OSDN Git Service

Merge tag 'write-page-prefaulting' of git://git.kernel.org/pub/scm/linux/kernel/git...
[tomoyo/tomoyo-test1.git] / mm / filemap.c
index 7672022..647d72b 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/gfp.h>
 #include <linux/mm.h>
 #include <linux/swap.h>
+#include <linux/swapops.h>
 #include <linux/mman.h>
 #include <linux/pagemap.h>
 #include <linux/file.h>
 #include <linux/cpuset.h>
 #include <linux/hugetlb.h>
 #include <linux/memcontrol.h>
-#include <linux/cleancache.h>
 #include <linux/shmem_fs.h>
 #include <linux/rmap.h>
 #include <linux/delayacct.h>
 #include <linux/psi.h>
 #include <linux/ramfs.h>
 #include <linux/page_idle.h>
+#include <linux/migrate.h>
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 #include "internal.h"
@@ -71,7 +72,7 @@
  * Lock ordering:
  *
  *  ->i_mmap_rwsem             (truncate_pagecache)
- *    ->private_lock           (__free_pte->__set_page_dirty_buffers)
+ *    ->private_lock           (__free_pte->block_dirty_folio)
  *      ->swap_lock            (exclusive_swap_page, others)
  *        ->i_pages lock
  *
  *    ->memcg->move_lock       (page_remove_rmap->lock_page_memcg)
  *    bdi.wb->list_lock                (zap_pte_range->set_page_dirty)
  *    ->inode->i_lock          (zap_pte_range->set_page_dirty)
- *    ->private_lock           (zap_pte_range->__set_page_dirty_buffers)
+ *    ->private_lock           (zap_pte_range->block_dirty_folio)
  *
  * ->i_mmap_rwsem
  *   ->tasklist_lock            (memory_failure, collect_procs_ao)
  */
 
 static void page_cache_delete(struct address_space *mapping,
-                                  struct page *page, void *shadow)
+                                  struct folio *folio, void *shadow)
 {
-       XA_STATE(xas, &mapping->i_pages, page->index);
-       unsigned int nr = 1;
+       XA_STATE(xas, &mapping->i_pages, folio->index);
+       long nr = 1;
 
        mapping_set_update(&xas, mapping);
 
        /* hugetlb pages are represented by a single entry in the xarray */
-       if (!PageHuge(page)) {
-               xas_set_order(&xas, page->index, compound_order(page));
-               nr = compound_nr(page);
+       if (!folio_test_hugetlb(folio)) {
+               xas_set_order(&xas, folio->index, folio_order(folio));
+               nr = folio_nr_pages(folio);
        }
 
-       VM_BUG_ON_PAGE(!PageLocked(page), page);
-       VM_BUG_ON_PAGE(PageTail(page), page);
-       VM_BUG_ON_PAGE(nr != 1 && shadow, page);
+       VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
 
        xas_store(&xas, shadow);
        xas_init_marks(&xas);
 
-       page->mapping = NULL;
+       folio->mapping = NULL;
        /* Leave page->index set: truncation lookup relies upon it */
        mapping->nrpages -= nr;
 }
 
-static void unaccount_page_cache_page(struct address_space *mapping,
-                                     struct page *page)
+static void filemap_unaccount_folio(struct address_space *mapping,
+               struct folio *folio)
 {
-       int nr;
-
-       /*
-        * if we're uptodate, flush out into the cleancache, otherwise
-        * invalidate any existing cleancache entries.  We can't leave
-        * stale data around in the cleancache once our page is gone
-        */
-       if (PageUptodate(page) && PageMappedToDisk(page))
-               cleancache_put_page(page);
-       else
-               cleancache_invalidate_page(mapping, page);
-
-       VM_BUG_ON_PAGE(PageTail(page), page);
-       VM_BUG_ON_PAGE(page_mapped(page), page);
-       if (!IS_ENABLED(CONFIG_DEBUG_VM) && unlikely(page_mapped(page))) {
-               int mapcount;
+       long nr;
 
+       VM_BUG_ON_FOLIO(folio_mapped(folio), folio);
+       if (!IS_ENABLED(CONFIG_DEBUG_VM) && unlikely(folio_mapped(folio))) {
                pr_alert("BUG: Bad page cache in process %s  pfn:%05lx\n",
-                        current->comm, page_to_pfn(page));
-               dump_page(page, "still mapped when deleted");
+                        current->comm, folio_pfn(folio));
+               dump_page(&folio->page, "still mapped when deleted");
                dump_stack();
                add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
 
-               mapcount = page_mapcount(page);
-               if (mapping_exiting(mapping) &&
-                   page_count(page) >= mapcount + 2) {
-                       /*
-                        * All vmas have already been torn down, so it's
-                        * a good bet that actually the page is unmapped,
-                        * and we'd prefer not to leak it: if we're wrong,
-                        * some other bad page check should catch it later.
-                        */
-                       page_mapcount_reset(page);
-                       page_ref_sub(page, mapcount);
+               if (mapping_exiting(mapping) && !folio_test_large(folio)) {
+                       int mapcount = page_mapcount(&folio->page);
+
+                       if (folio_ref_count(folio) >= mapcount + 2) {
+                               /*
+                                * All vmas have already been torn down, so it's
+                                * a good bet that actually the page is unmapped
+                                * and we'd rather not leak it: if we're wrong,
+                                * another bad page check should catch it later.
+                                */
+                               page_mapcount_reset(&folio->page);
+                               folio_ref_sub(folio, mapcount);
+                       }
                }
        }
 
-       /* hugetlb pages do not participate in page cache accounting. */
-       if (PageHuge(page))
+       /* hugetlb folios do not participate in page cache accounting. */
+       if (folio_test_hugetlb(folio))
                return;
 
-       nr = thp_nr_pages(page);
+       nr = folio_nr_pages(folio);
 
-       __mod_lruvec_page_state(page, NR_FILE_PAGES, -nr);
-       if (PageSwapBacked(page)) {
-               __mod_lruvec_page_state(page, NR_SHMEM, -nr);
-               if (PageTransHuge(page))
-                       __mod_lruvec_page_state(page, NR_SHMEM_THPS, -nr);
-       } else if (PageTransHuge(page)) {
-               __mod_lruvec_page_state(page, NR_FILE_THPS, -nr);
+       __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, -nr);
+       if (folio_test_swapbacked(folio)) {
+               __lruvec_stat_mod_folio(folio, NR_SHMEM, -nr);
+               if (folio_test_pmd_mappable(folio))
+                       __lruvec_stat_mod_folio(folio, NR_SHMEM_THPS, -nr);
+       } else if (folio_test_pmd_mappable(folio)) {
+               __lruvec_stat_mod_folio(folio, NR_FILE_THPS, -nr);
                filemap_nr_thps_dec(mapping);
        }
 
        /*
-        * At this point page must be either written or cleaned by
-        * truncate.  Dirty page here signals a bug and loss of
-        * unwritten data.
+        * At this point folio must be either written or cleaned by
+        * truncate.  Dirty folio here signals a bug and loss of
+        * unwritten data - on ordinary filesystems.
+        *
+        * But it's harmless on in-memory filesystems like tmpfs; and can
+        * occur when a driver which did get_user_pages() sets page dirty
+        * before putting it, while the inode is being finally evicted.
         *
-        * This fixes dirty accounting after removing the page entirely
-        * but leaves PageDirty set: it has no effect for truncated
-        * page and anyway will be cleared before returning page into
+        * Below fixes dirty accounting after removing the folio entirely
+        * but leaves the dirty flag set: it has no effect for truncated
+        * folio and anyway will be cleared before returning folio to
         * buddy allocator.
         */
-       if (WARN_ON_ONCE(PageDirty(page)))
-               account_page_cleaned(page, mapping, inode_to_wb(mapping->host));
+       if (WARN_ON_ONCE(folio_test_dirty(folio) &&
+                        mapping_can_writeback(mapping)))
+               folio_account_cleaned(folio, inode_to_wb(mapping->host));
 }
 
 /*
@@ -221,87 +214,81 @@ static void unaccount_page_cache_page(struct address_space *mapping,
  * sure the page is locked and that nobody else uses it - or that usage
  * is safe.  The caller must hold the i_pages lock.
  */
-void __delete_from_page_cache(struct page *page, void *shadow)
+void __filemap_remove_folio(struct folio *folio, void *shadow)
 {
-       struct address_space *mapping = page->mapping;
+       struct address_space *mapping = folio->mapping;
 
-       trace_mm_filemap_delete_from_page_cache(page);
-
-       unaccount_page_cache_page(mapping, page);
-       page_cache_delete(mapping, page, shadow);
+       trace_mm_filemap_delete_from_page_cache(folio);
+       filemap_unaccount_folio(mapping, folio);
+       page_cache_delete(mapping, folio, shadow);
 }
 
-static void page_cache_free_page(struct address_space *mapping,
-                               struct page *page)
+void filemap_free_folio(struct address_space *mapping, struct folio *folio)
 {
        void (*freepage)(struct page *);
+       int refs = 1;
 
        freepage = mapping->a_ops->freepage;
        if (freepage)
-               freepage(page);
+               freepage(&folio->page);
 
-       if (PageTransHuge(page) && !PageHuge(page)) {
-               page_ref_sub(page, thp_nr_pages(page));
-               VM_BUG_ON_PAGE(page_count(page) <= 0, page);
-       } else {
-               put_page(page);
-       }
+       if (folio_test_large(folio) && !folio_test_hugetlb(folio))
+               refs = folio_nr_pages(folio);
+       folio_put_refs(folio, refs);
 }
 
 /**
- * delete_from_page_cache - delete page from page cache
- * @page: the page which the kernel is trying to remove from page cache
+ * filemap_remove_folio - Remove folio from page cache.
+ * @folio: The folio.
  *
- * This must be called only on pages that have been verified to be in the page
- * cache and locked.  It will never put the page into the free list, the caller
- * has a reference on the page.
+ * This must be called only on folios that are locked and have been
+ * verified to be in the page cache.  It will never put the folio into
+ * the free list because the caller has a reference on the page.
  */
-void delete_from_page_cache(struct page *page)
+void filemap_remove_folio(struct folio *folio)
 {
-       struct address_space *mapping = page_mapping(page);
+       struct address_space *mapping = folio->mapping;
 
-       BUG_ON(!PageLocked(page));
+       BUG_ON(!folio_test_locked(folio));
        spin_lock(&mapping->host->i_lock);
        xa_lock_irq(&mapping->i_pages);
-       __delete_from_page_cache(page, NULL);
+       __filemap_remove_folio(folio, NULL);
        xa_unlock_irq(&mapping->i_pages);
        if (mapping_shrinkable(mapping))
                inode_add_lru(mapping->host);
        spin_unlock(&mapping->host->i_lock);
 
-       page_cache_free_page(mapping, page);
+       filemap_free_folio(mapping, folio);
 }
-EXPORT_SYMBOL(delete_from_page_cache);
 
 /*
- * page_cache_delete_batch - delete several pages from page cache
- * @mapping: the mapping to which pages belong
- * @pvec: pagevec with pages to delete
+ * page_cache_delete_batch - delete several folios from page cache
+ * @mapping: the mapping to which folios belong
+ * @fbatch: batch of folios to delete
  *
- * The function walks over mapping->i_pages and removes pages passed in @pvec
- * from the mapping. The function expects @pvec to be sorted by page index
- * and is optimised for it to be dense.
- * It tolerates holes in @pvec (mapping entries at those indices are not
- * modified). The function expects only THP head pages to be present in the
- * @pvec.
+ * The function walks over mapping->i_pages and removes folios passed in
+ * @fbatch from the mapping. The function expects @fbatch to be sorted
+ * by page index and is optimised for it to be dense.
+ * It tolerates holes in @fbatch (mapping entries at those indices are not
+ * modified).
  *
  * The function expects the i_pages lock to be held.
  */
 static void page_cache_delete_batch(struct address_space *mapping,
-                            struct pagevec *pvec)
+                            struct folio_batch *fbatch)
 {
-       XA_STATE(xas, &mapping->i_pages, pvec->pages[0]->index);
-       int total_pages = 0;
+       XA_STATE(xas, &mapping->i_pages, fbatch->folios[0]->index);
+       long total_pages = 0;
        int i = 0;
-       struct page *page;
+       struct folio *folio;
 
        mapping_set_update(&xas, mapping);
-       xas_for_each(&xas, page, ULONG_MAX) {
-               if (i >= pagevec_count(pvec))
+       xas_for_each(&xas, folio, ULONG_MAX) {
+               if (i >= folio_batch_count(fbatch))
                        break;
 
                /* A swap/dax/shadow entry got inserted? Skip it. */
-               if (xa_is_value(page))
+               if (xa_is_value(folio))
                        continue;
                /*
                 * A page got inserted in our range? Skip it. We have our
@@ -310,54 +297,48 @@ static void page_cache_delete_batch(struct address_space *mapping,
                 * means our page has been removed, which shouldn't be
                 * possible because we're holding the PageLock.
                 */
-               if (page != pvec->pages[i]) {
-                       VM_BUG_ON_PAGE(page->index > pvec->pages[i]->index,
-                                       page);
+               if (folio != fbatch->folios[i]) {
+                       VM_BUG_ON_FOLIO(folio->index >
+                                       fbatch->folios[i]->index, folio);
                        continue;
                }
 
-               WARN_ON_ONCE(!PageLocked(page));
+               WARN_ON_ONCE(!folio_test_locked(folio));
 
-               if (page->index == xas.xa_index)
-                       page->mapping = NULL;
-               /* Leave page->index set: truncation lookup relies on it */
+               folio->mapping = NULL;
+               /* Leave folio->index set: truncation lookup relies on it */
 
-               /*
-                * Move to the next page in the vector if this is a regular
-                * page or the index is of the last sub-page of this compound
-                * page.
-                */
-               if (page->index + compound_nr(page) - 1 == xas.xa_index)
-                       i++;
+               i++;
                xas_store(&xas, NULL);
-               total_pages++;
+               total_pages += folio_nr_pages(folio);
        }
        mapping->nrpages -= total_pages;
 }
 
 void delete_from_page_cache_batch(struct address_space *mapping,
-                                 struct pagevec *pvec)
+                                 struct folio_batch *fbatch)
 {
        int i;
 
-       if (!pagevec_count(pvec))
+       if (!folio_batch_count(fbatch))
                return;
 
        spin_lock(&mapping->host->i_lock);
        xa_lock_irq(&mapping->i_pages);
-       for (i = 0; i < pagevec_count(pvec); i++) {
-               trace_mm_filemap_delete_from_page_cache(pvec->pages[i]);
+       for (i = 0; i < folio_batch_count(fbatch); i++) {
+               struct folio *folio = fbatch->folios[i];
 
-               unaccount_page_cache_page(mapping, pvec->pages[i]);
+               trace_mm_filemap_delete_from_page_cache(folio);
+               filemap_unaccount_folio(mapping, folio);
        }
-       page_cache_delete_batch(mapping, pvec);
+       page_cache_delete_batch(mapping, fbatch);
        xa_unlock_irq(&mapping->i_pages);
        if (mapping_shrinkable(mapping))
                inode_add_lru(mapping->host);
        spin_unlock(&mapping->host->i_lock);
 
-       for (i = 0; i < pagevec_count(pvec); i++)
-               page_cache_free_page(mapping, pvec->pages[i]);
+       for (i = 0; i < folio_batch_count(fbatch); i++)
+               filemap_free_folio(mapping, fbatch->folios[i]);
 }
 
 int filemap_check_errors(struct address_space *mapping)
@@ -646,8 +627,8 @@ static bool mapping_needs_writeback(struct address_space *mapping)
        return mapping->nrpages;
 }
 
-static bool filemap_range_has_writeback(struct address_space *mapping,
-                                       loff_t start_byte, loff_t end_byte)
+bool filemap_range_has_writeback(struct address_space *mapping,
+                                loff_t start_byte, loff_t end_byte)
 {
        XA_STATE(xas, &mapping->i_pages, start_byte >> PAGE_SHIFT);
        pgoff_t max = end_byte >> PAGE_SHIFT;
@@ -667,34 +648,8 @@ static bool filemap_range_has_writeback(struct address_space *mapping,
        }
        rcu_read_unlock();
        return page != NULL;
-
-}
-
-/**
- * filemap_range_needs_writeback - check if range potentially needs writeback
- * @mapping:           address space within which to check
- * @start_byte:        offset in bytes where the range starts
- * @end_byte:          offset in bytes where the range ends (inclusive)
- *
- * Find at least one page in the range supplied, usually used to check if
- * direct writing in this range will trigger a writeback. Used by O_DIRECT
- * read/write with IOCB_NOWAIT, to see if the caller needs to do
- * filemap_write_and_wait_range() before proceeding.
- *
- * Return: %true if the caller should do filemap_write_and_wait_range() before
- * doing O_DIRECT to a page in this range, %false otherwise.
- */
-bool filemap_range_needs_writeback(struct address_space *mapping,
-                                  loff_t start_byte, loff_t end_byte)
-{
-       if (!mapping_needs_writeback(mapping))
-               return false;
-       if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) &&
-           !mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK))
-               return false;
-       return filemap_range_has_writeback(mapping, start_byte, end_byte);
 }
-EXPORT_SYMBOL_GPL(filemap_range_needs_writeback);
+EXPORT_SYMBOL_GPL(filemap_range_has_writeback);
 
 /**
  * filemap_write_and_wait_range - write out & wait on a file range
@@ -891,26 +846,27 @@ noinline int __filemap_add_folio(struct address_space *mapping,
 {
        XA_STATE(xas, &mapping->i_pages, index);
        int huge = folio_test_hugetlb(folio);
-       int error;
        bool charged = false;
+       long nr = 1;
 
        VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
        VM_BUG_ON_FOLIO(folio_test_swapbacked(folio), folio);
        mapping_set_update(&xas, mapping);
 
-       folio_get(folio);
-       folio->mapping = mapping;
-       folio->index = index;
-
        if (!huge) {
-               error = mem_cgroup_charge(folio, NULL, gfp);
+               int error = mem_cgroup_charge(folio, NULL, gfp);
                VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio);
                if (error)
-                       goto error;
+                       return error;
                charged = true;
+               xas_set_order(&xas, index, folio_order(folio));
+               nr = folio_nr_pages(folio);
        }
 
        gfp &= GFP_RECLAIM_MASK;
+       folio_ref_add(folio, nr);
+       folio->mapping = mapping;
+       folio->index = xas.xa_index;
 
        do {
                unsigned int order = xa_get_order(xas.xa, xas.xa_index);
@@ -934,6 +890,8 @@ noinline int __filemap_add_folio(struct address_space *mapping,
                        /* entry may have been split before we acquired lock */
                        order = xa_get_order(xas.xa, xas.xa_index);
                        if (order > folio_order(folio)) {
+                               /* How to handle large swap entries? */
+                               BUG_ON(shmem_mapping(mapping));
                                xas_split(&xas, old, order);
                                xas_reset(&xas);
                        }
@@ -943,29 +901,31 @@ noinline int __filemap_add_folio(struct address_space *mapping,
                if (xas_error(&xas))
                        goto unlock;
 
-               mapping->nrpages++;
+               mapping->nrpages += nr;
 
                /* hugetlb pages do not participate in page cache accounting */
-               if (!huge)
-                       __lruvec_stat_add_folio(folio, NR_FILE_PAGES);
+               if (!huge) {
+                       __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, nr);
+                       if (folio_test_pmd_mappable(folio))
+                               __lruvec_stat_mod_folio(folio,
+                                               NR_FILE_THPS, nr);
+               }
 unlock:
                xas_unlock_irq(&xas);
        } while (xas_nomem(&xas, gfp));
 
-       if (xas_error(&xas)) {
-               error = xas_error(&xas);
-               if (charged)
-                       mem_cgroup_uncharge(folio);
+       if (xas_error(&xas))
                goto error;
-       }
 
-       trace_mm_filemap_add_to_page_cache(&folio->page);
+       trace_mm_filemap_add_to_page_cache(folio);
        return 0;
 error:
+       if (charged)
+               mem_cgroup_uncharge(folio);
        folio->mapping = NULL;
        /* Leave page->index set: truncation relies upon it */
-       folio_put(folio);
-       return error;
+       folio_put_refs(folio, nr);
+       return xas_error(&xas);
 }
 ALLOW_ERROR_INJECTION(__filemap_add_folio, ERRNO);
 
@@ -1103,6 +1063,12 @@ void __init pagecache_init(void)
                init_waitqueue_head(&folio_wait_table[i]);
 
        page_writeback_init();
+
+       /*
+        * tmpfs uses the ZERO_PAGE for reading holes: it is up-to-date,
+        * and splice's page_cache_pipe_buf_confirm() needs to see that.
+        */
+       SetPageUptodate(ZERO_PAGE(0));
 }
 
 /*
@@ -1223,24 +1189,17 @@ static void folio_wake_bit(struct folio *folio, int bit_nr)
        }
 
        /*
-        * It is possible for other pages to have collided on the waitqueue
-        * hash, so in that case check for a page match. That prevents a long-
-        * term waiter
+        * It's possible to miss clearing waiters here, when we woke our page
+        * waiters, but the hashed waitqueue has waiters for other pages on it.
+        * That's okay, it's a rare case. The next waker will clear it.
         *
-        * It is still possible to miss a case here, when we woke page waiters
-        * and removed them from the waitqueue, but there are still other
-        * page waiters.
+        * Note that, depending on the page pool (buddy, hugetlb, ZONE_DEVICE,
+        * other), the flag may be cleared in the course of freeing the page;
+        * but that is not required for correctness.
         */
-       if (!waitqueue_active(q) || !key.page_match) {
+       if (!waitqueue_active(q) || !key.page_match)
                folio_clear_waiters(folio);
-               /*
-                * It's possible to miss clearing Waiters here, when we woke
-                * our page waiters, but the hashed waitqueue has waiters for
-                * other pages on it.
-                *
-                * That's okay, it's a rare case. The next waker will clear it.
-                */
-       }
+
        spin_unlock_irqrestore(&q->lock, flags);
 }
 
@@ -1259,10 +1218,10 @@ enum behavior {
                         * __folio_lock() waiting on then setting PG_locked.
                         */
        SHARED,         /* Hold ref to page and check the bit when woken, like
-                        * wait_on_page_writeback() waiting on PG_writeback.
+                        * folio_wait_writeback() waiting on PG_writeback.
                         */
        DROP,           /* Drop ref to page before wait, no check when woken,
-                        * like put_and_wait_on_page_locked() on PG_locked.
+                        * like folio_put_wait_locked() on PG_locked.
                         */
 };
 
@@ -1426,6 +1385,95 @@ repeat:
        return wait->flags & WQ_FLAG_WOKEN ? 0 : -EINTR;
 }
 
+#ifdef CONFIG_MIGRATION
+/**
+ * migration_entry_wait_on_locked - Wait for a migration entry to be removed
+ * @entry: migration swap entry.
+ * @ptep: mapped pte pointer. Will return with the ptep unmapped. Only required
+ *        for pte entries, pass NULL for pmd entries.
+ * @ptl: already locked ptl. This function will drop the lock.
+ *
+ * Wait for a migration entry referencing the given page to be removed. This is
+ * equivalent to put_and_wait_on_page_locked(page, TASK_UNINTERRUPTIBLE) except
+ * this can be called without taking a reference on the page. Instead this
+ * should be called while holding the ptl for the migration entry referencing
+ * the page.
+ *
+ * Returns after unmapping and unlocking the pte/ptl with pte_unmap_unlock().
+ *
+ * This follows the same logic as folio_wait_bit_common() so see the comments
+ * there.
+ */
+void migration_entry_wait_on_locked(swp_entry_t entry, pte_t *ptep,
+                               spinlock_t *ptl)
+{
+       struct wait_page_queue wait_page;
+       wait_queue_entry_t *wait = &wait_page.wait;
+       bool thrashing = false;
+       bool delayacct = false;
+       unsigned long pflags;
+       wait_queue_head_t *q;
+       struct folio *folio = page_folio(pfn_swap_entry_to_page(entry));
+
+       q = folio_waitqueue(folio);
+       if (!folio_test_uptodate(folio) && folio_test_workingset(folio)) {
+               if (!folio_test_swapbacked(folio)) {
+                       delayacct_thrashing_start();
+                       delayacct = true;
+               }
+               psi_memstall_enter(&pflags);
+               thrashing = true;
+       }
+
+       init_wait(wait);
+       wait->func = wake_page_function;
+       wait_page.folio = folio;
+       wait_page.bit_nr = PG_locked;
+       wait->flags = 0;
+
+       spin_lock_irq(&q->lock);
+       folio_set_waiters(folio);
+       if (!folio_trylock_flag(folio, PG_locked, wait))
+               __add_wait_queue_entry_tail(q, wait);
+       spin_unlock_irq(&q->lock);
+
+       /*
+        * If a migration entry exists for the page the migration path must hold
+        * a valid reference to the page, and it must take the ptl to remove the
+        * migration entry. So the page is valid until the ptl is dropped.
+        */
+       if (ptep)
+               pte_unmap_unlock(ptep, ptl);
+       else
+               spin_unlock(ptl);
+
+       for (;;) {
+               unsigned int flags;
+
+               set_current_state(TASK_UNINTERRUPTIBLE);
+
+               /* Loop until we've been woken or interrupted */
+               flags = smp_load_acquire(&wait->flags);
+               if (!(flags & WQ_FLAG_WOKEN)) {
+                       if (signal_pending_state(TASK_UNINTERRUPTIBLE, current))
+                               break;
+
+                       io_schedule();
+                       continue;
+               }
+               break;
+       }
+
+       finish_wait(q, wait);
+
+       if (thrashing) {
+               if (delayacct)
+                       delayacct_thrashing_end();
+               psi_memstall_leave(&pflags);
+       }
+}
+#endif
+
 void folio_wait_bit(struct folio *folio, int bit_nr)
 {
        folio_wait_bit_common(folio, bit_nr, TASK_UNINTERRUPTIBLE, SHARED);
@@ -1439,22 +1487,21 @@ int folio_wait_bit_killable(struct folio *folio, int bit_nr)
 EXPORT_SYMBOL(folio_wait_bit_killable);
 
 /**
- * put_and_wait_on_page_locked - Drop a reference and wait for it to be unlocked
- * @page: The page to wait for.
+ * folio_put_wait_locked - Drop a reference and wait for it to be unlocked
+ * @folio: The folio to wait for.
  * @state: The sleep state (TASK_KILLABLE, TASK_UNINTERRUPTIBLE, etc).
  *
- * The caller should hold a reference on @page.  They expect the page to
+ * The caller should hold a reference on @folio.  They expect the page to
  * become unlocked relatively soon, but do not wish to hold up migration
- * (for example) by holding the reference while waiting for the page to
+ * (for example) by holding the reference while waiting for the folio to
  * come unlocked.  After this function returns, the caller should not
- * dereference @page.
+ * dereference @folio.
  *
- * Return: 0 if the page was unlocked or -EINTR if interrupted by a signal.
+ * Return: 0 if the folio was unlocked or -EINTR if interrupted by a signal.
  */
-int put_and_wait_on_page_locked(struct page *page, int state)
+int folio_put_wait_locked(struct folio *folio, int state)
 {
-       return folio_wait_bit_common(page_folio(page), PG_locked, state,
-                       DROP);
+       return folio_wait_bit_common(folio, PG_locked, state, DROP);
 }
 
 /**
@@ -1979,37 +2026,36 @@ no_page:
 }
 EXPORT_SYMBOL(__filemap_get_folio);
 
-static inline struct page *find_get_entry(struct xa_state *xas, pgoff_t max,
+static inline struct folio *find_get_entry(struct xa_state *xas, pgoff_t max,
                xa_mark_t mark)
 {
-       struct page *page;
+       struct folio *folio;
 
 retry:
        if (mark == XA_PRESENT)
-               page = xas_find(xas, max);
+               folio = xas_find(xas, max);
        else
-               page = xas_find_marked(xas, max, mark);
+               folio = xas_find_marked(xas, max, mark);
 
-       if (xas_retry(xas, page))
+       if (xas_retry(xas, folio))
                goto retry;
        /*
         * A shadow entry of a recently evicted page, a swap
         * entry from shmem/tmpfs or a DAX entry.  Return it
         * without attempting to raise page count.
         */
-       if (!page || xa_is_value(page))
-               return page;
+       if (!folio || xa_is_value(folio))
+               return folio;
 
-       if (!page_cache_get_speculative(page))
+       if (!folio_try_get_rcu(folio))
                goto reset;
 
-       /* Has the page moved or been split? */
-       if (unlikely(page != xas_reload(xas))) {
-               put_page(page);
+       if (unlikely(folio != xas_reload(xas))) {
+               folio_put(folio);
                goto reset;
        }
 
-       return page;
+       return folio;
 reset:
        xas_reset(xas);
        goto retry;
@@ -2020,56 +2066,36 @@ reset:
  * @mapping:   The address_space to search
  * @start:     The starting page cache index
  * @end:       The final page index (inclusive).
- * @pvec:      Where the resulting entries are placed.
+ * @fbatch:    Where the resulting entries are placed.
  * @indices:   The cache indices corresponding to the entries in @entries
  *
  * find_get_entries() will search for and return a batch of entries in
- * the mapping.  The entries are placed in @pvec.  find_get_entries()
- * takes a reference on any actual pages it returns.
+ * the mapping.  The entries are placed in @fbatch.  find_get_entries()
+ * takes a reference on any actual folios it returns.
  *
- * The search returns a group of mapping-contiguous page cache entries
- * with ascending indexes.  There may be holes in the indices due to
- * not-present pages.
+ * The entries have ascending indexes.  The indices may not be consecutive
+ * due to not-present entries or large folios.
  *
- * Any shadow entries of evicted pages, or swap entries from
+ * Any shadow entries of evicted folios, or swap entries from
  * shmem/tmpfs, are included in the returned array.
  *
- * If it finds a Transparent Huge Page, head or tail, find_get_entries()
- * stops at that page: the caller is likely to have a better way to handle
- * the compound page as a whole, and then skip its extent, than repeatedly
- * calling find_get_entries() to return all its tails.
- *
- * Return: the number of pages and shadow entries which were found.
+ * Return: The number of entries which were found.
  */
 unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
-               pgoff_t end, struct pagevec *pvec, pgoff_t *indices)
+               pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices)
 {
        XA_STATE(xas, &mapping->i_pages, start);
-       struct page *page;
-       unsigned int ret = 0;
-       unsigned nr_entries = PAGEVEC_SIZE;
+       struct folio *folio;
 
        rcu_read_lock();
-       while ((page = find_get_entry(&xas, end, XA_PRESENT))) {
-               /*
-                * Terminate early on finding a THP, to allow the caller to
-                * handle it all at once; but continue if this is hugetlbfs.
-                */
-               if (!xa_is_value(page) && PageTransHuge(page) &&
-                               !PageHuge(page)) {
-                       page = find_subpage(page, xas.xa_index);
-                       nr_entries = ret + 1;
-               }
-
-               indices[ret] = xas.xa_index;
-               pvec->pages[ret] = page;
-               if (++ret == nr_entries)
+       while ((folio = find_get_entry(&xas, end, XA_PRESENT)) != NULL) {
+               indices[fbatch->nr] = xas.xa_index;
+               if (!folio_batch_add(fbatch, folio))
                        break;
        }
        rcu_read_unlock();
 
-       pvec->nr = ret;
-       return ret;
+       return folio_batch_count(fbatch);
 }
 
 /**
@@ -2077,63 +2103,64 @@ unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
  * @mapping:   The address_space to search.
  * @start:     The starting page cache index.
  * @end:       The final page index (inclusive).
- * @pvec:      Where the resulting entries are placed.
- * @indices:   The cache indices of the entries in @pvec.
+ * @fbatch:    Where the resulting entries are placed.
+ * @indices:   The cache indices of the entries in @fbatch.
  *
  * find_lock_entries() will return a batch of entries from @mapping.
- * Swap, shadow and DAX entries are included.  Pages are returned
- * locked and with an incremented refcount.  Pages which are locked by
- * somebody else or under writeback are skipped.  Only the head page of
- * a THP is returned.  Pages which are partially outside the range are
- * not returned.
+ * Swap, shadow and DAX entries are included.  Folios are returned
+ * locked and with an incremented refcount.  Folios which are locked
+ * by somebody else or under writeback are skipped.  Folios which are
+ * partially outside the range are not returned.
  *
  * The entries have ascending indexes.  The indices may not be consecutive
- * due to not-present entries, THP pages, pages which could not be locked
- * or pages under writeback.
+ * due to not-present entries, large folios, folios which could not be
+ * locked or folios under writeback.
  *
  * Return: The number of entries which were found.
  */
 unsigned find_lock_entries(struct address_space *mapping, pgoff_t start,
-               pgoff_t end, struct pagevec *pvec, pgoff_t *indices)
+               pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices)
 {
        XA_STATE(xas, &mapping->i_pages, start);
-       struct page *page;
+       struct folio *folio;
 
        rcu_read_lock();
-       while ((page = find_get_entry(&xas, end, XA_PRESENT))) {
-               if (!xa_is_value(page)) {
-                       if (page->index < start)
+       while ((folio = find_get_entry(&xas, end, XA_PRESENT))) {
+               if (!xa_is_value(folio)) {
+                       if (folio->index < start)
                                goto put;
-                       if (page->index + thp_nr_pages(page) - 1 > end)
+                       if (folio->index + folio_nr_pages(folio) - 1 > end)
                                goto put;
-                       if (!trylock_page(page))
+                       if (!folio_trylock(folio))
                                goto put;
-                       if (page->mapping != mapping || PageWriteback(page))
+                       if (folio->mapping != mapping ||
+                           folio_test_writeback(folio))
                                goto unlock;
-                       VM_BUG_ON_PAGE(!thp_contains(page, xas.xa_index),
-                                       page);
+                       VM_BUG_ON_FOLIO(!folio_contains(folio, xas.xa_index),
+                                       folio);
                }
-               indices[pvec->nr] = xas.xa_index;
-               if (!pagevec_add(pvec, page))
+               indices[fbatch->nr] = xas.xa_index;
+               if (!folio_batch_add(fbatch, folio))
                        break;
-               goto next;
+               continue;
 unlock:
-               unlock_page(page);
+               folio_unlock(folio);
 put:
-               put_page(page);
-next:
-               if (!xa_is_value(page) && PageTransHuge(page)) {
-                       unsigned int nr_pages = thp_nr_pages(page);
-
-                       /* Final THP may cross MAX_LFS_FILESIZE on 32-bit */
-                       xas_set(&xas, page->index + nr_pages);
-                       if (xas.xa_index < nr_pages)
-                               break;
-               }
+               folio_put(folio);
        }
        rcu_read_unlock();
 
-       return pagevec_count(pvec);
+       return folio_batch_count(fbatch);
+}
+
+static inline
+bool folio_more_pages(struct folio *folio, pgoff_t index, pgoff_t max)
+{
+       if (!folio_test_large(folio) || folio_test_hugetlb(folio))
+               return false;
+       if (index >= max)
+               return false;
+       return index < folio->index + folio_nr_pages(folio) - 1;
 }
 
 /**
@@ -2162,23 +2189,29 @@ unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start,
                              struct page **pages)
 {
        XA_STATE(xas, &mapping->i_pages, *start);
-       struct page *page;
+       struct folio *folio;
        unsigned ret = 0;
 
        if (unlikely(!nr_pages))
                return 0;
 
        rcu_read_lock();
-       while ((page = find_get_entry(&xas, end, XA_PRESENT))) {
+       while ((folio = find_get_entry(&xas, end, XA_PRESENT))) {
                /* Skip over shadow, swap and DAX entries */
-               if (xa_is_value(page))
+               if (xa_is_value(folio))
                        continue;
 
-               pages[ret] = find_subpage(page, xas.xa_index);
+again:
+               pages[ret] = folio_file_page(folio, xas.xa_index);
                if (++ret == nr_pages) {
                        *start = xas.xa_index + 1;
                        goto out;
                }
+               if (folio_more_pages(folio, xas.xa_index, end)) {
+                       xas.xa_index++;
+                       folio_ref_inc(folio);
+                       goto again;
+               }
        }
 
        /*
@@ -2204,8 +2237,9 @@ out:
  * @nr_pages:  The maximum number of pages
  * @pages:     Where the resulting pages are placed
  *
- * find_get_pages_contig() works exactly like find_get_pages(), except
- * that the returned number of pages are guaranteed to be contiguous.
+ * find_get_pages_contig() works exactly like find_get_pages_range(),
+ * except that the returned number of pages are guaranteed to be
+ * contiguous.
  *
  * Return: the number of pages which were found.
  */
@@ -2213,36 +2247,41 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
                               unsigned int nr_pages, struct page **pages)
 {
        XA_STATE(xas, &mapping->i_pages, index);
-       struct page *page;
+       struct folio *folio;
        unsigned int ret = 0;
 
        if (unlikely(!nr_pages))
                return 0;
 
        rcu_read_lock();
-       for (page = xas_load(&xas); page; page = xas_next(&xas)) {
-               if (xas_retry(&xas, page))
+       for (folio = xas_load(&xas); folio; folio = xas_next(&xas)) {
+               if (xas_retry(&xas, folio))
                        continue;
                /*
                 * If the entry has been swapped out, we can stop looking.
                 * No current caller is looking for DAX entries.
                 */
-               if (xa_is_value(page))
+               if (xa_is_value(folio))
                        break;
 
-               if (!page_cache_get_speculative(page))
+               if (!folio_try_get_rcu(folio))
                        goto retry;
 
-               /* Has the page moved or been split? */
-               if (unlikely(page != xas_reload(&xas)))
+               if (unlikely(folio != xas_reload(&xas)))
                        goto put_page;
 
-               pages[ret] = find_subpage(page, xas.xa_index);
+again:
+               pages[ret] = folio_file_page(folio, xas.xa_index);
                if (++ret == nr_pages)
                        break;
+               if (folio_more_pages(folio, xas.xa_index, ULONG_MAX)) {
+                       xas.xa_index++;
+                       folio_ref_inc(folio);
+                       goto again;
+               }
                continue;
 put_page:
-               put_page(page);
+               folio_put(folio);
 retry:
                xas_reset(&xas);
        }
@@ -2260,9 +2299,9 @@ EXPORT_SYMBOL(find_get_pages_contig);
  * @nr_pages:  the maximum number of pages
  * @pages:     where the resulting pages are placed
  *
- * Like find_get_pages(), except we only return head pages which are tagged
- * with @tag.  @index is updated to the index immediately after the last
- * page we return, ready for the next iteration.
+ * Like find_get_pages_range(), except we only return head pages which are
+ * tagged with @tag.  @index is updated to the index immediately after the
+ * last page we return, ready for the next iteration.
  *
  * Return: the number of pages which were found.
  */
@@ -2271,25 +2310,25 @@ unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
                        struct page **pages)
 {
        XA_STATE(xas, &mapping->i_pages, *index);
-       struct page *page;
+       struct folio *folio;
        unsigned ret = 0;
 
        if (unlikely(!nr_pages))
                return 0;
 
        rcu_read_lock();
-       while ((page = find_get_entry(&xas, end, tag))) {
+       while ((folio = find_get_entry(&xas, end, tag))) {
                /*
                 * Shadow entries should never be tagged, but this iteration
                 * is lockless so there is a window for page reclaim to evict
                 * a page we saw tagged.  Skip over it.
                 */
-               if (xa_is_value(page))
+               if (xa_is_value(folio))
                        continue;
 
-               pages[ret] = page;
+               pages[ret] = &folio->page;
                if (++ret == nr_pages) {
-                       *index = page->index + thp_nr_pages(page);
+                       *index = folio->index + folio_nr_pages(folio);
                        goto out;
                }
        }
@@ -2332,52 +2371,50 @@ static void shrink_readahead_size_eio(struct file_ra_state *ra)
 }
 
 /*
- * filemap_get_read_batch - Get a batch of pages for read
+ * filemap_get_read_batch - Get a batch of folios for read
  *
- * Get a batch of pages which represent a contiguous range of bytes
- * in the file.  No tail pages will be returned.  If @index is in the
- * middle of a THP, the entire THP will be returned.  The last page in
- * the batch may have Readahead set or be not Uptodate so that the
- * caller can take the appropriate action.
+ * Get a batch of folios which represent a contiguous range of bytes in
+ * the file.  No exceptional entries will be returned.  If @index is in
+ * the middle of a folio, the entire folio will be returned.  The last
+ * folio in the batch may have the readahead flag set or the uptodate flag
+ * clear so that the caller can take the appropriate action.
  */
 static void filemap_get_read_batch(struct address_space *mapping,
-               pgoff_t index, pgoff_t max, struct pagevec *pvec)
+               pgoff_t index, pgoff_t max, struct folio_batch *fbatch)
 {
        XA_STATE(xas, &mapping->i_pages, index);
-       struct page *head;
+       struct folio *folio;
 
        rcu_read_lock();
-       for (head = xas_load(&xas); head; head = xas_next(&xas)) {
-               if (xas_retry(&xas, head))
+       for (folio = xas_load(&xas); folio; folio = xas_next(&xas)) {
+               if (xas_retry(&xas, folio))
                        continue;
-               if (xas.xa_index > max || xa_is_value(head))
+               if (xas.xa_index > max || xa_is_value(folio))
                        break;
-               if (!page_cache_get_speculative(head))
+               if (!folio_try_get_rcu(folio))
                        goto retry;
 
-               /* Has the page moved or been split? */
-               if (unlikely(head != xas_reload(&xas)))
-                       goto put_page;
+               if (unlikely(folio != xas_reload(&xas)))
+                       goto put_folio;
 
-               if (!pagevec_add(pvec, head))
+               if (!folio_batch_add(fbatch, folio))
                        break;
-               if (!PageUptodate(head))
+               if (!folio_test_uptodate(folio))
                        break;
-               if (PageReadahead(head))
+               if (folio_test_readahead(folio))
                        break;
-               xas.xa_index = head->index + thp_nr_pages(head) - 1;
-               xas.xa_offset = (xas.xa_index >> xas.xa_shift) & XA_CHUNK_MASK;
+               xas_advance(&xas, folio->index + folio_nr_pages(folio) - 1);
                continue;
-put_page:
-               put_page(head);
+put_folio:
+               folio_put(folio);
 retry:
                xas_reset(&xas);
        }
        rcu_read_unlock();
 }
 
-static int filemap_read_page(struct file *file, struct address_space *mapping,
-               struct page *page)
+static int filemap_read_folio(struct file *file, struct address_space *mapping,
+               struct folio *folio)
 {
        int error;
 
@@ -2386,52 +2423,51 @@ static int filemap_read_page(struct file *file, struct address_space *mapping,
         * eg. multipath errors.  PG_error will be set again if readpage
         * fails.
         */
-       ClearPageError(page);
+       folio_clear_error(folio);
        /* Start the actual read. The read will unlock the page. */
-       error = mapping->a_ops->readpage(file, page);
+       error = mapping->a_ops->readpage(file, &folio->page);
        if (error)
                return error;
 
-       error = wait_on_page_locked_killable(page);
+       error = folio_wait_locked_killable(folio);
        if (error)
                return error;
-       if (PageUptodate(page))
+       if (folio_test_uptodate(folio))
                return 0;
        shrink_readahead_size_eio(&file->f_ra);
        return -EIO;
 }
 
 static bool filemap_range_uptodate(struct address_space *mapping,
-               loff_t pos, struct iov_iter *iter, struct page *page)
+               loff_t pos, struct iov_iter *iter, struct folio *folio)
 {
        int count;
 
-       if (PageUptodate(page))
+       if (folio_test_uptodate(folio))
                return true;
        /* pipes can't handle partially uptodate pages */
        if (iov_iter_is_pipe(iter))
                return false;
        if (!mapping->a_ops->is_partially_uptodate)
                return false;
-       if (mapping->host->i_blkbits >= (PAGE_SHIFT + thp_order(page)))
+       if (mapping->host->i_blkbits >= folio_shift(folio))
                return false;
 
        count = iter->count;
-       if (page_offset(page) > pos) {
-               count -= page_offset(page) - pos;
+       if (folio_pos(folio) > pos) {
+               count -= folio_pos(folio) - pos;
                pos = 0;
        } else {
-               pos -= page_offset(page);
+               pos -= folio_pos(folio);
        }
 
-       return mapping->a_ops->is_partially_uptodate(page, pos, count);
+       return mapping->a_ops->is_partially_uptodate(folio, pos, count);
 }
 
 static int filemap_update_page(struct kiocb *iocb,
                struct address_space *mapping, struct iov_iter *iter,
-               struct page *page)
+               struct folio *folio)
 {
-       struct folio *folio = page_folio(page);
        int error;
 
        if (iocb->ki_flags & IOCB_NOWAIT) {
@@ -2447,7 +2483,11 @@ static int filemap_update_page(struct kiocb *iocb,
                        goto unlock_mapping;
                if (!(iocb->ki_flags & IOCB_WAITQ)) {
                        filemap_invalidate_unlock_shared(mapping);
-                       put_and_wait_on_page_locked(&folio->page, TASK_KILLABLE);
+                       /*
+                        * This is where we usually end up waiting for a
+                        * previously submitted readahead to finish.
+                        */
+                       folio_put_wait_locked(folio, TASK_KILLABLE);
                        return AOP_TRUNCATED_PAGE;
                }
                error = __folio_lock_async(folio, iocb->ki_waitq);
@@ -2460,14 +2500,14 @@ static int filemap_update_page(struct kiocb *iocb,
                goto unlock;
 
        error = 0;
-       if (filemap_range_uptodate(mapping, iocb->ki_pos, iter, &folio->page))
+       if (filemap_range_uptodate(mapping, iocb->ki_pos, iter, folio))
                goto unlock;
 
        error = -EAGAIN;
        if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT | IOCB_WAITQ))
                goto unlock;
 
-       error = filemap_read_page(iocb->ki_filp, mapping, &folio->page);
+       error = filemap_read_folio(iocb->ki_filp, mapping, folio);
        goto unlock_mapping;
 unlock:
        folio_unlock(folio);
@@ -2478,70 +2518,72 @@ unlock_mapping:
        return error;
 }
 
-static int filemap_create_page(struct file *file,
+static int filemap_create_folio(struct file *file,
                struct address_space *mapping, pgoff_t index,
-               struct pagevec *pvec)
+               struct folio_batch *fbatch)
 {
-       struct page *page;
+       struct folio *folio;
        int error;
 
-       page = page_cache_alloc(mapping);
-       if (!page)
+       folio = filemap_alloc_folio(mapping_gfp_mask(mapping), 0);
+       if (!folio)
                return -ENOMEM;
 
        /*
-        * Protect against truncate / hole punch. Grabbing invalidate_lock here
-        * assures we cannot instantiate and bring uptodate new pagecache pages
-        * after evicting page cache during truncate and before actually
-        * freeing blocks.  Note that we could release invalidate_lock after
-        * inserting the page into page cache as the locked page would then be
-        * enough to synchronize with hole punching. But there are code paths
-        * such as filemap_update_page() filling in partially uptodate pages or
-        * ->readpages() that need to hold invalidate_lock while mapping blocks
-        * for IO so let's hold the lock here as well to keep locking rules
-        * simple.
+        * Protect against truncate / hole punch. Grabbing invalidate_lock
+        * here assures we cannot instantiate and bring uptodate new
+        * pagecache folios after evicting page cache during truncate
+        * and before actually freeing blocks.  Note that we could
+        * release invalidate_lock after inserting the folio into
+        * the page cache as the locked folio would then be enough to
+        * synchronize with hole punching. But there are code paths
+        * such as filemap_update_page() filling in partially uptodate
+        * pages or ->readpages() that need to hold invalidate_lock
+        * while mapping blocks for IO so let's hold the lock here as
+        * well to keep locking rules simple.
         */
        filemap_invalidate_lock_shared(mapping);
-       error = add_to_page_cache_lru(page, mapping, index,
+       error = filemap_add_folio(mapping, folio, index,
                        mapping_gfp_constraint(mapping, GFP_KERNEL));
        if (error == -EEXIST)
                error = AOP_TRUNCATED_PAGE;
        if (error)
                goto error;
 
-       error = filemap_read_page(file, mapping, page);
+       error = filemap_read_folio(file, mapping, folio);
        if (error)
                goto error;
 
        filemap_invalidate_unlock_shared(mapping);
-       pagevec_add(pvec, page);
+       folio_batch_add(fbatch, folio);
        return 0;
 error:
        filemap_invalidate_unlock_shared(mapping);
-       put_page(page);
+       folio_put(folio);
        return error;
 }
 
 static int filemap_readahead(struct kiocb *iocb, struct file *file,
-               struct address_space *mapping, struct page *page,
+               struct address_space *mapping, struct folio *folio,
                pgoff_t last_index)
 {
+       DEFINE_READAHEAD(ractl, file, &file->f_ra, mapping, folio->index);
+
        if (iocb->ki_flags & IOCB_NOIO)
                return -EAGAIN;
-       page_cache_async_readahead(mapping, &file->f_ra, file, page,
-                       page->index, last_index - page->index);
+       page_cache_async_ra(&ractl, folio, last_index - folio->index);
        return 0;
 }
 
 static int filemap_get_pages(struct kiocb *iocb, struct iov_iter *iter,
-               struct pagevec *pvec)
+               struct folio_batch *fbatch)
 {
        struct file *filp = iocb->ki_filp;
        struct address_space *mapping = filp->f_mapping;
        struct file_ra_state *ra = &filp->f_ra;
        pgoff_t index = iocb->ki_pos >> PAGE_SHIFT;
        pgoff_t last_index;
-       struct page *page;
+       struct folio *folio;
        int err = 0;
 
        last_index = DIV_ROUND_UP(iocb->ki_pos + iter->count, PAGE_SIZE);
@@ -2549,34 +2591,35 @@ retry:
        if (fatal_signal_pending(current))
                return -EINTR;
 
-       filemap_get_read_batch(mapping, index, last_index, pvec);
-       if (!pagevec_count(pvec)) {
+       filemap_get_read_batch(mapping, index, last_index, fbatch);
+       if (!folio_batch_count(fbatch)) {
                if (iocb->ki_flags & IOCB_NOIO)
                        return -EAGAIN;
                page_cache_sync_readahead(mapping, ra, filp, index,
                                last_index - index);
-               filemap_get_read_batch(mapping, index, last_index, pvec);
+               filemap_get_read_batch(mapping, index, last_index, fbatch);
        }
-       if (!pagevec_count(pvec)) {
+       if (!folio_batch_count(fbatch)) {
                if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_WAITQ))
                        return -EAGAIN;
-               err = filemap_create_page(filp, mapping,
-                               iocb->ki_pos >> PAGE_SHIFT, pvec);
+               err = filemap_create_folio(filp, mapping,
+                               iocb->ki_pos >> PAGE_SHIFT, fbatch);
                if (err == AOP_TRUNCATED_PAGE)
                        goto retry;
                return err;
        }
 
-       page = pvec->pages[pagevec_count(pvec) - 1];
-       if (PageReadahead(page)) {
-               err = filemap_readahead(iocb, filp, mapping, page, last_index);
+       folio = fbatch->folios[folio_batch_count(fbatch) - 1];
+       if (folio_test_readahead(folio)) {
+               err = filemap_readahead(iocb, filp, mapping, folio, last_index);
                if (err)
                        goto err;
        }
-       if (!PageUptodate(page)) {
-               if ((iocb->ki_flags & IOCB_WAITQ) && pagevec_count(pvec) > 1)
+       if (!folio_test_uptodate(folio)) {
+               if ((iocb->ki_flags & IOCB_WAITQ) &&
+                   folio_batch_count(fbatch) > 1)
                        iocb->ki_flags |= IOCB_NOWAIT;
-               err = filemap_update_page(iocb, mapping, iter, page);
+               err = filemap_update_page(iocb, mapping, iter, folio);
                if (err)
                        goto err;
        }
@@ -2584,8 +2627,8 @@ retry:
        return 0;
 err:
        if (err < 0)
-               put_page(page);
-       if (likely(--pvec->nr))
+               folio_put(folio);
+       if (likely(--fbatch->nr))
                return 0;
        if (err == AOP_TRUNCATED_PAGE)
                goto retry;
@@ -2612,7 +2655,7 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter,
        struct file_ra_state *ra = &filp->f_ra;
        struct address_space *mapping = filp->f_mapping;
        struct inode *inode = mapping->host;
-       struct pagevec pvec;
+       struct folio_batch fbatch;
        int i, error = 0;
        bool writably_mapped;
        loff_t isize, end_offset;
@@ -2623,7 +2666,7 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter,
                return 0;
 
        iov_iter_truncate(iter, inode->i_sb->s_maxbytes);
-       pagevec_init(&pvec);
+       folio_batch_init(&fbatch);
 
        do {
                cond_resched();
@@ -2639,7 +2682,7 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter,
                if (unlikely(iocb->ki_pos >= i_size_read(inode)))
                        break;
 
-               error = filemap_get_pages(iocb, iter, &pvec);
+               error = filemap_get_pages(iocb, iter, &fbatch);
                if (error < 0)
                        break;
 
@@ -2653,7 +2696,7 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter,
                 */
                isize = i_size_read(inode);
                if (unlikely(iocb->ki_pos >= isize))
-                       goto put_pages;
+                       goto put_folios;
                end_offset = min_t(loff_t, isize, iocb->ki_pos + iter->count);
 
                /*
@@ -2668,33 +2711,29 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter,
                 */
                if (iocb->ki_pos >> PAGE_SHIFT !=
                    ra->prev_pos >> PAGE_SHIFT)
-                       mark_page_accessed(pvec.pages[0]);
+                       folio_mark_accessed(fbatch.folios[0]);
 
-               for (i = 0; i < pagevec_count(&pvec); i++) {
-                       struct page *page = pvec.pages[i];
-                       size_t page_size = thp_size(page);
-                       size_t offset = iocb->ki_pos & (page_size - 1);
+               for (i = 0; i < folio_batch_count(&fbatch); i++) {
+                       struct folio *folio = fbatch.folios[i];
+                       size_t fsize = folio_size(folio);
+                       size_t offset = iocb->ki_pos & (fsize - 1);
                        size_t bytes = min_t(loff_t, end_offset - iocb->ki_pos,
-                                            page_size - offset);
+                                            fsize - offset);
                        size_t copied;
 
-                       if (end_offset < page_offset(page))
+                       if (end_offset < folio_pos(folio))
                                break;
                        if (i > 0)
-                               mark_page_accessed(page);
+                               folio_mark_accessed(folio);
                        /*
-                        * If users can be writing to this page using arbitrary
-                        * virtual addresses, take care about potential aliasing
-                        * before reading the page on the kernel side.
+                        * If users can be writing to this folio using arbitrary
+                        * virtual addresses, take care of potential aliasing
+                        * before reading the folio on the kernel side.
                         */
-                       if (writably_mapped) {
-                               int j;
-
-                               for (j = 0; j < thp_nr_pages(page); j++)
-                                       flush_dcache_page(page + j);
-                       }
+                       if (writably_mapped)
+                               flush_dcache_folio(folio);
 
-                       copied = copy_page_to_iter(page, offset, bytes, iter);
+                       copied = copy_folio_to_iter(folio, offset, bytes, iter);
 
                        already_read += copied;
                        iocb->ki_pos += copied;
@@ -2705,10 +2744,10 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter,
                                break;
                        }
                }
-put_pages:
-               for (i = 0; i < pagevec_count(&pvec); i++)
-                       put_page(pvec.pages[i]);
-               pagevec_reinit(&pvec);
+put_folios:
+               for (i = 0; i < folio_batch_count(&fbatch); i++)
+                       folio_put(fbatch.folios[i]);
+               folio_batch_init(&fbatch);
        } while (iov_iter_count(iter) && iocb->ki_pos < isize && !error);
 
        file_accessed(filp);
@@ -2793,44 +2832,44 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 }
 EXPORT_SYMBOL(generic_file_read_iter);
 
-static inline loff_t page_seek_hole_data(struct xa_state *xas,
-               struct address_space *mapping, struct page *page,
+static inline loff_t folio_seek_hole_data(struct xa_state *xas,
+               struct address_space *mapping, struct folio *folio,
                loff_t start, loff_t end, bool seek_data)
 {
        const struct address_space_operations *ops = mapping->a_ops;
        size_t offset, bsz = i_blocksize(mapping->host);
 
-       if (xa_is_value(page) || PageUptodate(page))
+       if (xa_is_value(folio) || folio_test_uptodate(folio))
                return seek_data ? start : end;
        if (!ops->is_partially_uptodate)
                return seek_data ? end : start;
 
        xas_pause(xas);
        rcu_read_unlock();
-       lock_page(page);
-       if (unlikely(page->mapping != mapping))
+       folio_lock(folio);
+       if (unlikely(folio->mapping != mapping))
                goto unlock;
 
-       offset = offset_in_thp(page, start) & ~(bsz - 1);
+       offset = offset_in_folio(folio, start) & ~(bsz - 1);
 
        do {
-               if (ops->is_partially_uptodate(page, offset, bsz) == seek_data)
+               if (ops->is_partially_uptodate(folio, offset, bsz) ==
+                                                       seek_data)
                        break;
                start = (start + bsz) & ~(bsz - 1);
                offset += bsz;
-       } while (offset < thp_size(page));
+       } while (offset < folio_size(folio));
 unlock:
-       unlock_page(page);
+       folio_unlock(folio);
        rcu_read_lock();
        return start;
 }
 
-static inline
-unsigned int seek_page_size(struct xa_state *xas, struct page *page)
+static inline size_t seek_folio_size(struct xa_state *xas, struct folio *folio)
 {
-       if (xa_is_value(page))
+       if (xa_is_value(folio))
                return PAGE_SIZE << xa_get_order(xas->xa, xas->xa_index);
-       return thp_size(page);
+       return folio_size(folio);
 }
 
 /**
@@ -2857,15 +2896,15 @@ loff_t mapping_seek_hole_data(struct address_space *mapping, loff_t start,
        XA_STATE(xas, &mapping->i_pages, start >> PAGE_SHIFT);
        pgoff_t max = (end - 1) >> PAGE_SHIFT;
        bool seek_data = (whence == SEEK_DATA);
-       struct page *page;
+       struct folio *folio;
 
        if (end <= start)
                return -ENXIO;
 
        rcu_read_lock();
-       while ((page = find_get_entry(&xas, max, XA_PRESENT))) {
+       while ((folio = find_get_entry(&xas, max, XA_PRESENT))) {
                loff_t pos = (u64)xas.xa_index << PAGE_SHIFT;
-               unsigned int seek_size;
+               size_t seek_size;
 
                if (start < pos) {
                        if (!seek_data)
@@ -2873,9 +2912,9 @@ loff_t mapping_seek_hole_data(struct address_space *mapping, loff_t start,
                        start = pos;
                }
 
-               seek_size = seek_page_size(&xas, page);
-               pos = round_up(pos + 1, seek_size);
-               start = page_seek_hole_data(&xas, mapping, page, start, pos,
+               seek_size = seek_folio_size(&xas, folio);
+               pos = round_up((u64)pos + 1, seek_size);
+               start = folio_seek_hole_data(&xas, mapping, folio, start, pos,
                                seek_data);
                if (start < pos)
                        goto unlock;
@@ -2883,15 +2922,15 @@ loff_t mapping_seek_hole_data(struct address_space *mapping, loff_t start,
                        break;
                if (seek_size > PAGE_SIZE)
                        xas_set(&xas, pos >> PAGE_SHIFT);
-               if (!xa_is_value(page))
-                       put_page(page);
+               if (!xa_is_value(folio))
+                       folio_put(folio);
        }
        if (seek_data)
                start = -ENXIO;
 unlock:
        rcu_read_unlock();
-       if (page && !xa_is_value(page))
-               put_page(page);
+       if (folio && !xa_is_value(folio))
+               folio_put(folio);
        if (start > end)
                return end;
        return start;
@@ -2900,21 +2939,20 @@ unlock:
 #ifdef CONFIG_MMU
 #define MMAP_LOTSAMISS  (100)
 /*
- * lock_page_maybe_drop_mmap - lock the page, possibly dropping the mmap_lock
+ * lock_folio_maybe_drop_mmap - lock the page, possibly dropping the mmap_lock
  * @vmf - the vm_fault for this fault.
- * @page - the page to lock.
+ * @folio - the folio to lock.
  * @fpin - the pointer to the file we may pin (or is already pinned).
  *
- * This works similar to lock_page_or_retry in that it can drop the mmap_lock.
- * It differs in that it actually returns the page locked if it returns 1 and 0
- * if it couldn't lock the page.  If we did have to drop the mmap_lock then fpin
- * will point to the pinned file and needs to be fput()'ed at a later point.
+ * This works similar to lock_folio_or_retry in that it can drop the
+ * mmap_lock.  It differs in that it actually returns the folio locked
+ * if it returns 1 and 0 if it couldn't lock the folio.  If we did have
+ * to drop the mmap_lock then fpin will point to the pinned file and
+ * needs to be fput()'ed at a later point.
  */
-static int lock_page_maybe_drop_mmap(struct vm_fault *vmf, struct page *page,
+static int lock_folio_maybe_drop_mmap(struct vm_fault *vmf, struct folio *folio,
                                     struct file **fpin)
 {
-       struct folio *folio = page_folio(page);
-
        if (folio_trylock(folio))
                return 1;
 
@@ -2961,6 +2999,24 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
        struct file *fpin = NULL;
        unsigned int mmap_miss;
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+       /* Use the readahead code, even if readahead is disabled */
+       if (vmf->vma->vm_flags & VM_HUGEPAGE) {
+               fpin = maybe_unlock_mmap_for_io(vmf, fpin);
+               ractl._index &= ~((unsigned long)HPAGE_PMD_NR - 1);
+               ra->size = HPAGE_PMD_NR;
+               /*
+                * Fetch two PMD folios, so we get the chance to actually
+                * readahead, unless we've been told not to.
+                */
+               if (!(vmf->vma->vm_flags & VM_RAND_READ))
+                       ra->size *= 2;
+               ra->async_size = HPAGE_PMD_NR;
+               page_cache_ra_order(&ractl, ra, HPAGE_PMD_ORDER);
+               return fpin;
+       }
+#endif
+
        /* If we don't want any read-ahead, don't bother */
        if (vmf->vma->vm_flags & VM_RAND_READ)
                return fpin;
@@ -2993,7 +3049,7 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
        ra->size = ra->ra_pages;
        ra->async_size = ra->ra_pages / 4;
        ractl._index = ra->start;
-       do_page_cache_ra(&ractl, ra->size, ra->async_size);
+       page_cache_ra_order(&ractl, ra, 0);
        return fpin;
 }
 
@@ -3003,25 +3059,25 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
  * was pinned if we have to drop the mmap_lock in order to do IO.
  */
 static struct file *do_async_mmap_readahead(struct vm_fault *vmf,
-                                           struct page *page)
+                                           struct folio *folio)
 {
        struct file *file = vmf->vma->vm_file;
        struct file_ra_state *ra = &file->f_ra;
-       struct address_space *mapping = file->f_mapping;
+       DEFINE_READAHEAD(ractl, file, ra, file->f_mapping, vmf->pgoff);
        struct file *fpin = NULL;
        unsigned int mmap_miss;
-       pgoff_t offset = vmf->pgoff;
 
        /* If we don't want any read-ahead, don't bother */
        if (vmf->vma->vm_flags & VM_RAND_READ || !ra->ra_pages)
                return fpin;
+
        mmap_miss = READ_ONCE(ra->mmap_miss);
        if (mmap_miss)
                WRITE_ONCE(ra->mmap_miss, --mmap_miss);
-       if (PageReadahead(page)) {
+
+       if (folio_test_readahead(folio)) {
                fpin = maybe_unlock_mmap_for_io(vmf, fpin);
-               page_cache_async_readahead(mapping, ra, file,
-                                          page, offset, ra->ra_pages);
+               page_cache_async_ra(&ractl, folio, ra->ra_pages);
        }
        return fpin;
 }
@@ -3040,7 +3096,7 @@ static struct file *do_async_mmap_readahead(struct vm_fault *vmf,
  * vma->vm_mm->mmap_lock must be held on entry.
  *
  * If our return value has VM_FAULT_RETRY set, it's because the mmap_lock
- * may be dropped before doing I/O or by lock_page_maybe_drop_mmap().
+ * may be dropped before doing I/O or by lock_folio_maybe_drop_mmap().
  *
  * If our return value does not have VM_FAULT_RETRY set, the mmap_lock
  * has not been released.
@@ -3056,28 +3112,27 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
        struct file *fpin = NULL;
        struct address_space *mapping = file->f_mapping;
        struct inode *inode = mapping->host;
-       pgoff_t offset = vmf->pgoff;
-       pgoff_t max_off;
-       struct page *page;
+       pgoff_t max_idx, index = vmf->pgoff;
+       struct folio *folio;
        vm_fault_t ret = 0;
        bool mapping_locked = false;
 
-       max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
-       if (unlikely(offset >= max_off))
+       max_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
+       if (unlikely(index >= max_idx))
                return VM_FAULT_SIGBUS;
 
        /*
         * Do we have something in the page cache already?
         */
-       page = find_get_page(mapping, offset);
-       if (likely(page)) {
+       folio = filemap_get_folio(mapping, index);
+       if (likely(folio)) {
                /*
                 * We found the page, so try async readahead before waiting for
                 * the lock.
                 */
                if (!(vmf->flags & FAULT_FLAG_TRIED))
-                       fpin = do_async_mmap_readahead(vmf, page);
-               if (unlikely(!PageUptodate(page))) {
+                       fpin = do_async_mmap_readahead(vmf, folio);
+               if (unlikely(!folio_test_uptodate(folio))) {
                        filemap_invalidate_lock_shared(mapping);
                        mapping_locked = true;
                }
@@ -3089,17 +3144,17 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
                fpin = do_sync_mmap_readahead(vmf);
 retry_find:
                /*
-                * See comment in filemap_create_page() why we need
+                * See comment in filemap_create_folio() why we need
                 * invalidate_lock
                 */
                if (!mapping_locked) {
                        filemap_invalidate_lock_shared(mapping);
                        mapping_locked = true;
                }
-               page = pagecache_get_page(mapping, offset,
+               folio = __filemap_get_folio(mapping, index,
                                          FGP_CREAT|FGP_FOR_MMAP,
                                          vmf->gfp_mask);
-               if (!page) {
+               if (!folio) {
                        if (fpin)
                                goto out_retry;
                        filemap_invalidate_unlock_shared(mapping);
@@ -3107,22 +3162,22 @@ retry_find:
                }
        }
 
-       if (!lock_page_maybe_drop_mmap(vmf, page, &fpin))
+       if (!lock_folio_maybe_drop_mmap(vmf, folio, &fpin))
                goto out_retry;
 
        /* Did it get truncated? */
-       if (unlikely(compound_head(page)->mapping != mapping)) {
-               unlock_page(page);
-               put_page(page);
+       if (unlikely(folio->mapping != mapping)) {
+               folio_unlock(folio);
+               folio_put(folio);
                goto retry_find;
        }
-       VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page);
+       VM_BUG_ON_FOLIO(!folio_contains(folio, index), folio);
 
        /*
         * We have a locked page in the page cache, now we need to check
         * that it's up-to-date. If not, it is going to be due to an error.
         */
-       if (unlikely(!PageUptodate(page))) {
+       if (unlikely(!folio_test_uptodate(folio))) {
                /*
                 * The page was in cache and uptodate and now it is not.
                 * Strange but possible since we didn't hold the page lock all
@@ -3130,8 +3185,8 @@ retry_find:
                 * try again.
                 */
                if (!mapping_locked) {
-                       unlock_page(page);
-                       put_page(page);
+                       folio_unlock(folio);
+                       folio_put(folio);
                        goto retry_find;
                }
                goto page_not_uptodate;
@@ -3143,7 +3198,7 @@ retry_find:
         * redo the fault.
         */
        if (fpin) {
-               unlock_page(page);
+               folio_unlock(folio);
                goto out_retry;
        }
        if (mapping_locked)
@@ -3153,14 +3208,14 @@ retry_find:
         * Found the page and have a reference on it.
         * We must recheck i_size under page lock.
         */
-       max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
-       if (unlikely(offset >= max_off)) {
-               unlock_page(page);
-               put_page(page);
+       max_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
+       if (unlikely(index >= max_idx)) {
+               folio_unlock(folio);
+               folio_put(folio);
                return VM_FAULT_SIGBUS;
        }
 
-       vmf->page = page;
+       vmf->page = folio_file_page(folio, index);
        return ret | VM_FAULT_LOCKED;
 
 page_not_uptodate:
@@ -3171,10 +3226,10 @@ page_not_uptodate:
         * and we need to check for errors.
         */
        fpin = maybe_unlock_mmap_for_io(vmf, fpin);
-       error = filemap_read_page(file, mapping, page);
+       error = filemap_read_folio(file, mapping, folio);
        if (fpin)
                goto out_retry;
-       put_page(page);
+       folio_put(folio);
 
        if (!error || error == AOP_TRUNCATED_PAGE)
                goto retry_find;
@@ -3188,8 +3243,8 @@ out_retry:
         * re-find the vma and come back and find our hopefully still populated
         * page.
         */
-       if (page)
-               put_page(page);
+       if (folio)
+               folio_put(folio);
        if (mapping_locked)
                filemap_invalidate_unlock_shared(mapping);
        if (fpin)
@@ -3231,50 +3286,48 @@ static bool filemap_map_pmd(struct vm_fault *vmf, struct page *page)
        return false;
 }
 
-static struct page *next_uptodate_page(struct page *page,
+static struct folio *next_uptodate_page(struct folio *folio,
                                       struct address_space *mapping,
                                       struct xa_state *xas, pgoff_t end_pgoff)
 {
        unsigned long max_idx;
 
        do {
-               if (!page)
+               if (!folio)
                        return NULL;
-               if (xas_retry(xas, page))
+               if (xas_retry(xas, folio))
                        continue;
-               if (xa_is_value(page))
+               if (xa_is_value(folio))
                        continue;
-               if (PageLocked(page))
+               if (folio_test_locked(folio))
                        continue;
-               if (!page_cache_get_speculative(page))
+               if (!folio_try_get_rcu(folio))
                        continue;
                /* Has the page moved or been split? */
-               if (unlikely(page != xas_reload(xas)))
-                       goto skip;
-               if (!PageUptodate(page) || PageReadahead(page))
+               if (unlikely(folio != xas_reload(xas)))
                        goto skip;
-               if (PageHWPoison(page))
+               if (!folio_test_uptodate(folio) || folio_test_readahead(folio))
                        goto skip;
-               if (!trylock_page(page))
+               if (!folio_trylock(folio))
                        goto skip;
-               if (page->mapping != mapping)
+               if (folio->mapping != mapping)
                        goto unlock;
-               if (!PageUptodate(page))
+               if (!folio_test_uptodate(folio))
                        goto unlock;
                max_idx = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
                if (xas->xa_index >= max_idx)
                        goto unlock;
-               return page;
+               return folio;
 unlock:
-               unlock_page(page);
+               folio_unlock(folio);
 skip:
-               put_page(page);
-       } while ((page = xas_next_entry(xas, end_pgoff)) != NULL);
+               folio_put(folio);
+       } while ((folio = xas_next_entry(xas, end_pgoff)) != NULL);
 
        return NULL;
 }
 
-static inline struct page *first_map_page(struct address_space *mapping,
+static inline struct folio *first_map_page(struct address_space *mapping,
                                          struct xa_state *xas,
                                          pgoff_t end_pgoff)
 {
@@ -3282,7 +3335,7 @@ static inline struct page *first_map_page(struct address_space *mapping,
                                  mapping, xas, end_pgoff);
 }
 
-static inline struct page *next_map_page(struct address_space *mapping,
+static inline struct folio *next_map_page(struct address_space *mapping,
                                         struct xa_state *xas,
                                         pgoff_t end_pgoff)
 {
@@ -3299,16 +3352,17 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
        pgoff_t last_pgoff = start_pgoff;
        unsigned long addr;
        XA_STATE(xas, &mapping->i_pages, start_pgoff);
-       struct page *head, *page;
+       struct folio *folio;
+       struct page *page;
        unsigned int mmap_miss = READ_ONCE(file->f_ra.mmap_miss);
        vm_fault_t ret = 0;
 
        rcu_read_lock();
-       head = first_map_page(mapping, &xas, end_pgoff);
-       if (!head)
+       folio = first_map_page(mapping, &xas, end_pgoff);
+       if (!folio)
                goto out;
 
-       if (filemap_map_pmd(vmf, head)) {
+       if (filemap_map_pmd(vmf, &folio->page)) {
                ret = VM_FAULT_NOPAGE;
                goto out;
        }
@@ -3316,7 +3370,8 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
        addr = vma->vm_start + ((start_pgoff - vma->vm_pgoff) << PAGE_SHIFT);
        vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, addr, &vmf->ptl);
        do {
-               page = find_subpage(head, xas.xa_index);
+again:
+               page = folio_file_page(folio, xas.xa_index);
                if (PageHWPoison(page))
                        goto unlock;
 
@@ -3337,12 +3392,21 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
                do_set_pte(vmf, page, addr);
                /* no need to invalidate: a not-present page won't be cached */
                update_mmu_cache(vma, addr, vmf->pte);
-               unlock_page(head);
+               if (folio_more_pages(folio, xas.xa_index, end_pgoff)) {
+                       xas.xa_index++;
+                       folio_ref_inc(folio);
+                       goto again;
+               }
+               folio_unlock(folio);
                continue;
 unlock:
-               unlock_page(head);
-               put_page(head);
-       } while ((head = next_map_page(mapping, &xas, end_pgoff)) != NULL);
+               if (folio_more_pages(folio, xas.xa_index, end_pgoff)) {
+                       xas.xa_index++;
+                       goto again;
+               }
+               folio_unlock(folio);
+               folio_put(folio);
+       } while ((folio = next_map_page(mapping, &xas, end_pgoff)) != NULL);
        pte_unmap_unlock(vmf->pte, vmf->ptl);
 out:
        rcu_read_unlock();
@@ -3354,24 +3418,24 @@ EXPORT_SYMBOL(filemap_map_pages);
 vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf)
 {
        struct address_space *mapping = vmf->vma->vm_file->f_mapping;
-       struct page *page = vmf->page;
+       struct folio *folio = page_folio(vmf->page);
        vm_fault_t ret = VM_FAULT_LOCKED;
 
        sb_start_pagefault(mapping->host->i_sb);
        file_update_time(vmf->vma->vm_file);
-       lock_page(page);
-       if (page->mapping != mapping) {
-               unlock_page(page);
+       folio_lock(folio);
+       if (folio->mapping != mapping) {
+               folio_unlock(folio);
                ret = VM_FAULT_NOPAGE;
                goto out;
        }
        /*
-        * We mark the page dirty already here so that when freeze is in
+        * We mark the folio dirty already here so that when freeze is in
         * progress, we are guaranteed that writeback during freezing will
-        * see the dirty page and writeprotect it again.
+        * see the dirty folio and writeprotect it again.
         */
-       set_page_dirty(page);
-       wait_for_stable_page(page);
+       folio_mark_dirty(folio);
+       folio_wait_stable(folio);
 out:
        sb_end_pagefault(mapping->host->i_sb);
        return ret;
@@ -3424,35 +3488,20 @@ EXPORT_SYMBOL(filemap_page_mkwrite);
 EXPORT_SYMBOL(generic_file_mmap);
 EXPORT_SYMBOL(generic_file_readonly_mmap);
 
-static struct page *wait_on_page_read(struct page *page)
-{
-       if (!IS_ERR(page)) {
-               wait_on_page_locked(page);
-               if (!PageUptodate(page)) {
-                       put_page(page);
-                       page = ERR_PTR(-EIO);
-               }
-       }
-       return page;
-}
-
-static struct page *do_read_cache_page(struct address_space *mapping,
-                               pgoff_t index,
-                               int (*filler)(void *, struct page *),
-                               void *data,
-                               gfp_t gfp)
+static struct folio *do_read_cache_folio(struct address_space *mapping,
+               pgoff_t index, filler_t filler, void *data, gfp_t gfp)
 {
-       struct page *page;
+       struct folio *folio;
        int err;
 repeat:
-       page = find_get_page(mapping, index);
-       if (!page) {
-               page = __page_cache_alloc(gfp);
-               if (!page)
+       folio = filemap_get_folio(mapping, index);
+       if (!folio) {
+               folio = filemap_alloc_folio(gfp, 0);
+               if (!folio)
                        return ERR_PTR(-ENOMEM);
-               err = add_to_page_cache_lru(page, mapping, index, gfp);
+               err = filemap_add_folio(mapping, folio, index, gfp);
                if (unlikely(err)) {
-                       put_page(page);
+                       folio_put(folio);
                        if (err == -EEXIST)
                                goto repeat;
                        /* Presumably ENOMEM for xarray node */
@@ -3461,71 +3510,41 @@ repeat:
 
 filler:
                if (filler)
-                       err = filler(data, page);
+                       err = filler(data, &folio->page);
                else
-                       err = mapping->a_ops->readpage(data, page);
+                       err = mapping->a_ops->readpage(data, &folio->page);
 
                if (err < 0) {
-                       put_page(page);
+                       folio_put(folio);
                        return ERR_PTR(err);
                }
 
-               page = wait_on_page_read(page);
-               if (IS_ERR(page))
-                       return page;
+               folio_wait_locked(folio);
+               if (!folio_test_uptodate(folio)) {
+                       folio_put(folio);
+                       return ERR_PTR(-EIO);
+               }
+
                goto out;
        }
-       if (PageUptodate(page))
-               goto out;
-
-       /*
-        * Page is not up to date and may be locked due to one of the following
-        * case a: Page is being filled and the page lock is held
-        * case b: Read/write error clearing the page uptodate status
-        * case c: Truncation in progress (page locked)
-        * case d: Reclaim in progress
-        *
-        * Case a, the page will be up to date when the page is unlocked.
-        *    There is no need to serialise on the page lock here as the page
-        *    is pinned so the lock gives no additional protection. Even if the
-        *    page is truncated, the data is still valid if PageUptodate as
-        *    it's a race vs truncate race.
-        * Case b, the page will not be up to date
-        * Case c, the page may be truncated but in itself, the data may still
-        *    be valid after IO completes as it's a read vs truncate race. The
-        *    operation must restart if the page is not uptodate on unlock but
-        *    otherwise serialising on page lock to stabilise the mapping gives
-        *    no additional guarantees to the caller as the page lock is
-        *    released before return.
-        * Case d, similar to truncation. If reclaim holds the page lock, it
-        *    will be a race with remove_mapping that determines if the mapping
-        *    is valid on unlock but otherwise the data is valid and there is
-        *    no need to serialise with page lock.
-        *
-        * As the page lock gives no additional guarantee, we optimistically
-        * wait on the page to be unlocked and check if it's up to date and
-        * use the page if it is. Otherwise, the page lock is required to
-        * distinguish between the different cases. The motivation is that we
-        * avoid spurious serialisations and wakeups when multiple processes
-        * wait on the same page for IO to complete.
-        */
-       wait_on_page_locked(page);
-       if (PageUptodate(page))
+       if (folio_test_uptodate(folio))
                goto out;
 
-       /* Distinguish between all the cases under the safety of the lock */
-       lock_page(page);
+       if (!folio_trylock(folio)) {
+               folio_put_wait_locked(folio, TASK_UNINTERRUPTIBLE);
+               goto repeat;
+       }
 
-       /* Case c or d, restart the operation */
-       if (!page->mapping) {
-               unlock_page(page);
-               put_page(page);
+       /* Folio was truncated from mapping */
+       if (!folio->mapping) {
+               folio_unlock(folio);
+               folio_put(folio);
                goto repeat;
        }
 
        /* Someone else locked and filled the page in a very small window */
-       if (PageUptodate(page)) {
-               unlock_page(page);
+       if (folio_test_uptodate(folio)) {
+               folio_unlock(folio);
                goto out;
        }
 
@@ -3535,16 +3554,16 @@ filler:
         * Clear page error before actual read, PG_error will be
         * set again if read page fails.
         */
-       ClearPageError(page);
+       folio_clear_error(folio);
        goto filler;
 
 out:
-       mark_page_accessed(page);
-       return page;
+       folio_mark_accessed(folio);
+       return folio;
 }
 
 /**
- * read_cache_page - read into page cache, fill it if needed
+ * read_cache_folio - read into page cache, fill it if needed
  * @mapping:   the page's address_space
  * @index:     the page index
  * @filler:    function to perform the read
@@ -3559,10 +3578,27 @@ out:
  *
  * Return: up to date page on success, ERR_PTR() on failure.
  */
+struct folio *read_cache_folio(struct address_space *mapping, pgoff_t index,
+               filler_t filler, void *data)
+{
+       return do_read_cache_folio(mapping, index, filler, data,
+                       mapping_gfp_mask(mapping));
+}
+EXPORT_SYMBOL(read_cache_folio);
+
+static struct page *do_read_cache_page(struct address_space *mapping,
+               pgoff_t index, filler_t *filler, void *data, gfp_t gfp)
+{
+       struct folio *folio;
+
+       folio = do_read_cache_folio(mapping, index, filler, data, gfp);
+       if (IS_ERR(folio))
+               return &folio->page;
+       return folio_file_page(folio, index);
+}
+
 struct page *read_cache_page(struct address_space *mapping,
-                               pgoff_t index,
-                               int (*filler)(void *, struct page *),
-                               void *data)
+                               pgoff_t index, filler_t *filler, void *data)
 {
        return do_read_cache_page(mapping, index, filler, data,
                        mapping_gfp_mask(mapping));
@@ -3922,33 +3958,32 @@ ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 EXPORT_SYMBOL(generic_file_write_iter);
 
 /**
- * try_to_release_page() - release old fs-specific metadata on a page
+ * filemap_release_folio() - Release fs-specific metadata on a folio.
+ * @folio: The folio which the kernel is trying to free.
+ * @gfp: Memory allocation flags (and I/O mode).
  *
- * @page: the page which the kernel is trying to free
- * @gfp_mask: memory allocation flags (and I/O mode)
+ * The address_space is trying to release any data attached to a folio
+ * (presumably at folio->private).
  *
- * The address_space is to try to release any data against the page
- * (presumably at page->private).
+ * This will also be called if the private_2 flag is set on a page,
+ * indicating that the folio has other metadata associated with it.
  *
- * This may also be called if PG_fscache is set on a page, indicating that the
- * page is known to the local caching routines.
+ * The @gfp argument specifies whether I/O may be performed to release
+ * this page (__GFP_IO), and whether the call may block
+ * (__GFP_RECLAIM & __GFP_FS).
  *
- * The @gfp_mask argument specifies whether I/O may be performed to release
- * this page (__GFP_IO), and whether the call may block (__GFP_RECLAIM & __GFP_FS).
- *
- * Return: %1 if the release was successful, otherwise return zero.
+ * Return: %true if the release was successful, otherwise %false.
  */
-int try_to_release_page(struct page *page, gfp_t gfp_mask)
+bool filemap_release_folio(struct folio *folio, gfp_t gfp)
 {
-       struct address_space * const mapping = page->mapping;
+       struct address_space * const mapping = folio->mapping;
 
-       BUG_ON(!PageLocked(page));
-       if (PageWriteback(page))
-               return 0;
+       BUG_ON(!folio_test_locked(folio));
+       if (folio_test_writeback(folio))
+               return false;
 
        if (mapping && mapping->a_ops->releasepage)
-               return mapping->a_ops->releasepage(page, gfp_mask);
-       return try_to_free_buffers(page);
+               return mapping->a_ops->releasepage(&folio->page, gfp);
+       return try_to_free_buffers(&folio->page);
 }
-
-EXPORT_SYMBOL(try_to_release_page);
+EXPORT_SYMBOL(filemap_release_folio);