OSDN Git Service

Merge tag 'x86_shstk_for_6.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git...
[tomoyo/tomoyo-test1.git] / mm / mmap.c
index 3937479..b56a7f0 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -76,10 +76,10 @@ int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS;
 static bool ignore_rlimit_data;
 core_param(ignore_rlimit_data, ignore_rlimit_data, bool, 0644);
 
-static void unmap_region(struct mm_struct *mm, struct maple_tree *mt,
+static void unmap_region(struct mm_struct *mm, struct ma_state *mas,
                struct vm_area_struct *vma, struct vm_area_struct *prev,
                struct vm_area_struct *next, unsigned long start,
-               unsigned long end, bool mm_wr_locked);
+               unsigned long end, unsigned long tree_end, bool mm_wr_locked);
 
 static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags)
 {
@@ -154,18 +154,6 @@ static inline struct vm_area_struct *vma_prev_limit(struct vma_iterator *vmi,
        return mas_prev(&vmi->mas, min);
 }
 
-static inline int vma_iter_clear_gfp(struct vma_iterator *vmi,
-                       unsigned long start, unsigned long end, gfp_t gfp)
-{
-       vmi->mas.index = start;
-       vmi->mas.last = end - 1;
-       mas_store_gfp(&vmi->mas, NULL, gfp);
-       if (unlikely(mas_is_err(&vmi->mas)))
-               return -ENOMEM;
-
-       return 0;
-}
-
 /*
  * check_brk_limits() - Use platform specific check of range & verify mlock
  * limits.
@@ -409,17 +397,17 @@ static int vma_link(struct mm_struct *mm, struct vm_area_struct *vma)
        VMA_ITERATOR(vmi, mm, 0);
        struct address_space *mapping = NULL;
 
-       if (vma_iter_prealloc(&vmi))
+       vma_iter_config(&vmi, vma->vm_start, vma->vm_end);
+       if (vma_iter_prealloc(&vmi, vma))
                return -ENOMEM;
 
-       if (vma->vm_file) {
-               mapping = vma->vm_file->f_mapping;
-               i_mmap_lock_write(mapping);
-       }
+       vma_start_write(vma);
 
        vma_iter_store(&vmi, vma);
 
-       if (mapping) {
+       if (vma->vm_file) {
+               mapping = vma->vm_file->f_mapping;
+               i_mmap_lock_write(mapping);
                __vma_link_file(vma, mapping);
                i_mmap_unlock_write(mapping);
        }
@@ -474,15 +462,6 @@ static inline void init_vma_prep(struct vma_prepare *vp,
  */
 static inline void vma_prepare(struct vma_prepare *vp)
 {
-       vma_start_write(vp->vma);
-       if (vp->adj_next)
-               vma_start_write(vp->adj_next);
-       /* vp->insert is always a newly created VMA, no need for locking */
-       if (vp->remove)
-               vma_start_write(vp->remove);
-       if (vp->remove2)
-               vma_start_write(vp->remove2);
-
        if (vp->file) {
                uprobe_munmap(vp->vma, vp->vma->vm_start, vp->vma->vm_end);
 
@@ -597,6 +576,7 @@ again:
        }
        if (vp->insert && vp->file)
                uprobe_mmap(vp->insert);
+       validate_mm(mm);
 }
 
 /*
@@ -615,7 +595,7 @@ static inline int dup_anon_vma(struct vm_area_struct *dst,
         * anon pages imported.
         */
        if (src->anon_vma && !dst->anon_vma) {
-               vma_start_write(dst);
+               vma_assert_write_locked(dst);
                dst->anon_vma = src->anon_vma;
                return anon_vma_clone(dst, src);
        }
@@ -647,10 +627,12 @@ int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma,
        bool remove_next = false;
        struct vma_prepare vp;
 
+       vma_start_write(vma);
        if (next && (vma != next) && (end == next->vm_end)) {
                int ret;
 
                remove_next = true;
+               vma_start_write(next);
                ret = dup_anon_vma(vma, next);
                if (ret)
                        return ret;
@@ -663,23 +645,19 @@ int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma,
        /* Only handles expanding */
        VM_WARN_ON(vma->vm_start < start || vma->vm_end > end);
 
-       if (vma_iter_prealloc(vmi))
+       /* Note: vma iterator must be pointing to 'start' */
+       vma_iter_config(vmi, start, end);
+       if (vma_iter_prealloc(vmi, vma))
                goto nomem;
 
        vma_prepare(&vp);
        vma_adjust_trans_huge(vma, start, end, 0);
-       /* VMA iterator points to previous, so set to start if necessary */
-       if (vma_iter_addr(vmi) != start)
-               vma_iter_set(vmi, start);
-
        vma->vm_start = start;
        vma->vm_end = end;
        vma->vm_pgoff = pgoff;
-       /* Note: mas must be pointing to the expanding VMA */
        vma_iter_store(vmi, vma);
 
        vma_complete(&vp, vmi, vma->vm_mm);
-       validate_mm(vma->vm_mm);
        return 0;
 
 nomem:
@@ -702,24 +680,25 @@ int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma,
 
        WARN_ON((vma->vm_start != start) && (vma->vm_end != end));
 
-       if (vma_iter_prealloc(vmi))
+       if (vma->vm_start < start)
+               vma_iter_config(vmi, vma->vm_start, start);
+       else
+               vma_iter_config(vmi, end, vma->vm_end);
+
+       if (vma_iter_prealloc(vmi, NULL))
                return -ENOMEM;
 
+       vma_start_write(vma);
+
        init_vma_prep(&vp, vma);
        vma_prepare(&vp);
        vma_adjust_trans_huge(vma, start, end, 0);
 
-       if (vma->vm_start < start)
-               vma_iter_clear(vmi, vma->vm_start, start);
-
-       if (vma->vm_end > end)
-               vma_iter_clear(vmi, end, vma->vm_end);
-
+       vma_iter_clear(vmi);
        vma->vm_start = start;
        vma->vm_end = end;
        vma->vm_pgoff = pgoff;
        vma_complete(&vp, vmi, vma->vm_mm);
-       validate_mm(vma->vm_mm);
        return 0;
 }
 
@@ -892,7 +871,6 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
        pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
        long adj_start = 0;
 
-       validate_mm(mm);
        /*
         * We later require that vma->vm_flags == vm_flags,
         * so this tests vma->vm_flags & VM_SPECIAL, too.
@@ -937,16 +915,21 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
        if (!merge_prev && !merge_next)
                return NULL; /* Not mergeable. */
 
+       if (merge_prev)
+               vma_start_write(prev);
+
        res = vma = prev;
        remove = remove2 = adjust = NULL;
 
        /* Can we merge both the predecessor and the successor? */
        if (merge_prev && merge_next &&
            is_mergeable_anon_vma(prev->anon_vma, next->anon_vma, NULL)) {
+               vma_start_write(next);
                remove = next;                          /* case 1 */
                vma_end = next->vm_end;
                err = dup_anon_vma(prev, next);
                if (curr) {                             /* case 6 */
+                       vma_start_write(curr);
                        remove = curr;
                        remove2 = next;
                        if (!next->anon_vma)
@@ -954,6 +937,7 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
                }
        } else if (merge_prev) {                        /* case 2 */
                if (curr) {
+                       vma_start_write(curr);
                        err = dup_anon_vma(prev, curr);
                        if (end == curr->vm_end) {      /* case 7 */
                                remove = curr;
@@ -963,8 +947,10 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
                        }
                }
        } else { /* merge_next */
+               vma_start_write(next);
                res = next;
                if (prev && addr < prev->vm_end) {      /* case 4 */
+                       vma_start_write(prev);
                        vma_end = addr;
                        adjust = next;
                        adj_start = -(prev->vm_end - addr);
@@ -980,6 +966,7 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
                        vma_pgoff = next->vm_pgoff - pglen;
                        if (curr) {                     /* case 8 */
                                vma_pgoff = curr->vm_pgoff;
+                               vma_start_write(curr);
                                remove = curr;
                                err = dup_anon_vma(next, curr);
                        }
@@ -990,7 +977,17 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
        if (err)
                return NULL;
 
-       if (vma_iter_prealloc(vmi))
+       if (vma_start < vma->vm_start || vma_end > vma->vm_end)
+               vma_expanded = true;
+
+       if (vma_expanded) {
+               vma_iter_config(vmi, vma_start, vma_end);
+       } else {
+               vma_iter_config(vmi, adjust->vm_start + adj_start,
+                               adjust->vm_end);
+       }
+
+       if (vma_iter_prealloc(vmi, vma))
                return NULL;
 
        init_multi_vma_prep(&vp, vma, adjust, remove, remove2);
@@ -999,8 +996,6 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
 
        vma_prepare(&vp);
        vma_adjust_trans_huge(vma, vma_start, vma_end, adj_start);
-       if (vma_start < vma->vm_start || vma_end > vma->vm_end)
-               vma_expanded = true;
 
        vma->vm_start = vma_start;
        vma->vm_end = vma_end;
@@ -1019,10 +1014,7 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
        }
 
        vma_complete(&vp, vmi, mm);
-       vma_iter_free(vmi);
-       validate_mm(mm);
        khugepaged_enter_vma(res, vm_flags);
-
        return res;
 }
 
@@ -1190,14 +1182,13 @@ static inline bool file_mmap_ok(struct file *file, struct inode *inode,
  */
 unsigned long do_mmap(struct file *file, unsigned long addr,
                        unsigned long len, unsigned long prot,
-                       unsigned long flags, unsigned long pgoff,
-                       unsigned long *populate, struct list_head *uf)
+                       unsigned long flags, vm_flags_t vm_flags,
+                       unsigned long pgoff, unsigned long *populate,
+                       struct list_head *uf)
 {
        struct mm_struct *mm = current->mm;
-       vm_flags_t vm_flags;
        int pkey = 0;
 
-       validate_mm(mm);
        *populate = 0;
 
        if (!len)
@@ -1255,7 +1246,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
         * to. we assume access permissions have been handled by the open
         * of the memory object, so we don't do any here.
         */
-       vm_flags = calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) |
+       vm_flags |= calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) |
                        mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
 
        if (flags & MAP_LOCKED)
@@ -1573,7 +1564,7 @@ retry:
        gap = mas.index;
        gap += (info->align_offset - gap) & info->align_mask;
        tmp = mas_next(&mas, ULONG_MAX);
-       if (tmp && (tmp->vm_flags & VM_GROWSDOWN)) { /* Avoid prev check if possible */
+       if (tmp && (tmp->vm_flags & VM_STARTGAP_FLAGS)) { /* Avoid prev check if possible */
                if (vm_start_gap(tmp) < gap + length - 1) {
                        low_limit = tmp->vm_end;
                        mas_reset(&mas);
@@ -1625,7 +1616,7 @@ retry:
        gap -= (gap - info->align_offset) & info->align_mask;
        gap_end = mas.last;
        tmp = mas_next(&mas, ULONG_MAX);
-       if (tmp && (tmp->vm_flags & VM_GROWSDOWN)) { /* Avoid prev check if possible */
+       if (tmp && (tmp->vm_flags & VM_STARTGAP_FLAGS)) { /* Avoid prev check if possible */
                if (vm_start_gap(tmp) <= gap_end) {
                        high_limit = vm_start_gap(tmp);
                        mas_reset(&mas);
@@ -1944,7 +1935,7 @@ static int expand_upwards(struct vm_area_struct *vma, unsigned long address)
        struct vm_area_struct *next;
        unsigned long gap_addr;
        int error = 0;
-       MA_STATE(mas, &mm->mm_mt, 0, 0);
+       MA_STATE(mas, &mm->mm_mt, vma->vm_start, address);
 
        if (!(vma->vm_flags & VM_GROWSUP))
                return -EFAULT;
@@ -1969,7 +1960,11 @@ static int expand_upwards(struct vm_area_struct *vma, unsigned long address)
                /* Check that both stack segments have the same anon_vma? */
        }
 
-       if (mas_preallocate(&mas, GFP_KERNEL))
+       if (next)
+               mas_prev_range(&mas, address);
+
+       __mas_set_range(&mas, vma->vm_start, address - 1);
+       if (mas_preallocate(&mas, vma, GFP_KERNEL))
                return -ENOMEM;
 
        /* We must make sure the anon_vma is allocated. */
@@ -2014,7 +2009,6 @@ static int expand_upwards(struct vm_area_struct *vma, unsigned long address)
                                anon_vma_interval_tree_pre_update_vma(vma);
                                vma->vm_end = address;
                                /* Overwrite old entry in mtree. */
-                               mas_set_range(&mas, vma->vm_start, address - 1);
                                mas_store_prealloc(&mas, vma);
                                anon_vma_interval_tree_post_update_vma(vma);
                                spin_unlock(&mm->page_table_lock);
@@ -2026,6 +2020,7 @@ static int expand_upwards(struct vm_area_struct *vma, unsigned long address)
        anon_vma_unlock_write(vma->anon_vma);
        khugepaged_enter_vma(vma, vma->vm_flags);
        mas_destroy(&mas);
+       validate_mm(mm);
        return error;
 }
 #endif /* CONFIG_STACK_GROWSUP || CONFIG_IA64 */
@@ -2058,7 +2053,11 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address)
                        return -ENOMEM;
        }
 
-       if (mas_preallocate(&mas, GFP_KERNEL))
+       if (prev)
+               mas_next_range(&mas, vma->vm_start);
+
+       __mas_set_range(&mas, address, vma->vm_end - 1);
+       if (mas_preallocate(&mas, vma, GFP_KERNEL))
                return -ENOMEM;
 
        /* We must make sure the anon_vma is allocated. */
@@ -2104,7 +2103,6 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address)
                                vma->vm_start = address;
                                vma->vm_pgoff -= grow;
                                /* Overwrite old entry in mtree. */
-                               mas_set_range(&mas, address, vma->vm_end - 1);
                                mas_store_prealloc(&mas, vma);
                                anon_vma_interval_tree_post_update_vma(vma);
                                spin_unlock(&mm->page_table_lock);
@@ -2116,6 +2114,7 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address)
        anon_vma_unlock_write(vma->anon_vma);
        khugepaged_enter_vma(vma, vma->vm_flags);
        mas_destroy(&mas);
+       validate_mm(mm);
        return error;
 }
 
@@ -2293,7 +2292,6 @@ static inline void remove_mt(struct mm_struct *mm, struct ma_state *mas)
                remove_vma(vma, false);
        }
        vm_unacct_memory(nr_accounted);
-       validate_mm(mm);
 }
 
 /*
@@ -2301,18 +2299,20 @@ static inline void remove_mt(struct mm_struct *mm, struct ma_state *mas)
  *
  * Called with the mm semaphore held.
  */
-static void unmap_region(struct mm_struct *mm, struct maple_tree *mt,
+static void unmap_region(struct mm_struct *mm, struct ma_state *mas,
                struct vm_area_struct *vma, struct vm_area_struct *prev,
-               struct vm_area_struct *next,
-               unsigned long start, unsigned long end, bool mm_wr_locked)
+               struct vm_area_struct *next, unsigned long start,
+               unsigned long end, unsigned long tree_end, bool mm_wr_locked)
 {
        struct mmu_gather tlb;
+       unsigned long mt_start = mas->index;
 
        lru_add_drain();
        tlb_gather_mmu(&tlb, mm);
        update_hiwater_rss(mm);
-       unmap_vmas(&tlb, mt, vma, start, end, mm_wr_locked);
-       free_pgtables(&tlb, mt, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
+       unmap_vmas(&tlb, mas, vma, start, end, tree_end, mm_wr_locked);
+       mas_set(mas, mt_start);
+       free_pgtables(&tlb, mas, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
                                 next ? next->vm_start : USER_PGTABLES_CEILING,
                                 mm_wr_locked);
        tlb_finish_mmu(&tlb);
@@ -2330,8 +2330,6 @@ int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
        struct vm_area_struct *new;
        int err;
 
-       validate_mm(vma->vm_mm);
-
        WARN_ON(vma->vm_start >= addr);
        WARN_ON(vma->vm_end <= addr);
 
@@ -2345,10 +2343,6 @@ int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
        if (!new)
                return -ENOMEM;
 
-       err = -ENOMEM;
-       if (vma_iter_prealloc(vmi))
-               goto out_free_vma;
-
        if (new_below) {
                new->vm_end = addr;
        } else {
@@ -2356,6 +2350,11 @@ int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
                new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
        }
 
+       err = -ENOMEM;
+       vma_iter_config(vmi, new->vm_start, new->vm_end);
+       if (vma_iter_prealloc(vmi, new))
+               goto out_free_vma;
+
        err = vma_dup_policy(vma, new);
        if (err)
                goto out_free_vmi;
@@ -2370,6 +2369,9 @@ int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
        if (new->vm_ops && new->vm_ops->open)
                new->vm_ops->open(new);
 
+       vma_start_write(vma);
+       vma_start_write(new);
+
        init_vma_prep(&vp, vma);
        vp.insert = new;
        vma_prepare(&vp);
@@ -2388,7 +2390,6 @@ int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
        /* Success. */
        if (new_below)
                vma_next(vmi);
-       validate_mm(vma->vm_mm);
        return 0;
 
 out_free_mpol:
@@ -2397,7 +2398,6 @@ out_free_vmi:
        vma_iter_free(vmi);
 out_free_vma:
        vm_area_free(new);
-       validate_mm(vma->vm_mm);
        return err;
 }
 
@@ -2440,7 +2440,7 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
        unsigned long locked_vm = 0;
        MA_STATE(mas_detach, &mt_detach, 0, 0);
        mt_init_flags(&mt_detach, vmi->mas.tree->ma_flags & MT_FLAGS_LOCK_MASK);
-       mt_set_external_lock(&mt_detach, &mm->mmap_lock);
+       mt_on_stack(mt_detach);
 
        /*
         * If we need to split any vma, do it now to save pain later.
@@ -2461,22 +2461,17 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
                if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count)
                        goto map_count_exceeded;
 
-               error = __split_vma(vmi, vma, start, 0);
+               error = __split_vma(vmi, vma, start, 1);
                if (error)
                        goto start_split_failed;
-
-               vma = vma_iter_load(vmi);
        }
 
-       prev = vma_prev(vmi);
-       if (unlikely((!prev)))
-               vma_iter_set(vmi, start);
-
        /*
         * Detach a range of VMAs from the mm. Using next as a temp variable as
         * it is always overwritten.
         */
-       for_each_vma_range(*vmi, next, end) {
+       next = vma;
+       do {
                /* Does it split the end? */
                if (next->vm_end > end) {
                        error = __split_vma(vmi, next, end, 0);
@@ -2484,7 +2479,7 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
                                goto end_split_failed;
                }
                vma_start_write(next);
-               mas_set_range(&mas_detach, next->vm_start, next->vm_end - 1);
+               mas_set(&mas_detach, count);
                error = mas_store_gfp(&mas_detach, next, GFP_KERNEL);
                if (error)
                        goto munmap_gather_failed;
@@ -2512,34 +2507,31 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
                BUG_ON(next->vm_start < start);
                BUG_ON(next->vm_start > end);
 #endif
-       }
-
-       if (vma_iter_end(vmi) > end)
-               next = vma_iter_load(vmi);
-
-       if (!next)
-               next = vma_next(vmi);
+       } for_each_vma_range(*vmi, next, end);
 
 #if defined(CONFIG_DEBUG_VM_MAPLE_TREE)
        /* Make sure no VMAs are about to be lost. */
        {
-               MA_STATE(test, &mt_detach, start, end - 1);
+               MA_STATE(test, &mt_detach, 0, 0);
                struct vm_area_struct *vma_mas, *vma_test;
                int test_count = 0;
 
                vma_iter_set(vmi, start);
                rcu_read_lock();
-               vma_test = mas_find(&test, end - 1);
+               vma_test = mas_find(&test, count - 1);
                for_each_vma_range(*vmi, vma_mas, end) {
                        BUG_ON(vma_mas != vma_test);
                        test_count++;
-                       vma_test = mas_next(&test, end - 1);
+                       vma_test = mas_next(&test, count - 1);
                }
                rcu_read_unlock();
                BUG_ON(count != test_count);
        }
 #endif
-       vma_iter_set(vmi, start);
+
+       while (vma_iter_addr(vmi) > start)
+               vma_iter_prev_range(vmi);
+
        error = vma_iter_clear_gfp(vmi, start, end, GFP_KERNEL);
        if (error)
                goto clear_tree_failed;
@@ -2550,19 +2542,26 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
        if (unlock)
                mmap_write_downgrade(mm);
 
+       prev = vma_iter_prev_range(vmi);
+       next = vma_next(vmi);
+       if (next)
+               vma_iter_prev_range(vmi);
+
        /*
         * We can free page tables without write-locking mmap_lock because VMAs
         * were isolated before we downgraded mmap_lock.
         */
-       unmap_region(mm, &mt_detach, vma, prev, next, start, end, !unlock);
+       mas_set(&mas_detach, 1);
+       unmap_region(mm, &mas_detach, vma, prev, next, start, end, count,
+                    !unlock);
        /* Statistics and freeing VMAs */
-       mas_set(&mas_detach, start);
+       mas_set(&mas_detach, 0);
        remove_mt(mm, &mas_detach);
-       __mt_destroy(&mt_detach);
        validate_mm(mm);
        if (unlock)
                mmap_read_unlock(mm);
 
+       __mt_destroy(&mt_detach);
        return 0;
 
 clear_tree_failed:
@@ -2686,8 +2685,11 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 
        next = vma_next(&vmi);
        prev = vma_prev(&vmi);
-       if (vm_flags & VM_SPECIAL)
+       if (vm_flags & VM_SPECIAL) {
+               if (prev)
+                       vma_iter_next_range(&vmi);
                goto cannot_expand;
+       }
 
        /* Attempt to expand an old mapping */
        /* Check next */
@@ -2708,9 +2710,10 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
                merge_start = prev->vm_start;
                vma = prev;
                vm_pgoff = prev->vm_pgoff;
+       } else if (prev) {
+               vma_iter_next_range(&vmi);
        }
 
-
        /* Actually expand, if possible */
        if (vma &&
            !vma_expand(&vmi, vma, merge_start, merge_end, vm_pgoff, next)) {
@@ -2718,9 +2721,9 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
                goto expanded;
        }
 
+       if (vma == prev)
+               vma_iter_set(&vmi, addr);
 cannot_expand:
-       if (prev)
-               vma_iter_next_range(&vmi);
 
        /*
         * Determine the object being mapped and call the appropriate
@@ -2733,7 +2736,7 @@ cannot_expand:
                goto unacct_error;
        }
 
-       vma_iter_set(&vmi, addr);
+       vma_iter_config(&vmi, addr, end);
        vma->vm_start = addr;
        vma->vm_end = end;
        vm_flags_init(vma, vm_flags);
@@ -2760,7 +2763,7 @@ cannot_expand:
                if (WARN_ON((addr != vma->vm_start)))
                        goto close_and_free_vma;
 
-               vma_iter_set(&vmi, addr);
+               vma_iter_config(&vmi, addr, end);
                /*
                 * If vm_flags changed after call_mmap(), we should try merge
                 * vma again as we may succeed this time.
@@ -2807,17 +2810,15 @@ cannot_expand:
                goto close_and_free_vma;
 
        error = -ENOMEM;
-       if (vma_iter_prealloc(&vmi))
+       if (vma_iter_prealloc(&vmi, vma))
                goto close_and_free_vma;
 
        /* Lock the VMA since it is modified after insertion into VMA tree */
        vma_start_write(vma);
-       if (vma->vm_file)
-               i_mmap_lock_write(vma->vm_file->f_mapping);
-
        vma_iter_store(&vmi, vma);
        mm->map_count++;
        if (vma->vm_file) {
+               i_mmap_lock_write(vma->vm_file->f_mapping);
                if (vma->vm_flags & VM_SHARED)
                        mapping_allow_writable(vma->vm_file->f_mapping);
 
@@ -2878,9 +2879,10 @@ unmap_and_free_vma:
                fput(vma->vm_file);
                vma->vm_file = NULL;
 
+               vma_iter_set(&vmi, vma->vm_end);
                /* Undo any partial mapping done by a device driver. */
-               unmap_region(mm, &mm->mm_mt, vma, prev, next, vma->vm_start,
-                            vma->vm_end, true);
+               unmap_region(mm, &vmi.mas, vma, prev, next, vma->vm_start,
+                            vma->vm_end, vma->vm_end, true);
        }
        if (file && (vm_flags & VM_SHARED))
                mapping_unmap_writable(file->f_mapping);
@@ -2996,7 +2998,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
 
        file = get_file(vma->vm_file);
        ret = do_mmap(vma->vm_file, start, size,
-                       prot, flags, pgoff, &populate, NULL);
+                       prot, flags, 0, pgoff, &populate, NULL);
        fput(file);
 out:
        mmap_write_unlock(mm);
@@ -3050,7 +3052,6 @@ static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
        struct mm_struct *mm = current->mm;
        struct vma_prepare vp;
 
-       validate_mm(mm);
        /*
         * Check against address space limits by the changed size
         * Note: This happens *after* clearing old mappings in some code paths.
@@ -3072,9 +3073,12 @@ static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
        if (vma && vma->vm_end == addr && !vma_policy(vma) &&
            can_vma_merge_after(vma, flags, NULL, NULL,
                                addr >> PAGE_SHIFT, NULL_VM_UFFD_CTX, NULL)) {
-               if (vma_iter_prealloc(vmi))
+               vma_iter_config(vmi, vma->vm_start, addr + len);
+               if (vma_iter_prealloc(vmi, vma))
                        goto unacct_fail;
 
+               vma_start_write(vma);
+
                init_vma_prep(&vp, vma);
                vma_prepare(&vp);
                vma_adjust_trans_huge(vma, vma->vm_start, addr + len, 0);
@@ -3087,6 +3091,8 @@ static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
                goto out;
        }
 
+       if (vma)
+               vma_iter_next_range(vmi);
        /* create a vma struct for an anonymous mapping */
        vma = vm_area_alloc(mm);
        if (!vma)
@@ -3098,10 +3104,12 @@ static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
        vma->vm_pgoff = addr >> PAGE_SHIFT;
        vm_flags_init(vma, flags);
        vma->vm_page_prot = vm_get_page_prot(flags);
+       vma_start_write(vma);
        if (vma_iter_store_gfp(vmi, vma, GFP_KERNEL))
                goto mas_store_fail;
 
        mm->map_count++;
+       validate_mm(mm);
        ksm_add_vma(vma);
 out:
        perf_event_mmap(vma);
@@ -3110,7 +3118,6 @@ out:
        if (flags & VM_LOCKED)
                mm->locked_vm += (len >> PAGE_SHIFT);
        vm_flags_set(vma, VM_SOFTDIRTY);
-       validate_mm(mm);
        return 0;
 
 mas_store_fail:
@@ -3200,7 +3207,7 @@ void exit_mmap(struct mm_struct *mm)
        tlb_gather_mmu_fullmm(&tlb, mm);
        /* update_hiwater_rss(mm) here? but nobody should be looking */
        /* Use ULONG_MAX here to ensure all VMAs in the mm are unmapped */
-       unmap_vmas(&tlb, &mm->mm_mt, vma, 0, ULONG_MAX, false);
+       unmap_vmas(&tlb, &mas, vma, 0, ULONG_MAX, ULONG_MAX, false);
        mmap_read_unlock(mm);
 
        /*
@@ -3210,7 +3217,8 @@ void exit_mmap(struct mm_struct *mm)
        set_bit(MMF_OOM_SKIP, &mm->flags);
        mmap_write_lock(mm);
        mt_clear_in_rcu(&mm->mm_mt);
-       free_pgtables(&tlb, &mm->mm_mt, vma, FIRST_USER_ADDRESS,
+       mas_set(&mas, vma->vm_end);
+       free_pgtables(&tlb, &mas, vma, FIRST_USER_ADDRESS,
                      USER_PGTABLES_CEILING, true);
        tlb_finish_mmu(&tlb);
 
@@ -3219,6 +3227,7 @@ void exit_mmap(struct mm_struct *mm)
         * enabled, without holding any MM locks besides the unreachable
         * mmap_write_lock.
         */
+       mas_set(&mas, vma->vm_end);
        do {
                if (vma->vm_flags & VM_ACCOUNT)
                        nr_accounted += vma_pages(vma);
@@ -3291,7 +3300,6 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
        bool faulted_in_anon_vma = true;
        VMA_ITERATOR(vmi, mm, addr);
 
-       validate_mm(mm);
        /*
         * If anonymous vma has not yet been faulted, update new pgoff
         * to match new location, to increase its chance of merging.
@@ -3345,12 +3353,10 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
                        get_file(new_vma->vm_file);
                if (new_vma->vm_ops && new_vma->vm_ops->open)
                        new_vma->vm_ops->open(new_vma);
-               vma_start_write(new_vma);
                if (vma_link(mm, new_vma))
                        goto out_vma_link;
                *need_rmap_locks = false;
        }
-       validate_mm(mm);
        return new_vma;
 
 out_vma_link:
@@ -3366,7 +3372,6 @@ out_free_mempol:
 out_free_vma:
        vm_area_free(new_vma);
 out:
-       validate_mm(mm);
        return NULL;
 }
 
@@ -3503,7 +3508,6 @@ static struct vm_area_struct *__install_special_mapping(
        int ret;
        struct vm_area_struct *vma;
 
-       validate_mm(mm);
        vma = vm_area_alloc(mm);
        if (unlikely(vma == NULL))
                return ERR_PTR(-ENOMEM);
@@ -3526,12 +3530,10 @@ static struct vm_area_struct *__install_special_mapping(
 
        perf_event_mmap(vma);
 
-       validate_mm(mm);
        return vma;
 
 out:
        vm_area_free(vma);
-       validate_mm(mm);
        return ERR_PTR(ret);
 }
 
@@ -3663,6 +3665,12 @@ int mm_take_all_locks(struct mm_struct *mm)
 
        mutex_lock(&mm_all_locks_mutex);
 
+       /*
+        * vma_start_write() does not have a complement in mm_drop_all_locks()
+        * because vma_start_write() is always asymmetrical; it marks a VMA as
+        * being written to until mmap_write_unlock() or mmap_write_downgrade()
+        * is reached.
+        */
        mas_for_each(&mas, vma, ULONG_MAX) {
                if (signal_pending(current))
                        goto out_unlock;
@@ -3759,7 +3767,6 @@ void mm_drop_all_locks(struct mm_struct *mm)
                if (vma->vm_file && vma->vm_file->f_mapping)
                        vm_unlock_mapping(vma->vm_file->f_mapping);
        }
-       vma_end_write_all(mm);
 
        mutex_unlock(&mm_all_locks_mutex);
 }
@@ -3789,7 +3796,7 @@ static int init_user_reserve(void)
 {
        unsigned long free_kbytes;
 
-       free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
+       free_kbytes = K(global_zone_page_state(NR_FREE_PAGES));
 
        sysctl_user_reserve_kbytes = min(free_kbytes / 32, 1UL << 17);
        return 0;
@@ -3810,7 +3817,7 @@ static int init_admin_reserve(void)
 {
        unsigned long free_kbytes;
 
-       free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
+       free_kbytes = K(global_zone_page_state(NR_FREE_PAGES));
 
        sysctl_admin_reserve_kbytes = min(free_kbytes / 32, 1UL << 13);
        return 0;
@@ -3854,7 +3861,7 @@ static int reserve_mem_notifier(struct notifier_block *nb,
 
                break;
        case MEM_OFFLINE:
-               free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
+               free_kbytes = K(global_zone_page_state(NR_FREE_PAGES));
 
                if (sysctl_user_reserve_kbytes > free_kbytes) {
                        init_user_reserve();