goto retry;
}
- migrate_page_add(page, qp->pagelist, flags);
+ if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
+ if (!vma_migratable(vma))
+ break;
+ migrate_page_add(page, qp->pagelist, flags);
+ } else
+ break;
}
pte_unmap_unlock(pte - 1, ptl);
cond_resched();
- return 0;
+ return addr != end ? -EIO : 0;
}
static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask,
unsigned long endvma = vma->vm_end;
unsigned long flags = qp->flags;
- if (!vma_migratable(vma))
+ /*
+ * Need check MPOL_MF_STRICT to return -EIO if possible
+ * regardless of vma_migratable
+ */
+ if (!vma_migratable(vma) &&
+ !(flags & MPOL_MF_STRICT))
return 1;
if (endvma > end)
}
/* queue pages from current vma */
- if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
+ if (flags & MPOL_MF_VALID)
return 0;
return 1;
}
*policy |= (pol->flags & MPOL_MODE_FLAGS);
}
- if (vma) {
- up_read(¤t->mm->mmap_sem);
- vma = NULL;
- }
-
err = 0;
if (nmask) {
if (mpol_store_user_nodemask(pol)) {
unsigned long maxnode)
{
unsigned long k;
+ unsigned long t;
unsigned long nlongs;
unsigned long endmask;
else
endmask = (1UL << (maxnode % BITS_PER_LONG)) - 1;
- /* When the user specified more nodes than supported just check
- if the non supported part is all zero. */
+ /*
+ * When the user specified more nodes than supported just check
+ * if the non supported part is all zero.
+ *
+ * If maxnode have more longs than MAX_NUMNODES, check
+ * the bits in that area first. And then go through to
+ * check the rest bits which equal or bigger than MAX_NUMNODES.
+ * Otherwise, just check bits [MAX_NUMNODES, maxnode).
+ */
if (nlongs > BITS_TO_LONGS(MAX_NUMNODES)) {
if (nlongs > PAGE_SIZE/sizeof(long))
return -EINVAL;
for (k = BITS_TO_LONGS(MAX_NUMNODES); k < nlongs; k++) {
- unsigned long t;
if (get_user(t, nmask + k))
return -EFAULT;
if (k == nlongs - 1) {
endmask = ~0UL;
}
+ if (maxnode > MAX_NUMNODES && MAX_NUMNODES % BITS_PER_LONG != 0) {
+ unsigned long valid_mask = endmask;
+
+ valid_mask &= ~((1UL << (MAX_NUMNODES % BITS_PER_LONG)) - 1);
+ if (get_user(t, nmask + nlongs - 1))
+ return -EFAULT;
+ if (t & valid_mask)
+ return -EINVAL;
+ }
+
if (copy_from_user(nodes_addr(*nodes), nmask, nlongs*sizeof(unsigned long)))
return -EFAULT;
nodes_addr(*nodes)[nlongs-1] &= endmask;
nodemask_t *nodes)
{
unsigned long copy = ALIGN(maxnode-1, 64) / 8;
- const int nbytes = BITS_TO_LONGS(MAX_NUMNODES) * sizeof(long);
+ unsigned int nbytes = BITS_TO_LONGS(nr_node_ids) * sizeof(long);
if (copy > nbytes) {
if (copy > PAGE_SIZE)
goto out_put;
}
- if (!nodes_subset(*new, node_states[N_MEMORY])) {
- err = -EINVAL;
+ task_nodes = cpuset_mems_allowed(current);
+ nodes_and(*new, *new, task_nodes);
+ if (nodes_empty(*new))
+ goto out_put;
+
+ nodes_and(*new, *new, node_states[N_MEMORY]);
+ if (nodes_empty(*new))
goto out_put;
- }
err = security_task_movememory(task);
if (err)
int uninitialized_var(pval);
nodemask_t nodes;
- if (nmask != NULL && maxnode < MAX_NUMNODES)
+ if (nmask != NULL && maxnode < nr_node_ids)
return -EINVAL;
err = do_get_mempolicy(&pval, &nodes, addr, flags);
unsigned long nr_bits, alloc_size;
DECLARE_BITMAP(bm, MAX_NUMNODES);
- nr_bits = min_t(unsigned long, maxnode-1, MAX_NUMNODES);
+ nr_bits = min_t(unsigned long, maxnode-1, nr_node_ids);
alloc_size = ALIGN(nr_bits, BITS_PER_LONG) / 8;
if (nmask)
COMPAT_SYSCALL_DEFINE3(set_mempolicy, int, mode, compat_ulong_t __user *, nmask,
compat_ulong_t, maxnode)
{
- long err = 0;
unsigned long __user *nm = NULL;
unsigned long nr_bits, alloc_size;
DECLARE_BITMAP(bm, MAX_NUMNODES);
alloc_size = ALIGN(nr_bits, BITS_PER_LONG) / 8;
if (nmask) {
- err = compat_get_bitmap(bm, nmask, nr_bits);
+ if (compat_get_bitmap(bm, nmask, nr_bits))
+ return -EFAULT;
nm = compat_alloc_user_space(alloc_size);
- err |= copy_to_user(nm, bm, alloc_size);
+ if (copy_to_user(nm, bm, alloc_size))
+ return -EFAULT;
}
- if (err)
- return -EFAULT;
-
return sys_set_mempolicy(mode, nm, nr_bits+1);
}
compat_ulong_t, mode, compat_ulong_t __user *, nmask,
compat_ulong_t, maxnode, compat_ulong_t, flags)
{
- long err = 0;
unsigned long __user *nm = NULL;
unsigned long nr_bits, alloc_size;
nodemask_t bm;
alloc_size = ALIGN(nr_bits, BITS_PER_LONG) / 8;
if (nmask) {
- err = compat_get_bitmap(nodes_addr(bm), nmask, nr_bits);
+ if (compat_get_bitmap(nodes_addr(bm), nmask, nr_bits))
+ return -EFAULT;
nm = compat_alloc_user_space(alloc_size);
- err |= copy_to_user(nm, nodes_addr(bm), alloc_size);
+ if (copy_to_user(nm, nodes_addr(bm), alloc_size))
+ return -EFAULT;
}
- if (err)
- return -EFAULT;
-
return sys_mbind(start, len, mode, nm, nr_bits+1, flags);
}
nmask = policy_nodemask(gfp, pol);
if (!nmask || node_isset(hpage_node, *nmask)) {
mpol_cond_put(pol);
- page = __alloc_pages_node(hpage_node,
- gfp | __GFP_THISNODE, order);
+ /*
+ * We cannot invoke reclaim if __GFP_THISNODE
+ * is set. Invoking reclaim with
+ * __GFP_THISNODE set, would cause THP
+ * allocations to trigger heavy swapping
+ * despite there may be tons of free memory
+ * (including potentially plenty of THP
+ * already available in the buddy) on all the
+ * other NUMA nodes.
+ *
+ * At most we could invoke compaction when
+ * __GFP_THISNODE is set (but we would need to
+ * refrain from invoking reclaim even if
+ * compaction returned COMPACT_SKIPPED because
+ * there wasn't not enough memory to succeed
+ * compaction). For now just avoid
+ * __GFP_THISNODE instead of limiting the
+ * allocation path to a strict and single
+ * compaction invocation.
+ *
+ * Supposedly if direct reclaim was enabled by
+ * the caller, the app prefers THP regardless
+ * of the node it comes from so this would be
+ * more desiderable behavior than only
+ * providing THP originated from the local
+ * node in such case.
+ */
+ if (!(gfp & __GFP_DIRECT_RECLAIM))
+ gfp |= __GFP_THISNODE;
+ page = __alloc_pages_node(hpage_node, gfp, order);
goto out;
}
}
nmask = policy_nodemask(gfp, pol);
zl = policy_zonelist(gfp, pol, node);
- mpol_cond_put(pol);
page = __alloc_pages_nodemask(gfp, order, zl, nmask);
+ mpol_cond_put(pol);
out:
if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))
goto retry_cpuset;
case MPOL_INTERLEAVE:
return !!nodes_equal(a->v.nodes, b->v.nodes);
case MPOL_PREFERRED:
+ /* a's ->flags is the same as b's */
+ if (a->flags & MPOL_F_LOCAL)
+ return true;
return a->v.preferred_node == b->v.preferred_node;
default:
BUG();