Merge branch 'akpm' (patches from Andrew)

Merge misc mm fixes from Andrew Morton:
 "15 patches.

  VM subsystems affected by this patch series: userfaultfd, kfence,
  highmem, pagealloc, memblock, pagecache, secretmem, pagemap, and
  hugetlbfs"

* akpm:
  hugetlbfs: fix mount mode command line processing
  mm: fix the deadlock in finish_fault()
  mm: mmap_lock: fix disabling preemption directly
  mm/secretmem: wire up ->set_page_dirty
  writeback, cgroup: do not reparent dax inodes
  writeback, cgroup: remove wb from offline list before releasing refcnt
  memblock: make for_each_mem_range() traverse MEMBLOCK_HOTPLUG regions
  mm: page_alloc: fix page_poison=1 / INIT_ON_ALLOC_DEFAULT_ON interaction
  mm: use kmap_local_page in memzero_page
  mm: call flush_dcache_page() in memcpy_to_page() and memzero_page()
  kfence: skip all GFP_ZONEMASK allocations
  kfence: move the size check to the beginning of __kfence_alloc()
  kfence: defer kfence_test_init to ensure that kunit debugfs is created
  selftest: use mmap instead of posix_memalign to allocate memory
  userfaultfd: do not untag user pointers
This commit is contained in:
Linus Torvalds 2021-07-24 12:27:16 -07:00
commit bca1d4de39
15 changed files with 92 additions and 50 deletions

View File

@ -45,14 +45,24 @@ how the user addresses are used by the kernel:
1. User addresses not accessed by the kernel but used for address space
management (e.g. ``mprotect()``, ``madvise()``). The use of valid
tagged pointers in this context is allowed with the exception of
``brk()``, ``mmap()`` and the ``new_address`` argument to
``mremap()`` as these have the potential to alias with existing
user addresses.
tagged pointers in this context is allowed with these exceptions:
NOTE: This behaviour changed in v5.6 and so some earlier kernels may
incorrectly accept valid tagged pointers for the ``brk()``,
``mmap()`` and ``mremap()`` system calls.
- ``brk()``, ``mmap()`` and the ``new_address`` argument to
``mremap()`` as these have the potential to alias with existing
user addresses.
NOTE: This behaviour changed in v5.6 and so some earlier kernels may
incorrectly accept valid tagged pointers for the ``brk()``,
``mmap()`` and ``mremap()`` system calls.
- The ``range.start``, ``start`` and ``dst`` arguments to the
``UFFDIO_*`` ``ioctl()``s used on a file descriptor obtained from
``userfaultfd()``, as fault addresses subsequently obtained by reading
the file descriptor will be untagged, which may otherwise confuse
tag-unaware programs.
NOTE: This behaviour changed in v5.14 and so some earlier kernels may
incorrectly accept valid tagged pointers for this system call.
2. User addresses accessed by the kernel (e.g. ``write()``). This ABI
relaxation is disabled by default and the application thread needs to

View File

@ -521,6 +521,9 @@ static bool inode_prepare_wbs_switch(struct inode *inode,
*/
smp_mb();
if (IS_DAX(inode))
return false;
/* while holding I_WB_SWITCH, no one else can update the association */
spin_lock(&inode->i_lock);
if (!(inode->i_sb->s_flags & SB_ACTIVE) ||

View File

@ -77,7 +77,7 @@ enum hugetlb_param {
static const struct fs_parameter_spec hugetlb_fs_parameters[] = {
fsparam_u32 ("gid", Opt_gid),
fsparam_string("min_size", Opt_min_size),
fsparam_u32 ("mode", Opt_mode),
fsparam_u32oct("mode", Opt_mode),
fsparam_string("nr_inodes", Opt_nr_inodes),
fsparam_string("pagesize", Opt_pagesize),
fsparam_string("size", Opt_size),

View File

@ -1236,23 +1236,21 @@ static __always_inline void wake_userfault(struct userfaultfd_ctx *ctx,
}
static __always_inline int validate_range(struct mm_struct *mm,
__u64 *start, __u64 len)
__u64 start, __u64 len)
{
__u64 task_size = mm->task_size;
*start = untagged_addr(*start);
if (*start & ~PAGE_MASK)
if (start & ~PAGE_MASK)
return -EINVAL;
if (len & ~PAGE_MASK)
return -EINVAL;
if (!len)
return -EINVAL;
if (*start < mmap_min_addr)
if (start < mmap_min_addr)
return -EINVAL;
if (*start >= task_size)
if (start >= task_size)
return -EINVAL;
if (len > task_size - *start)
if (len > task_size - start)
return -EINVAL;
return 0;
}
@ -1316,7 +1314,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
vm_flags |= VM_UFFD_MINOR;
}
ret = validate_range(mm, &uffdio_register.range.start,
ret = validate_range(mm, uffdio_register.range.start,
uffdio_register.range.len);
if (ret)
goto out;
@ -1522,7 +1520,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
if (copy_from_user(&uffdio_unregister, buf, sizeof(uffdio_unregister)))
goto out;
ret = validate_range(mm, &uffdio_unregister.start,
ret = validate_range(mm, uffdio_unregister.start,
uffdio_unregister.len);
if (ret)
goto out;
@ -1671,7 +1669,7 @@ static int userfaultfd_wake(struct userfaultfd_ctx *ctx,
if (copy_from_user(&uffdio_wake, buf, sizeof(uffdio_wake)))
goto out;
ret = validate_range(ctx->mm, &uffdio_wake.start, uffdio_wake.len);
ret = validate_range(ctx->mm, uffdio_wake.start, uffdio_wake.len);
if (ret)
goto out;
@ -1711,7 +1709,7 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
sizeof(uffdio_copy)-sizeof(__s64)))
goto out;
ret = validate_range(ctx->mm, &uffdio_copy.dst, uffdio_copy.len);
ret = validate_range(ctx->mm, uffdio_copy.dst, uffdio_copy.len);
if (ret)
goto out;
/*
@ -1768,7 +1766,7 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx,
sizeof(uffdio_zeropage)-sizeof(__s64)))
goto out;
ret = validate_range(ctx->mm, &uffdio_zeropage.range.start,
ret = validate_range(ctx->mm, uffdio_zeropage.range.start,
uffdio_zeropage.range.len);
if (ret)
goto out;
@ -1818,7 +1816,7 @@ static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx,
sizeof(struct uffdio_writeprotect)))
return -EFAULT;
ret = validate_range(ctx->mm, &uffdio_wp.range.start,
ret = validate_range(ctx->mm, uffdio_wp.range.start,
uffdio_wp.range.len);
if (ret)
return ret;
@ -1866,7 +1864,7 @@ static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg)
sizeof(uffdio_continue) - (sizeof(__s64))))
goto out;
ret = validate_range(ctx->mm, &uffdio_continue.range.start,
ret = validate_range(ctx->mm, uffdio_continue.range.start,
uffdio_continue.range.len);
if (ret)
goto out;

View File

@ -318,14 +318,16 @@ static inline void memcpy_to_page(struct page *page, size_t offset,
VM_BUG_ON(offset + len > PAGE_SIZE);
memcpy(to + offset, from, len);
flush_dcache_page(page);
kunmap_local(to);
}
static inline void memzero_page(struct page *page, size_t offset, size_t len)
{
char *addr = kmap_atomic(page);
char *addr = kmap_local_page(page);
memset(addr + offset, 0, len);
kunmap_atomic(addr);
flush_dcache_page(page);
kunmap_local(addr);
}
#endif /* _LINUX_HIGHMEM_H */

View File

@ -209,7 +209,7 @@ static inline void __next_physmem_range(u64 *idx, struct memblock_type *type,
*/
#define for_each_mem_range(i, p_start, p_end) \
__for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE, \
MEMBLOCK_NONE, p_start, p_end, NULL)
MEMBLOCK_HOTPLUG, p_start, p_end, NULL)
/**
* for_each_mem_range_rev - reverse iterate through memblock areas from
@ -220,7 +220,7 @@ static inline void __next_physmem_range(u64 *idx, struct memblock_type *type,
*/
#define for_each_mem_range_rev(i, p_start, p_end) \
__for_each_mem_range_rev(i, &memblock.memory, NULL, NUMA_NO_NODE, \
MEMBLOCK_NONE, p_start, p_end, NULL)
MEMBLOCK_HOTPLUG, p_start, p_end, NULL)
/**
* for_each_reserved_mem_range - iterate over all reserved memblock areas

View File

@ -398,12 +398,12 @@ static void cgwb_release_workfn(struct work_struct *work)
blkcg_unpin_online(blkcg);
fprop_local_destroy_percpu(&wb->memcg_completions);
percpu_ref_exit(&wb->refcnt);
spin_lock_irq(&cgwb_lock);
list_del(&wb->offline_node);
spin_unlock_irq(&cgwb_lock);
percpu_ref_exit(&wb->refcnt);
wb_exit(wb);
WARN_ON_ONCE(!list_empty(&wb->b_attached));
kfree_rcu(wb, rcu);

View File

@ -733,6 +733,22 @@ void kfence_shutdown_cache(struct kmem_cache *s)
void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags)
{
/*
* Perform size check before switching kfence_allocation_gate, so that
* we don't disable KFENCE without making an allocation.
*/
if (size > PAGE_SIZE)
return NULL;
/*
* Skip allocations from non-default zones, including DMA. We cannot
* guarantee that pages in the KFENCE pool will have the requested
* properties (e.g. reside in DMAable memory).
*/
if ((flags & GFP_ZONEMASK) ||
(s->flags & (SLAB_CACHE_DMA | SLAB_CACHE_DMA32)))
return NULL;
/*
* allocation_gate only needs to become non-zero, so it doesn't make
* sense to continue writing to it and pay the associated contention
@ -757,9 +773,6 @@ void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags)
if (!READ_ONCE(kfence_enabled))
return NULL;
if (size > PAGE_SIZE)
return NULL;
return kfence_guarded_alloc(s, size, flags);
}

View File

@ -852,7 +852,7 @@ static void kfence_test_exit(void)
tracepoint_synchronize_unregister();
}
late_initcall(kfence_test_init);
late_initcall_sync(kfence_test_init);
module_exit(kfence_test_exit);
MODULE_LICENSE("GPL v2");

View File

@ -947,7 +947,8 @@ static bool should_skip_region(struct memblock_type *type,
return true;
/* skip hotpluggable memory regions if needed */
if (movable_node_is_enabled() && memblock_is_hotpluggable(m))
if (movable_node_is_enabled() && memblock_is_hotpluggable(m) &&
!(flags & MEMBLOCK_HOTPLUG))
return true;
/* if we want mirror memory skip non-mirror memory regions */

View File

@ -4026,8 +4026,17 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
return ret;
}
if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd)))
if (vmf->prealloc_pte) {
vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
if (likely(pmd_none(*vmf->pmd))) {
mm_inc_nr_ptes(vma->vm_mm);
pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
vmf->prealloc_pte = NULL;
}
spin_unlock(vmf->ptl);
} else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) {
return VM_FAULT_OOM;
}
}
/* See comment in handle_pte_fault() */

View File

@ -156,14 +156,14 @@ static inline void put_memcg_path_buf(void)
#define TRACE_MMAP_LOCK_EVENT(type, mm, ...) \
do { \
const char *memcg_path; \
preempt_disable(); \
local_lock(&memcg_paths.lock); \
memcg_path = get_mm_memcg_path(mm); \
trace_mmap_lock_##type(mm, \
memcg_path != NULL ? memcg_path : "", \
##__VA_ARGS__); \
if (likely(memcg_path != NULL)) \
put_memcg_path_buf(); \
preempt_enable(); \
local_unlock(&memcg_paths.lock); \
} while (0)
#else /* !CONFIG_MEMCG */

View File

@ -840,21 +840,24 @@ void init_mem_debugging_and_hardening(void)
}
#endif
if (_init_on_alloc_enabled_early) {
if (page_poisoning_requested)
pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, "
"will take precedence over init_on_alloc\n");
else
static_branch_enable(&init_on_alloc);
}
if (_init_on_free_enabled_early) {
if (page_poisoning_requested)
pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, "
"will take precedence over init_on_free\n");
else
static_branch_enable(&init_on_free);
if ((_init_on_alloc_enabled_early || _init_on_free_enabled_early) &&
page_poisoning_requested) {
pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, "
"will take precedence over init_on_alloc and init_on_free\n");
_init_on_alloc_enabled_early = false;
_init_on_free_enabled_early = false;
}
if (_init_on_alloc_enabled_early)
static_branch_enable(&init_on_alloc);
else
static_branch_disable(&init_on_alloc);
if (_init_on_free_enabled_early)
static_branch_enable(&init_on_free);
else
static_branch_disable(&init_on_free);
#ifdef CONFIG_DEBUG_PAGEALLOC
if (!debug_pagealloc_enabled())
return;

View File

@ -152,6 +152,7 @@ static void secretmem_freepage(struct page *page)
}
const struct address_space_operations secretmem_aops = {
.set_page_dirty = __set_page_dirty_no_writeback,
.freepage = secretmem_freepage,
.migratepage = secretmem_migratepage,
.isolate_page = secretmem_isolate_page,

View File

@ -210,8 +210,10 @@ static void anon_release_pages(char *rel_area)
static void anon_allocate_area(void **alloc_area)
{
if (posix_memalign(alloc_area, page_size, nr_pages * page_size))
err("posix_memalign() failed");
*alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
if (*alloc_area == MAP_FAILED)
err("mmap of anonymous memory failed");
}
static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset)