diff --git a/.mailmap b/.mailmap index bf076bbc36b1..650689d00930 100644 --- a/.mailmap +++ b/.mailmap @@ -233,6 +233,7 @@ Jisheng Zhang Johan Hovold Johan Hovold John Crispin +John Keeping John Paul Adrian Glaubitz John Stultz diff --git a/arch/powerpc/purgatory/Makefile b/arch/powerpc/purgatory/Makefile index 6f5e2727963c..78473d69cd2b 100644 --- a/arch/powerpc/purgatory/Makefile +++ b/arch/powerpc/purgatory/Makefile @@ -5,6 +5,11 @@ KCSAN_SANITIZE := n targets += trampoline_$(BITS).o purgatory.ro +# When profile-guided optimization is enabled, llvm emits two different +# overlapping text sections, which is not supported by kexec. Remove profile +# optimization flags. +KBUILD_CFLAGS := $(filter-out -fprofile-sample-use=% -fprofile-use=%,$(KBUILD_CFLAGS)) + LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined $(obj)/purgatory.ro: $(obj)/trampoline_$(BITS).o FORCE diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile index 5730797a6b40..bd2e27f82532 100644 --- a/arch/riscv/purgatory/Makefile +++ b/arch/riscv/purgatory/Makefile @@ -35,6 +35,11 @@ CFLAGS_sha256.o := -D__DISABLE_EXPORTS CFLAGS_string.o := -D__DISABLE_EXPORTS CFLAGS_ctype.o := -D__DISABLE_EXPORTS +# When profile-guided optimization is enabled, llvm emits two different +# overlapping text sections, which is not supported by kexec. Remove profile +# optimization flags. +KBUILD_CFLAGS := $(filter-out -fprofile-sample-use=% -fprofile-use=%,$(KBUILD_CFLAGS)) + # When linking purgatory.ro with -r unresolved symbols are not checked, # also link a purgatory.chk binary without -r to check for unresolved symbols. PURGATORY_LDFLAGS := -e purgatory_start -z nodefaultlib diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index 82fec66d46d2..42abd6af1198 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -14,6 +14,11 @@ $(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE CFLAGS_sha256.o := -D__DISABLE_EXPORTS +# When profile-guided optimization is enabled, llvm emits two different +# overlapping text sections, which is not supported by kexec. Remove profile +# optimization flags. +KBUILD_CFLAGS := $(filter-out -fprofile-sample-use=% -fprofile-use=%,$(KBUILD_CFLAGS)) + # When linking purgatory.ro with -r unresolved symbols are not checked, # also link a purgatory.chk binary without -r to check for unresolved symbols. PURGATORY_LDFLAGS := -e purgatory_start -z nodefaultlib diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 980483455cc0..266d45c7685b 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1805,7 +1805,11 @@ static int ep_autoremove_wake_function(struct wait_queue_entry *wq_entry, { int ret = default_wake_function(wq_entry, mode, sync, key); - list_del_init(&wq_entry->entry); + /* + * Pairs with list_empty_careful in ep_poll, and ensures future loop + * iterations see the cause of this wakeup. + */ + list_del_init_careful(&wq_entry->entry); return ret; } diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index e956f886a1a1..5710833ac1cc 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -285,6 +285,14 @@ void nilfs_btnode_abort_change_key(struct address_space *btnc, if (nbh == NULL) { /* blocksize == pagesize */ xa_erase_irq(&btnc->i_pages, newkey); unlock_page(ctxt->bh->b_page); - } else - brelse(nbh); + } else { + /* + * When canceling a buffer that a prepare operation has + * allocated to copy a node block to another location, use + * nilfs_btnode_delete() to initialize and release the buffer + * so that the buffer flags will not be in an inconsistent + * state when it is reallocated. + */ + nilfs_btnode_delete(nbh); + } } diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index dc359b56fdfa..2c6078a6b8ec 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -779,6 +779,15 @@ int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs) goto out_header; sui->ncleansegs -= nsegs - newnsegs; + + /* + * If the sufile is successfully truncated, immediately adjust + * the segment allocation space while locking the semaphore + * "mi_sem" so that nilfs_sufile_alloc() never allocates + * segments in the truncated space. + */ + sui->allocmax = newnsegs - 1; + sui->allocmin = 0; } kaddr = kmap_atomic(header_bh->b_page); diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 2894152a6b25..0f0667957c81 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -405,6 +405,18 @@ unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs) 100)); } +/** + * nilfs_max_segment_count - calculate the maximum number of segments + * @nilfs: nilfs object + */ +static u64 nilfs_max_segment_count(struct the_nilfs *nilfs) +{ + u64 max_count = U64_MAX; + + do_div(max_count, nilfs->ns_blocks_per_segment); + return min_t(u64, max_count, ULONG_MAX); +} + void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs) { nilfs->ns_nsegments = nsegs; @@ -414,6 +426,8 @@ void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs) static int nilfs_store_disk_layout(struct the_nilfs *nilfs, struct nilfs_super_block *sbp) { + u64 nsegments, nblocks; + if (le32_to_cpu(sbp->s_rev_level) < NILFS_MIN_SUPP_REV) { nilfs_err(nilfs->ns_sb, "unsupported revision (superblock rev.=%d.%d, current rev.=%d.%d). Please check the version of mkfs.nilfs(2).", @@ -457,7 +471,34 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs, return -EINVAL; } - nilfs_set_nsegments(nilfs, le64_to_cpu(sbp->s_nsegments)); + nsegments = le64_to_cpu(sbp->s_nsegments); + if (nsegments > nilfs_max_segment_count(nilfs)) { + nilfs_err(nilfs->ns_sb, + "segment count %llu exceeds upper limit (%llu segments)", + (unsigned long long)nsegments, + (unsigned long long)nilfs_max_segment_count(nilfs)); + return -EINVAL; + } + + nblocks = sb_bdev_nr_blocks(nilfs->ns_sb); + if (nblocks) { + u64 min_block_count = nsegments * nilfs->ns_blocks_per_segment; + /* + * To avoid failing to mount early device images without a + * second superblock, exclude that block count from the + * "min_block_count" calculation. + */ + + if (nblocks < min_block_count) { + nilfs_err(nilfs->ns_sb, + "total number of segment blocks %llu exceeds device size (%llu blocks)", + (unsigned long long)min_block_count, + (unsigned long long)nblocks); + return -EINVAL; + } + } + + nilfs_set_nsegments(nilfs, nsegments); nilfs->ns_crc_seed = le32_to_cpu(sbp->s_crc_seed); return 0; } diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index efb09de4343d..b173c36bcab3 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2100,14 +2100,20 @@ static long ocfs2_fallocate(struct file *file, int mode, loff_t offset, struct ocfs2_space_resv sr; int change_size = 1; int cmd = OCFS2_IOC_RESVSP64; + int ret = 0; if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) return -EOPNOTSUPP; if (!ocfs2_writes_unwritten_extents(osb)) return -EOPNOTSUPP; - if (mode & FALLOC_FL_KEEP_SIZE) + if (mode & FALLOC_FL_KEEP_SIZE) { change_size = 0; + } else { + ret = inode_newsize_ok(inode, offset + len); + if (ret) + return ret; + } if (mode & FALLOC_FL_PUNCH_HOLE) cmd = OCFS2_IOC_UNRESVSP64; diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 0b0e6a132101..988d1c076861 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -952,8 +952,10 @@ static void ocfs2_disable_quotas(struct ocfs2_super *osb) for (type = 0; type < OCFS2_MAXQUOTAS; type++) { if (!sb_has_quota_loaded(sb, type)) continue; - oinfo = sb_dqinfo(sb, type)->dqi_priv; - cancel_delayed_work_sync(&oinfo->dqi_sync_work); + if (!sb_has_quota_suspended(sb, type)) { + oinfo = sb_dqinfo(sb, type)->dqi_priv; + cancel_delayed_work_sync(&oinfo->dqi_sync_work); + } inode = igrab(sb->s_dquot.files[type]); /* Turn off quotas. This will remove all dquot structures from * memory and so they will be automatically synced to global diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 0fd96d6e39ce..4e800bb7d2ab 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -1332,6 +1332,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, bool basic_ioctls; unsigned long start, end, vma_end; struct vma_iterator vmi; + pgoff_t pgoff; user_uffdio_register = (struct uffdio_register __user *) arg; @@ -1459,6 +1460,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, vma_iter_set(&vmi, start); prev = vma_prev(&vmi); + if (vma->vm_start < start) + prev = vma; ret = 0; for_each_vma_range(vmi, vma, end) { @@ -1482,8 +1485,9 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, vma_end = min(end, vma->vm_end); new_flags = (vma->vm_flags & ~__VM_UFFD_FLAGS) | vm_flags; + pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); prev = vma_merge(&vmi, mm, prev, start, vma_end, new_flags, - vma->anon_vma, vma->vm_file, vma->vm_pgoff, + vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), ((struct vm_userfaultfd_ctx){ ctx }), anon_vma_name(vma)); @@ -1563,6 +1567,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, unsigned long start, end, vma_end; const void __user *buf = (void __user *)arg; struct vma_iterator vmi; + pgoff_t pgoff; ret = -EFAULT; if (copy_from_user(&uffdio_unregister, buf, sizeof(uffdio_unregister))) @@ -1625,6 +1630,9 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, vma_iter_set(&vmi, start); prev = vma_prev(&vmi); + if (vma->vm_start < start) + prev = vma; + ret = 0; for_each_vma_range(vmi, vma, end) { cond_resched(); @@ -1662,8 +1670,9 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, uffd_wp_range(vma, start, vma_end - start, false); new_flags = vma->vm_flags & ~__VM_UFFD_FLAGS; + pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); prev = vma_merge(&vmi, mm, prev, start, vma_end, new_flags, - vma->anon_vma, vma->vm_file, vma->vm_pgoff, + vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), NULL_VM_UFFD_CTX, anon_vma_name(vma)); if (prev) { diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index f989f5f1933b..69ee4a29136f 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -901,10 +901,22 @@ static int kexec_purgatory_setup_sechdrs(struct purgatory_info *pi, } offset = ALIGN(offset, align); + + /* + * Check if the segment contains the entry point, if so, + * calculate the value of image->start based on it. + * If the compiler has produced more than one .text section + * (Eg: .text.hot), they are generally after the main .text + * section, and they shall not be used to calculate + * image->start. So do not re-calculate image->start if it + * is not set to the initial value, and warn the user so they + * have a chance to fix their purgatory's linker script. + */ if (sechdrs[i].sh_flags & SHF_EXECINSTR && pi->ehdr->e_entry >= sechdrs[i].sh_addr && pi->ehdr->e_entry < (sechdrs[i].sh_addr - + sechdrs[i].sh_size)) { + + sechdrs[i].sh_size) && + !WARN_ON(kbuf->image->start != pi->ehdr->e_entry)) { kbuf->image->start -= sechdrs[i].sh_addr; kbuf->image->start += kbuf->mem + offset; } diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 049ba132f7ef..1a31065b2036 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -27,6 +27,8 @@ #include #include +#include "radix-tree.h" + /* * Radix tree node cache. */ diff --git a/lib/radix-tree.h b/lib/radix-tree.h new file mode 100644 index 000000000000..40d5c03e2b09 --- /dev/null +++ b/lib/radix-tree.h @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* radix-tree helpers that are only shared with xarray */ + +struct kmem_cache; +struct rcu_head; + +extern struct kmem_cache *radix_tree_node_cachep; +extern void radix_tree_node_rcu_free(struct rcu_head *head); diff --git a/lib/test_vmalloc.c b/lib/test_vmalloc.c index 9dd9745d365f..3718d9886407 100644 --- a/lib/test_vmalloc.c +++ b/lib/test_vmalloc.c @@ -369,7 +369,7 @@ vm_map_ram_test(void) int i; map_nr_pages = nr_pages > 0 ? nr_pages:1; - pages = kmalloc(map_nr_pages * sizeof(struct page), GFP_KERNEL); + pages = kcalloc(map_nr_pages, sizeof(struct page *), GFP_KERNEL); if (!pages) return -1; diff --git a/lib/xarray.c b/lib/xarray.c index ea9ce1f0b386..2071a3718f4e 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -12,6 +12,8 @@ #include #include +#include "radix-tree.h" + /* * Coding conventions in this file: * @@ -247,10 +249,6 @@ void *xas_load(struct xa_state *xas) } EXPORT_SYMBOL_GPL(xas_load); -/* Move the radix tree node cache here */ -extern struct kmem_cache *radix_tree_node_cachep; -extern void radix_tree_node_rcu_free(struct rcu_head *head); - #define XA_RCU_FREE ((struct xarray *)1) static void xa_node_free(struct xa_node *node) diff --git a/mm/damon/core.c b/mm/damon/core.c index d9ef62047bf5..91cff7f2997e 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -551,6 +551,8 @@ int damon_set_attrs(struct damon_ctx *ctx, struct damon_attrs *attrs) return -EINVAL; if (attrs->min_nr_regions > attrs->max_nr_regions) return -EINVAL; + if (attrs->sample_interval > attrs->aggr_interval) + return -EINVAL; damon_update_monitoring_results(ctx, attrs); ctx->attrs = *attrs; diff --git a/mm/filemap.c b/mm/filemap.c index b4c9bd368b7e..83dda76d1fc3 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1760,7 +1760,9 @@ bool __folio_lock_or_retry(struct folio *folio, struct mm_struct *mm, * * Return: The index of the gap if found, otherwise an index outside the * range specified (in which case 'return - index >= max_scan' will be true). - * In the rare case of index wrap-around, 0 will be returned. + * In the rare case of index wrap-around, 0 will be returned. 0 will also + * be returned if index == 0 and there is a gap at the index. We can not + * wrap-around if passed index == 0. */ pgoff_t page_cache_next_miss(struct address_space *mapping, pgoff_t index, unsigned long max_scan) @@ -1770,12 +1772,13 @@ pgoff_t page_cache_next_miss(struct address_space *mapping, while (max_scan--) { void *entry = xas_next(&xas); if (!entry || xa_is_value(entry)) - break; - if (xas.xa_index == 0) - break; + return xas.xa_index; + if (xas.xa_index == 0 && index != 0) + return xas.xa_index; } - return xas.xa_index; + /* No gaps in range and no wrap-around, return index beyond range */ + return xas.xa_index + 1; } EXPORT_SYMBOL(page_cache_next_miss); @@ -1796,7 +1799,9 @@ EXPORT_SYMBOL(page_cache_next_miss); * * Return: The index of the gap if found, otherwise an index outside the * range specified (in which case 'index - return >= max_scan' will be true). - * In the rare case of wrap-around, ULONG_MAX will be returned. + * In the rare case of wrap-around, ULONG_MAX will be returned. ULONG_MAX + * will also be returned if index == ULONG_MAX and there is a gap at the + * index. We can not wrap-around if passed index == ULONG_MAX. */ pgoff_t page_cache_prev_miss(struct address_space *mapping, pgoff_t index, unsigned long max_scan) @@ -1806,12 +1811,13 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping, while (max_scan--) { void *entry = xas_prev(&xas); if (!entry || xa_is_value(entry)) - break; - if (xas.xa_index == ULONG_MAX) - break; + return xas.xa_index; + if (xas.xa_index == ULONG_MAX && index != ULONG_MAX) + return xas.xa_index; } - return xas.xa_index; + /* No gaps in range and no wrap-around, return index beyond range */ + return xas.xa_index - 1; } EXPORT_SYMBOL(page_cache_prev_miss); diff --git a/mm/gup_test.c b/mm/gup_test.c index 8ae7307a1bb6..c0421b786dcd 100644 --- a/mm/gup_test.c +++ b/mm/gup_test.c @@ -381,6 +381,7 @@ static int gup_test_release(struct inode *inode, struct file *file) static const struct file_operations gup_test_fops = { .open = nonseekable_open, .unlocked_ioctl = gup_test_ioctl, + .compat_ioctl = compat_ptr_ioctl, .release = gup_test_release, }; diff --git a/mm/zswap.c b/mm/zswap.c index 59da2a415fbb..30092d9a3b23 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -1174,9 +1174,16 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset, goto reject; } + /* + * XXX: zswap reclaim does not work with cgroups yet. Without a + * cgroup-aware entry LRU, we will push out entries system-wide based on + * local cgroup limits. + */ objcg = get_obj_cgroup_from_page(page); - if (objcg && !obj_cgroup_may_zswap(objcg)) - goto shrink; + if (objcg && !obj_cgroup_may_zswap(objcg)) { + ret = -ENOMEM; + goto reject; + } /* reclaim space if needed */ if (zswap_is_full()) { diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile index caf32a9b9608..7527f738b4a1 100644 --- a/tools/testing/radix-tree/Makefile +++ b/tools/testing/radix-tree/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -I. -I../../include -g -Og -Wall -D_LGPL_SOURCE -fsanitize=address \ - -fsanitize=undefined +CFLAGS += -I. -I../../include -I../../../lib -g -Og -Wall \ + -D_LGPL_SOURCE -fsanitize=address -fsanitize=undefined LDFLAGS += -fsanitize=address -fsanitize=undefined LDLIBS+= -lpthread -lurcu TARGETS = main idr-test multiorder xarray maple @@ -49,6 +49,7 @@ $(OFILES): Makefile *.h */*.h generated/map-shift.h generated/bit-length.h \ ../../../include/linux/xarray.h \ ../../../include/linux/maple_tree.h \ ../../../include/linux/radix-tree.h \ + ../../../lib/radix-tree.h \ ../../../include/linux/idr.h radix-tree.c: ../../../lib/radix-tree.c