Fourteen hotfixes, eleven of which are cc:stable. The remainder pertain

to issues which were introduced after 6.5.
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYIAB0WIQTTMBEPP41GrTpTJgfdBJ7gKXxAjgUCZRmSDAAKCRDdBJ7gKXxA
 jlSaAQCe3SnBdjRmuzbp5iIfNJOY7GXLN4NwMsArRUxRGY27IwD+KWhXZP/ydVnt
 ZgS4x9rmarHuh5Pxds+6SRGhihRz/Ak=
 =sf/5
 -----END PGP SIGNATURE-----

Merge tag 'mm-hotfixes-stable-2023-10-01-08-34' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

Pull misc fixes from Andrew Morton:
 "Fourteen hotfixes, eleven of which are cc:stable. The remainder
  pertain to issues which were introduced after 6.5"

* tag 'mm-hotfixes-stable-2023-10-01-08-34' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm:
  Crash: add lock to serialize crash hotplug handling
  selftests/mm: fix awk usage in charge_reserved_hugetlb.sh and hugetlb_reparenting_test.sh that may cause error
  mm: mempolicy: keep VMA walk if both MPOL_MF_STRICT and MPOL_MF_MOVE are specified
  mm/damon/vaddr-test: fix memory leak in damon_do_test_apply_three_regions()
  mm, memcg: reconsider kmem.limit_in_bytes deprecation
  mm: zswap: fix potential memory corruption on duplicate store
  arm64: hugetlb: fix set_huge_pte_at() to work with all swap entries
  mm: hugetlb: add huge page size param to set_huge_pte_at()
  maple_tree: add MAS_UNDERFLOW and MAS_OVERFLOW states
  maple_tree: add mas_is_active() to detect in-tree walks
  nilfs2: fix potential use after free in nilfs_gccache_submit_read_data()
  mm: abstract moving to the next PFN
  mm: report success more often from filemap_map_folio_range()
  fs: binfmt_elf_efpic: fix personality for ELF-FDPIC
This commit is contained in:
Linus Torvalds 2023-10-01 13:33:25 -07:00
commit d2c5231581
38 changed files with 458 additions and 172 deletions

View File

@ -92,6 +92,13 @@ Brief summary of control files.
memory.oom_control set/show oom controls.
memory.numa_stat show the number of memory usage per numa
node
memory.kmem.limit_in_bytes Deprecated knob to set and read the kernel
memory hard limit. Kernel hard limit is not
supported since 5.16. Writing any value to
do file will not have any effect same as if
nokmem kernel parameter was specified.
Kernel memory is still charged and reported
by memory.kmem.usage_in_bytes.
memory.kmem.usage_in_bytes show current kernel memory allocation
memory.kmem.failcnt show the number of kernel memory usage
hits limits

View File

@ -28,7 +28,7 @@ pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags);
#define arch_make_huge_pte arch_make_huge_pte
#define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT
extern void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte);
pte_t *ptep, pte_t pte, unsigned long sz);
#define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS
extern int huge_ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,

View File

@ -241,15 +241,8 @@ static void clear_flush(struct mm_struct *mm,
flush_tlb_range(&vma, saddr, addr);
}
static inline struct folio *hugetlb_swap_entry_to_folio(swp_entry_t entry)
{
VM_BUG_ON(!is_migration_entry(entry) && !is_hwpoison_entry(entry));
return page_folio(pfn_to_page(swp_offset_pfn(entry)));
}
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
pte_t *ptep, pte_t pte, unsigned long sz)
{
size_t pgsize;
int i;
@ -257,13 +250,10 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
unsigned long pfn, dpfn;
pgprot_t hugeprot;
ncontig = num_contig_ptes(sz, &pgsize);
if (!pte_present(pte)) {
struct folio *folio;
folio = hugetlb_swap_entry_to_folio(pte_to_swp_entry(pte));
ncontig = num_contig_ptes(folio_size(folio), &pgsize);
for (i = 0; i < ncontig; i++, ptep++)
for (i = 0; i < ncontig; i++, ptep++, addr += pgsize)
set_pte_at(mm, addr, ptep, pte);
return;
}
@ -273,7 +263,6 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
return;
}
ncontig = find_num_contig(mm, addr, ptep, &pgsize);
pfn = pte_pfn(pte);
dpfn = pgsize >> PAGE_SHIFT;
hugeprot = pte_pgprot(pte);
@ -571,5 +560,7 @@ pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr
void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep,
pte_t old_pte, pte_t pte)
{
set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
unsigned long psize = huge_page_size(hstate_vma(vma));
set_huge_pte_at(vma->vm_mm, addr, ptep, pte, psize);
}

View File

@ -6,7 +6,7 @@
#define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte);
pte_t *ptep, pte_t pte, unsigned long sz);
#define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,

View File

@ -140,7 +140,7 @@ static void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
}
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t entry)
pte_t *ptep, pte_t entry, unsigned long sz)
{
__set_huge_pte_at(mm, addr, ptep, entry);
}

View File

@ -46,7 +46,8 @@ static inline int check_and_get_huge_psize(int shift)
}
#define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte);
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
pte_t pte, unsigned long sz);
#define __HAVE_ARCH_HUGE_PTE_CLEAR
static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,

View File

@ -143,11 +143,14 @@ pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma,
void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
pte_t *ptep, pte_t old_pte, pte_t pte)
{
unsigned long psize;
if (radix_enabled())
return radix__huge_ptep_modify_prot_commit(vma, addr, ptep,
old_pte, pte);
set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
psize = huge_page_size(hstate_vma(vma));
set_huge_pte_at(vma->vm_mm, addr, ptep, pte, psize);
}
void __init hugetlbpage_init_defaultsize(void)

View File

@ -47,6 +47,7 @@ void radix__huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
pte_t old_pte, pte_t pte)
{
struct mm_struct *mm = vma->vm_mm;
unsigned long psize = huge_page_size(hstate_vma(vma));
/*
* POWER9 NMMU must flush the TLB after clearing the PTE before
@ -58,5 +59,5 @@ void radix__huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
atomic_read(&mm->context.copros) > 0)
radix__flush_hugetlb_page(vma, addr);
set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
set_huge_pte_at(vma->vm_mm, addr, ptep, pte, psize);
}

View File

@ -91,7 +91,8 @@ static int __ref __early_map_kernel_hugepage(unsigned long va, phys_addr_t pa,
if (new && WARN_ON(pte_present(*ptep) && pgprot_val(prot)))
return -EINVAL;
set_huge_pte_at(&init_mm, va, ptep, pte_mkhuge(pfn_pte(pa >> PAGE_SHIFT, prot)));
set_huge_pte_at(&init_mm, va, ptep,
pte_mkhuge(pfn_pte(pa >> PAGE_SHIFT, prot)), psize);
return 0;
}

View File

@ -288,7 +288,8 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
}
#if defined(CONFIG_PPC_8xx)
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte)
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
pte_t pte, unsigned long sz)
{
pmd_t *pmd = pmd_off(mm, addr);
pte_basic_t val;

View File

@ -18,7 +18,8 @@ void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
#define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT
void set_huge_pte_at(struct mm_struct *mm,
unsigned long addr, pte_t *ptep, pte_t pte);
unsigned long addr, pte_t *ptep, pte_t pte,
unsigned long sz);
#define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
pte_t huge_ptep_get_and_clear(struct mm_struct *mm,

View File

@ -180,7 +180,8 @@ pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags)
void set_huge_pte_at(struct mm_struct *mm,
unsigned long addr,
pte_t *ptep,
pte_t pte)
pte_t pte,
unsigned long sz)
{
int i, pte_num;

View File

@ -16,6 +16,8 @@
#define hugepages_supported() (MACHINE_HAS_EDAT1)
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte, unsigned long sz);
void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte);
pte_t huge_ptep_get(pte_t *ptep);
pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
@ -65,7 +67,7 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
int changed = !pte_same(huge_ptep_get(ptep), pte);
if (changed) {
huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
__set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
}
return changed;
}
@ -74,7 +76,7 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
pte_t pte = huge_ptep_get_and_clear(mm, addr, ptep);
set_huge_pte_at(mm, addr, ptep, pte_wrprotect(pte));
__set_huge_pte_at(mm, addr, ptep, pte_wrprotect(pte));
}
static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot)

View File

@ -142,7 +142,7 @@ static void clear_huge_pte_skeys(struct mm_struct *mm, unsigned long rste)
__storage_key_init_range(paddr, paddr + size - 1);
}
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
{
unsigned long rste;
@ -163,6 +163,12 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
set_pte(ptep, __pte(rste));
}
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte, unsigned long sz)
{
__set_huge_pte_at(mm, addr, ptep, pte);
}
pte_t huge_ptep_get(pte_t *ptep)
{
return __rste_to_pte(pte_val(*ptep));

View File

@ -14,6 +14,8 @@ extern struct pud_huge_patch_entry __pud_huge_patch, __pud_huge_patch_end;
#define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte, unsigned long sz);
void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte);
#define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
@ -32,7 +34,7 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
pte_t old_pte = *ptep;
set_huge_pte_at(mm, addr, ptep, pte_wrprotect(old_pte));
__set_huge_pte_at(mm, addr, ptep, pte_wrprotect(old_pte));
}
#define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS
@ -42,7 +44,7 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
{
int changed = !pte_same(*ptep, pte);
if (changed) {
set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
__set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
flush_tlb_page(vma, addr);
}
return changed;

View File

@ -328,7 +328,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
return pte_offset_huge(pmd, addr);
}
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t entry)
{
unsigned int nptes, orig_shift, shift;
@ -364,6 +364,12 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
orig_shift);
}
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t entry, unsigned long sz)
{
__set_huge_pte_at(mm, addr, ptep, entry);
}
pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{

View File

@ -955,6 +955,14 @@ static inline int pte_same(pte_t a, pte_t b)
return a.pte == b.pte;
}
static inline pte_t pte_next_pfn(pte_t pte)
{
if (__pte_needs_invert(pte_val(pte)))
return __pte(pte_val(pte) - (1UL << PFN_PTE_SHIFT));
return __pte(pte_val(pte) + (1UL << PFN_PTE_SHIFT));
}
#define pte_next_pfn pte_next_pfn
static inline int pte_present(pte_t a)
{
return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE);

View File

@ -345,10 +345,9 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm)
/* there's now no turning back... the old userspace image is dead,
* defunct, deceased, etc.
*/
SET_PERSONALITY(exec_params.hdr);
if (elf_check_fdpic(&exec_params.hdr))
set_personality(PER_LINUX_FDPIC);
else
set_personality(PER_LINUX);
current->personality |= PER_LINUX_FDPIC;
if (elf_read_implies_exec(&exec_params.hdr, executable_stack))
current->personality |= READ_IMPLIES_EXEC;

View File

@ -73,10 +73,8 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff,
struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
err = nilfs_dat_translate(nilfs->ns_dat, vbn, &pbn);
if (unlikely(err)) { /* -EIO, -ENOMEM, -ENOENT */
brelse(bh);
if (unlikely(err)) /* -EIO, -ENOMEM, -ENOENT */
goto failed;
}
}
lock_buffer(bh);
@ -102,6 +100,8 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff,
failed:
unlock_page(bh->b_page);
put_page(bh->b_page);
if (unlikely(err))
brelse(bh);
return err;
}

View File

@ -76,7 +76,7 @@ static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
#ifndef __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT
static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
pte_t *ptep, pte_t pte, unsigned long sz)
{
set_pte_at(mm, addr, ptep, pte);
}

View File

@ -984,7 +984,9 @@ static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
pte_t old_pte, pte_t pte)
{
set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
unsigned long psize = huge_page_size(hstate_vma(vma));
set_huge_pte_at(vma->vm_mm, addr, ptep, pte, psize);
}
#endif
@ -1173,7 +1175,7 @@ static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
}
static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
pte_t *ptep, pte_t pte, unsigned long sz)
{
}

View File

@ -428,6 +428,8 @@ struct ma_wr_state {
#define MAS_ROOT ((struct maple_enode *)5UL)
#define MAS_NONE ((struct maple_enode *)9UL)
#define MAS_PAUSE ((struct maple_enode *)17UL)
#define MAS_OVERFLOW ((struct maple_enode *)33UL)
#define MAS_UNDERFLOW ((struct maple_enode *)65UL)
#define MA_ERROR(err) \
((struct maple_enode *)(((unsigned long)err << 2) | 2UL))
@ -511,6 +513,15 @@ static inline bool mas_is_paused(const struct ma_state *mas)
return mas->node == MAS_PAUSE;
}
/* Check if the mas is pointing to a node or not */
static inline bool mas_is_active(struct ma_state *mas)
{
if ((unsigned long)mas->node >= MAPLE_RESERVED_RANGE)
return true;
return false;
}
/**
* mas_reset() - Reset a Maple Tree operation state.
* @mas: Maple Tree operation state.

View File

@ -206,6 +206,14 @@ static inline int pmd_young(pmd_t pmd)
#endif
#ifndef set_ptes
#ifndef pte_next_pfn
static inline pte_t pte_next_pfn(pte_t pte)
{
return __pte(pte_val(pte) + (1UL << PFN_PTE_SHIFT));
}
#endif
/**
* set_ptes - Map consecutive pages to a contiguous range of addresses.
* @mm: Address space to map the pages into.
@ -231,7 +239,7 @@ static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
if (--nr == 0)
break;
ptep++;
pte = __pte(pte_val(pte) + (1UL << PFN_PTE_SHIFT));
pte = pte_next_pfn(pte);
}
arch_leave_lazy_mmu_mode();
}

View File

@ -739,6 +739,17 @@ subsys_initcall(crash_notes_memory_init);
#undef pr_fmt
#define pr_fmt(fmt) "crash hp: " fmt
/*
* Different than kexec/kdump loading/unloading/jumping/shrinking which
* usually rarely happen, there will be many crash hotplug events notified
* during one short period, e.g one memory board is hot added and memory
* regions are online. So mutex lock __crash_hotplug_lock is used to
* serialize the crash hotplug handling specifically.
*/
DEFINE_MUTEX(__crash_hotplug_lock);
#define crash_hotplug_lock() mutex_lock(&__crash_hotplug_lock)
#define crash_hotplug_unlock() mutex_unlock(&__crash_hotplug_lock)
/*
* This routine utilized when the crash_hotplug sysfs node is read.
* It reflects the kernel's ability/permission to update the crash
@ -748,9 +759,11 @@ int crash_check_update_elfcorehdr(void)
{
int rc = 0;
crash_hotplug_lock();
/* Obtain lock while reading crash information */
if (!kexec_trylock()) {
pr_info("kexec_trylock() failed, elfcorehdr may be inaccurate\n");
crash_hotplug_unlock();
return 0;
}
if (kexec_crash_image) {
@ -761,6 +774,7 @@ int crash_check_update_elfcorehdr(void)
}
/* Release lock now that update complete */
kexec_unlock();
crash_hotplug_unlock();
return rc;
}
@ -783,9 +797,11 @@ static void crash_handle_hotplug_event(unsigned int hp_action, unsigned int cpu)
{
struct kimage *image;
crash_hotplug_lock();
/* Obtain lock while changing crash information */
if (!kexec_trylock()) {
pr_info("kexec_trylock() failed, elfcorehdr may be inaccurate\n");
crash_hotplug_unlock();
return;
}
@ -852,6 +868,7 @@ static void crash_handle_hotplug_event(unsigned int hp_action, unsigned int cpu)
out:
/* Release lock now that update complete */
kexec_unlock();
crash_hotplug_unlock();
}
static int crash_memhp_notifier(struct notifier_block *nb, unsigned long val, void *v)

View File

@ -256,6 +256,22 @@ bool mas_is_err(struct ma_state *mas)
return xa_is_err(mas->node);
}
static __always_inline bool mas_is_overflow(struct ma_state *mas)
{
if (unlikely(mas->node == MAS_OVERFLOW))
return true;
return false;
}
static __always_inline bool mas_is_underflow(struct ma_state *mas)
{
if (unlikely(mas->node == MAS_UNDERFLOW))
return true;
return false;
}
static inline bool mas_searchable(struct ma_state *mas)
{
if (mas_is_none(mas))
@ -4415,10 +4431,13 @@ no_entry:
*
* @mas: The maple state
* @max: The minimum starting range
* @empty: Can be empty
* @set_underflow: Set the @mas->node to underflow state on limit.
*
* Return: The entry in the previous slot which is possibly NULL
*/
static void *mas_prev_slot(struct ma_state *mas, unsigned long min, bool empty)
static void *mas_prev_slot(struct ma_state *mas, unsigned long min, bool empty,
bool set_underflow)
{
void *entry;
void __rcu **slots;
@ -4435,7 +4454,6 @@ retry:
if (unlikely(mas_rewalk_if_dead(mas, node, save_point)))
goto retry;
again:
if (mas->min <= min) {
pivot = mas_safe_min(mas, pivots, mas->offset);
@ -4443,9 +4461,10 @@ again:
goto retry;
if (pivot <= min)
return NULL;
goto underflow;
}
again:
if (likely(mas->offset)) {
mas->offset--;
mas->last = mas->index - 1;
@ -4457,7 +4476,7 @@ again:
}
if (mas_is_none(mas))
return NULL;
goto underflow;
mas->last = mas->max;
node = mas_mn(mas);
@ -4474,10 +4493,19 @@ again:
if (likely(entry))
return entry;
if (!empty)
if (!empty) {
if (mas->index <= min)
goto underflow;
goto again;
}
return entry;
underflow:
if (set_underflow)
mas->node = MAS_UNDERFLOW;
return NULL;
}
/*
@ -4567,10 +4595,13 @@ no_entry:
* @mas: The maple state
* @max: The maximum starting range
* @empty: Can be empty
* @set_overflow: Should @mas->node be set to overflow when the limit is
* reached.
*
* Return: The entry in the next slot which is possibly NULL
*/
static void *mas_next_slot(struct ma_state *mas, unsigned long max, bool empty)
static void *mas_next_slot(struct ma_state *mas, unsigned long max, bool empty,
bool set_overflow)
{
void __rcu **slots;
unsigned long *pivots;
@ -4589,22 +4620,22 @@ retry:
if (unlikely(mas_rewalk_if_dead(mas, node, save_point)))
goto retry;
again:
if (mas->max >= max) {
if (likely(mas->offset < data_end))
pivot = pivots[mas->offset];
else
return NULL; /* must be mas->max */
goto overflow;
if (unlikely(mas_rewalk_if_dead(mas, node, save_point)))
goto retry;
if (pivot >= max)
return NULL;
goto overflow;
}
if (likely(mas->offset < data_end)) {
mas->index = pivots[mas->offset] + 1;
again:
mas->offset++;
if (likely(mas->offset < data_end))
mas->last = pivots[mas->offset];
@ -4616,8 +4647,11 @@ again:
goto retry;
}
if (mas_is_none(mas))
if (WARN_ON_ONCE(mas_is_none(mas))) {
mas->node = MAS_OVERFLOW;
return NULL;
goto overflow;
}
mas->offset = 0;
mas->index = mas->min;
@ -4636,12 +4670,20 @@ again:
return entry;
if (!empty) {
if (!mas->offset)
data_end = 2;
if (mas->last >= max)
goto overflow;
mas->index = mas->last + 1;
/* Node cannot end on NULL, so it's safe to short-cut here */
goto again;
}
return entry;
overflow:
if (set_overflow)
mas->node = MAS_OVERFLOW;
return NULL;
}
/*
@ -4651,17 +4693,20 @@ again:
*
* Set the @mas->node to the next entry and the range_start to
* the beginning value for the entry. Does not check beyond @limit.
* Sets @mas->index and @mas->last to the limit if it is hit.
* Sets @mas->index and @mas->last to the range, Does not update @mas->index and
* @mas->last on overflow.
* Restarts on dead nodes.
*
* Return: the next entry or %NULL.
*/
static inline void *mas_next_entry(struct ma_state *mas, unsigned long limit)
{
if (mas->last >= limit)
if (mas->last >= limit) {
mas->node = MAS_OVERFLOW;
return NULL;
}
return mas_next_slot(mas, limit, false);
return mas_next_slot(mas, limit, false, true);
}
/*
@ -4837,7 +4882,7 @@ void *mas_walk(struct ma_state *mas)
{
void *entry;
if (mas_is_none(mas) || mas_is_paused(mas) || mas_is_ptr(mas))
if (!mas_is_active(mas) || !mas_is_start(mas))
mas->node = MAS_START;
retry:
entry = mas_state_walk(mas);
@ -5294,14 +5339,22 @@ static inline void mte_destroy_walk(struct maple_enode *enode,
static void mas_wr_store_setup(struct ma_wr_state *wr_mas)
{
if (mas_is_start(wr_mas->mas))
return;
if (!mas_is_active(wr_mas->mas)) {
if (mas_is_start(wr_mas->mas))
return;
if (unlikely(mas_is_paused(wr_mas->mas)))
goto reset;
if (unlikely(mas_is_paused(wr_mas->mas)))
goto reset;
if (unlikely(mas_is_none(wr_mas->mas)))
goto reset;
if (unlikely(mas_is_none(wr_mas->mas)))
goto reset;
if (unlikely(mas_is_overflow(wr_mas->mas)))
goto reset;
if (unlikely(mas_is_underflow(wr_mas->mas)))
goto reset;
}
/*
* A less strict version of mas_is_span_wr() where we allow spanning
@ -5595,8 +5648,25 @@ static inline bool mas_next_setup(struct ma_state *mas, unsigned long max,
{
bool was_none = mas_is_none(mas);
if (mas_is_none(mas) || mas_is_paused(mas))
if (unlikely(mas->last >= max)) {
mas->node = MAS_OVERFLOW;
return true;
}
if (mas_is_active(mas))
return false;
if (mas_is_none(mas) || mas_is_paused(mas)) {
mas->node = MAS_START;
} else if (mas_is_overflow(mas)) {
/* Overflowed before, but the max changed */
mas->node = MAS_START;
} else if (mas_is_underflow(mas)) {
mas->node = MAS_START;
*entry = mas_walk(mas);
if (*entry)
return true;
}
if (mas_is_start(mas))
*entry = mas_walk(mas); /* Retries on dead nodes handled by mas_walk */
@ -5615,6 +5685,7 @@ static inline bool mas_next_setup(struct ma_state *mas, unsigned long max,
if (mas_is_none(mas))
return true;
return false;
}
@ -5637,7 +5708,7 @@ void *mas_next(struct ma_state *mas, unsigned long max)
return entry;
/* Retries on dead nodes handled by mas_next_slot */
return mas_next_slot(mas, max, false);
return mas_next_slot(mas, max, false, true);
}
EXPORT_SYMBOL_GPL(mas_next);
@ -5660,7 +5731,7 @@ void *mas_next_range(struct ma_state *mas, unsigned long max)
return entry;
/* Retries on dead nodes handled by mas_next_slot */
return mas_next_slot(mas, max, true);
return mas_next_slot(mas, max, true, true);
}
EXPORT_SYMBOL_GPL(mas_next_range);
@ -5691,18 +5762,31 @@ EXPORT_SYMBOL_GPL(mt_next);
static inline bool mas_prev_setup(struct ma_state *mas, unsigned long min,
void **entry)
{
if (mas->index <= min)
goto none;
if (mas_is_none(mas) || mas_is_paused(mas))
mas->node = MAS_START;
if (mas_is_start(mas)) {
mas_walk(mas);
if (!mas->index)
goto none;
if (unlikely(mas->index <= min)) {
mas->node = MAS_UNDERFLOW;
return true;
}
if (mas_is_active(mas))
return false;
if (mas_is_overflow(mas)) {
mas->node = MAS_START;
*entry = mas_walk(mas);
if (*entry)
return true;
}
if (mas_is_none(mas) || mas_is_paused(mas)) {
mas->node = MAS_START;
} else if (mas_is_underflow(mas)) {
/* underflowed before but the min changed */
mas->node = MAS_START;
}
if (mas_is_start(mas))
mas_walk(mas);
if (unlikely(mas_is_ptr(mas))) {
if (!mas->index)
goto none;
@ -5747,7 +5831,7 @@ void *mas_prev(struct ma_state *mas, unsigned long min)
if (mas_prev_setup(mas, min, &entry))
return entry;
return mas_prev_slot(mas, min, false);
return mas_prev_slot(mas, min, false, true);
}
EXPORT_SYMBOL_GPL(mas_prev);
@ -5770,7 +5854,7 @@ void *mas_prev_range(struct ma_state *mas, unsigned long min)
if (mas_prev_setup(mas, min, &entry))
return entry;
return mas_prev_slot(mas, min, true);
return mas_prev_slot(mas, min, true, true);
}
EXPORT_SYMBOL_GPL(mas_prev_range);
@ -5828,24 +5912,35 @@ EXPORT_SYMBOL_GPL(mas_pause);
static inline bool mas_find_setup(struct ma_state *mas, unsigned long max,
void **entry)
{
*entry = NULL;
if (mas_is_active(mas)) {
if (mas->last < max)
return false;
if (unlikely(mas_is_none(mas))) {
return true;
}
if (mas_is_paused(mas)) {
if (unlikely(mas->last >= max))
return true;
mas->index = ++mas->last;
mas->node = MAS_START;
} else if (mas_is_none(mas)) {
if (unlikely(mas->last >= max))
return true;
mas->index = mas->last;
mas->node = MAS_START;
} else if (unlikely(mas_is_paused(mas))) {
if (unlikely(mas->last >= max))
} else if (mas_is_overflow(mas) || mas_is_underflow(mas)) {
if (mas->index > max) {
mas->node = MAS_OVERFLOW;
return true;
}
mas->node = MAS_START;
mas->index = ++mas->last;
} else if (unlikely(mas_is_ptr(mas)))
goto ptr_out_of_range;
}
if (unlikely(mas_is_start(mas))) {
if (mas_is_start(mas)) {
/* First run or continue */
if (mas->index > max)
return true;
@ -5895,7 +5990,7 @@ void *mas_find(struct ma_state *mas, unsigned long max)
return entry;
/* Retries on dead nodes handled by mas_next_slot */
return mas_next_slot(mas, max, false);
return mas_next_slot(mas, max, false, false);
}
EXPORT_SYMBOL_GPL(mas_find);
@ -5913,13 +6008,13 @@ EXPORT_SYMBOL_GPL(mas_find);
*/
void *mas_find_range(struct ma_state *mas, unsigned long max)
{
void *entry;
void *entry = NULL;
if (mas_find_setup(mas, max, &entry))
return entry;
/* Retries on dead nodes handled by mas_next_slot */
return mas_next_slot(mas, max, true);
return mas_next_slot(mas, max, true, false);
}
EXPORT_SYMBOL_GPL(mas_find_range);
@ -5934,26 +6029,36 @@ EXPORT_SYMBOL_GPL(mas_find_range);
static inline bool mas_find_rev_setup(struct ma_state *mas, unsigned long min,
void **entry)
{
*entry = NULL;
if (mas_is_active(mas)) {
if (mas->index > min)
return false;
if (unlikely(mas_is_none(mas))) {
if (mas->index <= min)
goto none;
mas->last = mas->index;
mas->node = MAS_START;
return true;
}
if (unlikely(mas_is_paused(mas))) {
if (mas_is_paused(mas)) {
if (unlikely(mas->index <= min)) {
mas->node = MAS_NONE;
return true;
}
mas->node = MAS_START;
mas->last = --mas->index;
} else if (mas_is_none(mas)) {
if (mas->index <= min)
goto none;
mas->last = mas->index;
mas->node = MAS_START;
} else if (mas_is_underflow(mas) || mas_is_overflow(mas)) {
if (mas->last <= min) {
mas->node = MAS_UNDERFLOW;
return true;
}
mas->node = MAS_START;
}
if (unlikely(mas_is_start(mas))) {
if (mas_is_start(mas)) {
/* First run or continue */
if (mas->index < min)
return true;
@ -6004,13 +6109,13 @@ none:
*/
void *mas_find_rev(struct ma_state *mas, unsigned long min)
{
void *entry;
void *entry = NULL;
if (mas_find_rev_setup(mas, min, &entry))
return entry;
/* Retries on dead nodes handled by mas_prev_slot */
return mas_prev_slot(mas, min, false);
return mas_prev_slot(mas, min, false, false);
}
EXPORT_SYMBOL_GPL(mas_find_rev);
@ -6030,13 +6135,13 @@ EXPORT_SYMBOL_GPL(mas_find_rev);
*/
void *mas_find_range_rev(struct ma_state *mas, unsigned long min)
{
void *entry;
void *entry = NULL;
if (mas_find_rev_setup(mas, min, &entry))
return entry;
/* Retries on dead nodes handled by mas_prev_slot */
return mas_prev_slot(mas, min, true);
return mas_prev_slot(mas, min, true, false);
}
EXPORT_SYMBOL_GPL(mas_find_range_rev);

View File

@ -2166,7 +2166,7 @@ static noinline void __init next_prev_test(struct maple_tree *mt)
MT_BUG_ON(mt, val != NULL);
MT_BUG_ON(mt, mas.index != 0);
MT_BUG_ON(mt, mas.last != 5);
MT_BUG_ON(mt, mas.node != MAS_NONE);
MT_BUG_ON(mt, mas.node != MAS_UNDERFLOW);
mas.index = 0;
mas.last = 5;
@ -2917,6 +2917,7 @@ static noinline void __init check_empty_area_fill(struct maple_tree *mt)
* exists MAS_NONE active range
* exists active active range
* DNE active active set to last range
* ERANGE active MAS_OVERFLOW last range
*
* Function ENTRY Start Result index & last
* mas_prev()
@ -2945,6 +2946,7 @@ static noinline void __init check_empty_area_fill(struct maple_tree *mt)
* any MAS_ROOT MAS_NONE 0
* exists active active range
* DNE active active last range
* ERANGE active MAS_UNDERFLOW last range
*
* Function ENTRY Start Result index & last
* mas_find()
@ -2955,7 +2957,7 @@ static noinline void __init check_empty_area_fill(struct maple_tree *mt)
* DNE MAS_START MAS_NONE 0
* DNE MAS_PAUSE MAS_NONE 0
* DNE MAS_ROOT MAS_NONE 0
* DNE MAS_NONE MAS_NONE 0
* DNE MAS_NONE MAS_NONE 1
* if index == 0
* exists MAS_START MAS_ROOT 0
* exists MAS_PAUSE MAS_ROOT 0
@ -2967,7 +2969,7 @@ static noinline void __init check_empty_area_fill(struct maple_tree *mt)
* DNE MAS_START active set to max
* exists MAS_PAUSE active range
* DNE MAS_PAUSE active set to max
* exists MAS_NONE active range
* exists MAS_NONE active range (start at last)
* exists active active range
* DNE active active last range (max < last)
*
@ -2992,7 +2994,7 @@ static noinline void __init check_empty_area_fill(struct maple_tree *mt)
* DNE MAS_START active set to min
* exists MAS_PAUSE active range
* DNE MAS_PAUSE active set to min
* exists MAS_NONE active range
* exists MAS_NONE active range (start at index)
* exists active active range
* DNE active active last range (min > index)
*
@ -3039,10 +3041,10 @@ static noinline void __init check_state_handling(struct maple_tree *mt)
mtree_store_range(mt, 0, 0, ptr, GFP_KERNEL);
mas_lock(&mas);
/* prev: Start -> none */
/* prev: Start -> underflow*/
entry = mas_prev(&mas, 0);
MT_BUG_ON(mt, entry != NULL);
MT_BUG_ON(mt, mas.node != MAS_NONE);
MT_BUG_ON(mt, mas.node != MAS_UNDERFLOW);
/* prev: Start -> root */
mas_set(&mas, 10);
@ -3069,7 +3071,7 @@ static noinline void __init check_state_handling(struct maple_tree *mt)
MT_BUG_ON(mt, entry != NULL);
MT_BUG_ON(mt, mas.node != MAS_NONE);
/* next: start -> none */
/* next: start -> none*/
mas_set(&mas, 10);
entry = mas_next(&mas, ULONG_MAX);
MT_BUG_ON(mt, mas.index != 1);
@ -3268,27 +3270,48 @@ static noinline void __init check_state_handling(struct maple_tree *mt)
MT_BUG_ON(mt, mas.last != 0x2500);
MT_BUG_ON(mt, !mas_active(mas));
/* next:active -> active out of range*/
/* next:active -> active beyond data */
entry = mas_next(&mas, 0x2999);
MT_BUG_ON(mt, entry != NULL);
MT_BUG_ON(mt, mas.index != 0x2501);
MT_BUG_ON(mt, mas.last != 0x2fff);
MT_BUG_ON(mt, !mas_active(mas));
/* Continue after out of range*/
/* Continue after last range ends after max */
entry = mas_next(&mas, ULONG_MAX);
MT_BUG_ON(mt, entry != ptr3);
MT_BUG_ON(mt, mas.index != 0x3000);
MT_BUG_ON(mt, mas.last != 0x3500);
MT_BUG_ON(mt, !mas_active(mas));
/* next:active -> active out of range*/
/* next:active -> active continued */
entry = mas_next(&mas, ULONG_MAX);
MT_BUG_ON(mt, entry != NULL);
MT_BUG_ON(mt, mas.index != 0x3501);
MT_BUG_ON(mt, mas.last != ULONG_MAX);
MT_BUG_ON(mt, !mas_active(mas));
/* next:active -> overflow */
entry = mas_next(&mas, ULONG_MAX);
MT_BUG_ON(mt, entry != NULL);
MT_BUG_ON(mt, mas.index != 0x3501);
MT_BUG_ON(mt, mas.last != ULONG_MAX);
MT_BUG_ON(mt, mas.node != MAS_OVERFLOW);
/* next:overflow -> overflow */
entry = mas_next(&mas, ULONG_MAX);
MT_BUG_ON(mt, entry != NULL);
MT_BUG_ON(mt, mas.index != 0x3501);
MT_BUG_ON(mt, mas.last != ULONG_MAX);
MT_BUG_ON(mt, mas.node != MAS_OVERFLOW);
/* prev:overflow -> active */
entry = mas_prev(&mas, 0);
MT_BUG_ON(mt, entry != ptr3);
MT_BUG_ON(mt, mas.index != 0x3000);
MT_BUG_ON(mt, mas.last != 0x3500);
MT_BUG_ON(mt, !mas_active(mas));
/* next: none -> active, skip value at location */
mas_set(&mas, 0);
entry = mas_next(&mas, ULONG_MAX);
@ -3307,11 +3330,46 @@ static noinline void __init check_state_handling(struct maple_tree *mt)
MT_BUG_ON(mt, mas.last != 0x1500);
MT_BUG_ON(mt, !mas_active(mas));
/* prev:active -> active out of range*/
/* prev:active -> active spanning end range */
entry = mas_prev(&mas, 0x0100);
MT_BUG_ON(mt, entry != NULL);
MT_BUG_ON(mt, mas.index != 0);
MT_BUG_ON(mt, mas.last != 0x0FFF);
MT_BUG_ON(mt, !mas_active(mas));
/* prev:active -> underflow */
entry = mas_prev(&mas, 0);
MT_BUG_ON(mt, entry != NULL);
MT_BUG_ON(mt, mas.index != 0);
MT_BUG_ON(mt, mas.last != 0x0FFF);
MT_BUG_ON(mt, mas.node != MAS_UNDERFLOW);
/* prev:underflow -> underflow */
entry = mas_prev(&mas, 0);
MT_BUG_ON(mt, entry != NULL);
MT_BUG_ON(mt, mas.index != 0);
MT_BUG_ON(mt, mas.last != 0x0FFF);
MT_BUG_ON(mt, mas.node != MAS_UNDERFLOW);
/* next:underflow -> active */
entry = mas_next(&mas, ULONG_MAX);
MT_BUG_ON(mt, entry != ptr);
MT_BUG_ON(mt, mas.index != 0x1000);
MT_BUG_ON(mt, mas.last != 0x1500);
MT_BUG_ON(mt, !mas_active(mas));
/* prev:first value -> underflow */
entry = mas_prev(&mas, 0x1000);
MT_BUG_ON(mt, entry != NULL);
MT_BUG_ON(mt, mas.index != 0x1000);
MT_BUG_ON(mt, mas.last != 0x1500);
MT_BUG_ON(mt, mas.node != MAS_UNDERFLOW);
/* find:underflow -> first value */
entry = mas_find(&mas, ULONG_MAX);
MT_BUG_ON(mt, entry != ptr);
MT_BUG_ON(mt, mas.index != 0x1000);
MT_BUG_ON(mt, mas.last != 0x1500);
MT_BUG_ON(mt, !mas_active(mas));
/* prev: pause ->active */
@ -3325,14 +3383,14 @@ static noinline void __init check_state_handling(struct maple_tree *mt)
MT_BUG_ON(mt, mas.last != 0x2500);
MT_BUG_ON(mt, !mas_active(mas));
/* prev:active -> active out of range*/
/* prev:active -> active spanning min */
entry = mas_prev(&mas, 0x1600);
MT_BUG_ON(mt, entry != NULL);
MT_BUG_ON(mt, mas.index != 0x1501);
MT_BUG_ON(mt, mas.last != 0x1FFF);
MT_BUG_ON(mt, !mas_active(mas));
/* prev: active ->active, continue*/
/* prev: active ->active, continue */
entry = mas_prev(&mas, 0);
MT_BUG_ON(mt, entry != ptr);
MT_BUG_ON(mt, mas.index != 0x1000);
@ -3379,7 +3437,7 @@ static noinline void __init check_state_handling(struct maple_tree *mt)
MT_BUG_ON(mt, mas.last != 0x2FFF);
MT_BUG_ON(mt, !mas_active(mas));
/* find: none ->active */
/* find: overflow ->active */
entry = mas_find(&mas, 0x5000);
MT_BUG_ON(mt, entry != ptr3);
MT_BUG_ON(mt, mas.index != 0x3000);
@ -3778,7 +3836,6 @@ static int __init maple_tree_seed(void)
check_empty_area_fill(&tree);
mtree_destroy(&tree);
mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
check_state_handling(&tree);
mtree_destroy(&tree);

View File

@ -148,6 +148,8 @@ static void damon_do_test_apply_three_regions(struct kunit *test,
KUNIT_EXPECT_EQ(test, r->ar.start, expected[i * 2]);
KUNIT_EXPECT_EQ(test, r->ar.end, expected[i * 2 + 1]);
}
damon_destroy_target(t);
}
/*

View File

@ -341,13 +341,14 @@ static void damon_hugetlb_mkold(pte_t *pte, struct mm_struct *mm,
bool referenced = false;
pte_t entry = huge_ptep_get(pte);
struct folio *folio = pfn_folio(pte_pfn(entry));
unsigned long psize = huge_page_size(hstate_vma(vma));
folio_get(folio);
if (pte_young(entry)) {
referenced = true;
entry = pte_mkold(entry);
set_huge_pte_at(mm, addr, pte, entry);
set_huge_pte_at(mm, addr, pte, entry, psize);
}
#ifdef CONFIG_MMU_NOTIFIER

View File

@ -3503,7 +3503,7 @@ skip:
if (count) {
set_pte_range(vmf, folio, page, count, addr);
folio_ref_add(folio, count);
if (in_range(vmf->address, addr, count))
if (in_range(vmf->address, addr, count * PAGE_SIZE))
ret = VM_FAULT_NOPAGE;
}
@ -3517,7 +3517,7 @@ skip:
if (count) {
set_pte_range(vmf, folio, page, count, addr);
folio_ref_add(folio, count);
if (in_range(vmf->address, addr, count))
if (in_range(vmf->address, addr, count * PAGE_SIZE))
ret = VM_FAULT_NOPAGE;
}

View File

@ -4980,7 +4980,7 @@ static bool is_hugetlb_entry_hwpoisoned(pte_t pte)
static void
hugetlb_install_folio(struct vm_area_struct *vma, pte_t *ptep, unsigned long addr,
struct folio *new_folio, pte_t old)
struct folio *new_folio, pte_t old, unsigned long sz)
{
pte_t newpte = make_huge_pte(vma, &new_folio->page, 1);
@ -4988,7 +4988,7 @@ hugetlb_install_folio(struct vm_area_struct *vma, pte_t *ptep, unsigned long add
hugepage_add_new_anon_rmap(new_folio, vma, addr);
if (userfaultfd_wp(vma) && huge_pte_uffd_wp(old))
newpte = huge_pte_mkuffd_wp(newpte);
set_huge_pte_at(vma->vm_mm, addr, ptep, newpte);
set_huge_pte_at(vma->vm_mm, addr, ptep, newpte, sz);
hugetlb_count_add(pages_per_huge_page(hstate_vma(vma)), vma->vm_mm);
folio_set_hugetlb_migratable(new_folio);
}
@ -5065,7 +5065,7 @@ again:
} else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) {
if (!userfaultfd_wp(dst_vma))
entry = huge_pte_clear_uffd_wp(entry);
set_huge_pte_at(dst, addr, dst_pte, entry);
set_huge_pte_at(dst, addr, dst_pte, entry, sz);
} else if (unlikely(is_hugetlb_entry_migration(entry))) {
swp_entry_t swp_entry = pte_to_swp_entry(entry);
bool uffd_wp = pte_swp_uffd_wp(entry);
@ -5080,18 +5080,18 @@ again:
entry = swp_entry_to_pte(swp_entry);
if (userfaultfd_wp(src_vma) && uffd_wp)
entry = pte_swp_mkuffd_wp(entry);
set_huge_pte_at(src, addr, src_pte, entry);
set_huge_pte_at(src, addr, src_pte, entry, sz);
}
if (!userfaultfd_wp(dst_vma))
entry = huge_pte_clear_uffd_wp(entry);
set_huge_pte_at(dst, addr, dst_pte, entry);
set_huge_pte_at(dst, addr, dst_pte, entry, sz);
} else if (unlikely(is_pte_marker(entry))) {
pte_marker marker = copy_pte_marker(
pte_to_swp_entry(entry), dst_vma);
if (marker)
set_huge_pte_at(dst, addr, dst_pte,
make_pte_marker(marker));
make_pte_marker(marker), sz);
} else {
entry = huge_ptep_get(src_pte);
pte_folio = page_folio(pte_page(entry));
@ -5145,7 +5145,7 @@ again:
goto again;
}
hugetlb_install_folio(dst_vma, dst_pte, addr,
new_folio, src_pte_old);
new_folio, src_pte_old, sz);
spin_unlock(src_ptl);
spin_unlock(dst_ptl);
continue;
@ -5166,7 +5166,7 @@ again:
if (!userfaultfd_wp(dst_vma))
entry = huge_pte_clear_uffd_wp(entry);
set_huge_pte_at(dst, addr, dst_pte, entry);
set_huge_pte_at(dst, addr, dst_pte, entry, sz);
hugetlb_count_add(npages, dst);
}
spin_unlock(src_ptl);
@ -5184,7 +5184,8 @@ again:
}
static void move_huge_pte(struct vm_area_struct *vma, unsigned long old_addr,
unsigned long new_addr, pte_t *src_pte, pte_t *dst_pte)
unsigned long new_addr, pte_t *src_pte, pte_t *dst_pte,
unsigned long sz)
{
struct hstate *h = hstate_vma(vma);
struct mm_struct *mm = vma->vm_mm;
@ -5202,7 +5203,7 @@ static void move_huge_pte(struct vm_area_struct *vma, unsigned long old_addr,
spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
pte = huge_ptep_get_and_clear(mm, old_addr, src_pte);
set_huge_pte_at(mm, new_addr, dst_pte, pte);
set_huge_pte_at(mm, new_addr, dst_pte, pte, sz);
if (src_ptl != dst_ptl)
spin_unlock(src_ptl);
@ -5259,7 +5260,7 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
if (!dst_pte)
break;
move_huge_pte(vma, old_addr, new_addr, src_pte, dst_pte);
move_huge_pte(vma, old_addr, new_addr, src_pte, dst_pte, sz);
}
if (shared_pmd)
@ -5337,7 +5338,8 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
if (pte_swp_uffd_wp_any(pte) &&
!(zap_flags & ZAP_FLAG_DROP_MARKER))
set_huge_pte_at(mm, address, ptep,
make_pte_marker(PTE_MARKER_UFFD_WP));
make_pte_marker(PTE_MARKER_UFFD_WP),
sz);
else
huge_pte_clear(mm, address, ptep, sz);
spin_unlock(ptl);
@ -5371,7 +5373,8 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
if (huge_pte_uffd_wp(pte) &&
!(zap_flags & ZAP_FLAG_DROP_MARKER))
set_huge_pte_at(mm, address, ptep,
make_pte_marker(PTE_MARKER_UFFD_WP));
make_pte_marker(PTE_MARKER_UFFD_WP),
sz);
hugetlb_count_sub(pages_per_huge_page(h), mm);
page_remove_rmap(page, vma, true);
@ -5676,7 +5679,7 @@ retry_avoidcopy:
hugepage_add_new_anon_rmap(new_folio, vma, haddr);
if (huge_pte_uffd_wp(pte))
newpte = huge_pte_mkuffd_wp(newpte);
set_huge_pte_at(mm, haddr, ptep, newpte);
set_huge_pte_at(mm, haddr, ptep, newpte, huge_page_size(h));
folio_set_hugetlb_migratable(new_folio);
/* Make the old page be freed below */
new_folio = old_folio;
@ -5972,7 +5975,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
*/
if (unlikely(pte_marker_uffd_wp(old_pte)))
new_pte = huge_pte_mkuffd_wp(new_pte);
set_huge_pte_at(mm, haddr, ptep, new_pte);
set_huge_pte_at(mm, haddr, ptep, new_pte, huge_page_size(h));
hugetlb_count_add(pages_per_huge_page(h), mm);
if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) {
@ -6261,7 +6264,8 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
}
_dst_pte = make_pte_marker(PTE_MARKER_POISONED);
set_huge_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
set_huge_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte,
huge_page_size(h));
/* No need to invalidate - it was non-present before */
update_mmu_cache(dst_vma, dst_addr, dst_pte);
@ -6412,7 +6416,7 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
if (wp_enabled)
_dst_pte = huge_pte_mkuffd_wp(_dst_pte);
set_huge_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
set_huge_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte, huge_page_size(h));
hugetlb_count_add(pages_per_huge_page(h), dst_mm);
@ -6598,7 +6602,7 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
else if (uffd_wp_resolve)
newpte = pte_swp_clear_uffd_wp(newpte);
if (!pte_same(pte, newpte))
set_huge_pte_at(mm, address, ptep, newpte);
set_huge_pte_at(mm, address, ptep, newpte, psize);
} else if (unlikely(is_pte_marker(pte))) {
/* No other markers apply for now. */
WARN_ON_ONCE(!pte_marker_uffd_wp(pte));
@ -6623,7 +6627,8 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
if (unlikely(uffd_wp))
/* Safe to modify directly (none->non-present). */
set_huge_pte_at(mm, address, ptep,
make_pte_marker(PTE_MARKER_UFFD_WP));
make_pte_marker(PTE_MARKER_UFFD_WP),
psize);
}
spin_unlock(ptl);
}

View File

@ -3867,6 +3867,13 @@ static ssize_t mem_cgroup_write(struct kernfs_open_file *of,
case _MEMSWAP:
ret = mem_cgroup_resize_max(memcg, nr_pages, true);
break;
case _KMEM:
pr_warn_once("kmem.limit_in_bytes is deprecated and will be removed. "
"Writing any value to this file has no effect. "
"Please report your usecase to linux-mm@kvack.org if you "
"depend on this functionality.\n");
ret = 0;
break;
case _TCP:
ret = memcg_update_tcp_max(memcg, nr_pages);
break;
@ -5077,6 +5084,12 @@ static struct cftype mem_cgroup_legacy_files[] = {
.seq_show = memcg_numa_stat_show,
},
#endif
{
.name = "kmem.limit_in_bytes",
.private = MEMFILE_PRIVATE(_KMEM, RES_LIMIT),
.write = mem_cgroup_write,
.read_u64 = mem_cgroup_read_u64,
},
{
.name = "kmem.usage_in_bytes",
.private = MEMFILE_PRIVATE(_KMEM, RES_USAGE),

View File

@ -426,6 +426,7 @@ struct queue_pages {
unsigned long start;
unsigned long end;
struct vm_area_struct *first;
bool has_unmovable;
};
/*
@ -446,9 +447,8 @@ static inline bool queue_folio_required(struct folio *folio,
/*
* queue_folios_pmd() has three possible return values:
* 0 - folios are placed on the right node or queued successfully, or
* special page is met, i.e. huge zero page.
* 1 - there is unmovable folio, and MPOL_MF_MOVE* & MPOL_MF_STRICT were
* specified.
* special page is met, i.e. zero page, or unmovable page is found
* but continue walking (indicated by queue_pages.has_unmovable).
* -EIO - is migration entry or only MPOL_MF_STRICT was specified and an
* existing folio was already on a node that does not follow the
* policy.
@ -479,7 +479,7 @@ static int queue_folios_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
if (!vma_migratable(walk->vma) ||
migrate_folio_add(folio, qp->pagelist, flags)) {
ret = 1;
qp->has_unmovable = true;
goto unlock;
}
} else
@ -495,9 +495,8 @@ unlock:
*
* queue_folios_pte_range() has three possible return values:
* 0 - folios are placed on the right node or queued successfully, or
* special page is met, i.e. zero page.
* 1 - there is unmovable folio, and MPOL_MF_MOVE* & MPOL_MF_STRICT were
* specified.
* special page is met, i.e. zero page, or unmovable page is found
* but continue walking (indicated by queue_pages.has_unmovable).
* -EIO - only MPOL_MF_STRICT was specified and an existing folio was already
* on a node that does not follow the policy.
*/
@ -508,7 +507,6 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr,
struct folio *folio;
struct queue_pages *qp = walk->private;
unsigned long flags = qp->flags;
bool has_unmovable = false;
pte_t *pte, *mapped_pte;
pte_t ptent;
spinlock_t *ptl;
@ -538,11 +536,12 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr,
if (!queue_folio_required(folio, qp))
continue;
if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
/* MPOL_MF_STRICT must be specified if we get here */
if (!vma_migratable(vma)) {
has_unmovable = true;
break;
}
/*
* MPOL_MF_STRICT must be specified if we get here.
* Continue walking vmas due to MPOL_MF_MOVE* flags.
*/
if (!vma_migratable(vma))
qp->has_unmovable = true;
/*
* Do not abort immediately since there may be
@ -550,16 +549,13 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr,
* need migrate other LRU pages.
*/
if (migrate_folio_add(folio, qp->pagelist, flags))
has_unmovable = true;
qp->has_unmovable = true;
} else
break;
}
pte_unmap_unlock(mapped_pte, ptl);
cond_resched();
if (has_unmovable)
return 1;
return addr != end ? -EIO : 0;
}
@ -599,7 +595,7 @@ static int queue_folios_hugetlb(pte_t *pte, unsigned long hmask,
* Detecting misplaced folio but allow migrating folios which
* have been queued.
*/
ret = 1;
qp->has_unmovable = true;
goto unlock;
}
@ -620,7 +616,7 @@ static int queue_folios_hugetlb(pte_t *pte, unsigned long hmask,
* Failed to isolate folio but allow migrating pages
* which have been queued.
*/
ret = 1;
qp->has_unmovable = true;
}
unlock:
spin_unlock(ptl);
@ -756,12 +752,15 @@ queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
.start = start,
.end = end,
.first = NULL,
.has_unmovable = false,
};
const struct mm_walk_ops *ops = lock_vma ?
&queue_pages_lock_vma_walk_ops : &queue_pages_walk_ops;
err = walk_page_range(mm, start, end, ops, &qp);
if (qp.has_unmovable)
err = 1;
if (!qp.first)
/* whole range in hole */
err = -EFAULT;
@ -1358,7 +1357,7 @@ static long do_mbind(unsigned long start, unsigned long len,
putback_movable_pages(&pagelist);
}
if ((ret > 0) || (nr_failed && (flags & MPOL_MF_STRICT)))
if (((ret > 0) || nr_failed) && (flags & MPOL_MF_STRICT))
err = -EIO;
} else {
up_out:

View File

@ -243,7 +243,9 @@ static bool remove_migration_pte(struct folio *folio,
#ifdef CONFIG_HUGETLB_PAGE
if (folio_test_hugetlb(folio)) {
unsigned int shift = huge_page_shift(hstate_vma(vma));
struct hstate *h = hstate_vma(vma);
unsigned int shift = huge_page_shift(h);
unsigned long psize = huge_page_size(h);
pte = arch_make_huge_pte(pte, shift, vma->vm_flags);
if (folio_test_anon(folio))
@ -251,7 +253,8 @@ static bool remove_migration_pte(struct folio *folio,
rmap_flags);
else
page_dup_file_rmap(new, true);
set_huge_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
set_huge_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte,
psize);
} else
#endif
{

View File

@ -1480,6 +1480,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
struct mmu_notifier_range range;
enum ttu_flags flags = (enum ttu_flags)(long)arg;
unsigned long pfn;
unsigned long hsz = 0;
/*
* When racing against e.g. zap_pte_range() on another cpu,
@ -1511,6 +1512,9 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
*/
adjust_range_if_pmd_sharing_possible(vma, &range.start,
&range.end);
/* We need the huge page size for set_huge_pte_at() */
hsz = huge_page_size(hstate_vma(vma));
}
mmu_notifier_invalidate_range_start(&range);
@ -1628,7 +1632,8 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
if (folio_test_hugetlb(folio)) {
hugetlb_count_sub(folio_nr_pages(folio), mm);
set_huge_pte_at(mm, address, pvmw.pte, pteval);
set_huge_pte_at(mm, address, pvmw.pte, pteval,
hsz);
} else {
dec_mm_counter(mm, mm_counter(&folio->page));
set_pte_at(mm, address, pvmw.pte, pteval);
@ -1820,6 +1825,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
struct mmu_notifier_range range;
enum ttu_flags flags = (enum ttu_flags)(long)arg;
unsigned long pfn;
unsigned long hsz = 0;
/*
* When racing against e.g. zap_pte_range() on another cpu,
@ -1855,6 +1861,9 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
*/
adjust_range_if_pmd_sharing_possible(vma, &range.start,
&range.end);
/* We need the huge page size for set_huge_pte_at() */
hsz = huge_page_size(hstate_vma(vma));
}
mmu_notifier_invalidate_range_start(&range);
@ -2020,7 +2029,8 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
if (folio_test_hugetlb(folio)) {
hugetlb_count_sub(folio_nr_pages(folio), mm);
set_huge_pte_at(mm, address, pvmw.pte, pteval);
set_huge_pte_at(mm, address, pvmw.pte, pteval,
hsz);
} else {
dec_mm_counter(mm, mm_counter(&folio->page));
set_pte_at(mm, address, pvmw.pte, pteval);
@ -2044,7 +2054,8 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
if (arch_unmap_one(mm, vma, address, pteval) < 0) {
if (folio_test_hugetlb(folio))
set_huge_pte_at(mm, address, pvmw.pte, pteval);
set_huge_pte_at(mm, address, pvmw.pte,
pteval, hsz);
else
set_pte_at(mm, address, pvmw.pte, pteval);
ret = false;
@ -2058,7 +2069,8 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
if (anon_exclusive &&
page_try_share_anon_rmap(subpage)) {
if (folio_test_hugetlb(folio))
set_huge_pte_at(mm, address, pvmw.pte, pteval);
set_huge_pte_at(mm, address, pvmw.pte,
pteval, hsz);
else
set_pte_at(mm, address, pvmw.pte, pteval);
ret = false;
@ -2090,7 +2102,8 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
if (pte_uffd_wp(pteval))
swp_pte = pte_swp_mkuffd_wp(swp_pte);
if (folio_test_hugetlb(folio))
set_huge_pte_at(mm, address, pvmw.pte, swp_pte);
set_huge_pte_at(mm, address, pvmw.pte, swp_pte,
hsz);
else
set_pte_at(mm, address, pvmw.pte, swp_pte);
trace_set_migration_pte(address, pte_val(swp_pte),

View File

@ -111,7 +111,7 @@ static int vmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
pte_t entry = pfn_pte(pfn, prot);
entry = arch_make_huge_pte(entry, ilog2(size), 0);
set_huge_pte_at(&init_mm, addr, pte, entry);
set_huge_pte_at(&init_mm, addr, pte, entry, size);
pfn += PFN_DOWN(size);
continue;
}

View File

@ -1218,6 +1218,19 @@ bool zswap_store(struct folio *folio)
if (!zswap_enabled || !tree)
return false;
/*
* If this is a duplicate, it must be removed before attempting to store
* it, otherwise, if the store fails the old page won't be removed from
* the tree, and it might be written back overriding the new data.
*/
spin_lock(&tree->lock);
dupentry = zswap_rb_search(&tree->rbroot, offset);
if (dupentry) {
zswap_duplicate_entry++;
zswap_invalidate_entry(tree, dupentry);
}
spin_unlock(&tree->lock);
/*
* XXX: zswap reclaim does not work with cgroups yet. Without a
* cgroup-aware entry LRU, we will push out entries system-wide based on
@ -1333,7 +1346,14 @@ insert_entry:
/* map */
spin_lock(&tree->lock);
/*
* A duplicate entry should have been removed at the beginning of this
* function. Since the swap entry should be pinned, if a duplicate is
* found again here it means that something went wrong in the swap
* cache.
*/
while (zswap_rb_insert(&tree->rbroot, entry, &dupentry) == -EEXIST) {
WARN_ON(1);
zswap_duplicate_entry++;
zswap_invalidate_entry(tree, dupentry);
}

View File

@ -25,7 +25,7 @@ if [[ "$1" == "-cgroup-v2" ]]; then
fi
if [[ $cgroup2 ]]; then
cgroup_path=$(mount -t cgroup2 | head -1 | awk -e '{print $3}')
cgroup_path=$(mount -t cgroup2 | head -1 | awk '{print $3}')
if [[ -z "$cgroup_path" ]]; then
cgroup_path=/dev/cgroup/memory
mount -t cgroup2 none $cgroup_path
@ -33,7 +33,7 @@ if [[ $cgroup2 ]]; then
fi
echo "+hugetlb" >$cgroup_path/cgroup.subtree_control
else
cgroup_path=$(mount -t cgroup | grep ",hugetlb" | awk -e '{print $3}')
cgroup_path=$(mount -t cgroup | grep ",hugetlb" | awk '{print $3}')
if [[ -z "$cgroup_path" ]]; then
cgroup_path=/dev/cgroup/memory
mount -t cgroup memory,hugetlb $cgroup_path

View File

@ -20,7 +20,7 @@ fi
if [[ $cgroup2 ]]; then
CGROUP_ROOT=$(mount -t cgroup2 | head -1 | awk -e '{print $3}')
CGROUP_ROOT=$(mount -t cgroup2 | head -1 | awk '{print $3}')
if [[ -z "$CGROUP_ROOT" ]]; then
CGROUP_ROOT=/dev/cgroup/memory
mount -t cgroup2 none $CGROUP_ROOT
@ -28,7 +28,7 @@ if [[ $cgroup2 ]]; then
fi
echo "+hugetlb +memory" >$CGROUP_ROOT/cgroup.subtree_control
else
CGROUP_ROOT=$(mount -t cgroup | grep ",hugetlb" | awk -e '{print $3}')
CGROUP_ROOT=$(mount -t cgroup | grep ",hugetlb" | awk '{print $3}')
if [[ -z "$CGROUP_ROOT" ]]; then
CGROUP_ROOT=/dev/cgroup/memory
mount -t cgroup memory,hugetlb $CGROUP_ROOT