15 hotfixes. 9 are cc:stable and the remainder address post-6.8 issues
or aren't considered suitable for backporting. There are a significant number of fixups for this cycle's page_owner changes (series "page_owner: print stacks and their outstanding allocations"). Apart from that, singleton changes all over, mainly in MM. -----BEGIN PGP SIGNATURE----- iHUEABYIAB0WIQTTMBEPP41GrTpTJgfdBJ7gKXxAjgUCZiGTewAKCRDdBJ7gKXxA jt1QAP9QxiU/+gUMVjkHyKaMBHSBMD/CWBFjDfRjx+BPqYx55gD+JWxUXwlyVkMo Z8fqtCGEgatev1VbwpCwByhvnH9bKgw= =YBZ9 -----END PGP SIGNATURE----- Merge tag 'mm-hotfixes-stable-2024-04-18-14-41' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Pull misc fixes from Andrew Morton: "15 hotfixes. 9 are cc:stable and the remainder address post-6.8 issues or aren't considered suitable for backporting. There are a significant number of fixups for this cycle's page_owner changes (series "page_owner: print stacks and their outstanding allocations"). Apart from that, singleton changes all over, mainly in MM" * tag 'mm-hotfixes-stable-2024-04-18-14-41' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: nilfs2: fix OOB in nilfs_set_de_type MAINTAINERS: update Naoya Horiguchi's email address fork: defer linking file vma until vma is fully initialized mm/shmem: inline shmem_is_huge() for disabled transparent hugepages mm,page_owner: defer enablement of static branch Squashfs: check the inode number is not the invalid value of zero mm,swapops: update check in is_pfn_swap_entry for hwpoison entries mm/memory-failure: fix deadlock when hugetlb_optimize_vmemmap is enabled mm/userfaultfd: allow hugetlb change protection upon poison entry mm,page_owner: fix printing of stack records mm,page_owner: fix accounting of pages when migrating mm,page_owner: fix refcount imbalance mm,page_owner: update metadata for tail pages userfaultfd: change src_folio after ensuring it's unpinned in UFFDIO_MOVE mm/madvise: make MADV_POPULATE_(READ|WRITE) handle VM_FAULT_RETRY properly
This commit is contained in:
commit
54c23548e0
3
.mailmap
3
.mailmap
|
@ -446,7 +446,8 @@ Mythri P K <mythripk@ti.com>
|
||||||
Nadav Amit <nadav.amit@gmail.com> <namit@vmware.com>
|
Nadav Amit <nadav.amit@gmail.com> <namit@vmware.com>
|
||||||
Nadav Amit <nadav.amit@gmail.com> <namit@cs.technion.ac.il>
|
Nadav Amit <nadav.amit@gmail.com> <namit@cs.technion.ac.il>
|
||||||
Nadia Yvette Chambers <nyc@holomorphy.com> William Lee Irwin III <wli@holomorphy.com>
|
Nadia Yvette Chambers <nyc@holomorphy.com> William Lee Irwin III <wli@holomorphy.com>
|
||||||
Naoya Horiguchi <naoya.horiguchi@nec.com> <n-horiguchi@ah.jp.nec.com>
|
Naoya Horiguchi <nao.horiguchi@gmail.com> <n-horiguchi@ah.jp.nec.com>
|
||||||
|
Naoya Horiguchi <nao.horiguchi@gmail.com> <naoya.horiguchi@nec.com>
|
||||||
Nathan Chancellor <nathan@kernel.org> <natechancellor@gmail.com>
|
Nathan Chancellor <nathan@kernel.org> <natechancellor@gmail.com>
|
||||||
Neeraj Upadhyay <quic_neeraju@quicinc.com> <neeraju@codeaurora.org>
|
Neeraj Upadhyay <quic_neeraju@quicinc.com> <neeraju@codeaurora.org>
|
||||||
Neil Armstrong <neil.armstrong@linaro.org> <narmstrong@baylibre.com>
|
Neil Armstrong <neil.armstrong@linaro.org> <narmstrong@baylibre.com>
|
||||||
|
|
|
@ -24,10 +24,10 @@ fragmentation statistics can be obtained through gfp flag information of
|
||||||
each page. It is already implemented and activated if page owner is
|
each page. It is already implemented and activated if page owner is
|
||||||
enabled. Other usages are more than welcome.
|
enabled. Other usages are more than welcome.
|
||||||
|
|
||||||
It can also be used to show all the stacks and their outstanding
|
It can also be used to show all the stacks and their current number of
|
||||||
allocations, which gives us a quick overview of where the memory is going
|
allocated base pages, which gives us a quick overview of where the memory
|
||||||
without the need to screen through all the pages and match the allocation
|
is going without the need to screen through all the pages and match the
|
||||||
and free operation.
|
allocation and free operation.
|
||||||
|
|
||||||
page owner is disabled by default. So, if you'd like to use it, you need
|
page owner is disabled by default. So, if you'd like to use it, you need
|
||||||
to add "page_owner=on" to your boot cmdline. If the kernel is built
|
to add "page_owner=on" to your boot cmdline. If the kernel is built
|
||||||
|
@ -75,42 +75,45 @@ Usage
|
||||||
|
|
||||||
cat /sys/kernel/debug/page_owner_stacks/show_stacks > stacks.txt
|
cat /sys/kernel/debug/page_owner_stacks/show_stacks > stacks.txt
|
||||||
cat stacks.txt
|
cat stacks.txt
|
||||||
prep_new_page+0xa9/0x120
|
post_alloc_hook+0x177/0x1a0
|
||||||
get_page_from_freelist+0x7e6/0x2140
|
get_page_from_freelist+0xd01/0xd80
|
||||||
__alloc_pages+0x18a/0x370
|
__alloc_pages+0x39e/0x7e0
|
||||||
new_slab+0xc8/0x580
|
allocate_slab+0xbc/0x3f0
|
||||||
___slab_alloc+0x1f2/0xaf0
|
___slab_alloc+0x528/0x8a0
|
||||||
__slab_alloc.isra.86+0x22/0x40
|
kmem_cache_alloc+0x224/0x3b0
|
||||||
kmem_cache_alloc+0x31b/0x350
|
sk_prot_alloc+0x58/0x1a0
|
||||||
__khugepaged_enter+0x39/0x100
|
sk_alloc+0x32/0x4f0
|
||||||
dup_mmap+0x1c7/0x5ce
|
inet_create+0x427/0xb50
|
||||||
copy_process+0x1afe/0x1c90
|
__sock_create+0x2e4/0x650
|
||||||
kernel_clone+0x9a/0x3c0
|
inet_ctl_sock_create+0x30/0x180
|
||||||
__do_sys_clone+0x66/0x90
|
igmp_net_init+0xc1/0x130
|
||||||
do_syscall_64+0x7f/0x160
|
ops_init+0x167/0x410
|
||||||
entry_SYSCALL_64_after_hwframe+0x6c/0x74
|
setup_net+0x304/0xa60
|
||||||
stack_count: 234
|
copy_net_ns+0x29b/0x4a0
|
||||||
|
create_new_namespaces+0x4a1/0x820
|
||||||
|
nr_base_pages: 16
|
||||||
...
|
...
|
||||||
...
|
...
|
||||||
echo 7000 > /sys/kernel/debug/page_owner_stacks/count_threshold
|
echo 7000 > /sys/kernel/debug/page_owner_stacks/count_threshold
|
||||||
cat /sys/kernel/debug/page_owner_stacks/show_stacks> stacks_7000.txt
|
cat /sys/kernel/debug/page_owner_stacks/show_stacks> stacks_7000.txt
|
||||||
cat stacks_7000.txt
|
cat stacks_7000.txt
|
||||||
prep_new_page+0xa9/0x120
|
post_alloc_hook+0x177/0x1a0
|
||||||
get_page_from_freelist+0x7e6/0x2140
|
get_page_from_freelist+0xd01/0xd80
|
||||||
__alloc_pages+0x18a/0x370
|
__alloc_pages+0x39e/0x7e0
|
||||||
alloc_pages_mpol+0xdf/0x1e0
|
alloc_pages_mpol+0x22e/0x490
|
||||||
folio_alloc+0x14/0x50
|
folio_alloc+0xd5/0x110
|
||||||
filemap_alloc_folio+0xb0/0x100
|
filemap_alloc_folio+0x78/0x230
|
||||||
page_cache_ra_unbounded+0x97/0x180
|
page_cache_ra_order+0x287/0x6f0
|
||||||
filemap_fault+0x4b4/0x1200
|
filemap_get_pages+0x517/0x1160
|
||||||
__do_fault+0x2d/0x110
|
filemap_read+0x304/0x9f0
|
||||||
do_pte_missing+0x4b0/0xa30
|
xfs_file_buffered_read+0xe6/0x1d0 [xfs]
|
||||||
__handle_mm_fault+0x7fa/0xb70
|
xfs_file_read_iter+0x1f0/0x380 [xfs]
|
||||||
handle_mm_fault+0x125/0x300
|
__kernel_read+0x3b9/0x730
|
||||||
do_user_addr_fault+0x3c9/0x840
|
kernel_read_file+0x309/0x4d0
|
||||||
exc_page_fault+0x68/0x150
|
__do_sys_finit_module+0x381/0x730
|
||||||
asm_exc_page_fault+0x22/0x30
|
do_syscall_64+0x8d/0x150
|
||||||
stack_count: 8248
|
entry_SYSCALL_64_after_hwframe+0x62/0x6a
|
||||||
|
nr_base_pages: 20824
|
||||||
...
|
...
|
||||||
|
|
||||||
cat /sys/kernel/debug/page_owner > page_owner_full.txt
|
cat /sys/kernel/debug/page_owner > page_owner_full.txt
|
||||||
|
|
|
@ -10024,7 +10024,7 @@ F: drivers/media/platform/st/sti/hva
|
||||||
|
|
||||||
HWPOISON MEMORY FAILURE HANDLING
|
HWPOISON MEMORY FAILURE HANDLING
|
||||||
M: Miaohe Lin <linmiaohe@huawei.com>
|
M: Miaohe Lin <linmiaohe@huawei.com>
|
||||||
R: Naoya Horiguchi <naoya.horiguchi@nec.com>
|
R: Naoya Horiguchi <nao.horiguchi@gmail.com>
|
||||||
L: linux-mm@kvack.org
|
L: linux-mm@kvack.org
|
||||||
S: Maintained
|
S: Maintained
|
||||||
F: mm/hwpoison-inject.c
|
F: mm/hwpoison-inject.c
|
||||||
|
|
|
@ -240,7 +240,7 @@ nilfs_filetype_table[NILFS_FT_MAX] = {
|
||||||
|
|
||||||
#define S_SHIFT 12
|
#define S_SHIFT 12
|
||||||
static unsigned char
|
static unsigned char
|
||||||
nilfs_type_by_mode[S_IFMT >> S_SHIFT] = {
|
nilfs_type_by_mode[(S_IFMT >> S_SHIFT) + 1] = {
|
||||||
[S_IFREG >> S_SHIFT] = NILFS_FT_REG_FILE,
|
[S_IFREG >> S_SHIFT] = NILFS_FT_REG_FILE,
|
||||||
[S_IFDIR >> S_SHIFT] = NILFS_FT_DIR,
|
[S_IFDIR >> S_SHIFT] = NILFS_FT_DIR,
|
||||||
[S_IFCHR >> S_SHIFT] = NILFS_FT_CHRDEV,
|
[S_IFCHR >> S_SHIFT] = NILFS_FT_CHRDEV,
|
||||||
|
|
|
@ -48,6 +48,10 @@ static int squashfs_new_inode(struct super_block *sb, struct inode *inode,
|
||||||
gid_t i_gid;
|
gid_t i_gid;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
|
inode->i_ino = le32_to_cpu(sqsh_ino->inode_number);
|
||||||
|
if (inode->i_ino == 0)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
err = squashfs_get_id(sb, le16_to_cpu(sqsh_ino->uid), &i_uid);
|
err = squashfs_get_id(sb, le16_to_cpu(sqsh_ino->uid), &i_uid);
|
||||||
if (err)
|
if (err)
|
||||||
return err;
|
return err;
|
||||||
|
@ -58,7 +62,6 @@ static int squashfs_new_inode(struct super_block *sb, struct inode *inode,
|
||||||
|
|
||||||
i_uid_write(inode, i_uid);
|
i_uid_write(inode, i_uid);
|
||||||
i_gid_write(inode, i_gid);
|
i_gid_write(inode, i_gid);
|
||||||
inode->i_ino = le32_to_cpu(sqsh_ino->inode_number);
|
|
||||||
inode_set_mtime(inode, le32_to_cpu(sqsh_ino->mtime), 0);
|
inode_set_mtime(inode, le32_to_cpu(sqsh_ino->mtime), 0);
|
||||||
inode_set_atime(inode, inode_get_mtime_sec(inode), 0);
|
inode_set_atime(inode, inode_get_mtime_sec(inode), 0);
|
||||||
inode_set_ctime(inode, inode_get_mtime_sec(inode), 0);
|
inode_set_ctime(inode, inode_get_mtime_sec(inode), 0);
|
||||||
|
|
|
@ -110,8 +110,17 @@ extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
|
||||||
extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end);
|
extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end);
|
||||||
int shmem_unuse(unsigned int type);
|
int shmem_unuse(unsigned int type);
|
||||||
|
|
||||||
|
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||||
extern bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
|
extern bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
|
||||||
struct mm_struct *mm, unsigned long vm_flags);
|
struct mm_struct *mm, unsigned long vm_flags);
|
||||||
|
#else
|
||||||
|
static __always_inline bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
|
||||||
|
struct mm_struct *mm, unsigned long vm_flags)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_SHMEM
|
#ifdef CONFIG_SHMEM
|
||||||
extern unsigned long shmem_swap_usage(struct vm_area_struct *vma);
|
extern unsigned long shmem_swap_usage(struct vm_area_struct *vma);
|
||||||
#else
|
#else
|
||||||
|
|
|
@ -390,6 +390,35 @@ static inline bool is_migration_entry_dirty(swp_entry_t entry)
|
||||||
}
|
}
|
||||||
#endif /* CONFIG_MIGRATION */
|
#endif /* CONFIG_MIGRATION */
|
||||||
|
|
||||||
|
#ifdef CONFIG_MEMORY_FAILURE
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Support for hardware poisoned pages
|
||||||
|
*/
|
||||||
|
static inline swp_entry_t make_hwpoison_entry(struct page *page)
|
||||||
|
{
|
||||||
|
BUG_ON(!PageLocked(page));
|
||||||
|
return swp_entry(SWP_HWPOISON, page_to_pfn(page));
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int is_hwpoison_entry(swp_entry_t entry)
|
||||||
|
{
|
||||||
|
return swp_type(entry) == SWP_HWPOISON;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
static inline swp_entry_t make_hwpoison_entry(struct page *page)
|
||||||
|
{
|
||||||
|
return swp_entry(0, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int is_hwpoison_entry(swp_entry_t swp)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
typedef unsigned long pte_marker;
|
typedef unsigned long pte_marker;
|
||||||
|
|
||||||
#define PTE_MARKER_UFFD_WP BIT(0)
|
#define PTE_MARKER_UFFD_WP BIT(0)
|
||||||
|
@ -483,8 +512,9 @@ static inline struct folio *pfn_swap_entry_folio(swp_entry_t entry)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* A pfn swap entry is a special type of swap entry that always has a pfn stored
|
* A pfn swap entry is a special type of swap entry that always has a pfn stored
|
||||||
* in the swap offset. They are used to represent unaddressable device memory
|
* in the swap offset. They can either be used to represent unaddressable device
|
||||||
* and to restrict access to a page undergoing migration.
|
* memory, to restrict access to a page undergoing migration or to represent a
|
||||||
|
* pfn which has been hwpoisoned and unmapped.
|
||||||
*/
|
*/
|
||||||
static inline bool is_pfn_swap_entry(swp_entry_t entry)
|
static inline bool is_pfn_swap_entry(swp_entry_t entry)
|
||||||
{
|
{
|
||||||
|
@ -492,7 +522,7 @@ static inline bool is_pfn_swap_entry(swp_entry_t entry)
|
||||||
BUILD_BUG_ON(SWP_TYPE_SHIFT < SWP_PFN_BITS);
|
BUILD_BUG_ON(SWP_TYPE_SHIFT < SWP_PFN_BITS);
|
||||||
|
|
||||||
return is_migration_entry(entry) || is_device_private_entry(entry) ||
|
return is_migration_entry(entry) || is_device_private_entry(entry) ||
|
||||||
is_device_exclusive_entry(entry);
|
is_device_exclusive_entry(entry) || is_hwpoison_entry(entry);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct page_vma_mapped_walk;
|
struct page_vma_mapped_walk;
|
||||||
|
@ -561,35 +591,6 @@ static inline int is_pmd_migration_entry(pmd_t pmd)
|
||||||
}
|
}
|
||||||
#endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */
|
#endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */
|
||||||
|
|
||||||
#ifdef CONFIG_MEMORY_FAILURE
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Support for hardware poisoned pages
|
|
||||||
*/
|
|
||||||
static inline swp_entry_t make_hwpoison_entry(struct page *page)
|
|
||||||
{
|
|
||||||
BUG_ON(!PageLocked(page));
|
|
||||||
return swp_entry(SWP_HWPOISON, page_to_pfn(page));
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline int is_hwpoison_entry(swp_entry_t entry)
|
|
||||||
{
|
|
||||||
return swp_type(entry) == SWP_HWPOISON;
|
|
||||||
}
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
static inline swp_entry_t make_hwpoison_entry(struct page *page)
|
|
||||||
{
|
|
||||||
return swp_entry(0, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline int is_hwpoison_entry(swp_entry_t swp)
|
|
||||||
{
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static inline int non_swap_entry(swp_entry_t entry)
|
static inline int non_swap_entry(swp_entry_t entry)
|
||||||
{
|
{
|
||||||
return swp_type(entry) >= MAX_SWAPFILES;
|
return swp_type(entry) >= MAX_SWAPFILES;
|
||||||
|
|
|
@ -714,6 +714,23 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
|
||||||
} else if (anon_vma_fork(tmp, mpnt))
|
} else if (anon_vma_fork(tmp, mpnt))
|
||||||
goto fail_nomem_anon_vma_fork;
|
goto fail_nomem_anon_vma_fork;
|
||||||
vm_flags_clear(tmp, VM_LOCKED_MASK);
|
vm_flags_clear(tmp, VM_LOCKED_MASK);
|
||||||
|
/*
|
||||||
|
* Copy/update hugetlb private vma information.
|
||||||
|
*/
|
||||||
|
if (is_vm_hugetlb_page(tmp))
|
||||||
|
hugetlb_dup_vma_private(tmp);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Link the vma into the MT. After using __mt_dup(), memory
|
||||||
|
* allocation is not necessary here, so it cannot fail.
|
||||||
|
*/
|
||||||
|
vma_iter_bulk_store(&vmi, tmp);
|
||||||
|
|
||||||
|
mm->map_count++;
|
||||||
|
|
||||||
|
if (tmp->vm_ops && tmp->vm_ops->open)
|
||||||
|
tmp->vm_ops->open(tmp);
|
||||||
|
|
||||||
file = tmp->vm_file;
|
file = tmp->vm_file;
|
||||||
if (file) {
|
if (file) {
|
||||||
struct address_space *mapping = file->f_mapping;
|
struct address_space *mapping = file->f_mapping;
|
||||||
|
@ -730,25 +747,9 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
|
||||||
i_mmap_unlock_write(mapping);
|
i_mmap_unlock_write(mapping);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Copy/update hugetlb private vma information.
|
|
||||||
*/
|
|
||||||
if (is_vm_hugetlb_page(tmp))
|
|
||||||
hugetlb_dup_vma_private(tmp);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Link the vma into the MT. After using __mt_dup(), memory
|
|
||||||
* allocation is not necessary here, so it cannot fail.
|
|
||||||
*/
|
|
||||||
vma_iter_bulk_store(&vmi, tmp);
|
|
||||||
|
|
||||||
mm->map_count++;
|
|
||||||
if (!(tmp->vm_flags & VM_WIPEONFORK))
|
if (!(tmp->vm_flags & VM_WIPEONFORK))
|
||||||
retval = copy_page_range(tmp, mpnt);
|
retval = copy_page_range(tmp, mpnt);
|
||||||
|
|
||||||
if (tmp->vm_ops && tmp->vm_ops->open)
|
|
||||||
tmp->vm_ops->open(tmp);
|
|
||||||
|
|
||||||
if (retval) {
|
if (retval) {
|
||||||
mpnt = vma_next(&vmi);
|
mpnt = vma_next(&vmi);
|
||||||
goto loop_out;
|
goto loop_out;
|
||||||
|
|
54
mm/gup.c
54
mm/gup.c
|
@ -1206,6 +1206,22 @@ static long __get_user_pages(struct mm_struct *mm,
|
||||||
|
|
||||||
/* first iteration or cross vma bound */
|
/* first iteration or cross vma bound */
|
||||||
if (!vma || start >= vma->vm_end) {
|
if (!vma || start >= vma->vm_end) {
|
||||||
|
/*
|
||||||
|
* MADV_POPULATE_(READ|WRITE) wants to handle VMA
|
||||||
|
* lookups+error reporting differently.
|
||||||
|
*/
|
||||||
|
if (gup_flags & FOLL_MADV_POPULATE) {
|
||||||
|
vma = vma_lookup(mm, start);
|
||||||
|
if (!vma) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
if (check_vma_flags(vma, gup_flags)) {
|
||||||
|
ret = -EINVAL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
goto retry;
|
||||||
|
}
|
||||||
vma = gup_vma_lookup(mm, start);
|
vma = gup_vma_lookup(mm, start);
|
||||||
if (!vma && in_gate_area(mm, start)) {
|
if (!vma && in_gate_area(mm, start)) {
|
||||||
ret = get_gate_page(mm, start & PAGE_MASK,
|
ret = get_gate_page(mm, start & PAGE_MASK,
|
||||||
|
@ -1685,35 +1701,35 @@ long populate_vma_page_range(struct vm_area_struct *vma,
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* faultin_vma_page_range() - populate (prefault) page tables inside the
|
* faultin_page_range() - populate (prefault) page tables inside the
|
||||||
* given VMA range readable/writable
|
* given range readable/writable
|
||||||
*
|
*
|
||||||
* This takes care of mlocking the pages, too, if VM_LOCKED is set.
|
* This takes care of mlocking the pages, too, if VM_LOCKED is set.
|
||||||
*
|
*
|
||||||
* @vma: target vma
|
* @mm: the mm to populate page tables in
|
||||||
* @start: start address
|
* @start: start address
|
||||||
* @end: end address
|
* @end: end address
|
||||||
* @write: whether to prefault readable or writable
|
* @write: whether to prefault readable or writable
|
||||||
* @locked: whether the mmap_lock is still held
|
* @locked: whether the mmap_lock is still held
|
||||||
*
|
*
|
||||||
* Returns either number of processed pages in the vma, or a negative error
|
* Returns either number of processed pages in the MM, or a negative error
|
||||||
* code on error (see __get_user_pages()).
|
* code on error (see __get_user_pages()). Note that this function reports
|
||||||
|
* errors related to VMAs, such as incompatible mappings, as expected by
|
||||||
|
* MADV_POPULATE_(READ|WRITE).
|
||||||
*
|
*
|
||||||
* vma->vm_mm->mmap_lock must be held. The range must be page-aligned and
|
* The range must be page-aligned.
|
||||||
* covered by the VMA. If it's released, *@locked will be set to 0.
|
*
|
||||||
|
* mm->mmap_lock must be held. If it's released, *@locked will be set to 0.
|
||||||
*/
|
*/
|
||||||
long faultin_vma_page_range(struct vm_area_struct *vma, unsigned long start,
|
long faultin_page_range(struct mm_struct *mm, unsigned long start,
|
||||||
unsigned long end, bool write, int *locked)
|
unsigned long end, bool write, int *locked)
|
||||||
{
|
{
|
||||||
struct mm_struct *mm = vma->vm_mm;
|
|
||||||
unsigned long nr_pages = (end - start) / PAGE_SIZE;
|
unsigned long nr_pages = (end - start) / PAGE_SIZE;
|
||||||
int gup_flags;
|
int gup_flags;
|
||||||
long ret;
|
long ret;
|
||||||
|
|
||||||
VM_BUG_ON(!PAGE_ALIGNED(start));
|
VM_BUG_ON(!PAGE_ALIGNED(start));
|
||||||
VM_BUG_ON(!PAGE_ALIGNED(end));
|
VM_BUG_ON(!PAGE_ALIGNED(end));
|
||||||
VM_BUG_ON_VMA(start < vma->vm_start, vma);
|
|
||||||
VM_BUG_ON_VMA(end > vma->vm_end, vma);
|
|
||||||
mmap_assert_locked(mm);
|
mmap_assert_locked(mm);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1725,19 +1741,13 @@ long faultin_vma_page_range(struct vm_area_struct *vma, unsigned long start,
|
||||||
* a poisoned page.
|
* a poisoned page.
|
||||||
* !FOLL_FORCE: Require proper access permissions.
|
* !FOLL_FORCE: Require proper access permissions.
|
||||||
*/
|
*/
|
||||||
gup_flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_UNLOCKABLE;
|
gup_flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_UNLOCKABLE |
|
||||||
|
FOLL_MADV_POPULATE;
|
||||||
if (write)
|
if (write)
|
||||||
gup_flags |= FOLL_WRITE;
|
gup_flags |= FOLL_WRITE;
|
||||||
|
|
||||||
/*
|
ret = __get_user_pages_locked(mm, start, nr_pages, NULL, locked,
|
||||||
* We want to report -EINVAL instead of -EFAULT for any permission
|
gup_flags);
|
||||||
* problems or incompatible mappings.
|
|
||||||
*/
|
|
||||||
if (check_vma_flags(vma, gup_flags))
|
|
||||||
return -EINVAL;
|
|
||||||
|
|
||||||
ret = __get_user_pages(mm, start, nr_pages, gup_flags,
|
|
||||||
NULL, locked);
|
|
||||||
lru_add_drain();
|
lru_add_drain();
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
|
@ -2259,9 +2259,6 @@ int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pm
|
||||||
goto unlock_ptls;
|
goto unlock_ptls;
|
||||||
}
|
}
|
||||||
|
|
||||||
folio_move_anon_rmap(src_folio, dst_vma);
|
|
||||||
WRITE_ONCE(src_folio->index, linear_page_index(dst_vma, dst_addr));
|
|
||||||
|
|
||||||
src_pmdval = pmdp_huge_clear_flush(src_vma, src_addr, src_pmd);
|
src_pmdval = pmdp_huge_clear_flush(src_vma, src_addr, src_pmd);
|
||||||
/* Folio got pinned from under us. Put it back and fail the move. */
|
/* Folio got pinned from under us. Put it back and fail the move. */
|
||||||
if (folio_maybe_dma_pinned(src_folio)) {
|
if (folio_maybe_dma_pinned(src_folio)) {
|
||||||
|
@ -2270,6 +2267,9 @@ int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pm
|
||||||
goto unlock_ptls;
|
goto unlock_ptls;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
folio_move_anon_rmap(src_folio, dst_vma);
|
||||||
|
WRITE_ONCE(src_folio->index, linear_page_index(dst_vma, dst_addr));
|
||||||
|
|
||||||
_dst_pmd = mk_huge_pmd(&src_folio->page, dst_vma->vm_page_prot);
|
_dst_pmd = mk_huge_pmd(&src_folio->page, dst_vma->vm_page_prot);
|
||||||
/* Follow mremap() behavior and treat the entry dirty after the move */
|
/* Follow mremap() behavior and treat the entry dirty after the move */
|
||||||
_dst_pmd = pmd_mkwrite(pmd_mkdirty(_dst_pmd), dst_vma);
|
_dst_pmd = pmd_mkwrite(pmd_mkdirty(_dst_pmd), dst_vma);
|
||||||
|
|
10
mm/hugetlb.c
10
mm/hugetlb.c
|
@ -7044,9 +7044,13 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
|
||||||
if (!pte_same(pte, newpte))
|
if (!pte_same(pte, newpte))
|
||||||
set_huge_pte_at(mm, address, ptep, newpte, psize);
|
set_huge_pte_at(mm, address, ptep, newpte, psize);
|
||||||
} else if (unlikely(is_pte_marker(pte))) {
|
} else if (unlikely(is_pte_marker(pte))) {
|
||||||
/* No other markers apply for now. */
|
/*
|
||||||
WARN_ON_ONCE(!pte_marker_uffd_wp(pte));
|
* Do nothing on a poison marker; page is
|
||||||
if (uffd_wp_resolve)
|
* corrupted, permissons do not apply. Here
|
||||||
|
* pte_marker_uffd_wp()==true implies !poison
|
||||||
|
* because they're mutual exclusive.
|
||||||
|
*/
|
||||||
|
if (pte_marker_uffd_wp(pte) && uffd_wp_resolve)
|
||||||
/* Safe to modify directly (non-present->none). */
|
/* Safe to modify directly (non-present->none). */
|
||||||
huge_pte_clear(mm, address, ptep, psize);
|
huge_pte_clear(mm, address, ptep, psize);
|
||||||
} else if (!huge_pte_none(pte)) {
|
} else if (!huge_pte_none(pte)) {
|
||||||
|
|
|
@ -686,9 +686,8 @@ struct anon_vma *folio_anon_vma(struct folio *folio);
|
||||||
void unmap_mapping_folio(struct folio *folio);
|
void unmap_mapping_folio(struct folio *folio);
|
||||||
extern long populate_vma_page_range(struct vm_area_struct *vma,
|
extern long populate_vma_page_range(struct vm_area_struct *vma,
|
||||||
unsigned long start, unsigned long end, int *locked);
|
unsigned long start, unsigned long end, int *locked);
|
||||||
extern long faultin_vma_page_range(struct vm_area_struct *vma,
|
extern long faultin_page_range(struct mm_struct *mm, unsigned long start,
|
||||||
unsigned long start, unsigned long end,
|
unsigned long end, bool write, int *locked);
|
||||||
bool write, int *locked);
|
|
||||||
extern bool mlock_future_ok(struct mm_struct *mm, unsigned long flags,
|
extern bool mlock_future_ok(struct mm_struct *mm, unsigned long flags,
|
||||||
unsigned long bytes);
|
unsigned long bytes);
|
||||||
|
|
||||||
|
@ -1127,10 +1126,13 @@ enum {
|
||||||
FOLL_FAST_ONLY = 1 << 20,
|
FOLL_FAST_ONLY = 1 << 20,
|
||||||
/* allow unlocking the mmap lock */
|
/* allow unlocking the mmap lock */
|
||||||
FOLL_UNLOCKABLE = 1 << 21,
|
FOLL_UNLOCKABLE = 1 << 21,
|
||||||
|
/* VMA lookup+checks compatible with MADV_POPULATE_(READ|WRITE) */
|
||||||
|
FOLL_MADV_POPULATE = 1 << 22,
|
||||||
};
|
};
|
||||||
|
|
||||||
#define INTERNAL_GUP_FLAGS (FOLL_TOUCH | FOLL_TRIED | FOLL_REMOTE | FOLL_PIN | \
|
#define INTERNAL_GUP_FLAGS (FOLL_TOUCH | FOLL_TRIED | FOLL_REMOTE | FOLL_PIN | \
|
||||||
FOLL_FAST_ONLY | FOLL_UNLOCKABLE)
|
FOLL_FAST_ONLY | FOLL_UNLOCKABLE | \
|
||||||
|
FOLL_MADV_POPULATE)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Indicates for which pages that are write-protected in the page table,
|
* Indicates for which pages that are write-protected in the page table,
|
||||||
|
|
17
mm/madvise.c
17
mm/madvise.c
|
@ -908,27 +908,14 @@ static long madvise_populate(struct vm_area_struct *vma,
|
||||||
{
|
{
|
||||||
const bool write = behavior == MADV_POPULATE_WRITE;
|
const bool write = behavior == MADV_POPULATE_WRITE;
|
||||||
struct mm_struct *mm = vma->vm_mm;
|
struct mm_struct *mm = vma->vm_mm;
|
||||||
unsigned long tmp_end;
|
|
||||||
int locked = 1;
|
int locked = 1;
|
||||||
long pages;
|
long pages;
|
||||||
|
|
||||||
*prev = vma;
|
*prev = vma;
|
||||||
|
|
||||||
while (start < end) {
|
while (start < end) {
|
||||||
/*
|
|
||||||
* We might have temporarily dropped the lock. For example,
|
|
||||||
* our VMA might have been split.
|
|
||||||
*/
|
|
||||||
if (!vma || start >= vma->vm_end) {
|
|
||||||
vma = vma_lookup(mm, start);
|
|
||||||
if (!vma)
|
|
||||||
return -ENOMEM;
|
|
||||||
}
|
|
||||||
|
|
||||||
tmp_end = min_t(unsigned long, end, vma->vm_end);
|
|
||||||
/* Populate (prefault) page tables readable/writable. */
|
/* Populate (prefault) page tables readable/writable. */
|
||||||
pages = faultin_vma_page_range(vma, start, tmp_end, write,
|
pages = faultin_page_range(mm, start, end, write, &locked);
|
||||||
&locked);
|
|
||||||
if (!locked) {
|
if (!locked) {
|
||||||
mmap_read_lock(mm);
|
mmap_read_lock(mm);
|
||||||
locked = 1;
|
locked = 1;
|
||||||
|
@ -949,7 +936,7 @@ static long madvise_populate(struct vm_area_struct *vma,
|
||||||
pr_warn_once("%s: unhandled return value: %ld\n",
|
pr_warn_once("%s: unhandled return value: %ld\n",
|
||||||
__func__, pages);
|
__func__, pages);
|
||||||
fallthrough;
|
fallthrough;
|
||||||
case -ENOMEM:
|
case -ENOMEM: /* No VMA or out of memory. */
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -154,11 +154,23 @@ static int __page_handle_poison(struct page *page)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
zone_pcp_disable(page_zone(page));
|
/*
|
||||||
|
* zone_pcp_disable() can't be used here. It will
|
||||||
|
* hold pcp_batch_high_lock and dissolve_free_huge_page() might hold
|
||||||
|
* cpu_hotplug_lock via static_key_slow_dec() when hugetlb vmemmap
|
||||||
|
* optimization is enabled. This will break current lock dependency
|
||||||
|
* chain and leads to deadlock.
|
||||||
|
* Disabling pcp before dissolving the page was a deterministic
|
||||||
|
* approach because we made sure that those pages cannot end up in any
|
||||||
|
* PCP list. Draining PCP lists expels those pages to the buddy system,
|
||||||
|
* but nothing guarantees that those pages do not get back to a PCP
|
||||||
|
* queue if we need to refill those.
|
||||||
|
*/
|
||||||
ret = dissolve_free_huge_page(page);
|
ret = dissolve_free_huge_page(page);
|
||||||
if (!ret)
|
if (!ret) {
|
||||||
|
drain_all_pages(page_zone(page));
|
||||||
ret = take_page_off_buddy(page);
|
ret = take_page_off_buddy(page);
|
||||||
zone_pcp_enable(page_zone(page));
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
190
mm/page_owner.c
190
mm/page_owner.c
|
@ -118,7 +118,6 @@ static __init void init_page_owner(void)
|
||||||
register_dummy_stack();
|
register_dummy_stack();
|
||||||
register_failure_stack();
|
register_failure_stack();
|
||||||
register_early_stack();
|
register_early_stack();
|
||||||
static_branch_enable(&page_owner_inited);
|
|
||||||
init_early_allocated_pages();
|
init_early_allocated_pages();
|
||||||
/* Initialize dummy and failure stacks and link them to stack_list */
|
/* Initialize dummy and failure stacks and link them to stack_list */
|
||||||
dummy_stack.stack_record = __stack_depot_get_stack_record(dummy_handle);
|
dummy_stack.stack_record = __stack_depot_get_stack_record(dummy_handle);
|
||||||
|
@ -129,6 +128,7 @@ static __init void init_page_owner(void)
|
||||||
refcount_set(&failure_stack.stack_record->count, 1);
|
refcount_set(&failure_stack.stack_record->count, 1);
|
||||||
dummy_stack.next = &failure_stack;
|
dummy_stack.next = &failure_stack;
|
||||||
stack_list = &dummy_stack;
|
stack_list = &dummy_stack;
|
||||||
|
static_branch_enable(&page_owner_inited);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct page_ext_operations page_owner_ops = {
|
struct page_ext_operations page_owner_ops = {
|
||||||
|
@ -196,7 +196,8 @@ static void add_stack_record_to_list(struct stack_record *stack_record,
|
||||||
spin_unlock_irqrestore(&stack_list_lock, flags);
|
spin_unlock_irqrestore(&stack_list_lock, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void inc_stack_record_count(depot_stack_handle_t handle, gfp_t gfp_mask)
|
static void inc_stack_record_count(depot_stack_handle_t handle, gfp_t gfp_mask,
|
||||||
|
int nr_base_pages)
|
||||||
{
|
{
|
||||||
struct stack_record *stack_record = __stack_depot_get_stack_record(handle);
|
struct stack_record *stack_record = __stack_depot_get_stack_record(handle);
|
||||||
|
|
||||||
|
@ -217,20 +218,74 @@ static void inc_stack_record_count(depot_stack_handle_t handle, gfp_t gfp_mask)
|
||||||
/* Add the new stack_record to our list */
|
/* Add the new stack_record to our list */
|
||||||
add_stack_record_to_list(stack_record, gfp_mask);
|
add_stack_record_to_list(stack_record, gfp_mask);
|
||||||
}
|
}
|
||||||
refcount_inc(&stack_record->count);
|
refcount_add(nr_base_pages, &stack_record->count);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void dec_stack_record_count(depot_stack_handle_t handle)
|
static void dec_stack_record_count(depot_stack_handle_t handle,
|
||||||
|
int nr_base_pages)
|
||||||
{
|
{
|
||||||
struct stack_record *stack_record = __stack_depot_get_stack_record(handle);
|
struct stack_record *stack_record = __stack_depot_get_stack_record(handle);
|
||||||
|
|
||||||
if (stack_record)
|
if (!stack_record)
|
||||||
refcount_dec(&stack_record->count);
|
return;
|
||||||
|
|
||||||
|
if (refcount_sub_and_test(nr_base_pages, &stack_record->count))
|
||||||
|
pr_warn("%s: refcount went to 0 for %u handle\n", __func__,
|
||||||
|
handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void __update_page_owner_handle(struct page_ext *page_ext,
|
||||||
|
depot_stack_handle_t handle,
|
||||||
|
unsigned short order,
|
||||||
|
gfp_t gfp_mask,
|
||||||
|
short last_migrate_reason, u64 ts_nsec,
|
||||||
|
pid_t pid, pid_t tgid, char *comm)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
struct page_owner *page_owner;
|
||||||
|
|
||||||
|
for (i = 0; i < (1 << order); i++) {
|
||||||
|
page_owner = get_page_owner(page_ext);
|
||||||
|
page_owner->handle = handle;
|
||||||
|
page_owner->order = order;
|
||||||
|
page_owner->gfp_mask = gfp_mask;
|
||||||
|
page_owner->last_migrate_reason = last_migrate_reason;
|
||||||
|
page_owner->pid = pid;
|
||||||
|
page_owner->tgid = tgid;
|
||||||
|
page_owner->ts_nsec = ts_nsec;
|
||||||
|
strscpy(page_owner->comm, comm,
|
||||||
|
sizeof(page_owner->comm));
|
||||||
|
__set_bit(PAGE_EXT_OWNER, &page_ext->flags);
|
||||||
|
__set_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
|
||||||
|
page_ext = page_ext_next(page_ext);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void __update_page_owner_free_handle(struct page_ext *page_ext,
|
||||||
|
depot_stack_handle_t handle,
|
||||||
|
unsigned short order,
|
||||||
|
pid_t pid, pid_t tgid,
|
||||||
|
u64 free_ts_nsec)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
struct page_owner *page_owner;
|
||||||
|
|
||||||
|
for (i = 0; i < (1 << order); i++) {
|
||||||
|
page_owner = get_page_owner(page_ext);
|
||||||
|
/* Only __reset_page_owner() wants to clear the bit */
|
||||||
|
if (handle) {
|
||||||
|
__clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
|
||||||
|
page_owner->free_handle = handle;
|
||||||
|
}
|
||||||
|
page_owner->free_ts_nsec = free_ts_nsec;
|
||||||
|
page_owner->free_pid = current->pid;
|
||||||
|
page_owner->free_tgid = current->tgid;
|
||||||
|
page_ext = page_ext_next(page_ext);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void __reset_page_owner(struct page *page, unsigned short order)
|
void __reset_page_owner(struct page *page, unsigned short order)
|
||||||
{
|
{
|
||||||
int i;
|
|
||||||
struct page_ext *page_ext;
|
struct page_ext *page_ext;
|
||||||
depot_stack_handle_t handle;
|
depot_stack_handle_t handle;
|
||||||
depot_stack_handle_t alloc_handle;
|
depot_stack_handle_t alloc_handle;
|
||||||
|
@ -245,16 +300,10 @@ void __reset_page_owner(struct page *page, unsigned short order)
|
||||||
alloc_handle = page_owner->handle;
|
alloc_handle = page_owner->handle;
|
||||||
|
|
||||||
handle = save_stack(GFP_NOWAIT | __GFP_NOWARN);
|
handle = save_stack(GFP_NOWAIT | __GFP_NOWARN);
|
||||||
for (i = 0; i < (1 << order); i++) {
|
__update_page_owner_free_handle(page_ext, handle, order, current->pid,
|
||||||
__clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
|
current->tgid, free_ts_nsec);
|
||||||
page_owner->free_handle = handle;
|
|
||||||
page_owner->free_ts_nsec = free_ts_nsec;
|
|
||||||
page_owner->free_pid = current->pid;
|
|
||||||
page_owner->free_tgid = current->tgid;
|
|
||||||
page_ext = page_ext_next(page_ext);
|
|
||||||
page_owner = get_page_owner(page_ext);
|
|
||||||
}
|
|
||||||
page_ext_put(page_ext);
|
page_ext_put(page_ext);
|
||||||
|
|
||||||
if (alloc_handle != early_handle)
|
if (alloc_handle != early_handle)
|
||||||
/*
|
/*
|
||||||
* early_handle is being set as a handle for all those
|
* early_handle is being set as a handle for all those
|
||||||
|
@ -263,39 +312,14 @@ void __reset_page_owner(struct page *page, unsigned short order)
|
||||||
* the machinery is not ready yet, we cannot decrement
|
* the machinery is not ready yet, we cannot decrement
|
||||||
* their refcount either.
|
* their refcount either.
|
||||||
*/
|
*/
|
||||||
dec_stack_record_count(alloc_handle);
|
dec_stack_record_count(alloc_handle, 1 << order);
|
||||||
}
|
|
||||||
|
|
||||||
static inline void __set_page_owner_handle(struct page_ext *page_ext,
|
|
||||||
depot_stack_handle_t handle,
|
|
||||||
unsigned short order, gfp_t gfp_mask)
|
|
||||||
{
|
|
||||||
struct page_owner *page_owner;
|
|
||||||
int i;
|
|
||||||
u64 ts_nsec = local_clock();
|
|
||||||
|
|
||||||
for (i = 0; i < (1 << order); i++) {
|
|
||||||
page_owner = get_page_owner(page_ext);
|
|
||||||
page_owner->handle = handle;
|
|
||||||
page_owner->order = order;
|
|
||||||
page_owner->gfp_mask = gfp_mask;
|
|
||||||
page_owner->last_migrate_reason = -1;
|
|
||||||
page_owner->pid = current->pid;
|
|
||||||
page_owner->tgid = current->tgid;
|
|
||||||
page_owner->ts_nsec = ts_nsec;
|
|
||||||
strscpy(page_owner->comm, current->comm,
|
|
||||||
sizeof(page_owner->comm));
|
|
||||||
__set_bit(PAGE_EXT_OWNER, &page_ext->flags);
|
|
||||||
__set_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
|
|
||||||
|
|
||||||
page_ext = page_ext_next(page_ext);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
noinline void __set_page_owner(struct page *page, unsigned short order,
|
noinline void __set_page_owner(struct page *page, unsigned short order,
|
||||||
gfp_t gfp_mask)
|
gfp_t gfp_mask)
|
||||||
{
|
{
|
||||||
struct page_ext *page_ext;
|
struct page_ext *page_ext;
|
||||||
|
u64 ts_nsec = local_clock();
|
||||||
depot_stack_handle_t handle;
|
depot_stack_handle_t handle;
|
||||||
|
|
||||||
handle = save_stack(gfp_mask);
|
handle = save_stack(gfp_mask);
|
||||||
|
@ -303,9 +327,11 @@ noinline void __set_page_owner(struct page *page, unsigned short order,
|
||||||
page_ext = page_ext_get(page);
|
page_ext = page_ext_get(page);
|
||||||
if (unlikely(!page_ext))
|
if (unlikely(!page_ext))
|
||||||
return;
|
return;
|
||||||
__set_page_owner_handle(page_ext, handle, order, gfp_mask);
|
__update_page_owner_handle(page_ext, handle, order, gfp_mask, -1,
|
||||||
|
current->pid, current->tgid, ts_nsec,
|
||||||
|
current->comm);
|
||||||
page_ext_put(page_ext);
|
page_ext_put(page_ext);
|
||||||
inc_stack_record_count(handle, gfp_mask);
|
inc_stack_record_count(handle, gfp_mask, 1 << order);
|
||||||
}
|
}
|
||||||
|
|
||||||
void __set_page_owner_migrate_reason(struct page *page, int reason)
|
void __set_page_owner_migrate_reason(struct page *page, int reason)
|
||||||
|
@ -340,9 +366,12 @@ void __split_page_owner(struct page *page, int old_order, int new_order)
|
||||||
|
|
||||||
void __folio_copy_owner(struct folio *newfolio, struct folio *old)
|
void __folio_copy_owner(struct folio *newfolio, struct folio *old)
|
||||||
{
|
{
|
||||||
|
int i;
|
||||||
struct page_ext *old_ext;
|
struct page_ext *old_ext;
|
||||||
struct page_ext *new_ext;
|
struct page_ext *new_ext;
|
||||||
struct page_owner *old_page_owner, *new_page_owner;
|
struct page_owner *old_page_owner;
|
||||||
|
struct page_owner *new_page_owner;
|
||||||
|
depot_stack_handle_t migrate_handle;
|
||||||
|
|
||||||
old_ext = page_ext_get(&old->page);
|
old_ext = page_ext_get(&old->page);
|
||||||
if (unlikely(!old_ext))
|
if (unlikely(!old_ext))
|
||||||
|
@ -356,30 +385,32 @@ void __folio_copy_owner(struct folio *newfolio, struct folio *old)
|
||||||
|
|
||||||
old_page_owner = get_page_owner(old_ext);
|
old_page_owner = get_page_owner(old_ext);
|
||||||
new_page_owner = get_page_owner(new_ext);
|
new_page_owner = get_page_owner(new_ext);
|
||||||
new_page_owner->order = old_page_owner->order;
|
migrate_handle = new_page_owner->handle;
|
||||||
new_page_owner->gfp_mask = old_page_owner->gfp_mask;
|
__update_page_owner_handle(new_ext, old_page_owner->handle,
|
||||||
new_page_owner->last_migrate_reason =
|
old_page_owner->order, old_page_owner->gfp_mask,
|
||||||
old_page_owner->last_migrate_reason;
|
old_page_owner->last_migrate_reason,
|
||||||
new_page_owner->handle = old_page_owner->handle;
|
old_page_owner->ts_nsec, old_page_owner->pid,
|
||||||
new_page_owner->pid = old_page_owner->pid;
|
old_page_owner->tgid, old_page_owner->comm);
|
||||||
new_page_owner->tgid = old_page_owner->tgid;
|
|
||||||
new_page_owner->free_pid = old_page_owner->free_pid;
|
|
||||||
new_page_owner->free_tgid = old_page_owner->free_tgid;
|
|
||||||
new_page_owner->ts_nsec = old_page_owner->ts_nsec;
|
|
||||||
new_page_owner->free_ts_nsec = old_page_owner->ts_nsec;
|
|
||||||
strcpy(new_page_owner->comm, old_page_owner->comm);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We don't clear the bit on the old folio as it's going to be freed
|
* Do not proactively clear PAGE_EXT_OWNER{_ALLOCATED} bits as the folio
|
||||||
* after migration. Until then, the info can be useful in case of
|
* will be freed after migration. Keep them until then as they may be
|
||||||
* a bug, and the overall stats will be off a bit only temporarily.
|
* useful.
|
||||||
* Also, migrate_misplaced_transhuge_page() can still fail the
|
|
||||||
* migration and then we want the old folio to retain the info. But
|
|
||||||
* in that case we also don't need to explicitly clear the info from
|
|
||||||
* the new page, which will be freed.
|
|
||||||
*/
|
*/
|
||||||
__set_bit(PAGE_EXT_OWNER, &new_ext->flags);
|
__update_page_owner_free_handle(new_ext, 0, old_page_owner->order,
|
||||||
__set_bit(PAGE_EXT_OWNER_ALLOCATED, &new_ext->flags);
|
old_page_owner->free_pid,
|
||||||
|
old_page_owner->free_tgid,
|
||||||
|
old_page_owner->free_ts_nsec);
|
||||||
|
/*
|
||||||
|
* We linked the original stack to the new folio, we need to do the same
|
||||||
|
* for the new one and the old folio otherwise there will be an imbalance
|
||||||
|
* when subtracting those pages from the stack.
|
||||||
|
*/
|
||||||
|
for (i = 0; i < (1 << new_page_owner->order); i++) {
|
||||||
|
old_page_owner->handle = migrate_handle;
|
||||||
|
old_ext = page_ext_next(old_ext);
|
||||||
|
old_page_owner = get_page_owner(old_ext);
|
||||||
|
}
|
||||||
|
|
||||||
page_ext_put(new_ext);
|
page_ext_put(new_ext);
|
||||||
page_ext_put(old_ext);
|
page_ext_put(old_ext);
|
||||||
}
|
}
|
||||||
|
@ -787,8 +818,9 @@ static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone)
|
||||||
goto ext_put_continue;
|
goto ext_put_continue;
|
||||||
|
|
||||||
/* Found early allocated page */
|
/* Found early allocated page */
|
||||||
__set_page_owner_handle(page_ext, early_handle,
|
__update_page_owner_handle(page_ext, early_handle, 0, 0,
|
||||||
0, 0);
|
-1, local_clock(), current->pid,
|
||||||
|
current->tgid, current->comm);
|
||||||
count++;
|
count++;
|
||||||
ext_put_continue:
|
ext_put_continue:
|
||||||
page_ext_put(page_ext);
|
page_ext_put(page_ext);
|
||||||
|
@ -840,13 +872,11 @@ static void *stack_start(struct seq_file *m, loff_t *ppos)
|
||||||
* value of stack_list.
|
* value of stack_list.
|
||||||
*/
|
*/
|
||||||
stack = smp_load_acquire(&stack_list);
|
stack = smp_load_acquire(&stack_list);
|
||||||
|
m->private = stack;
|
||||||
} else {
|
} else {
|
||||||
stack = m->private;
|
stack = m->private;
|
||||||
stack = stack->next;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
m->private = stack;
|
|
||||||
|
|
||||||
return stack;
|
return stack;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -861,11 +891,11 @@ static void *stack_next(struct seq_file *m, void *v, loff_t *ppos)
|
||||||
return stack;
|
return stack;
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned long page_owner_stack_threshold;
|
static unsigned long page_owner_pages_threshold;
|
||||||
|
|
||||||
static int stack_print(struct seq_file *m, void *v)
|
static int stack_print(struct seq_file *m, void *v)
|
||||||
{
|
{
|
||||||
int i, stack_count;
|
int i, nr_base_pages;
|
||||||
struct stack *stack = v;
|
struct stack *stack = v;
|
||||||
unsigned long *entries;
|
unsigned long *entries;
|
||||||
unsigned long nr_entries;
|
unsigned long nr_entries;
|
||||||
|
@ -876,14 +906,14 @@ static int stack_print(struct seq_file *m, void *v)
|
||||||
|
|
||||||
nr_entries = stack_record->size;
|
nr_entries = stack_record->size;
|
||||||
entries = stack_record->entries;
|
entries = stack_record->entries;
|
||||||
stack_count = refcount_read(&stack_record->count) - 1;
|
nr_base_pages = refcount_read(&stack_record->count) - 1;
|
||||||
|
|
||||||
if (stack_count < 1 || stack_count < page_owner_stack_threshold)
|
if (nr_base_pages < 1 || nr_base_pages < page_owner_pages_threshold)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
for (i = 0; i < nr_entries; i++)
|
for (i = 0; i < nr_entries; i++)
|
||||||
seq_printf(m, " %pS\n", (void *)entries[i]);
|
seq_printf(m, " %pS\n", (void *)entries[i]);
|
||||||
seq_printf(m, "stack_count: %d\n\n", stack_count);
|
seq_printf(m, "nr_base_pages: %d\n\n", nr_base_pages);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -913,13 +943,13 @@ static const struct file_operations page_owner_stack_operations = {
|
||||||
|
|
||||||
static int page_owner_threshold_get(void *data, u64 *val)
|
static int page_owner_threshold_get(void *data, u64 *val)
|
||||||
{
|
{
|
||||||
*val = READ_ONCE(page_owner_stack_threshold);
|
*val = READ_ONCE(page_owner_pages_threshold);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int page_owner_threshold_set(void *data, u64 val)
|
static int page_owner_threshold_set(void *data, u64 val)
|
||||||
{
|
{
|
||||||
WRITE_ONCE(page_owner_stack_threshold, val);
|
WRITE_ONCE(page_owner_pages_threshold, val);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -748,12 +748,6 @@ static long shmem_unused_huge_count(struct super_block *sb,
|
||||||
|
|
||||||
#define shmem_huge SHMEM_HUGE_DENY
|
#define shmem_huge SHMEM_HUGE_DENY
|
||||||
|
|
||||||
bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
|
|
||||||
struct mm_struct *mm, unsigned long vm_flags)
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
|
static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
|
||||||
struct shrink_control *sc, unsigned long nr_to_split)
|
struct shrink_control *sc, unsigned long nr_to_split)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue