selftests/vm: add selftest to verify multi THP collapse

Add support to allocate and verify collapse of multiple hugepage-sized
regions into multiple THPs.

Add "nr" argument to check_huge() that instructs check_huge() to check for
exactly "nr_hpages" THPs.  This has the added benefit of now being able to
check for exactly 0 THPs, and so callsites that previously checked the
negation of exactly 1 THP are now more correct.

->collapse struct collapse_context hook has been expanded with a
"nr_hpages" argument to collapse "nr_hpages" hugepages.  The
collapse_full() test has been repurposed to collapse 4 THPs at once.  It
is expected more tests will want to test multi THP collapse (e.g. 
file/shmem).

This is of particular benefit to madvise collapse context given that it
may do many THP collapses during a single syscall.

Link: https://lkml.kernel.org/r/20220706235936.2197195-19-zokeefe@google.com
Signed-off-by: Zach O'Keefe <zokeefe@google.com>
Cc: Alex Shi <alex.shi@linux.alibaba.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Chris Kennelly <ckennelly@google.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: David Hildenbrand <david@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Helge Deller <deller@gmx.de>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Pavel Begunkov <asml.silence@gmail.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Rongwei Wang <rongwei.wang@linux.alibaba.com>
Cc: SeongJae Park <sj@kernel.org>
Cc: Song Liu <songliubraving@fb.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: Dan Carpenter <dan.carpenter@oracle.com>
Cc: "Souptick Joarder (HPE)" <jrdr.linux@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
Zach O'Keefe 2022-07-06 16:59:36 -07:00 committed by Andrew Morton
parent 1370a21fe4
commit 9d0d946840
1 changed files with 73 additions and 67 deletions

View File

@ -27,7 +27,7 @@ static int hpage_pmd_nr;
#define PID_SMAPS "/proc/self/smaps"
struct collapse_context {
void (*collapse)(const char *msg, char *p, bool expect);
void (*collapse)(const char *msg, char *p, int nr_hpages, bool expect);
bool enforce_pte_scan_limits;
};
@ -362,7 +362,7 @@ static bool check_for_pattern(FILE *fp, char *pattern, char *buf)
return false;
}
static bool check_huge(void *addr)
static bool check_huge(void *addr, int nr_hpages)
{
bool thp = false;
int ret;
@ -387,7 +387,7 @@ static bool check_huge(void *addr)
goto err_out;
ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "AnonHugePages:%10ld kB",
hpage_pmd_size >> 10);
nr_hpages * (hpage_pmd_size >> 10));
if (ret >= MAX_LINE_LENGTH) {
printf("%s: Pattern is too long\n", __func__);
exit(EXIT_FAILURE);
@ -455,12 +455,12 @@ err_out:
return swap;
}
static void *alloc_mapping(void)
static void *alloc_mapping(int nr)
{
void *p;
p = mmap(BASE_ADDR, hpage_pmd_size, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
p = mmap(BASE_ADDR, nr * hpage_pmd_size, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
if (p != BASE_ADDR) {
printf("Failed to allocate VMA at %p\n", BASE_ADDR);
exit(EXIT_FAILURE);
@ -485,11 +485,11 @@ static void *alloc_hpage(void)
{
void *p;
p = alloc_mapping();
p = alloc_mapping(1);
printf("Allocate huge page...");
madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
fill_memory(p, 0, hpage_pmd_size);
if (check_huge(p))
if (check_huge(p, 1))
success("OK");
else
fail("Fail");
@ -509,14 +509,15 @@ static void validate_memory(int *p, unsigned long start, unsigned long end)
}
}
static void madvise_collapse(const char *msg, char *p, bool expect)
static void madvise_collapse(const char *msg, char *p, int nr_hpages,
bool expect)
{
int ret;
struct settings settings = *current_settings();
printf("%s...", msg);
/* Sanity check */
if (check_huge(p)) {
if (!check_huge(p, 0)) {
printf("Unexpected huge page\n");
exit(EXIT_FAILURE);
}
@ -529,11 +530,11 @@ static void madvise_collapse(const char *msg, char *p, bool expect)
push_settings(&settings);
/* Clear VM_NOHUGEPAGE */
madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
ret = madvise(p, hpage_pmd_size, MADV_COLLAPSE);
madvise(p, nr_hpages * hpage_pmd_size, MADV_HUGEPAGE);
ret = madvise(p, nr_hpages * hpage_pmd_size, MADV_COLLAPSE);
if (((bool)ret) == expect)
fail("Fail: Bad return value");
else if (check_huge(p) != expect)
else if (check_huge(p, nr_hpages) != expect)
fail("Fail: check_huge()");
else
success("OK");
@ -542,25 +543,25 @@ static void madvise_collapse(const char *msg, char *p, bool expect)
}
#define TICK 500000
static bool wait_for_scan(const char *msg, char *p)
static bool wait_for_scan(const char *msg, char *p, int nr_hpages)
{
int full_scans;
int timeout = 6; /* 3 seconds */
/* Sanity check */
if (check_huge(p)) {
if (!check_huge(p, 0)) {
printf("Unexpected huge page\n");
exit(EXIT_FAILURE);
}
madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
madvise(p, nr_hpages * hpage_pmd_size, MADV_HUGEPAGE);
/* Wait until the second full_scan completed */
full_scans = read_num("khugepaged/full_scans") + 2;
printf("%s...", msg);
while (timeout--) {
if (check_huge(p))
if (check_huge(p, nr_hpages))
break;
if (read_num("khugepaged/full_scans") >= full_scans)
break;
@ -568,20 +569,21 @@ static bool wait_for_scan(const char *msg, char *p)
usleep(TICK);
}
madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
madvise(p, nr_hpages * hpage_pmd_size, MADV_NOHUGEPAGE);
return timeout == -1;
}
static void khugepaged_collapse(const char *msg, char *p, bool expect)
static void khugepaged_collapse(const char *msg, char *p, int nr_hpages,
bool expect)
{
if (wait_for_scan(msg, p)) {
if (wait_for_scan(msg, p, nr_hpages)) {
if (expect)
fail("Timeout");
else
success("OK");
return;
} else if (check_huge(p) == expect) {
} else if (check_huge(p, nr_hpages) == expect) {
success("OK");
} else {
fail("Fail");
@ -596,10 +598,10 @@ static void alloc_at_fault(void)
settings.thp_enabled = THP_ALWAYS;
push_settings(&settings);
p = alloc_mapping();
p = alloc_mapping(1);
*p = 1;
printf("Allocate huge page on fault...");
if (check_huge(p))
if (check_huge(p, 1))
success("OK");
else
fail("Fail");
@ -608,7 +610,7 @@ static void alloc_at_fault(void)
madvise(p, page_size, MADV_DONTNEED);
printf("Split huge PMD on MADV_DONTNEED...");
if (!check_huge(p))
if (check_huge(p, 0))
success("OK");
else
fail("Fail");
@ -618,20 +620,23 @@ static void alloc_at_fault(void)
static void collapse_full(struct collapse_context *c)
{
void *p;
int nr_hpages = 4;
unsigned long size = nr_hpages * hpage_pmd_size;
p = alloc_mapping();
fill_memory(p, 0, hpage_pmd_size);
c->collapse("Collapse fully populated PTE table", p, true);
validate_memory(p, 0, hpage_pmd_size);
munmap(p, hpage_pmd_size);
p = alloc_mapping(nr_hpages);
fill_memory(p, 0, size);
c->collapse("Collapse multiple fully populated PTE table", p, nr_hpages,
true);
validate_memory(p, 0, size);
munmap(p, size);
}
static void collapse_empty(struct collapse_context *c)
{
void *p;
p = alloc_mapping();
c->collapse("Do not collapse empty PTE table", p, false);
p = alloc_mapping(1);
c->collapse("Do not collapse empty PTE table", p, 1, false);
munmap(p, hpage_pmd_size);
}
@ -639,10 +644,10 @@ static void collapse_single_pte_entry(struct collapse_context *c)
{
void *p;
p = alloc_mapping();
p = alloc_mapping(1);
fill_memory(p, 0, page_size);
c->collapse("Collapse PTE table with single PTE entry present", p,
true);
1, true);
validate_memory(p, 0, page_size);
munmap(p, hpage_pmd_size);
}
@ -656,16 +661,17 @@ static void collapse_max_ptes_none(struct collapse_context *c)
settings.khugepaged.max_ptes_none = max_ptes_none;
push_settings(&settings);
p = alloc_mapping();
p = alloc_mapping(1);
fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size);
c->collapse("Maybe collapse with max_ptes_none exceeded", p,
c->collapse("Maybe collapse with max_ptes_none exceeded", p, 1,
!c->enforce_pte_scan_limits);
validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size);
if (c->enforce_pte_scan_limits) {
fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size);
c->collapse("Collapse with max_ptes_none PTEs empty", p, true);
c->collapse("Collapse with max_ptes_none PTEs empty", p, 1,
true);
validate_memory(p, 0,
(hpage_pmd_nr - max_ptes_none) * page_size);
}
@ -677,7 +683,7 @@ static void collapse_max_ptes_none(struct collapse_context *c)
static void collapse_swapin_single_pte(struct collapse_context *c)
{
void *p;
p = alloc_mapping();
p = alloc_mapping(1);
fill_memory(p, 0, hpage_pmd_size);
printf("Swapout one page...");
@ -692,7 +698,7 @@ static void collapse_swapin_single_pte(struct collapse_context *c)
goto out;
}
c->collapse("Collapse with swapping in single PTE entry", p, true);
c->collapse("Collapse with swapping in single PTE entry", p, 1, true);
validate_memory(p, 0, hpage_pmd_size);
out:
munmap(p, hpage_pmd_size);
@ -703,7 +709,7 @@ static void collapse_max_ptes_swap(struct collapse_context *c)
int max_ptes_swap = read_num("khugepaged/max_ptes_swap");
void *p;
p = alloc_mapping();
p = alloc_mapping(1);
fill_memory(p, 0, hpage_pmd_size);
printf("Swapout %d of %d pages...", max_ptes_swap + 1, hpage_pmd_nr);
@ -718,7 +724,7 @@ static void collapse_max_ptes_swap(struct collapse_context *c)
goto out;
}
c->collapse("Maybe collapse with max_ptes_swap exceeded", p,
c->collapse("Maybe collapse with max_ptes_swap exceeded", p, 1,
!c->enforce_pte_scan_limits);
validate_memory(p, 0, hpage_pmd_size);
@ -738,7 +744,7 @@ static void collapse_max_ptes_swap(struct collapse_context *c)
}
c->collapse("Collapse with max_ptes_swap pages swapped out", p,
true);
1, true);
validate_memory(p, 0, hpage_pmd_size);
}
out:
@ -753,13 +759,13 @@ static void collapse_single_pte_entry_compound(struct collapse_context *c)
madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
printf("Split huge page leaving single PTE mapping compound page...");
madvise(p + page_size, hpage_pmd_size - page_size, MADV_DONTNEED);
if (!check_huge(p))
if (check_huge(p, 0))
success("OK");
else
fail("Fail");
c->collapse("Collapse PTE table with single PTE mapping compound page",
p, true);
p, 1, true);
validate_memory(p, 0, page_size);
munmap(p, hpage_pmd_size);
}
@ -772,12 +778,12 @@ static void collapse_full_of_compound(struct collapse_context *c)
printf("Split huge page leaving single PTE page table full of compound pages...");
madvise(p, page_size, MADV_NOHUGEPAGE);
madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
if (!check_huge(p))
if (check_huge(p, 0))
success("OK");
else
fail("Fail");
c->collapse("Collapse PTE table full of compound pages", p, true);
c->collapse("Collapse PTE table full of compound pages", p, 1, true);
validate_memory(p, 0, hpage_pmd_size);
munmap(p, hpage_pmd_size);
}
@ -787,14 +793,14 @@ static void collapse_compound_extreme(struct collapse_context *c)
void *p;
int i;
p = alloc_mapping();
p = alloc_mapping(1);
for (i = 0; i < hpage_pmd_nr; i++) {
printf("\rConstruct PTE page table full of different PTE-mapped compound pages %3d/%d...",
i + 1, hpage_pmd_nr);
madvise(BASE_ADDR, hpage_pmd_size, MADV_HUGEPAGE);
fill_memory(BASE_ADDR, 0, hpage_pmd_size);
if (!check_huge(BASE_ADDR)) {
if (!check_huge(BASE_ADDR, 1)) {
printf("Failed to allocate huge page\n");
exit(EXIT_FAILURE);
}
@ -823,12 +829,12 @@ static void collapse_compound_extreme(struct collapse_context *c)
munmap(BASE_ADDR, hpage_pmd_size);
fill_memory(p, 0, hpage_pmd_size);
if (!check_huge(p))
if (check_huge(p, 0))
success("OK");
else
fail("Fail");
c->collapse("Collapse PTE table full of different compound pages", p,
c->collapse("Collapse PTE table full of different compound pages", p, 1,
true);
validate_memory(p, 0, hpage_pmd_size);
@ -840,11 +846,11 @@ static void collapse_fork(struct collapse_context *c)
int wstatus;
void *p;
p = alloc_mapping();
p = alloc_mapping(1);
printf("Allocate small page...");
fill_memory(p, 0, page_size);
if (!check_huge(p))
if (check_huge(p, 0))
success("OK");
else
fail("Fail");
@ -855,14 +861,14 @@ static void collapse_fork(struct collapse_context *c)
skip_settings_restore = true;
exit_status = 0;
if (!check_huge(p))
if (check_huge(p, 0))
success("OK");
else
fail("Fail");
fill_memory(p, page_size, 2 * page_size);
c->collapse("Collapse PTE table with single page shared with parent process",
p, true);
p, 1, true);
validate_memory(p, 0, page_size);
munmap(p, hpage_pmd_size);
@ -873,7 +879,7 @@ static void collapse_fork(struct collapse_context *c)
exit_status += WEXITSTATUS(wstatus);
printf("Check if parent still has small page...");
if (!check_huge(p))
if (check_huge(p, 0))
success("OK");
else
fail("Fail");
@ -893,7 +899,7 @@ static void collapse_fork_compound(struct collapse_context *c)
skip_settings_restore = true;
exit_status = 0;
if (check_huge(p))
if (check_huge(p, 1))
success("OK");
else
fail("Fail");
@ -901,7 +907,7 @@ static void collapse_fork_compound(struct collapse_context *c)
printf("Split huge page PMD in child process...");
madvise(p, page_size, MADV_NOHUGEPAGE);
madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
if (!check_huge(p))
if (check_huge(p, 0))
success("OK");
else
fail("Fail");
@ -909,7 +915,7 @@ static void collapse_fork_compound(struct collapse_context *c)
write_num("khugepaged/max_ptes_shared", hpage_pmd_nr - 1);
c->collapse("Collapse PTE table full of compound pages in child",
p, true);
p, 1, true);
write_num("khugepaged/max_ptes_shared",
current_settings()->khugepaged.max_ptes_shared);
@ -922,7 +928,7 @@ static void collapse_fork_compound(struct collapse_context *c)
exit_status += WEXITSTATUS(wstatus);
printf("Check if parent still has huge page...");
if (check_huge(p))
if (check_huge(p, 1))
success("OK");
else
fail("Fail");
@ -943,7 +949,7 @@ static void collapse_max_ptes_shared(struct collapse_context *c)
skip_settings_restore = true;
exit_status = 0;
if (check_huge(p))
if (check_huge(p, 1))
success("OK");
else
fail("Fail");
@ -951,26 +957,26 @@ static void collapse_max_ptes_shared(struct collapse_context *c)
printf("Trigger CoW on page %d of %d...",
hpage_pmd_nr - max_ptes_shared - 1, hpage_pmd_nr);
fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared - 1) * page_size);
if (!check_huge(p))
if (check_huge(p, 0))
success("OK");
else
fail("Fail");
c->collapse("Maybe collapse with max_ptes_shared exceeded", p,
!c->enforce_pte_scan_limits);
1, !c->enforce_pte_scan_limits);
if (c->enforce_pte_scan_limits) {
printf("Trigger CoW on page %d of %d...",
hpage_pmd_nr - max_ptes_shared, hpage_pmd_nr);
fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared) *
page_size);
if (!check_huge(p))
if (check_huge(p, 0))
success("OK");
else
fail("Fail");
c->collapse("Collapse with max_ptes_shared PTEs shared",
p, true);
p, 1, true);
}
validate_memory(p, 0, hpage_pmd_size);
@ -982,7 +988,7 @@ static void collapse_max_ptes_shared(struct collapse_context *c)
exit_status += WEXITSTATUS(wstatus);
printf("Check if parent still has huge page...");
if (check_huge(p))
if (check_huge(p, 1))
success("OK");
else
fail("Fail");
@ -995,7 +1001,7 @@ static void madvise_collapse_existing_thps(void)
void *p;
int err;
p = alloc_mapping();
p = alloc_mapping(1);
fill_memory(p, 0, hpage_pmd_size);
printf("Collapse fully populated PTE table...");
@ -1005,11 +1011,11 @@ static void madvise_collapse_existing_thps(void)
* MADV_COLLAPSE in "madvise" mode.
*/
err = madvise(p, hpage_pmd_size, MADV_COLLAPSE);
if (err == 0 && check_huge(p)) {
if (err == 0 && check_huge(p, 1)) {
success("OK");
printf("Re-collapse PMD-mapped hugepage");
err = madvise(p, hpage_pmd_size, MADV_COLLAPSE);
if (err == 0 && check_huge(p))
if (err == 0 && check_huge(p, 1))
success("OK");
else
fail("Fail");