Go to file
David Hildenbrand 42b2af2c9b mm/userfaultfd: propagate uffd-wp bit when PTE-mapping the huge zeropage
Currently, we'd lose the userfaultfd-wp marker when PTE-mapping a huge
zeropage, resulting in the next write faults in the PMD range not
triggering uffd-wp events.

Various actions (partial MADV_DONTNEED, partial mremap, partial munmap,
partial mprotect) could trigger this.  However, most importantly,
un-protecting a single sub-page from the userfaultfd-wp handler when
processing a uffd-wp event will PTE-map the shared huge zeropage and lose
the uffd-wp bit for the remainder of the PMD.

Let's properly propagate the uffd-wp bit to the PMDs.

 #define _GNU_SOURCE
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
 #include <stdbool.h>
 #include <inttypes.h>
 #include <fcntl.h>
 #include <unistd.h>
 #include <errno.h>
 #include <poll.h>
 #include <pthread.h>
 #include <sys/mman.h>
 #include <sys/syscall.h>
 #include <sys/ioctl.h>
 #include <linux/userfaultfd.h>

 static size_t pagesize;
 static int uffd;
 static volatile bool uffd_triggered;

 #define barrier() __asm__ __volatile__("": : :"memory")

 static void uffd_wp_range(char *start, size_t size, bool wp)
 {
 	struct uffdio_writeprotect uffd_writeprotect;

 	uffd_writeprotect.range.start = (unsigned long) start;
 	uffd_writeprotect.range.len = size;
 	if (wp) {
 		uffd_writeprotect.mode = UFFDIO_WRITEPROTECT_MODE_WP;
 	} else {
 		uffd_writeprotect.mode = 0;
 	}
 	if (ioctl(uffd, UFFDIO_WRITEPROTECT, &uffd_writeprotect)) {
 		fprintf(stderr, "UFFDIO_WRITEPROTECT failed: %d\n", errno);
 		exit(1);
 	}
 }

 static void *uffd_thread_fn(void *arg)
 {
 	static struct uffd_msg msg;
 	ssize_t nread;

 	while (1) {
 		struct pollfd pollfd;
 		int nready;

 		pollfd.fd = uffd;
 		pollfd.events = POLLIN;
 		nready = poll(&pollfd, 1, -1);
 		if (nready == -1) {
 			fprintf(stderr, "poll() failed: %d\n", errno);
 			exit(1);
 		}

 		nread = read(uffd, &msg, sizeof(msg));
 		if (nread <= 0)
 			continue;

 		if (msg.event != UFFD_EVENT_PAGEFAULT ||
 		    !(msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WP)) {
 			printf("FAIL: wrong uffd-wp event fired\n");
 			exit(1);
 		}

 		/* un-protect the single page. */
 		uffd_triggered = true;
 		uffd_wp_range((char *)(uintptr_t)msg.arg.pagefault.address,
 			      pagesize, false);
 	}
 	return arg;
 }

 static int setup_uffd(char *map, size_t size)
 {
 	struct uffdio_api uffdio_api;
 	struct uffdio_register uffdio_register;
 	pthread_t thread;

 	uffd = syscall(__NR_userfaultfd,
 		       O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY);
 	if (uffd < 0) {
 		fprintf(stderr, "syscall() failed: %d\n", errno);
 		return -errno;
 	}

 	uffdio_api.api = UFFD_API;
 	uffdio_api.features = UFFD_FEATURE_PAGEFAULT_FLAG_WP;
 	if (ioctl(uffd, UFFDIO_API, &uffdio_api) < 0) {
 		fprintf(stderr, "UFFDIO_API failed: %d\n", errno);
 		return -errno;
 	}

 	if (!(uffdio_api.features & UFFD_FEATURE_PAGEFAULT_FLAG_WP)) {
 		fprintf(stderr, "UFFD_FEATURE_WRITEPROTECT missing\n");
 		return -ENOSYS;
 	}

 	uffdio_register.range.start = (unsigned long) map;
 	uffdio_register.range.len = size;
 	uffdio_register.mode = UFFDIO_REGISTER_MODE_WP;
 	if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) < 0) {
 		fprintf(stderr, "UFFDIO_REGISTER failed: %d\n", errno);
 		return -errno;
 	}

 	pthread_create(&thread, NULL, uffd_thread_fn, NULL);

 	return 0;
 }

 int main(void)
 {
 	const size_t size = 4 * 1024 * 1024ull;
 	char *map, *cur;

 	pagesize = getpagesize();

 	map = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
 	if (map == MAP_FAILED) {
 		fprintf(stderr, "mmap() failed\n");
 		return -errno;
 	}

 	if (madvise(map, size, MADV_HUGEPAGE)) {
 		fprintf(stderr, "MADV_HUGEPAGE failed\n");
 		return -errno;
 	}

 	if (setup_uffd(map, size))
 		return 1;

 	/* Read the whole range, populating zeropages. */
 	madvise(map, size, MADV_POPULATE_READ);

 	/* Write-protect the whole range. */
 	uffd_wp_range(map, size, true);

 	/* Make sure uffd-wp triggers on each page. */
 	for (cur = map; cur < map + size; cur += pagesize) {
 		uffd_triggered = false;

 		barrier();
 		/* Trigger a write fault. */
 		*cur = 1;
 		barrier();

 		if (!uffd_triggered) {
 			printf("FAIL: uffd-wp did not trigger\n");
 			return 1;
 		}
 	}

 	printf("PASS: uffd-wp triggered\n");
 	return 0;
 }

Link: https://lkml.kernel.org/r/20230302175423.589164-1-david@redhat.com
Fixes: e06f1e1dd4 ("userfaultfd: wp: enabled write protection in userfaultfd API")
Signed-off-by: David Hildenbrand <david@redhat.com>
Acked-by: Peter Xu <peterx@redhat.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: Shaohua Li <shli@fb.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2023-03-07 17:04:53 -08:00
Documentation A small set of updates for x86: 2023-03-05 11:27:48 -08:00
LICENSES LICENSES: Add the copyleft-next-0.3.1 license 2022-11-08 15:44:01 +01:00
arch cpumask: re-introduce constant-sized cpumask optimizations 2023-03-05 14:30:34 -08:00
block block-6.3-2023-03-03 2023-03-03 10:21:39 -08:00
certs Kbuild updates for v6.3 2023-02-26 11:53:25 -08:00
crypto Networking changes for 6.3. 2023-02-21 18:24:12 -08:00
drivers This push fixes a regression in the caam driver. 2023-03-05 11:32:30 -08:00
fs 17 hotfixes. Eight are for MM and seven are for other parts of the 2023-03-04 13:32:50 -08:00
include cpumask: re-introduce constant-sized cpumask optimizations 2023-03-05 14:30:34 -08:00
init Kbuild updates for v6.3 2023-02-26 11:53:25 -08:00
io_uring io_uring-6.3-2023-03-03 2023-03-03 10:25:29 -08:00
ipc Merge branch 'work.namespace' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs 2023-02-24 19:20:07 -08:00
kernel A set of updates for the interrupt susbsystem: 2023-03-05 11:19:16 -08:00
lib cpumask: re-introduce constant-sized cpumask optimizations 2023-03-05 14:30:34 -08:00
mm mm/userfaultfd: propagate uffd-wp bit when PTE-mapping the huge zeropage 2023-03-07 17:04:53 -08:00
net nfsd-6.3 fixes: 2023-03-01 11:03:44 -08:00
rust Rust fixes for 6.3-rc1 2023-03-03 14:51:15 -08:00
samples LoongArch changes for v6.3 2023-03-01 09:27:00 -08:00
scripts Remove Intel compiler support 2023-03-05 10:49:37 -08:00
security capability: just use a 'u64' instead of a 'u32[2]' array 2023-03-01 10:01:22 -08:00
sound sound fixes for 6.3-rc1 2023-03-04 10:53:59 -08:00
tools Changes in this cycle were: 2023-03-02 09:45:34 -08:00
usr usr/gen_init_cpio.c: remove unnecessary -1 values from int file 2022-10-03 14:21:44 -07:00
virt KVM/riscv changes for 6.3 2023-02-15 12:33:28 -05:00
.clang-format cpumask: re-introduce constant-sized cpumask optimizations 2023-03-05 14:30:34 -08:00
.cocciconfig
.get_maintainer.ignore get_maintainer: add Alan to .get_maintainer.ignore 2022-08-20 15:17:44 -07:00
.gitattributes .gitattributes: use 'dts' diff driver for *.dtso files 2023-02-26 15:28:23 +09:00
.gitignore .gitignore: ignore *.cover and *.mbx 2023-02-05 18:51:22 +09:00
.mailmap mailmap: map Dikshita Agarwal's old address to his current one 2023-03-02 21:54:24 -08:00
.rustfmt.toml rust: add `.rustfmt.toml` 2022-09-28 09:02:20 +02:00
COPYING COPYING: state that all contributions really are covered by this file 2020-02-10 13:32:20 -08:00
CREDITS There is no particular theme here - mainly quick hits all over the tree. 2023-02-23 17:55:40 -08:00
Kbuild Kbuild updates for v6.1 2022-10-10 12:00:45 -07:00
Kconfig kbuild: ensure full rebuild when the compiler is updated 2020-05-12 13:28:33 +09:00
MAINTAINERS Adding Christian Brauner as VFS co-maintainer. 2023-03-05 11:11:52 -08:00
Makefile Linux 6.3-rc1 2023-03-05 14:52:03 -08:00
README Drop all 00-INDEX files from Documentation/ 2018-09-09 15:08:58 -06:00

README

Linux kernel
============

There are several guides for kernel developers and users. These guides can
be rendered in a number of formats, like HTML and PDF. Please read
Documentation/admin-guide/README.rst first.

In order to build the documentation, use ``make htmldocs`` or
``make pdfdocs``.  The formatted documentation can also be read online at:

    https://www.kernel.org/doc/html/latest/

There are various text files in the Documentation/ subdirectory,
several of them using the Restructured Text markup notation.

Please read the Documentation/process/changes.rst file, as it contains the
requirements for building and running the kernel, and information about
the problems which may result by upgrading your kernel.