diff --git a/Documentation/ABI/testing/sysfs-module b/Documentation/ABI/testing/sysfs-module index 08886367d047..62addab47d0c 100644 --- a/Documentation/ABI/testing/sysfs-module +++ b/Documentation/ABI/testing/sysfs-module @@ -60,3 +60,14 @@ Description: Module taint flags: C staging driver module E unsigned module == ===================== + +What: /sys/module/grant_table/parameters/free_per_iteration +Date: July 2023 +KernelVersion: 6.5 but backported to all supported stable branches +Contact: Xen developer discussion +Description: Read and write number of grant entries to attempt to free per iteration. + + Note: Future versions of Xen and Linux may provide a better + interface for controlling the rate of deferred grant reclaim + or may not need it at all. +Users: Qubes OS (https://www.qubes-os.org) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index c7715f8bd452..3bdd5b59661d 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -112,6 +112,7 @@ struct irq_info { unsigned int irq_epoch; /* If eoi_cpu valid: irq_epoch of event */ u64 eoi_time; /* Time in jiffies when to EOI. */ raw_spinlock_t lock; + bool is_static; /* Is event channel static */ union { unsigned short virq; @@ -815,15 +816,6 @@ static void xen_free_irq(unsigned irq) irq_free_desc(irq); } -static void xen_evtchn_close(evtchn_port_t port) -{ - struct evtchn_close close; - - close.port = port; - if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) - BUG(); -} - /* Not called for lateeoi events. */ static void event_handler_exit(struct irq_info *info) { @@ -982,7 +974,8 @@ static void __unbind_from_irq(unsigned int irq) unsigned int cpu = cpu_from_irq(irq); struct xenbus_device *dev; - xen_evtchn_close(evtchn); + if (!info->is_static) + xen_evtchn_close(evtchn); switch (type_from_irq(irq)) { case IRQT_VIRQ: @@ -1574,7 +1567,7 @@ int xen_set_irq_priority(unsigned irq, unsigned priority) } EXPORT_SYMBOL_GPL(xen_set_irq_priority); -int evtchn_make_refcounted(evtchn_port_t evtchn) +int evtchn_make_refcounted(evtchn_port_t evtchn, bool is_static) { int irq = get_evtchn_to_irq(evtchn); struct irq_info *info; @@ -1590,6 +1583,7 @@ int evtchn_make_refcounted(evtchn_port_t evtchn) WARN_ON(info->refcnt != -1); info->refcnt = 1; + info->is_static = is_static; return 0; } diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index c99415a70051..9139a7364df5 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -366,10 +366,10 @@ static int evtchn_resize_ring(struct per_user_data *u) return 0; } -static int evtchn_bind_to_user(struct per_user_data *u, evtchn_port_t port) +static int evtchn_bind_to_user(struct per_user_data *u, evtchn_port_t port, + bool is_static) { struct user_evtchn *evtchn; - struct evtchn_close close; int rc = 0; /* @@ -402,14 +402,14 @@ static int evtchn_bind_to_user(struct per_user_data *u, evtchn_port_t port) if (rc < 0) goto err; - rc = evtchn_make_refcounted(port); + rc = evtchn_make_refcounted(port, is_static); return rc; err: /* bind failed, should close the port now */ - close.port = port; - if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) - BUG(); + if (!is_static) + xen_evtchn_close(port); + del_evtchn(u, evtchn); return rc; } @@ -456,7 +456,7 @@ static long evtchn_ioctl(struct file *file, if (rc != 0) break; - rc = evtchn_bind_to_user(u, bind_virq.port); + rc = evtchn_bind_to_user(u, bind_virq.port, false); if (rc == 0) rc = bind_virq.port; break; @@ -482,7 +482,7 @@ static long evtchn_ioctl(struct file *file, if (rc != 0) break; - rc = evtchn_bind_to_user(u, bind_interdomain.local_port); + rc = evtchn_bind_to_user(u, bind_interdomain.local_port, false); if (rc == 0) rc = bind_interdomain.local_port; break; @@ -507,7 +507,7 @@ static long evtchn_ioctl(struct file *file, if (rc != 0) break; - rc = evtchn_bind_to_user(u, alloc_unbound.port); + rc = evtchn_bind_to_user(u, alloc_unbound.port, false); if (rc == 0) rc = alloc_unbound.port; break; @@ -536,6 +536,23 @@ static long evtchn_ioctl(struct file *file, break; } + case IOCTL_EVTCHN_BIND_STATIC: { + struct ioctl_evtchn_bind bind; + struct user_evtchn *evtchn; + + rc = -EFAULT; + if (copy_from_user(&bind, uarg, sizeof(bind))) + break; + + rc = -EISCONN; + evtchn = find_evtchn(u, bind.port); + if (evtchn) + break; + + rc = evtchn_bind_to_user(u, bind.port, true); + break; + } + case IOCTL_EVTCHN_NOTIFY: { struct ioctl_evtchn_notify notify; struct user_evtchn *evtchn; diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index e1ec725c2819..f13c3b76ad1e 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -498,14 +498,21 @@ static LIST_HEAD(deferred_list); static void gnttab_handle_deferred(struct timer_list *); static DEFINE_TIMER(deferred_timer, gnttab_handle_deferred); +static atomic64_t deferred_count; +static atomic64_t leaked_count; +static unsigned int free_per_iteration = 10; +module_param(free_per_iteration, uint, 0600); + static void gnttab_handle_deferred(struct timer_list *unused) { - unsigned int nr = 10; + unsigned int nr = READ_ONCE(free_per_iteration); + const bool ignore_limit = nr == 0; struct deferred_entry *first = NULL; unsigned long flags; + size_t freed = 0; spin_lock_irqsave(&gnttab_list_lock, flags); - while (nr--) { + while ((ignore_limit || nr--) && !list_empty(&deferred_list)) { struct deferred_entry *entry = list_first_entry(&deferred_list, struct deferred_entry, list); @@ -515,10 +522,14 @@ static void gnttab_handle_deferred(struct timer_list *unused) list_del(&entry->list); spin_unlock_irqrestore(&gnttab_list_lock, flags); if (_gnttab_end_foreign_access_ref(entry->ref)) { + uint64_t ret = atomic64_dec_return(&deferred_count); + put_free_entry(entry->ref); - pr_debug("freeing g.e. %#x (pfn %#lx)\n", - entry->ref, page_to_pfn(entry->page)); + pr_debug("freeing g.e. %#x (pfn %#lx), %llu remaining\n", + entry->ref, page_to_pfn(entry->page), + (unsigned long long)ret); put_page(entry->page); + freed++; kfree(entry); entry = NULL; } else { @@ -530,21 +541,22 @@ static void gnttab_handle_deferred(struct timer_list *unused) spin_lock_irqsave(&gnttab_list_lock, flags); if (entry) list_add_tail(&entry->list, &deferred_list); - else if (list_empty(&deferred_list)) - break; } - if (!list_empty(&deferred_list) && !timer_pending(&deferred_timer)) { + if (list_empty(&deferred_list)) + WARN_ON(atomic64_read(&deferred_count)); + else if (!timer_pending(&deferred_timer)) { deferred_timer.expires = jiffies + HZ; add_timer(&deferred_timer); } spin_unlock_irqrestore(&gnttab_list_lock, flags); + pr_debug("Freed %zu references", freed); } static void gnttab_add_deferred(grant_ref_t ref, struct page *page) { struct deferred_entry *entry; gfp_t gfp = (in_atomic() || irqs_disabled()) ? GFP_ATOMIC : GFP_KERNEL; - const char *what = KERN_WARNING "leaking"; + uint64_t leaked, deferred; entry = kmalloc(sizeof(*entry), gfp); if (!page) { @@ -567,10 +579,16 @@ static void gnttab_add_deferred(grant_ref_t ref, struct page *page) add_timer(&deferred_timer); } spin_unlock_irqrestore(&gnttab_list_lock, flags); - what = KERN_DEBUG "deferring"; + deferred = atomic64_inc_return(&deferred_count); + leaked = atomic64_read(&leaked_count); + pr_debug("deferring g.e. %#x (pfn %#lx) (total deferred %llu, total leaked %llu)\n", + ref, page ? page_to_pfn(page) : -1, deferred, leaked); + } else { + deferred = atomic64_read(&deferred_count); + leaked = atomic64_inc_return(&leaked_count); + pr_warn("leaking g.e. %#x (pfn %#lx) (total deferred %llu, total leaked %llu)\n", + ref, page ? page_to_pfn(page) : -1, deferred, leaked); } - printk("%s g.e. %#x (pfn %#lx)\n", - what, ref, page ? page_to_pfn(page) : -1); } int gnttab_try_end_foreign_access(grant_ref_t ref) diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 58b732dcbfb8..639bf628389b 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -811,6 +811,9 @@ static int xenbus_probe_thread(void *unused) static int __init xenbus_probe_initcall(void) { + if (!xen_domain()) + return -ENODEV; + /* * Probe XenBus here in the XS_PV case, and also XS_HVM unless we * need to wait for the platform PCI device to come up or diff --git a/include/uapi/xen/evtchn.h b/include/uapi/xen/evtchn.h index 7fbf732f168f..aef2b75f3413 100644 --- a/include/uapi/xen/evtchn.h +++ b/include/uapi/xen/evtchn.h @@ -101,4 +101,13 @@ struct ioctl_evtchn_restrict_domid { domid_t domid; }; +/* + * Bind statically allocated @port. + */ +#define IOCTL_EVTCHN_BIND_STATIC \ + _IOC(_IOC_NONE, 'E', 7, sizeof(struct ioctl_evtchn_bind)) +struct ioctl_evtchn_bind { + unsigned int port; +}; + #endif /* __LINUX_PUBLIC_EVTCHN_H__ */ diff --git a/include/xen/events.h b/include/xen/events.h index ac1281c5ead6..95970a2f7695 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -69,7 +69,7 @@ int xen_set_irq_priority(unsigned irq, unsigned priority); /* * Allow extra references to event channels exposed to userspace by evtchn */ -int evtchn_make_refcounted(evtchn_port_t evtchn); +int evtchn_make_refcounted(evtchn_port_t evtchn, bool is_static); int evtchn_get(evtchn_port_t evtchn); void evtchn_put(evtchn_port_t evtchn); @@ -141,4 +141,13 @@ void xen_init_IRQ(void); irqreturn_t xen_debug_interrupt(int irq, void *dev_id); +static inline void xen_evtchn_close(evtchn_port_t port) +{ + struct evtchn_close close; + + close.port = port; + if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) + BUG(); +} + #endif /* _XEN_EVENTS_H */