hyperv-fixes for 5.18-rc2

-----BEGIN PGP SIGNATURE-----
 
 iQFHBAABCAAxFiEEIbPD0id6easf0xsudhRwX5BBoF4FAmJO+AATHHdlaS5saXVA
 a2VybmVsLm9yZwAKCRB2FHBfkEGgXi4VB/9NvwUuqgQWxGmaSrITVPLXtwDjGQc8
 Tt3shHWYp9qRuXbX6H7K/PDvyQreLytpStj7JL8rMUsLsccHaBPGTC1czN+oGuwx
 upxKxWzkRGB8DUMD5pXuP9C/XJxVAUGJJ5sJx40HMBblsNi/PSqVzd1bIvV168g4
 hFSPzGJXsbDJZfGloQux5y4NxkVl4k8g6v7EBV0Qxiu0oFhTxJjFzuK6Rau4/ajS
 cXKIpgtjuAXExfgpvORKTs/K98e6Um42sFt5JwUShm9Yocas8POqUo7q0Qw4obcL
 4K14j9t2uGOrrfOld4kGa5Emx0lnRjXMv0EiVaA3tns7GA9//06KQhOE
 =/mS+
 -----END PGP SIGNATURE-----

Merge tag 'hyperv-fixes-signed-20220407' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux

Pull hyperv fixes from Wei Liu:

 - Correctly propagate coherence information for VMbus devices (Michael
   Kelley)

 - Disable balloon and memory hot-add on ARM64 temporarily (Boqun Feng)

 - Use barrier to prevent reording when reading ring buffer (Michael
   Kelley)

 - Use virt_store_mb in favour of smp_store_mb (Andrea Parri)

 - Fix VMbus device object initialization (Andrea Parri)

 - Deactivate sysctl_record_panic_msg on isolated guest (Andrea Parri)

 - Fix a crash when unloading VMbus module (Guilherme G. Piccoli)

* tag 'hyperv-fixes-signed-20220407' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux:
  Drivers: hv: vmbus: Replace smp_store_mb() with virt_store_mb()
  Drivers: hv: balloon: Disable balloon and hot-add accordingly
  Drivers: hv: balloon: Support status report for larger page sizes
  Drivers: hv: vmbus: Prevent load re-ordering when reading ring buffer
  PCI: hv: Propagate coherence from VMbus device to PCI device
  Drivers: hv: vmbus: Propagate VMbus coherence to each VMbus device
  Drivers: hv: vmbus: Fix potential crash on module unload
  Drivers: hv: vmbus: Fix initialization of device object in vmbus_device_register()
  Drivers: hv: vmbus: Deactivate sysctl_record_panic_msg by default in isolated guests
This commit is contained in:
Linus Torvalds 2022-04-07 06:35:34 -10:00
commit 42e7a03d3b
7 changed files with 132 additions and 20 deletions

View File

@ -380,7 +380,7 @@ void vmbus_channel_map_relid(struct vmbus_channel *channel)
* execute:
*
* (a) In the "normal (i.e., not resuming from hibernation)" path,
* the full barrier in smp_store_mb() guarantees that the store
* the full barrier in virt_store_mb() guarantees that the store
* is propagated to all CPUs before the add_channel_work work
* is queued. In turn, add_channel_work is queued before the
* channel's ring buffer is allocated/initialized and the
@ -392,14 +392,14 @@ void vmbus_channel_map_relid(struct vmbus_channel *channel)
* recv_int_page before retrieving the channel pointer from the
* array of channels.
*
* (b) In the "resuming from hibernation" path, the smp_store_mb()
* (b) In the "resuming from hibernation" path, the virt_store_mb()
* guarantees that the store is propagated to all CPUs before
* the VMBus connection is marked as ready for the resume event
* (cf. check_ready_for_resume_event()). The interrupt handler
* of the VMBus driver and vmbus_chan_sched() can not run before
* vmbus_bus_resume() has completed execution (cf. resume_noirq).
*/
smp_store_mb(
virt_store_mb(
vmbus_connection.channels[channel->offermsg.child_relid],
channel);
}

View File

@ -17,6 +17,7 @@
#include <linux/slab.h>
#include <linux/kthread.h>
#include <linux/completion.h>
#include <linux/count_zeros.h>
#include <linux/memory_hotplug.h>
#include <linux/memory.h>
#include <linux/notifier.h>
@ -1130,6 +1131,7 @@ static void post_status(struct hv_dynmem_device *dm)
struct dm_status status;
unsigned long now = jiffies;
unsigned long last_post = last_post_time;
unsigned long num_pages_avail, num_pages_committed;
if (pressure_report_delay > 0) {
--pressure_report_delay;
@ -1154,16 +1156,21 @@ static void post_status(struct hv_dynmem_device *dm)
* num_pages_onlined) as committed to the host, otherwise it can try
* asking us to balloon them out.
*/
status.num_avail = si_mem_available();
status.num_committed = vm_memory_committed() +
num_pages_avail = si_mem_available();
num_pages_committed = vm_memory_committed() +
dm->num_pages_ballooned +
(dm->num_pages_added > dm->num_pages_onlined ?
dm->num_pages_added - dm->num_pages_onlined : 0) +
compute_balloon_floor();
trace_balloon_status(status.num_avail, status.num_committed,
trace_balloon_status(num_pages_avail, num_pages_committed,
vm_memory_committed(), dm->num_pages_ballooned,
dm->num_pages_added, dm->num_pages_onlined);
/* Convert numbers of pages into numbers of HV_HYP_PAGEs. */
status.num_avail = num_pages_avail * NR_HV_HYP_PAGES_IN_PAGE;
status.num_committed = num_pages_committed * NR_HV_HYP_PAGES_IN_PAGE;
/*
* If our transaction ID is no longer current, just don't
* send the status. This can happen if we were interrupted
@ -1653,6 +1660,38 @@ static void disable_page_reporting(void)
}
}
static int ballooning_enabled(void)
{
/*
* Disable ballooning if the page size is not 4k (HV_HYP_PAGE_SIZE),
* since currently it's unclear to us whether an unballoon request can
* make sure all page ranges are guest page size aligned.
*/
if (PAGE_SIZE != HV_HYP_PAGE_SIZE) {
pr_info("Ballooning disabled because page size is not 4096 bytes\n");
return 0;
}
return 1;
}
static int hot_add_enabled(void)
{
/*
* Disable hot add on ARM64, because we currently rely on
* memory_add_physaddr_to_nid() to get a node id of a hot add range,
* however ARM64's memory_add_physaddr_to_nid() always return 0 and
* DM_MEM_HOT_ADD_REQUEST doesn't have the NUMA node information for
* add_memory().
*/
if (IS_ENABLED(CONFIG_ARM64)) {
pr_info("Memory hot add disabled on ARM64\n");
return 0;
}
return 1;
}
static int balloon_connect_vsp(struct hv_device *dev)
{
struct dm_version_request version_req;
@ -1724,8 +1763,8 @@ static int balloon_connect_vsp(struct hv_device *dev)
* currently still requires the bits to be set, so we have to add code
* to fail the host's hot-add and balloon up/down requests, if any.
*/
cap_msg.caps.cap_bits.balloon = 1;
cap_msg.caps.cap_bits.hot_add = 1;
cap_msg.caps.cap_bits.balloon = ballooning_enabled();
cap_msg.caps.cap_bits.hot_add = hot_add_enabled();
/*
* Specify our alignment requirements as it relates

View File

@ -20,6 +20,7 @@
#include <linux/panic_notifier.h>
#include <linux/ptrace.h>
#include <linux/slab.h>
#include <linux/dma-map-ops.h>
#include <asm/hyperv-tlfs.h>
#include <asm/mshyperv.h>
@ -218,6 +219,16 @@ bool hv_query_ext_cap(u64 cap_query)
}
EXPORT_SYMBOL_GPL(hv_query_ext_cap);
void hv_setup_dma_ops(struct device *dev, bool coherent)
{
/*
* Hyper-V does not offer a vIOMMU in the guest
* VM, so pass 0/NULL for the IOMMU settings
*/
arch_setup_dma_ops(dev, 0, 0, NULL, coherent);
}
EXPORT_SYMBOL_GPL(hv_setup_dma_ops);
bool hv_is_hibernation_supported(void)
{
return !hv_root_partition && acpi_sleep_state_supported(ACPI_STATE_S4);

View File

@ -439,7 +439,16 @@ int hv_ringbuffer_read(struct vmbus_channel *channel,
static u32 hv_pkt_iter_avail(const struct hv_ring_buffer_info *rbi)
{
u32 priv_read_loc = rbi->priv_read_index;
u32 write_loc = READ_ONCE(rbi->ring_buffer->write_index);
u32 write_loc;
/*
* The Hyper-V host writes the packet data, then uses
* store_release() to update the write_index. Use load_acquire()
* here to prevent loads of the packet data from being re-ordered
* before the read of the write_index and potentially getting
* stale data.
*/
write_loc = virt_load_acquire(&rbi->ring_buffer->write_index);
if (write_loc >= priv_read_loc)
return write_loc - priv_read_loc;

View File

@ -77,8 +77,8 @@ static int hyperv_panic_event(struct notifier_block *nb, unsigned long val,
/*
* Hyper-V should be notified only once about a panic. If we will be
* doing hyperv_report_panic_msg() later with kmsg data, don't do
* the notification here.
* doing hv_kmsg_dump() with kmsg data later, don't do the notification
* here.
*/
if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE
&& hyperv_report_reg()) {
@ -100,8 +100,8 @@ static int hyperv_die_event(struct notifier_block *nb, unsigned long val,
/*
* Hyper-V should be notified only once about a panic. If we will be
* doing hyperv_report_panic_msg() later with kmsg data, don't do
* the notification here.
* doing hv_kmsg_dump() with kmsg data later, don't do the notification
* here.
*/
if (hyperv_report_reg())
hyperv_report_panic(regs, val, true);
@ -920,6 +920,21 @@ static int vmbus_probe(struct device *child_device)
return ret;
}
/*
* vmbus_dma_configure -- Configure DMA coherence for VMbus device
*/
static int vmbus_dma_configure(struct device *child_device)
{
/*
* On ARM64, propagate the DMA coherence setting from the top level
* VMbus ACPI device to the child VMbus device being added here.
* On x86/x64 coherence is assumed and these calls have no effect.
*/
hv_setup_dma_ops(child_device,
device_get_dma_attr(&hv_acpi_dev->dev) == DEV_DMA_COHERENT);
return 0;
}
/*
* vmbus_remove - Remove a vmbus device
*/
@ -1040,6 +1055,7 @@ static struct bus_type hv_bus = {
.remove = vmbus_remove,
.probe = vmbus_probe,
.uevent = vmbus_uevent,
.dma_configure = vmbus_dma_configure,
.dev_groups = vmbus_dev_groups,
.drv_groups = vmbus_drv_groups,
.bus_groups = vmbus_bus_groups,
@ -1546,14 +1562,20 @@ static int vmbus_bus_init(void)
if (ret)
goto err_connect;
if (hv_is_isolation_supported())
sysctl_record_panic_msg = 0;
/*
* Only register if the crash MSRs are available
*/
if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
u64 hyperv_crash_ctl;
/*
* Sysctl registration is not fatal, since by default
* reporting is enabled.
* Panic message recording (sysctl_record_panic_msg)
* is enabled by default in non-isolated guests and
* disabled by default in isolated guests; the panic
* message recording won't be available in isolated
* guests should the following registration fail.
*/
hv_ctl_table_hdr = register_sysctl_table(hv_root_table);
if (!hv_ctl_table_hdr)
@ -2097,6 +2119,10 @@ int vmbus_device_register(struct hv_device *child_device_obj)
child_device_obj->device.parent = &hv_acpi_dev->dev;
child_device_obj->device.release = vmbus_device_release;
child_device_obj->device.dma_parms = &child_device_obj->dma_parms;
child_device_obj->device.dma_mask = &child_device_obj->dma_mask;
dma_set_mask(&child_device_obj->device, DMA_BIT_MASK(64));
/*
* Register with the LDM. This will kick off the driver/device
* binding...which will eventually call vmbus_match() and vmbus_probe()
@ -2122,9 +2148,6 @@ int vmbus_device_register(struct hv_device *child_device_obj)
}
hv_debug_add_dev_dir(child_device_obj);
child_device_obj->device.dma_parms = &child_device_obj->dma_parms;
child_device_obj->device.dma_mask = &child_device_obj->dma_mask;
dma_set_mask(&child_device_obj->device, DMA_BIT_MASK(64));
return 0;
err_kset_unregister:
@ -2428,6 +2451,21 @@ static int vmbus_acpi_add(struct acpi_device *device)
hv_acpi_dev = device;
/*
* Older versions of Hyper-V for ARM64 fail to include the _CCA
* method on the top level VMbus device in the DSDT. But devices
* are hardware coherent in all current Hyper-V use cases, so fix
* up the ACPI device to behave as if _CCA is present and indicates
* hardware coherence.
*/
ACPI_COMPANION_SET(&device->dev, device);
if (IS_ENABLED(CONFIG_ACPI_CCA_REQUIRED) &&
device_get_dma_attr(&device->dev) == DEV_DMA_NOT_SUPPORTED) {
pr_info("No ACPI _CCA found; assuming coherent device I/O\n");
device->flags.cca_seen = true;
device->flags.coherent_dma = true;
}
result = acpi_walk_resources(device->handle, METHOD_NAME__CRS,
vmbus_walk_resources, NULL);
@ -2780,10 +2818,15 @@ static void __exit vmbus_exit(void)
if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
kmsg_dump_unregister(&hv_kmsg_dumper);
unregister_die_notifier(&hyperv_die_block);
atomic_notifier_chain_unregister(&panic_notifier_list,
&hyperv_panic_block);
}
/*
* The panic notifier is always registered, hence we should
* also unconditionally unregister it here as well.
*/
atomic_notifier_chain_unregister(&panic_notifier_list,
&hyperv_panic_block);
free_page((unsigned long)hv_panic_page);
unregister_sysctl_table(hv_ctl_table_hdr);
hv_ctl_table_hdr = NULL;

View File

@ -3407,6 +3407,15 @@ static int hv_pci_probe(struct hv_device *hdev,
hbus->bridge->domain_nr = dom;
#ifdef CONFIG_X86
hbus->sysdata.domain = dom;
#elif defined(CONFIG_ARM64)
/*
* Set the PCI bus parent to be the corresponding VMbus
* device. Then the VMbus device will be assigned as the
* ACPI companion in pcibios_root_bridge_prepare() and
* pci_dma_configure() will propagate device coherence
* information to devices created on the bus.
*/
hbus->sysdata.parent = hdev->device.parent;
#endif
hbus->hdev = hdev;

View File

@ -269,6 +269,7 @@ bool hv_isolation_type_snp(void);
u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size);
void hyperv_cleanup(void);
bool hv_query_ext_cap(u64 cap_query);
void hv_setup_dma_ops(struct device *dev, bool coherent);
void *hv_map_memory(void *addr, unsigned long size);
void hv_unmap_memory(void *addr);
#else /* CONFIG_HYPERV */