mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-09-30 22:26:55 +00:00
hyperv-fixes for 6.1-rc7
-----BEGIN PGP SIGNATURE----- iQFHBAABCAAxFiEEIbPD0id6easf0xsudhRwX5BBoF4FAmOA1C0THHdlaS5saXVA a2VybmVsLm9yZwAKCRB2FHBfkEGgXoofCADVaWCNcmktsiMxeNuMGJULbib5Jf/q 69axU1totvczkff0Cg9NuDQoqXIJKF9NB4HbO0atqI4VXwInk6Y8xxNFY/EzGAat 6Dr+y6lT2OL+qzjkk8yMB8CQM67XTfDNOVeo8tVSpTOnCohHyQw4QSJmlh/cO60l h33UbvWwzTkxuZCGJxULGOEsydw1ktoEUC/TS0hqWVG/vmqfPBGiEb2oWU+lPE/0 cARhsV+VpLQ4bX960pcrbRvkEgydEtJHCvkU5k8C5ZoPaStNPvY/6we96eB+r4i+ htb4LDN8n7M9EZS30/xm/DLmemawKk57bv5fZtVv+98srtQhgO3kc2iu =mp04 -----END PGP SIGNATURE----- Merge tag 'hyperv-fixes-signed-20221125' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux Pull hyperv fixes from Wei Liu: - Fix IRTE allocation in Hyper-V PCI controller (Dexuan Cui) - Fix handling of SCSI srb_status and capacity change events (Michael Kelley) - Restore VP assist page after CPU offlining and onlining (Vitaly Kuznetsov) - Fix some memory leak issues in VMBus (Yang Yingliang) * tag 'hyperv-fixes-signed-20221125' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux: Drivers: hv: vmbus: fix possible memory leak in vmbus_device_register() Drivers: hv: vmbus: fix double free in the error path of vmbus_add_channel_work() PCI: hv: Only reuse existing IRTE allocation for Multi-MSI scsi: storvsc: Fix handling of srb_status and capacity change events x86/hyperv: Restore VP assist page after cpu offlining/onlining
This commit is contained in:
commit
081f359ef5
5 changed files with 141 additions and 79 deletions
|
@ -77,7 +77,7 @@ static int hyperv_init_ghcb(void)
|
|||
static int hv_cpu_init(unsigned int cpu)
|
||||
{
|
||||
union hv_vp_assist_msr_contents msr = { 0 };
|
||||
struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()];
|
||||
struct hv_vp_assist_page **hvp = &hv_vp_assist_page[cpu];
|
||||
int ret;
|
||||
|
||||
ret = hv_common_cpu_init(cpu);
|
||||
|
@ -87,34 +87,32 @@ static int hv_cpu_init(unsigned int cpu)
|
|||
if (!hv_vp_assist_page)
|
||||
return 0;
|
||||
|
||||
if (!*hvp) {
|
||||
if (hv_root_partition) {
|
||||
/*
|
||||
* For root partition we get the hypervisor provided VP assist
|
||||
* page, instead of allocating a new page.
|
||||
*/
|
||||
rdmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
|
||||
*hvp = memremap(msr.pfn <<
|
||||
HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT,
|
||||
PAGE_SIZE, MEMREMAP_WB);
|
||||
} else {
|
||||
/*
|
||||
* The VP assist page is an "overlay" page (see Hyper-V TLFS's
|
||||
* Section 5.2.1 "GPA Overlay Pages"). Here it must be zeroed
|
||||
* out to make sure we always write the EOI MSR in
|
||||
* hv_apic_eoi_write() *after* the EOI optimization is disabled
|
||||
* in hv_cpu_die(), otherwise a CPU may not be stopped in the
|
||||
* case of CPU offlining and the VM will hang.
|
||||
*/
|
||||
if (hv_root_partition) {
|
||||
/*
|
||||
* For root partition we get the hypervisor provided VP assist
|
||||
* page, instead of allocating a new page.
|
||||
*/
|
||||
rdmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
|
||||
*hvp = memremap(msr.pfn << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT,
|
||||
PAGE_SIZE, MEMREMAP_WB);
|
||||
} else {
|
||||
/*
|
||||
* The VP assist page is an "overlay" page (see Hyper-V TLFS's
|
||||
* Section 5.2.1 "GPA Overlay Pages"). Here it must be zeroed
|
||||
* out to make sure we always write the EOI MSR in
|
||||
* hv_apic_eoi_write() *after* the EOI optimization is disabled
|
||||
* in hv_cpu_die(), otherwise a CPU may not be stopped in the
|
||||
* case of CPU offlining and the VM will hang.
|
||||
*/
|
||||
if (!*hvp)
|
||||
*hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO);
|
||||
if (*hvp)
|
||||
msr.pfn = vmalloc_to_pfn(*hvp);
|
||||
}
|
||||
WARN_ON(!(*hvp));
|
||||
if (*hvp) {
|
||||
msr.enable = 1;
|
||||
wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
|
||||
}
|
||||
if (*hvp)
|
||||
msr.pfn = vmalloc_to_pfn(*hvp);
|
||||
|
||||
}
|
||||
if (!WARN_ON(!(*hvp))) {
|
||||
msr.enable = 1;
|
||||
wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
|
||||
}
|
||||
|
||||
return hyperv_init_ghcb();
|
||||
|
|
|
@ -533,13 +533,17 @@ static void vmbus_add_channel_work(struct work_struct *work)
|
|||
* Add the new device to the bus. This will kick off device-driver
|
||||
* binding which eventually invokes the device driver's AddDevice()
|
||||
* method.
|
||||
*
|
||||
* If vmbus_device_register() fails, the 'device_obj' is freed in
|
||||
* vmbus_device_release() as called by device_unregister() in the
|
||||
* error path of vmbus_device_register(). In the outside error
|
||||
* path, there's no need to free it.
|
||||
*/
|
||||
ret = vmbus_device_register(newchannel->device_obj);
|
||||
|
||||
if (ret != 0) {
|
||||
pr_err("unable to add child device object (relid %d)\n",
|
||||
newchannel->offermsg.child_relid);
|
||||
kfree(newchannel->device_obj);
|
||||
goto err_deq_chan;
|
||||
}
|
||||
|
||||
|
|
|
@ -2082,6 +2082,7 @@ int vmbus_device_register(struct hv_device *child_device_obj)
|
|||
ret = device_register(&child_device_obj->device);
|
||||
if (ret) {
|
||||
pr_err("Unable to register child device\n");
|
||||
put_device(&child_device_obj->device);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
@ -1613,7 +1613,7 @@ static void hv_pci_compose_compl(void *context, struct pci_response *resp,
|
|||
}
|
||||
|
||||
static u32 hv_compose_msi_req_v1(
|
||||
struct pci_create_interrupt *int_pkt, const struct cpumask *affinity,
|
||||
struct pci_create_interrupt *int_pkt,
|
||||
u32 slot, u8 vector, u16 vector_count)
|
||||
{
|
||||
int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE;
|
||||
|
@ -1631,6 +1631,35 @@ static u32 hv_compose_msi_req_v1(
|
|||
return sizeof(*int_pkt);
|
||||
}
|
||||
|
||||
/*
|
||||
* The vCPU selected by hv_compose_multi_msi_req_get_cpu() and
|
||||
* hv_compose_msi_req_get_cpu() is a "dummy" vCPU because the final vCPU to be
|
||||
* interrupted is specified later in hv_irq_unmask() and communicated to Hyper-V
|
||||
* via the HVCALL_RETARGET_INTERRUPT hypercall. But the choice of dummy vCPU is
|
||||
* not irrelevant because Hyper-V chooses the physical CPU to handle the
|
||||
* interrupts based on the vCPU specified in message sent to the vPCI VSP in
|
||||
* hv_compose_msi_msg(). Hyper-V's choice of pCPU is not visible to the guest,
|
||||
* but assigning too many vPCI device interrupts to the same pCPU can cause a
|
||||
* performance bottleneck. So we spread out the dummy vCPUs to influence Hyper-V
|
||||
* to spread out the pCPUs that it selects.
|
||||
*
|
||||
* For the single-MSI and MSI-X cases, it's OK for hv_compose_msi_req_get_cpu()
|
||||
* to always return the same dummy vCPU, because a second call to
|
||||
* hv_compose_msi_msg() contains the "real" vCPU, causing Hyper-V to choose a
|
||||
* new pCPU for the interrupt. But for the multi-MSI case, the second call to
|
||||
* hv_compose_msi_msg() exits without sending a message to the vPCI VSP, so the
|
||||
* original dummy vCPU is used. This dummy vCPU must be round-robin'ed so that
|
||||
* the pCPUs are spread out. All interrupts for a multi-MSI device end up using
|
||||
* the same pCPU, even though the vCPUs will be spread out by later calls
|
||||
* to hv_irq_unmask(), but that is the best we can do now.
|
||||
*
|
||||
* With Hyper-V in Nov 2022, the HVCALL_RETARGET_INTERRUPT hypercall does *not*
|
||||
* cause Hyper-V to reselect the pCPU based on the specified vCPU. Such an
|
||||
* enhancement is planned for a future version. With that enhancement, the
|
||||
* dummy vCPU selection won't matter, and interrupts for the same multi-MSI
|
||||
* device will be spread across multiple pCPUs.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Create MSI w/ dummy vCPU set targeting just one vCPU, overwritten
|
||||
* by subsequent retarget in hv_irq_unmask().
|
||||
|
@ -1640,18 +1669,39 @@ static int hv_compose_msi_req_get_cpu(const struct cpumask *affinity)
|
|||
return cpumask_first_and(affinity, cpu_online_mask);
|
||||
}
|
||||
|
||||
static u32 hv_compose_msi_req_v2(
|
||||
struct pci_create_interrupt2 *int_pkt, const struct cpumask *affinity,
|
||||
u32 slot, u8 vector, u16 vector_count)
|
||||
/*
|
||||
* Make sure the dummy vCPU values for multi-MSI don't all point to vCPU0.
|
||||
*/
|
||||
static int hv_compose_multi_msi_req_get_cpu(void)
|
||||
{
|
||||
static DEFINE_SPINLOCK(multi_msi_cpu_lock);
|
||||
|
||||
/* -1 means starting with CPU 0 */
|
||||
static int cpu_next = -1;
|
||||
|
||||
unsigned long flags;
|
||||
int cpu;
|
||||
|
||||
spin_lock_irqsave(&multi_msi_cpu_lock, flags);
|
||||
|
||||
cpu_next = cpumask_next_wrap(cpu_next, cpu_online_mask, nr_cpu_ids,
|
||||
false);
|
||||
cpu = cpu_next;
|
||||
|
||||
spin_unlock_irqrestore(&multi_msi_cpu_lock, flags);
|
||||
|
||||
return cpu;
|
||||
}
|
||||
|
||||
static u32 hv_compose_msi_req_v2(
|
||||
struct pci_create_interrupt2 *int_pkt, int cpu,
|
||||
u32 slot, u8 vector, u16 vector_count)
|
||||
{
|
||||
int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE2;
|
||||
int_pkt->wslot.slot = slot;
|
||||
int_pkt->int_desc.vector = vector;
|
||||
int_pkt->int_desc.vector_count = vector_count;
|
||||
int_pkt->int_desc.delivery_mode = DELIVERY_MODE;
|
||||
cpu = hv_compose_msi_req_get_cpu(affinity);
|
||||
int_pkt->int_desc.processor_array[0] =
|
||||
hv_cpu_number_to_vp_number(cpu);
|
||||
int_pkt->int_desc.processor_count = 1;
|
||||
|
@ -1660,18 +1710,15 @@ static u32 hv_compose_msi_req_v2(
|
|||
}
|
||||
|
||||
static u32 hv_compose_msi_req_v3(
|
||||
struct pci_create_interrupt3 *int_pkt, const struct cpumask *affinity,
|
||||
struct pci_create_interrupt3 *int_pkt, int cpu,
|
||||
u32 slot, u32 vector, u16 vector_count)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE3;
|
||||
int_pkt->wslot.slot = slot;
|
||||
int_pkt->int_desc.vector = vector;
|
||||
int_pkt->int_desc.reserved = 0;
|
||||
int_pkt->int_desc.vector_count = vector_count;
|
||||
int_pkt->int_desc.delivery_mode = DELIVERY_MODE;
|
||||
cpu = hv_compose_msi_req_get_cpu(affinity);
|
||||
int_pkt->int_desc.processor_array[0] =
|
||||
hv_cpu_number_to_vp_number(cpu);
|
||||
int_pkt->int_desc.processor_count = 1;
|
||||
|
@ -1715,12 +1762,18 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
|
|||
struct pci_create_interrupt3 v3;
|
||||
} int_pkts;
|
||||
} __packed ctxt;
|
||||
bool multi_msi;
|
||||
u64 trans_id;
|
||||
u32 size;
|
||||
int ret;
|
||||
int cpu;
|
||||
|
||||
msi_desc = irq_data_get_msi_desc(data);
|
||||
multi_msi = !msi_desc->pci.msi_attrib.is_msix &&
|
||||
msi_desc->nvec_used > 1;
|
||||
|
||||
/* Reuse the previous allocation */
|
||||
if (data->chip_data) {
|
||||
if (data->chip_data && multi_msi) {
|
||||
int_desc = data->chip_data;
|
||||
msg->address_hi = int_desc->address >> 32;
|
||||
msg->address_lo = int_desc->address & 0xffffffff;
|
||||
|
@ -1728,7 +1781,6 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
|
|||
return;
|
||||
}
|
||||
|
||||
msi_desc = irq_data_get_msi_desc(data);
|
||||
pdev = msi_desc_to_pci_dev(msi_desc);
|
||||
dest = irq_data_get_effective_affinity_mask(data);
|
||||
pbus = pdev->bus;
|
||||
|
@ -1738,11 +1790,18 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
|
|||
if (!hpdev)
|
||||
goto return_null_message;
|
||||
|
||||
/* Free any previous message that might have already been composed. */
|
||||
if (data->chip_data && !multi_msi) {
|
||||
int_desc = data->chip_data;
|
||||
data->chip_data = NULL;
|
||||
hv_int_desc_free(hpdev, int_desc);
|
||||
}
|
||||
|
||||
int_desc = kzalloc(sizeof(*int_desc), GFP_ATOMIC);
|
||||
if (!int_desc)
|
||||
goto drop_reference;
|
||||
|
||||
if (!msi_desc->pci.msi_attrib.is_msix && msi_desc->nvec_used > 1) {
|
||||
if (multi_msi) {
|
||||
/*
|
||||
* If this is not the first MSI of Multi MSI, we already have
|
||||
* a mapping. Can exit early.
|
||||
|
@ -1767,9 +1826,11 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
|
|||
*/
|
||||
vector = 32;
|
||||
vector_count = msi_desc->nvec_used;
|
||||
cpu = hv_compose_multi_msi_req_get_cpu();
|
||||
} else {
|
||||
vector = hv_msi_get_int_vector(data);
|
||||
vector_count = 1;
|
||||
cpu = hv_compose_msi_req_get_cpu(dest);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1785,7 +1846,6 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
|
|||
switch (hbus->protocol_version) {
|
||||
case PCI_PROTOCOL_VERSION_1_1:
|
||||
size = hv_compose_msi_req_v1(&ctxt.int_pkts.v1,
|
||||
dest,
|
||||
hpdev->desc.win_slot.slot,
|
||||
(u8)vector,
|
||||
vector_count);
|
||||
|
@ -1794,7 +1854,7 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
|
|||
case PCI_PROTOCOL_VERSION_1_2:
|
||||
case PCI_PROTOCOL_VERSION_1_3:
|
||||
size = hv_compose_msi_req_v2(&ctxt.int_pkts.v2,
|
||||
dest,
|
||||
cpu,
|
||||
hpdev->desc.win_slot.slot,
|
||||
(u8)vector,
|
||||
vector_count);
|
||||
|
@ -1802,7 +1862,7 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
|
|||
|
||||
case PCI_PROTOCOL_VERSION_1_4:
|
||||
size = hv_compose_msi_req_v3(&ctxt.int_pkts.v3,
|
||||
dest,
|
||||
cpu,
|
||||
hpdev->desc.win_slot.slot,
|
||||
vector,
|
||||
vector_count);
|
||||
|
|
|
@ -303,16 +303,21 @@ enum storvsc_request_type {
|
|||
};
|
||||
|
||||
/*
|
||||
* SRB status codes and masks; a subset of the codes used here.
|
||||
* SRB status codes and masks. In the 8-bit field, the two high order bits
|
||||
* are flags, while the remaining 6 bits are an integer status code. The
|
||||
* definitions here include only the subset of the integer status codes that
|
||||
* are tested for in this driver.
|
||||
*/
|
||||
|
||||
#define SRB_STATUS_AUTOSENSE_VALID 0x80
|
||||
#define SRB_STATUS_QUEUE_FROZEN 0x40
|
||||
#define SRB_STATUS_INVALID_LUN 0x20
|
||||
#define SRB_STATUS_SUCCESS 0x01
|
||||
#define SRB_STATUS_ABORTED 0x02
|
||||
#define SRB_STATUS_ERROR 0x04
|
||||
#define SRB_STATUS_DATA_OVERRUN 0x12
|
||||
|
||||
/* SRB status integer codes */
|
||||
#define SRB_STATUS_SUCCESS 0x01
|
||||
#define SRB_STATUS_ABORTED 0x02
|
||||
#define SRB_STATUS_ERROR 0x04
|
||||
#define SRB_STATUS_INVALID_REQUEST 0x06
|
||||
#define SRB_STATUS_DATA_OVERRUN 0x12
|
||||
#define SRB_STATUS_INVALID_LUN 0x20
|
||||
|
||||
#define SRB_STATUS(status) \
|
||||
(status & ~(SRB_STATUS_AUTOSENSE_VALID | SRB_STATUS_QUEUE_FROZEN))
|
||||
|
@ -969,38 +974,25 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb,
|
|||
void (*process_err_fn)(struct work_struct *work);
|
||||
struct hv_host_device *host_dev = shost_priv(host);
|
||||
|
||||
/*
|
||||
* In some situations, Hyper-V sets multiple bits in the
|
||||
* srb_status, such as ABORTED and ERROR. So process them
|
||||
* individually, with the most specific bits first.
|
||||
*/
|
||||
switch (SRB_STATUS(vm_srb->srb_status)) {
|
||||
case SRB_STATUS_ERROR:
|
||||
case SRB_STATUS_ABORTED:
|
||||
case SRB_STATUS_INVALID_REQUEST:
|
||||
if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID) {
|
||||
/* Check for capacity change */
|
||||
if ((asc == 0x2a) && (ascq == 0x9)) {
|
||||
process_err_fn = storvsc_device_scan;
|
||||
/* Retry the I/O that triggered this. */
|
||||
set_host_byte(scmnd, DID_REQUEUE);
|
||||
goto do_work;
|
||||
}
|
||||
|
||||
if (vm_srb->srb_status & SRB_STATUS_INVALID_LUN) {
|
||||
set_host_byte(scmnd, DID_NO_CONNECT);
|
||||
process_err_fn = storvsc_remove_lun;
|
||||
goto do_work;
|
||||
}
|
||||
|
||||
if (vm_srb->srb_status & SRB_STATUS_ABORTED) {
|
||||
if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID &&
|
||||
/* Capacity data has changed */
|
||||
(asc == 0x2a) && (ascq == 0x9)) {
|
||||
process_err_fn = storvsc_device_scan;
|
||||
/*
|
||||
* Retry the I/O that triggered this.
|
||||
* Otherwise, let upper layer deal with the
|
||||
* error when sense message is present
|
||||
*/
|
||||
set_host_byte(scmnd, DID_REQUEUE);
|
||||
goto do_work;
|
||||
}
|
||||
}
|
||||
|
||||
if (vm_srb->srb_status & SRB_STATUS_ERROR) {
|
||||
/*
|
||||
* Let upper layer deal with error when
|
||||
* sense message is present.
|
||||
*/
|
||||
if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID)
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* If there is an error; offline the device since all
|
||||
|
@ -1023,6 +1015,13 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb,
|
|||
default:
|
||||
set_host_byte(scmnd, DID_ERROR);
|
||||
}
|
||||
return;
|
||||
|
||||
case SRB_STATUS_INVALID_LUN:
|
||||
set_host_byte(scmnd, DID_NO_CONNECT);
|
||||
process_err_fn = storvsc_remove_lun;
|
||||
goto do_work;
|
||||
|
||||
}
|
||||
return;
|
||||
|
||||
|
|
Loading…
Reference in a new issue