diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c index 2faf38eab785..50cdefe3f6d3 100644 --- a/drivers/pci/host/pci-hyperv.c +++ b/drivers/pci/host/pci-hyperv.c @@ -447,7 +447,6 @@ struct hv_pcibus_device { spinlock_t device_list_lock; /* Protect lists below */ void __iomem *cfg_addr; - struct semaphore enum_sem; struct list_head resources_for_children; struct list_head children; @@ -461,6 +460,8 @@ struct hv_pcibus_device { struct retarget_msi_interrupt retarget_msi_interrupt_params; spinlock_t retarget_msi_interrupt_lock; + + struct workqueue_struct *wq; }; /* @@ -520,6 +521,8 @@ struct hv_pci_compl { s32 completion_status; }; +static void hv_pci_onchannelcallback(void *context); + /** * hv_pci_generic_compl() - Invoked for a completion packet * @context: Set up by the sender of the packet. @@ -653,7 +656,7 @@ static void _hv_pcifront_read_config(struct hv_pci_dev *hpdev, int where, break; } /* - * Make sure the write was done before we release the spinlock + * Make sure the read was done before we release the spinlock * allowing consecutive reads/writes. */ mb(); @@ -664,6 +667,31 @@ static void _hv_pcifront_read_config(struct hv_pci_dev *hpdev, int where, } } +static u16 hv_pcifront_get_vendor_id(struct hv_pci_dev *hpdev) +{ + u16 ret; + unsigned long flags; + void __iomem *addr = hpdev->hbus->cfg_addr + CFG_PAGE_OFFSET + + PCI_VENDOR_ID; + + spin_lock_irqsave(&hpdev->hbus->config_lock, flags); + + /* Choose the function to be read. (See comment above) */ + writel(hpdev->desc.win_slot.slot, hpdev->hbus->cfg_addr); + /* Make sure the function was chosen before we start reading. */ + mb(); + /* Read from that function's config space. */ + ret = readw(addr); + /* + * mb() is not required here, because the spin_unlock_irqrestore() + * is a barrier. + */ + + spin_unlock_irqrestore(&hpdev->hbus->config_lock, flags); + + return ret; +} + /** * _hv_pcifront_write_config() - Internal PCI config write * @hpdev: The PCI driver's representation of the device @@ -1106,8 +1134,37 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) * Since this function is called with IRQ locks held, can't * do normal wait for completion; instead poll. */ - while (!try_wait_for_completion(&comp.comp_pkt.host_event)) + while (!try_wait_for_completion(&comp.comp_pkt.host_event)) { + /* 0xFFFF means an invalid PCI VENDOR ID. */ + if (hv_pcifront_get_vendor_id(hpdev) == 0xFFFF) { + dev_err_once(&hbus->hdev->device, + "the device has gone\n"); + goto free_int_desc; + } + + /* + * When the higher level interrupt code calls us with + * interrupt disabled, we must poll the channel by calling + * the channel callback directly when channel->target_cpu is + * the current CPU. When the higher level interrupt code + * calls us with interrupt enabled, let's add the + * local_bh_disable()/enable() to avoid race. + */ + local_bh_disable(); + + if (hbus->hdev->channel->target_cpu == smp_processor_id()) + hv_pci_onchannelcallback(hbus); + + local_bh_enable(); + + if (hpdev->state == hv_pcichild_ejecting) { + dev_err_once(&hbus->hdev->device, + "the device is being ejected\n"); + goto free_int_desc; + } + udelay(100); + } if (comp.comp_pkt.completion_status < 0) { dev_err(&hbus->hdev->device, @@ -1590,12 +1647,8 @@ static struct hv_pci_dev *get_pcichild_wslot(struct hv_pcibus_device *hbus, * It must also treat the omission of a previously observed device as * notification that the device no longer exists. * - * Note that this function is a work item, and it may not be - * invoked in the order that it was queued. Back to back - * updates of the list of present devices may involve queuing - * multiple work items, and this one may run before ones that - * were sent later. As such, this function only does something - * if is the last one in the queue. + * Note that this function is serialized with hv_eject_device_work(), + * because both are pushed to the ordered workqueue hbus->wq. */ static void pci_devices_present_work(struct work_struct *work) { @@ -1616,11 +1669,6 @@ static void pci_devices_present_work(struct work_struct *work) INIT_LIST_HEAD(&removed); - if (down_interruptible(&hbus->enum_sem)) { - put_hvpcibus(hbus); - return; - } - /* Pull this off the queue and process it if it was the last one. */ spin_lock_irqsave(&hbus->device_list_lock, flags); while (!list_empty(&hbus->dr_list)) { @@ -1637,7 +1685,6 @@ static void pci_devices_present_work(struct work_struct *work) spin_unlock_irqrestore(&hbus->device_list_lock, flags); if (!dr) { - up(&hbus->enum_sem); put_hvpcibus(hbus); return; } @@ -1724,7 +1771,6 @@ static void pci_devices_present_work(struct work_struct *work) break; } - up(&hbus->enum_sem); put_hvpcibus(hbus); kfree(dr); } @@ -1743,6 +1789,7 @@ static void hv_pci_devices_present(struct hv_pcibus_device *hbus, struct hv_dr_state *dr; struct hv_dr_work *dr_wrk; unsigned long flags; + bool pending_dr; dr_wrk = kzalloc(sizeof(*dr_wrk), GFP_NOWAIT); if (!dr_wrk) @@ -1766,11 +1813,21 @@ static void hv_pci_devices_present(struct hv_pcibus_device *hbus, } spin_lock_irqsave(&hbus->device_list_lock, flags); + /* + * If pending_dr is true, we have already queued a work, + * which will see the new dr. Otherwise, we need to + * queue a new work. + */ + pending_dr = !list_empty(&hbus->dr_list); list_add_tail(&dr->list_entry, &hbus->dr_list); spin_unlock_irqrestore(&hbus->device_list_lock, flags); - get_hvpcibus(hbus); - schedule_work(&dr_wrk->wrk); + if (pending_dr) { + kfree(dr_wrk); + } else { + get_hvpcibus(hbus); + queue_work(hbus->wq, &dr_wrk->wrk); + } } /** @@ -1796,10 +1853,7 @@ static void hv_eject_device_work(struct work_struct *work) hpdev = container_of(work, struct hv_pci_dev, wrk); - if (hpdev->state != hv_pcichild_ejecting) { - put_pcichild(hpdev, hv_pcidev_ref_pnp); - return; - } + WARN_ON(hpdev->state != hv_pcichild_ejecting); /* * Ejection can come before or after the PCI bus has been set up, so @@ -1848,7 +1902,7 @@ static void hv_pci_eject_device(struct hv_pci_dev *hpdev) get_pcichild(hpdev, hv_pcidev_ref_pnp); INIT_WORK(&hpdev->wrk, hv_eject_device_work); get_hvpcibus(hpdev->hbus); - schedule_work(&hpdev->wrk); + queue_work(hpdev->hbus->wq, &hpdev->wrk); } /** @@ -2461,13 +2515,18 @@ static int hv_pci_probe(struct hv_device *hdev, spin_lock_init(&hbus->config_lock); spin_lock_init(&hbus->device_list_lock); spin_lock_init(&hbus->retarget_msi_interrupt_lock); - sema_init(&hbus->enum_sem, 1); init_completion(&hbus->remove_event); + hbus->wq = alloc_ordered_workqueue("hv_pci_%x", 0, + hbus->sysdata.domain); + if (!hbus->wq) { + ret = -ENOMEM; + goto free_bus; + } ret = vmbus_open(hdev->channel, pci_ring_size, pci_ring_size, NULL, 0, hv_pci_onchannelcallback, hbus); if (ret) - goto free_bus; + goto destroy_wq; hv_set_drvdata(hdev, hbus); @@ -2536,6 +2595,8 @@ static int hv_pci_probe(struct hv_device *hdev, hv_free_config_window(hbus); close: vmbus_close(hdev->channel); +destroy_wq: + destroy_workqueue(hbus->wq); free_bus: free_page((unsigned long)hbus); return ret; @@ -2615,6 +2676,7 @@ static int hv_pci_remove(struct hv_device *hdev) irq_domain_free_fwnode(hbus->sysdata.fwnode); put_hvpcibus(hbus); wait_for_completion(&hbus->remove_event); + destroy_workqueue(hbus->wq); free_page((unsigned long)hbus); return 0; }