IOMMU Fixes for Linux v6.7-rc3

Including:
 
 	- Fix race conditions in device probe path
 
 	- Handle ERR_PTR() returns in __iommu_domain_alloc() path
 
 	- Update MAINTAINERS entry for Qualcom IOMMUs
 
 	- Printk argument fix in device tree specific code
 
 	- Several Intel VT-d fixes from Lu Baolu:
 	  - Do not support enforcing cache coherency for non-empty domains
 	  - Avoid devTLB invalidation if iommu is off
 	  - Disable PCI ATS in legacy passthrough mode
 	  - Support non-PCI devices when clearing context
 	  - Fix incorrect cache invalidation for mm notification
 	  - Add MTL to quirk list to skip TE disabling
 	  - Set variable intel_dirty_ops to static
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEr9jSbILcajRFYWYyK/BELZcBGuMFAmVpq1UACgkQK/BELZcB
 GuOcUA/+Kko7SEbNCxdRlGZWYUtDrF4Aa6UOwFAFnqJikEpf3IOI6lGl/aNF/zZ/
 NMcAL53Jk9UNvKXlbwdVAOlEOco1LRGPbZFFJUGcS33iG9JhmHrMpmGuOoQe67zU
 Po5uxLigSXRlFBNqst61G8juOlyR0r6Kwel4LNoyfJ7RJuJ76j9gPL2CaIajrfB0
 mkKWKP8Qw11yA6A9sWLyFUOdLdmg4oG1rtjZRrcYGTvyKZrFxod8aQB12xcl2wd3
 RZYel2OReGML3tSczrwAHJis6XawNn9ifm2Z7jxyN10zIUH60BSl8NJrToxkIkfi
 eMNyGb89ZUDxZfH5oRHDo9ZRg3r/6AZnkvK1rA24onUJQtqlWKfnHzW4i9Hn4sdV
 ecckcyIRkRm54q9vsRh2Ke074OzOREVAeL7bUGmh8zsfc2mQSGofgZDxB12cL7re
 k3+GaPE2TTUpE9VSWttQN87d5wyds7Dfd0RiDu4T7mbP62b3UMhP//scTiWTQ4M5
 iYTHhJwrTebSjjEQB/0h09grYEFS2jsAimc6uj6MDnGTUGSlzd1VgiBZCn+ea8KL
 z0d0yi6rcVMBDUpU/GLUsaYji2trTxDV4popxz+gpDZx1wLBgaDbX8QJ4K3lH6a3
 51REakYTL4xb7m9aNGcvx+sYyB/OXlvGhc2jUv0kdJd4DmR8Un4=
 =O/Sr
 -----END PGP SIGNATURE-----

Merge tag 'iommu-fixes-v6.7-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu

Pull iommu fixes from Joerg Roedel:

 - Fix race conditions in device probe path

 - Handle ERR_PTR() returns in __iommu_domain_alloc() path

 - Update MAINTAINERS entry for Qualcom IOMMUs

 - Printk argument fix in device tree specific code

 - Several Intel VT-d fixes from Lu Baolu:
     - Do not support enforcing cache coherency for non-empty domains
     - Avoid devTLB invalidation if iommu is off
     - Disable PCI ATS in legacy passthrough mode
     - Support non-PCI devices when clearing context
     - Fix incorrect cache invalidation for mm notification
     - Add MTL to quirk list to skip TE disabling
     - Set variable intel_dirty_ops to static

* tag 'iommu-fixes-v6.7-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu:
  iommu: Fix printk arg in of_iommu_get_resv_regions()
  iommu/vt-d: Set variable intel_dirty_ops to static
  iommu/vt-d: Fix incorrect cache invalidation for mm notification
  iommu/vt-d: Add MTL to quirk list to skip TE disabling
  iommu/vt-d: Make context clearing consistent with context mapping
  iommu/vt-d: Disable PCI ATS in legacy passthrough mode
  iommu/vt-d: Omit devTLB invalidation requests when TES=0
  iommu/vt-d: Support enforce_cache_coherency only for empty domains
  iommu: Avoid more races around device probe
  MAINTAINERS: list all Qualcomm IOMMU drivers in the QUALCOMM IOMMU entry
  iommu: Flow ERR_PTR out from __iommu_domain_alloc()
This commit is contained in:
Linus Torvalds 2023-12-02 08:42:39 +09:00
commit 1a2b418566
9 changed files with 126 additions and 42 deletions

View File

@ -17946,6 +17946,8 @@ L: iommu@lists.linux.dev
L: linux-arm-msm@vger.kernel.org
S: Maintained
F: drivers/iommu/arm/arm-smmu/qcom_iommu.c
F: drivers/iommu/arm/arm-smmu/arm-smmu-qcom*
F: drivers/iommu/msm_iommu*
QUALCOMM IPC ROUTER (QRTR) DRIVER
M: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>

View File

@ -1568,17 +1568,22 @@ static const struct iommu_ops *acpi_iommu_configure_id(struct device *dev,
int err;
const struct iommu_ops *ops;
/* Serialise to make dev->iommu stable under our potential fwspec */
mutex_lock(&iommu_probe_device_lock);
/*
* If we already translated the fwspec there is nothing left to do,
* return the iommu_ops.
*/
ops = acpi_iommu_fwspec_ops(dev);
if (ops)
if (ops) {
mutex_unlock(&iommu_probe_device_lock);
return ops;
}
err = iort_iommu_configure_id(dev, id_in);
if (err && err != -EPROBE_DEFER)
err = viot_iommu_configure(dev);
mutex_unlock(&iommu_probe_device_lock);
/*
* If we have reason to believe the IOMMU driver missed the initial

View File

@ -1522,6 +1522,15 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
{
struct qi_desc desc;
/*
* VT-d spec, section 4.3:
*
* Software is recommended to not submit any Device-TLB invalidation
* requests while address remapping hardware is disabled.
*/
if (!(iommu->gcmd & DMA_GCMD_TE))
return;
if (mask) {
addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1;
desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
@ -1587,6 +1596,15 @@ void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1);
struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
/*
* VT-d spec, section 4.3:
*
* Software is recommended to not submit any Device-TLB invalidation
* requests while address remapping hardware is disabled.
*/
if (!(iommu->gcmd & DMA_GCMD_TE))
return;
desc.qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) |
QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE |
QI_DEV_IOTLB_PFSID(pfsid);

View File

@ -299,7 +299,7 @@ static int iommu_skip_te_disable;
#define IDENTMAP_AZALIA 4
const struct iommu_ops intel_iommu_ops;
const struct iommu_dirty_ops intel_dirty_ops;
static const struct iommu_dirty_ops intel_dirty_ops;
static bool translation_pre_enabled(struct intel_iommu *iommu)
{
@ -2207,6 +2207,8 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
attr |= DMA_FL_PTE_DIRTY;
}
domain->has_mappings = true;
pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr;
while (nr_pages > 0) {
@ -2490,7 +2492,8 @@ static int dmar_domain_attach_device(struct dmar_domain *domain,
return ret;
}
iommu_enable_pci_caps(info);
if (sm_supported(info->iommu) || !domain_type_is_si(info->domain))
iommu_enable_pci_caps(info);
return 0;
}
@ -3925,8 +3928,8 @@ static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *op
*/
static void domain_context_clear(struct device_domain_info *info)
{
if (!info->iommu || !info->dev || !dev_is_pci(info->dev))
return;
if (!dev_is_pci(info->dev))
domain_context_clear_one(info, info->bus, info->devfn);
pci_for_each_dma_alias(to_pci_dev(info->dev),
&domain_context_clear_one_cb, info);
@ -4360,7 +4363,8 @@ static bool intel_iommu_enforce_cache_coherency(struct iommu_domain *domain)
return true;
spin_lock_irqsave(&dmar_domain->lock, flags);
if (!domain_support_force_snooping(dmar_domain)) {
if (!domain_support_force_snooping(dmar_domain) ||
(!dmar_domain->use_first_level && dmar_domain->has_mappings)) {
spin_unlock_irqrestore(&dmar_domain->lock, flags);
return false;
}
@ -4925,7 +4929,7 @@ static int intel_iommu_read_and_clear_dirty(struct iommu_domain *domain,
return 0;
}
const struct iommu_dirty_ops intel_dirty_ops = {
static const struct iommu_dirty_ops intel_dirty_ops = {
.set_dirty_tracking = intel_iommu_set_dirty_tracking,
.read_and_clear_dirty = intel_iommu_read_and_clear_dirty,
};
@ -5073,7 +5077,7 @@ static void quirk_igfx_skip_te_disable(struct pci_dev *dev)
ver = (dev->device >> 8) & 0xff;
if (ver != 0x45 && ver != 0x46 && ver != 0x4c &&
ver != 0x4e && ver != 0x8a && ver != 0x98 &&
ver != 0x9a && ver != 0xa7)
ver != 0x9a && ver != 0xa7 && ver != 0x7d)
return;
if (risky_device(dev))

View File

@ -602,6 +602,9 @@ struct dmar_domain {
*/
u8 dirty_tracking:1; /* Dirty tracking is enabled */
u8 nested_parent:1; /* Has other domains nested on it */
u8 has_mappings:1; /* Has mappings configured through
* iommu_map() interface.
*/
spinlock_t lock; /* Protect device tracking lists */
struct list_head devices; /* all devices' list */

View File

@ -216,6 +216,27 @@ static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address,
rcu_read_unlock();
}
static void intel_flush_svm_all(struct intel_svm *svm)
{
struct device_domain_info *info;
struct intel_svm_dev *sdev;
rcu_read_lock();
list_for_each_entry_rcu(sdev, &svm->devs, list) {
info = dev_iommu_priv_get(sdev->dev);
qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, 0, -1UL, 0);
if (info->ats_enabled) {
qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid,
svm->pasid, sdev->qdep,
0, 64 - VTD_PAGE_SHIFT);
quirk_extra_dev_tlb_flush(info, 0, 64 - VTD_PAGE_SHIFT,
svm->pasid, sdev->qdep);
}
}
rcu_read_unlock();
}
/* Pages have been freed at this point */
static void intel_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,
struct mm_struct *mm,
@ -223,6 +244,11 @@ static void intel_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,
{
struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
if (start == 0 && end == -1UL) {
intel_flush_svm_all(svm);
return;
}
intel_flush_svm_range(svm, start,
(end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0);
}

View File

@ -485,11 +485,12 @@ static void iommu_deinit_device(struct device *dev)
dev_iommu_free(dev);
}
DEFINE_MUTEX(iommu_probe_device_lock);
static int __iommu_probe_device(struct device *dev, struct list_head *group_list)
{
const struct iommu_ops *ops = dev->bus->iommu_ops;
struct iommu_group *group;
static DEFINE_MUTEX(iommu_probe_device_lock);
struct group_device *gdev;
int ret;
@ -502,17 +503,15 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list
* probably be able to use device_lock() here to minimise the scope,
* but for now enforcing a simple global ordering is fine.
*/
mutex_lock(&iommu_probe_device_lock);
lockdep_assert_held(&iommu_probe_device_lock);
/* Device is probed already if in a group */
if (dev->iommu_group) {
ret = 0;
goto out_unlock;
}
if (dev->iommu_group)
return 0;
ret = iommu_init_device(dev, ops);
if (ret)
goto out_unlock;
return ret;
group = dev->iommu_group;
gdev = iommu_group_alloc_device(group, dev);
@ -548,7 +547,6 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list
list_add_tail(&group->entry, group_list);
}
mutex_unlock(&group->mutex);
mutex_unlock(&iommu_probe_device_lock);
if (dev_is_pci(dev))
iommu_dma_set_pci_32bit_workaround(dev);
@ -562,8 +560,6 @@ err_put_group:
iommu_deinit_device(dev);
mutex_unlock(&group->mutex);
iommu_group_put(group);
out_unlock:
mutex_unlock(&iommu_probe_device_lock);
return ret;
}
@ -573,7 +569,9 @@ int iommu_probe_device(struct device *dev)
const struct iommu_ops *ops;
int ret;
mutex_lock(&iommu_probe_device_lock);
ret = __iommu_probe_device(dev, NULL);
mutex_unlock(&iommu_probe_device_lock);
if (ret)
return ret;
@ -1788,7 +1786,7 @@ iommu_group_alloc_default_domain(struct iommu_group *group, int req_type)
*/
if (ops->default_domain) {
if (req_type)
return NULL;
return ERR_PTR(-EINVAL);
return ops->default_domain;
}
@ -1797,15 +1795,15 @@ iommu_group_alloc_default_domain(struct iommu_group *group, int req_type)
/* The driver gave no guidance on what type to use, try the default */
dom = __iommu_group_alloc_default_domain(group, iommu_def_domain_type);
if (dom)
if (!IS_ERR(dom))
return dom;
/* Otherwise IDENTITY and DMA_FQ defaults will try DMA */
if (iommu_def_domain_type == IOMMU_DOMAIN_DMA)
return NULL;
return ERR_PTR(-EINVAL);
dom = __iommu_group_alloc_default_domain(group, IOMMU_DOMAIN_DMA);
if (!dom)
return NULL;
if (IS_ERR(dom))
return dom;
pr_warn("Failed to allocate default IOMMU domain of type %u for group %s - Falling back to IOMMU_DOMAIN_DMA",
iommu_def_domain_type, group->name);
@ -1822,7 +1820,9 @@ static int probe_iommu_group(struct device *dev, void *data)
struct list_head *group_list = data;
int ret;
mutex_lock(&iommu_probe_device_lock);
ret = __iommu_probe_device(dev, group_list);
mutex_unlock(&iommu_probe_device_lock);
if (ret == -ENODEV)
ret = 0;
@ -2094,10 +2094,17 @@ static struct iommu_domain *__iommu_domain_alloc(const struct iommu_ops *ops,
else if (ops->domain_alloc)
domain = ops->domain_alloc(alloc_type);
else
return NULL;
return ERR_PTR(-EOPNOTSUPP);
/*
* Many domain_alloc ops now return ERR_PTR, make things easier for the
* driver by accepting ERR_PTR from all domain_alloc ops instead of
* having two rules.
*/
if (IS_ERR(domain))
return domain;
if (!domain)
return NULL;
return ERR_PTR(-ENOMEM);
domain->type = type;
/*
@ -2110,9 +2117,14 @@ static struct iommu_domain *__iommu_domain_alloc(const struct iommu_ops *ops,
if (!domain->ops)
domain->ops = ops->default_domain_ops;
if (iommu_is_dma_domain(domain) && iommu_get_dma_cookie(domain)) {
iommu_domain_free(domain);
domain = NULL;
if (iommu_is_dma_domain(domain)) {
int rc;
rc = iommu_get_dma_cookie(domain);
if (rc) {
iommu_domain_free(domain);
return ERR_PTR(rc);
}
}
return domain;
}
@ -2129,10 +2141,15 @@ __iommu_group_domain_alloc(struct iommu_group *group, unsigned int type)
struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus)
{
struct iommu_domain *domain;
if (bus == NULL || bus->iommu_ops == NULL)
return NULL;
return __iommu_domain_alloc(bus->iommu_ops, NULL,
domain = __iommu_domain_alloc(bus->iommu_ops, NULL,
IOMMU_DOMAIN_UNMANAGED);
if (IS_ERR(domain))
return NULL;
return domain;
}
EXPORT_SYMBOL_GPL(iommu_domain_alloc);
@ -3041,8 +3058,8 @@ static int iommu_setup_default_domain(struct iommu_group *group,
return -EINVAL;
dom = iommu_group_alloc_default_domain(group, req_type);
if (!dom)
return -ENODEV;
if (IS_ERR(dom))
return PTR_ERR(dom);
if (group->default_domain == dom)
return 0;
@ -3243,21 +3260,23 @@ void iommu_device_unuse_default_domain(struct device *dev)
static int __iommu_group_alloc_blocking_domain(struct iommu_group *group)
{
struct iommu_domain *domain;
if (group->blocking_domain)
return 0;
group->blocking_domain =
__iommu_group_domain_alloc(group, IOMMU_DOMAIN_BLOCKED);
if (!group->blocking_domain) {
domain = __iommu_group_domain_alloc(group, IOMMU_DOMAIN_BLOCKED);
if (IS_ERR(domain)) {
/*
* For drivers that do not yet understand IOMMU_DOMAIN_BLOCKED
* create an empty domain instead.
*/
group->blocking_domain = __iommu_group_domain_alloc(
group, IOMMU_DOMAIN_UNMANAGED);
if (!group->blocking_domain)
return -EINVAL;
domain = __iommu_group_domain_alloc(group,
IOMMU_DOMAIN_UNMANAGED);
if (IS_ERR(domain))
return PTR_ERR(domain);
}
group->blocking_domain = domain;
return 0;
}

View File

@ -112,16 +112,20 @@ const struct iommu_ops *of_iommu_configure(struct device *dev,
const u32 *id)
{
const struct iommu_ops *ops = NULL;
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct iommu_fwspec *fwspec;
int err = NO_IOMMU;
if (!master_np)
return NULL;
/* Serialise to make dev->iommu stable under our potential fwspec */
mutex_lock(&iommu_probe_device_lock);
fwspec = dev_iommu_fwspec_get(dev);
if (fwspec) {
if (fwspec->ops)
if (fwspec->ops) {
mutex_unlock(&iommu_probe_device_lock);
return fwspec->ops;
}
/* In the deferred case, start again from scratch */
iommu_fwspec_free(dev);
}
@ -155,6 +159,8 @@ const struct iommu_ops *of_iommu_configure(struct device *dev,
fwspec = dev_iommu_fwspec_get(dev);
ops = fwspec->ops;
}
mutex_unlock(&iommu_probe_device_lock);
/*
* If we have reason to believe the IOMMU driver missed the initial
* probe for dev, replay it to get things in order.
@ -191,7 +197,7 @@ iommu_resv_region_get_type(struct device *dev,
if (start == phys->start && end == phys->end)
return IOMMU_RESV_DIRECT;
dev_warn(dev, "treating non-direct mapping [%pr] -> [%pap-%pap] as reservation\n", &phys,
dev_warn(dev, "treating non-direct mapping [%pr] -> [%pap-%pap] as reservation\n", phys,
&start, &end);
return IOMMU_RESV_RESERVED;
}

View File

@ -845,6 +845,7 @@ static inline void dev_iommu_priv_set(struct device *dev, void *priv)
dev->iommu->priv = priv;
}
extern struct mutex iommu_probe_device_lock;
int iommu_probe_device(struct device *dev);
int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features f);