From 933fcd01e97e2ba29880dd5f1239365e40094950 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 4 Feb 2021 09:43:58 +0800 Subject: [PATCH] iommu/vt-d: Add iotlb_sync_map callback Some Intel VT-d hardware implementations don't support memory coherency for page table walk (presented by the Page-Walk-coherency bit in the ecap register), so that software must flush the corresponding CPU cache lines explicitly after each page table entry update. The iommu_map_sg() code iterates through the given scatter-gather list and invokes iommu_map() for each element in the scatter-gather list, which calls into the vendor IOMMU driver through iommu_ops callback. As the result, a single sg mapping may lead to multiple cache line flushes, which leads to the degradation of I/O performance after the commit ("iommu/vt-d: Convert intel iommu driver to the iommu ops"). Fix this by adding iotlb_sync_map callback and centralizing the clflush operations after all sg mappings. Fixes: c588072bba6b5 ("iommu/vt-d: Convert intel iommu driver to the iommu ops") Reported-by: Chuck Lever Link: https://lore.kernel.org/linux-iommu/D81314ED-5673-44A6-B597-090E3CB83EB0@oracle.com/ Signed-off-by: Lu Baolu Cc: Robin Murphy [ cel: removed @first_pte, which is no longer used ] Signed-off-by: Chuck Lever Link: https://lore.kernel.org/linux-iommu/161177763962.1311.15577661784296014186.stgit@manet.1015granger.net Link: https://lore.kernel.org/r/20210204014401.2846425-5-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 90 ++++++++++++++++++++++++------------- 1 file changed, 60 insertions(+), 30 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 7dfe681acc3d..761c1b251c1e 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -2283,9 +2283,9 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, unsigned long phys_pfn, unsigned long nr_pages, int prot) { - struct dma_pte *first_pte = NULL, *pte = NULL; unsigned int largepage_lvl = 0; unsigned long lvl_pages = 0; + struct dma_pte *pte = NULL; phys_addr_t pteval; u64 attr; @@ -2314,7 +2314,7 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, nr_pages); - first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl); + pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl); if (!pte) return -ENOMEM; /* It is large page*/ @@ -2375,34 +2375,14 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, * recalculate 'pte' and switch back to smaller pages for the * end of the mapping, if the trailing size is not enough to * use another superpage (i.e. nr_pages < lvl_pages). + * + * We leave clflush for the leaf pte changes to iotlb_sync_map() + * callback. */ pte++; if (!nr_pages || first_pte_in_page(pte) || - (largepage_lvl > 1 && nr_pages < lvl_pages)) { - domain_flush_cache(domain, first_pte, - (void *)pte - (void *)first_pte); + (largepage_lvl > 1 && nr_pages < lvl_pages)) pte = NULL; - } - } - - return 0; -} - -static int -domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, - unsigned long phys_pfn, unsigned long nr_pages, int prot) -{ - int iommu_id, ret; - struct intel_iommu *iommu; - - /* Do the real mapping first */ - ret = __domain_mapping(domain, iov_pfn, phys_pfn, nr_pages, prot); - if (ret) - return ret; - - for_each_domain_iommu(iommu_id, domain) { - iommu = g_iommus[iommu_id]; - __mapping_notify_one(iommu, domain, iov_pfn, nr_pages); } return 0; @@ -4943,7 +4923,6 @@ static int intel_iommu_map(struct iommu_domain *domain, struct dmar_domain *dmar_domain = to_dmar_domain(domain); u64 max_addr; int prot = 0; - int ret; if (iommu_prot & IOMMU_READ) prot |= DMA_PTE_READ; @@ -4969,9 +4948,8 @@ static int intel_iommu_map(struct iommu_domain *domain, /* Round up size to next multiple of PAGE_SIZE, if it and the low bits of hpa would take us onto the next page */ size = aligned_nrpages(hpa, size); - ret = domain_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT, - hpa >> VTD_PAGE_SHIFT, size, prot); - return ret; + return __domain_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT, + hpa >> VTD_PAGE_SHIFT, size, prot); } static size_t intel_iommu_unmap(struct iommu_domain *domain, @@ -5424,6 +5402,57 @@ static bool risky_device(struct pci_dev *pdev) return false; } +static void clflush_sync_map(struct dmar_domain *domain, unsigned long clf_pfn, + unsigned long clf_pages) +{ + struct dma_pte *first_pte = NULL, *pte = NULL; + unsigned long lvl_pages = 0; + int level = 0; + + while (clf_pages > 0) { + if (!pte) { + level = 0; + pte = pfn_to_dma_pte(domain, clf_pfn, &level); + if (WARN_ON(!pte)) + return; + first_pte = pte; + lvl_pages = lvl_to_nr_pages(level); + } + + if (WARN_ON(!lvl_pages || clf_pages < lvl_pages)) + return; + + clf_pages -= lvl_pages; + clf_pfn += lvl_pages; + pte++; + + if (!clf_pages || first_pte_in_page(pte) || + (level > 1 && clf_pages < lvl_pages)) { + domain_flush_cache(domain, first_pte, + (void *)pte - (void *)first_pte); + pte = NULL; + } + } +} + +static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain, + unsigned long iova, size_t size) +{ + struct dmar_domain *dmar_domain = to_dmar_domain(domain); + unsigned long pages = aligned_nrpages(iova, size); + unsigned long pfn = iova >> VTD_PAGE_SHIFT; + struct intel_iommu *iommu; + int iommu_id; + + if (!dmar_domain->iommu_coherency) + clflush_sync_map(dmar_domain, pfn, pages); + + for_each_domain_iommu(iommu_id, dmar_domain) { + iommu = g_iommus[iommu_id]; + __mapping_notify_one(iommu, dmar_domain, pfn, pages); + } +} + const struct iommu_ops intel_iommu_ops = { .capable = intel_iommu_capable, .domain_alloc = intel_iommu_domain_alloc, @@ -5436,6 +5465,7 @@ const struct iommu_ops intel_iommu_ops = { .aux_detach_dev = intel_iommu_aux_detach_device, .aux_get_pasid = intel_iommu_aux_get_pasid, .map = intel_iommu_map, + .iotlb_sync_map = intel_iommu_iotlb_sync_map, .unmap = intel_iommu_unmap, .flush_iotlb_all = intel_flush_iotlb_all, .iotlb_sync = intel_iommu_tlb_sync,