diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index cee91cdf0016..fb5c40715d10 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1267,15 +1267,52 @@ static void __domain_flush_pages(struct protection_domain *domain, } static void domain_flush_pages(struct protection_domain *domain, - u64 address, size_t size) + u64 address, size_t size, int pde) { - __domain_flush_pages(domain, address, size, 0); + if (likely(!amd_iommu_np_cache)) { + __domain_flush_pages(domain, address, size, pde); + return; + } + + /* + * When NpCache is on, we infer that we run in a VM and use a vIOMMU. + * In such setups it is best to avoid flushes of ranges which are not + * naturally aligned, since it would lead to flushes of unmodified + * PTEs. Such flushes would require the hypervisor to do more work than + * necessary. Therefore, perform repeated flushes of aligned ranges + * until you cover the range. Each iteration flushes the smaller + * between the natural alignment of the address that we flush and the + * greatest naturally aligned region that fits in the range. + */ + while (size != 0) { + int addr_alignment = __ffs(address); + int size_alignment = __fls(size); + int min_alignment; + size_t flush_size; + + /* + * size is always non-zero, but address might be zero, causing + * addr_alignment to be negative. As the casting of the + * argument in __ffs(address) to long might trim the high bits + * of the address on x86-32, cast to long when doing the check. + */ + if (likely((unsigned long)address != 0)) + min_alignment = min(addr_alignment, size_alignment); + else + min_alignment = size_alignment; + + flush_size = 1ul << min_alignment; + + __domain_flush_pages(domain, address, flush_size, pde); + address += flush_size; + size -= flush_size; + } } /* Flush the whole IO/TLB for a given protection domain - including PDE */ void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain) { - __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1); + domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1); } void amd_iommu_domain_flush_complete(struct protection_domain *domain) @@ -1302,7 +1339,7 @@ static void domain_flush_np_cache(struct protection_domain *domain, unsigned long flags; spin_lock_irqsave(&domain->lock, flags); - domain_flush_pages(domain, iova, size); + domain_flush_pages(domain, iova, size, 1); amd_iommu_domain_flush_complete(domain); spin_unlock_irqrestore(&domain->lock, flags); } @@ -2206,7 +2243,7 @@ static void amd_iommu_iotlb_sync(struct iommu_domain *domain, unsigned long flags; spin_lock_irqsave(&dom->lock, flags); - __domain_flush_pages(dom, gather->start, gather->end - gather->start, 1); + domain_flush_pages(dom, gather->start, gather->end - gather->start, 1); amd_iommu_domain_flush_complete(dom); spin_unlock_irqrestore(&dom->lock, flags); }