From 9113785c3e918187b6b0c084c60e0344a2f1685c Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 27 Jul 2015 13:29:00 +0100 Subject: [PATCH 01/16] iommu/tegra-smmu: Fix iova_to_phys() method iova_to_phys() has several problems: (a) iova_to_phys() is supposed to return 0 if there is no entry present for the iova. (b) if as_get_pte() fails, we oops the kernel by dereferencing a NULL pointer. Really, we should not even be trying to allocate a page table at all, but should only be returning the presence of the 2nd level page table. This will be fixed in a subsequent patch. Treat both of these conditions as "no mapping" conditions. Signed-off-by: Russell King Signed-off-by: Thierry Reding --- drivers/iommu/tegra-smmu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index c1f2e521dc52..083354903a1a 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -592,6 +592,9 @@ static phys_addr_t tegra_smmu_iova_to_phys(struct iommu_domain *domain, u32 *pte; pte = as_get_pte(as, iova, &page); + if (!pte || !*pte) + return 0; + pfn = *pte & as->smmu->pfn_mask; return PFN_PHYS(pfn); From b98e34f0c6f1c4ac7af41afecc4a26f5f2ebe68d Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 27 Jul 2015 13:29:05 +0100 Subject: [PATCH 02/16] iommu/tegra-smmu: Fix unmap() method The Tegra SMMU unmap path has several problems: 1. as_pte_put() can perform a write-after-free 2. tegra_smmu_unmap() can perform cache maintanence on a page we have just freed. 3. when a page table is unmapped, there is no CPU cache maintanence of the write clearing the page directory entry, nor is there any maintanence of the IOMMU to ensure that it sees the page table has gone. Fix this by getting rid of as_pte_put(), and instead coding the PTE unmap separately from the PDE unmap, placing the PDE unmap after the PTE unmap has been completed. Signed-off-by: Russell King Signed-off-by: Thierry Reding --- drivers/iommu/tegra-smmu.c | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 083354903a1a..a7a7645fb268 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -509,29 +509,35 @@ static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova, return &pt[pte]; } -static void as_put_pte(struct tegra_smmu_as *as, dma_addr_t iova) +static void tegra_smmu_pte_put_use(struct tegra_smmu_as *as, unsigned long iova) { + struct tegra_smmu *smmu = as->smmu; u32 pde = (iova >> SMMU_PDE_SHIFT) & 0x3ff; - u32 pte = (iova >> SMMU_PTE_SHIFT) & 0x3ff; u32 *count = page_address(as->count); - u32 *pd = page_address(as->pd), *pt; + u32 *pd = page_address(as->pd); struct page *page; - page = pfn_to_page(pd[pde] & as->smmu->pfn_mask); - pt = page_address(page); + page = pfn_to_page(pd[pde] & smmu->pfn_mask); /* * When no entries in this page table are used anymore, return the * memory page to the system. */ - if (pt[pte] != 0) { - if (--count[pde] == 0) { - ClearPageReserved(page); - __free_page(page); - pd[pde] = 0; - } + if (--count[pde] == 0) { + unsigned int offset = pde * sizeof(*pd); - pt[pte] = 0; + /* Clear the page directory entry first */ + pd[pde] = 0; + + /* Flush the page directory entry */ + smmu->soc->ops->flush_dcache(as->pd, offset, sizeof(*pd)); + smmu_flush_ptc(smmu, as->pd, offset); + smmu_flush_tlb_section(smmu, as->id, iova); + smmu_flush(smmu); + + /* Finally, free the page */ + ClearPageReserved(page); + __free_page(page); } } @@ -569,17 +575,20 @@ static size_t tegra_smmu_unmap(struct iommu_domain *domain, unsigned long iova, u32 *pte; pte = as_get_pte(as, iova, &page); - if (!pte) + if (!pte || !*pte) return 0; + *pte = 0; + offset = offset_in_page(pte); - as_put_pte(as, iova); smmu->soc->ops->flush_dcache(page, offset, 4); smmu_flush_ptc(smmu, page, offset); smmu_flush_tlb_group(smmu, as->id, iova); smmu_flush(smmu); + tegra_smmu_pte_put_use(as, iova); + return size; } From 8482ee5ea1097445f6498ee522965f5311667763 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 27 Jul 2015 13:29:10 +0100 Subject: [PATCH 03/16] iommu/tegra-smmu: Factor out common PTE setting Factor out the common PTE setting code into a separate function. Signed-off-by: Russell King Signed-off-by: Thierry Reding --- drivers/iommu/tegra-smmu.c | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index a7a7645fb268..53d0f15dac6f 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -541,12 +541,24 @@ static void tegra_smmu_pte_put_use(struct tegra_smmu_as *as, unsigned long iova) } } +static void tegra_smmu_set_pte(struct tegra_smmu_as *as, unsigned long iova, + u32 *pte, struct page *pte_page, u32 val) +{ + struct tegra_smmu *smmu = as->smmu; + unsigned long offset = offset_in_page(pte); + + *pte = val; + + smmu->soc->ops->flush_dcache(pte_page, offset, 4); + smmu_flush_ptc(smmu, pte_page, offset); + smmu_flush_tlb_group(smmu, as->id, iova); + smmu_flush(smmu); +} + static int tegra_smmu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot) { struct tegra_smmu_as *as = to_smmu_as(domain); - struct tegra_smmu *smmu = as->smmu; - unsigned long offset; struct page *page; u32 *pte; @@ -554,13 +566,8 @@ static int tegra_smmu_map(struct iommu_domain *domain, unsigned long iova, if (!pte) return -ENOMEM; - *pte = __phys_to_pfn(paddr) | SMMU_PTE_ATTR; - offset = offset_in_page(pte); - - smmu->soc->ops->flush_dcache(page, offset, 4); - smmu_flush_ptc(smmu, page, offset); - smmu_flush_tlb_group(smmu, as->id, iova); - smmu_flush(smmu); + tegra_smmu_set_pte(as, iova, pte, page, + __phys_to_pfn(paddr) | SMMU_PTE_ATTR); return 0; } @@ -569,8 +576,6 @@ static size_t tegra_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) { struct tegra_smmu_as *as = to_smmu_as(domain); - struct tegra_smmu *smmu = as->smmu; - unsigned long offset; struct page *page; u32 *pte; @@ -578,15 +583,7 @@ static size_t tegra_smmu_unmap(struct iommu_domain *domain, unsigned long iova, if (!pte || !*pte) return 0; - *pte = 0; - - offset = offset_in_page(pte); - - smmu->soc->ops->flush_dcache(page, offset, 4); - smmu_flush_ptc(smmu, page, offset); - smmu_flush_tlb_group(smmu, as->id, iova); - smmu_flush(smmu); - + tegra_smmu_set_pte(as, iova, pte, page, 0); tegra_smmu_pte_put_use(as, iova); return size; From 34d35f8cbe51bf93faf3214ee5b5d6f8ae7df4c1 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 27 Jul 2015 13:29:16 +0100 Subject: [PATCH 04/16] iommu/tegra-smmu: Add iova_pd_index() and iova_pt_index() helpers Add a pair of helpers to get the page directory and page table indexes. Signed-off-by: Russell King Signed-off-by: Thierry Reding --- drivers/iommu/tegra-smmu.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 53d0f15dac6f..4c4bc7966046 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -134,6 +134,16 @@ static inline u32 smmu_readl(struct tegra_smmu *smmu, unsigned long offset) #define SMMU_PTE_ATTR (SMMU_PTE_READABLE | SMMU_PTE_WRITABLE | \ SMMU_PTE_NONSECURE) +static unsigned int iova_pd_index(unsigned long iova) +{ + return (iova >> SMMU_PDE_SHIFT) & (SMMU_NUM_PDE - 1); +} + +static unsigned int iova_pt_index(unsigned long iova) +{ + return (iova >> SMMU_PTE_SHIFT) & (SMMU_NUM_PTE - 1); +} + static inline void smmu_flush_ptc(struct tegra_smmu *smmu, struct page *page, unsigned long offset) { @@ -469,8 +479,8 @@ static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova, struct page **pagep) { u32 *pd = page_address(as->pd), *pt, *count; - u32 pde = (iova >> SMMU_PDE_SHIFT) & 0x3ff; - u32 pte = (iova >> SMMU_PTE_SHIFT) & 0x3ff; + unsigned int pde = iova_pd_index(iova); + unsigned int pte = iova_pt_index(iova); struct tegra_smmu *smmu = as->smmu; struct page *page; unsigned int i; @@ -512,7 +522,7 @@ static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova, static void tegra_smmu_pte_put_use(struct tegra_smmu_as *as, unsigned long iova) { struct tegra_smmu *smmu = as->smmu; - u32 pde = (iova >> SMMU_PDE_SHIFT) & 0x3ff; + unsigned int pde = iova_pd_index(iova); u32 *count = page_address(as->count); u32 *pd = page_address(as->pd); struct page *page; From 0b42c7c1132f331fba263f0d2ca23544770584b7 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 27 Jul 2015 13:29:21 +0100 Subject: [PATCH 05/16] iommu/tegra-smmu: Fix page table lookup in unmap/iova_to_phys methods Fix the page table lookup in the unmap and iova_to_phys methods. Neither of these methods should allocate a page table; a missing page table should be treated the same as no mapping present. More importantly, using as_get_pte() for an IOVA corresponding with a non-present page table entry increments the use-count for the page table, on the assumption that the caller of as_get_pte() is going to setup a mapping. This is an incorrect assumption. Fix both of these bugs by providing a separate helper which only looks up the page table, but never allocates it. This is akin to pte_offset() for CPU page tables. Signed-off-by: Russell King Signed-off-by: Thierry Reding --- drivers/iommu/tegra-smmu.c | 43 ++++++++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 4c4bc7966046..bbff5b647183 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -475,12 +475,36 @@ static void tegra_smmu_detach_dev(struct iommu_domain *domain, struct device *de } } +static u32 *tegra_smmu_pte_offset(struct page *pt_page, unsigned long iova) +{ + u32 *pt = page_address(pt_page); + + return pt + iova_pt_index(iova); +} + +static u32 *tegra_smmu_pte_lookup(struct tegra_smmu_as *as, unsigned long iova, + struct page **pagep) +{ + unsigned int pd_index = iova_pd_index(iova); + struct page *pt_page; + u32 *pd; + + pd = page_address(as->pd); + + if (!pd[pd_index]) + return NULL; + + pt_page = pfn_to_page(pd[pd_index] & as->smmu->pfn_mask); + *pagep = pt_page; + + return tegra_smmu_pte_offset(pt_page, iova); +} + static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova, struct page **pagep) { u32 *pd = page_address(as->pd), *pt, *count; unsigned int pde = iova_pd_index(iova); - unsigned int pte = iova_pt_index(iova); struct tegra_smmu *smmu = as->smmu; struct page *page; unsigned int i; @@ -506,17 +530,18 @@ static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova, smmu_flush(smmu); } else { page = pfn_to_page(pd[pde] & smmu->pfn_mask); - pt = page_address(page); } *pagep = page; + pt = page_address(page); + /* Keep track of entries in this page table. */ count = page_address(as->count); - if (pt[pte] == 0) + if (pt[iova_pt_index(iova)] == 0) count[pde]++; - return &pt[pte]; + return tegra_smmu_pte_offset(page, iova); } static void tegra_smmu_pte_put_use(struct tegra_smmu_as *as, unsigned long iova) @@ -586,14 +611,14 @@ static size_t tegra_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) { struct tegra_smmu_as *as = to_smmu_as(domain); - struct page *page; + struct page *pte_page; u32 *pte; - pte = as_get_pte(as, iova, &page); + pte = tegra_smmu_pte_lookup(as, iova, &pte_page); if (!pte || !*pte) return 0; - tegra_smmu_set_pte(as, iova, pte, page, 0); + tegra_smmu_set_pte(as, iova, pte, pte_page, 0); tegra_smmu_pte_put_use(as, iova); return size; @@ -603,11 +628,11 @@ static phys_addr_t tegra_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { struct tegra_smmu_as *as = to_smmu_as(domain); - struct page *page; + struct page *pte_page; unsigned long pfn; u32 *pte; - pte = as_get_pte(as, iova, &page); + pte = tegra_smmu_pte_lookup(as, iova, &pte_page); if (!pte || !*pte) return 0; From 853520fa96511e4a49942d2cba34a329528c7e41 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 27 Jul 2015 13:29:26 +0100 Subject: [PATCH 06/16] iommu/tegra-smmu: Store struct page pointer for page tables Store the struct page pointer for the second level page tables, rather than working back from the page directory entry. This is necessary as we want to eliminate the use of physical addresses used with arch-private functions, switching instead to use the streaming DMA API. Signed-off-by: Russell King Signed-off-by: Thierry Reding --- drivers/iommu/tegra-smmu.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index bbff5b647183..8ec5ac45caab 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -41,6 +41,7 @@ struct tegra_smmu_as { struct tegra_smmu *smmu; unsigned int use_count; struct page *count; + struct page **pts; struct page *pd; unsigned id; u32 attr; @@ -271,6 +272,14 @@ static struct iommu_domain *tegra_smmu_domain_alloc(unsigned type) return NULL; } + as->pts = kcalloc(SMMU_NUM_PDE, sizeof(*as->pts), GFP_KERNEL); + if (!as->pts) { + __free_page(as->count); + __free_page(as->pd); + kfree(as); + return NULL; + } + /* clear PDEs */ pd = page_address(as->pd); SetPageReserved(as->pd); @@ -487,14 +496,11 @@ static u32 *tegra_smmu_pte_lookup(struct tegra_smmu_as *as, unsigned long iova, { unsigned int pd_index = iova_pd_index(iova); struct page *pt_page; - u32 *pd; - pd = page_address(as->pd); - - if (!pd[pd_index]) + pt_page = as->pts[pd_index]; + if (!pt_page) return NULL; - pt_page = pfn_to_page(pd[pd_index] & as->smmu->pfn_mask); *pagep = pt_page; return tegra_smmu_pte_offset(pt_page, iova); @@ -509,7 +515,7 @@ static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova, struct page *page; unsigned int i; - if (pd[pde] == 0) { + if (!as->pts[pde]) { page = alloc_page(GFP_KERNEL | __GFP_DMA); if (!page) return NULL; @@ -520,6 +526,8 @@ static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova, for (i = 0; i < SMMU_NUM_PTE; i++) pt[i] = 0; + as->pts[pde] = page; + smmu->soc->ops->flush_dcache(page, 0, SMMU_SIZE_PT); pd[pde] = SMMU_MK_PDE(page, SMMU_PDE_ATTR | SMMU_PDE_NEXT); @@ -529,7 +537,7 @@ static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova, smmu_flush_tlb_section(smmu, as->id, iova); smmu_flush(smmu); } else { - page = pfn_to_page(pd[pde] & smmu->pfn_mask); + page = as->pts[pde]; } *pagep = page; @@ -550,9 +558,7 @@ static void tegra_smmu_pte_put_use(struct tegra_smmu_as *as, unsigned long iova) unsigned int pde = iova_pd_index(iova); u32 *count = page_address(as->count); u32 *pd = page_address(as->pd); - struct page *page; - - page = pfn_to_page(pd[pde] & smmu->pfn_mask); + struct page *page = as->pts[pde]; /* * When no entries in this page table are used anymore, return the @@ -573,6 +579,7 @@ static void tegra_smmu_pte_put_use(struct tegra_smmu_as *as, unsigned long iova) /* Finally, free the page */ ClearPageReserved(page); __free_page(page); + as->pts[pde] = NULL; } } From 32924c76b0cbc67aa4cf0741f7bc6c37f097aaf3 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 27 Jul 2015 13:29:31 +0100 Subject: [PATCH 07/16] iommu/tegra-smmu: Use kcalloc() to allocate counter array Use kcalloc() to allocate the use-counter array for the page directory entries/page tables. Using kcalloc() allows us to be provided with zero-initialised memory from the allocators, rather than initialising it ourselves. Signed-off-by: Russell King Signed-off-by: Thierry Reding --- drivers/iommu/tegra-smmu.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 8ec5ac45caab..d649b06cc4ca 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -40,7 +40,7 @@ struct tegra_smmu_as { struct iommu_domain domain; struct tegra_smmu *smmu; unsigned int use_count; - struct page *count; + u32 *count; struct page **pts; struct page *pd; unsigned id; @@ -265,7 +265,7 @@ static struct iommu_domain *tegra_smmu_domain_alloc(unsigned type) return NULL; } - as->count = alloc_page(GFP_KERNEL); + as->count = kcalloc(SMMU_NUM_PDE, sizeof(u32), GFP_KERNEL); if (!as->count) { __free_page(as->pd); kfree(as); @@ -274,7 +274,7 @@ static struct iommu_domain *tegra_smmu_domain_alloc(unsigned type) as->pts = kcalloc(SMMU_NUM_PDE, sizeof(*as->pts), GFP_KERNEL); if (!as->pts) { - __free_page(as->count); + kfree(as->count); __free_page(as->pd); kfree(as); return NULL; @@ -284,13 +284,6 @@ static struct iommu_domain *tegra_smmu_domain_alloc(unsigned type) pd = page_address(as->pd); SetPageReserved(as->pd); - for (i = 0; i < SMMU_NUM_PDE; i++) - pd[i] = 0; - - /* clear PDE usage counters */ - pd = page_address(as->count); - SetPageReserved(as->count); - for (i = 0; i < SMMU_NUM_PDE; i++) pd[i] = 0; @@ -509,7 +502,7 @@ static u32 *tegra_smmu_pte_lookup(struct tegra_smmu_as *as, unsigned long iova, static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova, struct page **pagep) { - u32 *pd = page_address(as->pd), *pt, *count; + u32 *pd = page_address(as->pd), *pt; unsigned int pde = iova_pd_index(iova); struct tegra_smmu *smmu = as->smmu; struct page *page; @@ -545,9 +538,8 @@ static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova, pt = page_address(page); /* Keep track of entries in this page table. */ - count = page_address(as->count); if (pt[iova_pt_index(iova)] == 0) - count[pde]++; + as->count[pde]++; return tegra_smmu_pte_offset(page, iova); } @@ -556,7 +548,6 @@ static void tegra_smmu_pte_put_use(struct tegra_smmu_as *as, unsigned long iova) { struct tegra_smmu *smmu = as->smmu; unsigned int pde = iova_pd_index(iova); - u32 *count = page_address(as->count); u32 *pd = page_address(as->pd); struct page *page = as->pts[pde]; @@ -564,7 +555,7 @@ static void tegra_smmu_pte_put_use(struct tegra_smmu_as *as, unsigned long iova) * When no entries in this page table are used anymore, return the * memory page to the system. */ - if (--count[pde] == 0) { + if (--as->count[pde] == 0) { unsigned int offset = pde * sizeof(*pd); /* Clear the page directory entry first */ From 4b3c7d10765403ab19628fb7d530b8ce1c50b81d Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 27 Jul 2015 13:29:36 +0100 Subject: [PATCH 08/16] iommu/tegra-smmu: Move flush_dcache to tegra-smmu.c Drivers should not be using __cpuc_* functions nor outer_cache_flush() directly. This change partly cleans up tegra-smmu.c. The only difference between cache handling of the tegra variants is Denver, which omits the call to outer_cache_flush(). This is due to Denver being an ARM64 CPU, and the ARM64 architecture does not provide this function. (This, in itself, is a good reason why these should not be used.) Signed-off-by: Russell King [treding@nvidia.com: fix build failure on 64-bit ARM] Signed-off-by: Thierry Reding --- drivers/iommu/tegra-smmu.c | 30 +++++++++++++++++++++++++----- drivers/memory/tegra/tegra114.c | 17 ----------------- drivers/memory/tegra/tegra124.c | 30 ------------------------------ drivers/memory/tegra/tegra30.c | 17 ----------------- include/soc/tegra/mc.h | 7 ------- 5 files changed, 25 insertions(+), 76 deletions(-) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index d649b06cc4ca..42b13c07aeef 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -16,6 +16,8 @@ #include #include +#include + #include #include @@ -145,6 +147,24 @@ static unsigned int iova_pt_index(unsigned long iova) return (iova >> SMMU_PTE_SHIFT) & (SMMU_NUM_PTE - 1); } +static void smmu_flush_dcache(struct page *page, unsigned long offset, + size_t size) +{ +#ifdef CONFIG_ARM + phys_addr_t phys = page_to_phys(page) + offset; +#endif + void *virt = page_address(page) + offset; + +#ifdef CONFIG_ARM + __cpuc_flush_dcache_area(virt, size); + outer_flush_range(phys, phys + size); +#endif + +#ifdef CONFIG_ARM64 + __flush_dcache_area(virt, size); +#endif +} + static inline void smmu_flush_ptc(struct tegra_smmu *smmu, struct page *page, unsigned long offset) { @@ -392,7 +412,7 @@ static int tegra_smmu_as_prepare(struct tegra_smmu *smmu, if (err < 0) return err; - smmu->soc->ops->flush_dcache(as->pd, 0, SMMU_SIZE_PD); + smmu_flush_dcache(as->pd, 0, SMMU_SIZE_PD); smmu_flush_ptc(smmu, as->pd, 0); smmu_flush_tlb_asid(smmu, as->id); @@ -521,11 +541,11 @@ static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova, as->pts[pde] = page; - smmu->soc->ops->flush_dcache(page, 0, SMMU_SIZE_PT); + smmu_flush_dcache(page, 0, SMMU_SIZE_PT); pd[pde] = SMMU_MK_PDE(page, SMMU_PDE_ATTR | SMMU_PDE_NEXT); - smmu->soc->ops->flush_dcache(as->pd, pde << 2, 4); + smmu_flush_dcache(as->pd, pde << 2, 4); smmu_flush_ptc(smmu, as->pd, pde << 2); smmu_flush_tlb_section(smmu, as->id, iova); smmu_flush(smmu); @@ -562,7 +582,7 @@ static void tegra_smmu_pte_put_use(struct tegra_smmu_as *as, unsigned long iova) pd[pde] = 0; /* Flush the page directory entry */ - smmu->soc->ops->flush_dcache(as->pd, offset, sizeof(*pd)); + smmu_flush_dcache(as->pd, offset, sizeof(*pd)); smmu_flush_ptc(smmu, as->pd, offset); smmu_flush_tlb_section(smmu, as->id, iova); smmu_flush(smmu); @@ -582,7 +602,7 @@ static void tegra_smmu_set_pte(struct tegra_smmu_as *as, unsigned long iova, *pte = val; - smmu->soc->ops->flush_dcache(pte_page, offset, 4); + smmu_flush_dcache(pte_page, offset, 4); smmu_flush_ptc(smmu, pte_page, offset); smmu_flush_tlb_group(smmu, as->id, iova); smmu_flush(smmu); diff --git a/drivers/memory/tegra/tegra114.c b/drivers/memory/tegra/tegra114.c index 9f579589e800..7122f39be9cc 100644 --- a/drivers/memory/tegra/tegra114.c +++ b/drivers/memory/tegra/tegra114.c @@ -9,8 +9,6 @@ #include #include -#include - #include #include "mc.h" @@ -914,20 +912,6 @@ static const struct tegra_smmu_swgroup tegra114_swgroups[] = { { .name = "tsec", .swgroup = TEGRA_SWGROUP_TSEC, .reg = 0x294 }, }; -static void tegra114_flush_dcache(struct page *page, unsigned long offset, - size_t size) -{ - phys_addr_t phys = page_to_phys(page) + offset; - void *virt = page_address(page) + offset; - - __cpuc_flush_dcache_area(virt, size); - outer_flush_range(phys, phys + size); -} - -static const struct tegra_smmu_ops tegra114_smmu_ops = { - .flush_dcache = tegra114_flush_dcache, -}; - static const struct tegra_smmu_soc tegra114_smmu_soc = { .clients = tegra114_mc_clients, .num_clients = ARRAY_SIZE(tegra114_mc_clients), @@ -936,7 +920,6 @@ static const struct tegra_smmu_soc tegra114_smmu_soc = { .supports_round_robin_arbitration = false, .supports_request_limit = false, .num_asids = 4, - .ops = &tegra114_smmu_ops, }; const struct tegra_mc_soc tegra114_mc_soc = { diff --git a/drivers/memory/tegra/tegra124.c b/drivers/memory/tegra/tegra124.c index 966e1557e6f4..ebda63283853 100644 --- a/drivers/memory/tegra/tegra124.c +++ b/drivers/memory/tegra/tegra124.c @@ -9,8 +9,6 @@ #include #include -#include - #include #include "mc.h" @@ -1002,20 +1000,6 @@ static const struct tegra_smmu_swgroup tegra124_swgroups[] = { }; #ifdef CONFIG_ARCH_TEGRA_124_SOC -static void tegra124_flush_dcache(struct page *page, unsigned long offset, - size_t size) -{ - phys_addr_t phys = page_to_phys(page) + offset; - void *virt = page_address(page) + offset; - - __cpuc_flush_dcache_area(virt, size); - outer_flush_range(phys, phys + size); -} - -static const struct tegra_smmu_ops tegra124_smmu_ops = { - .flush_dcache = tegra124_flush_dcache, -}; - static const struct tegra_smmu_soc tegra124_smmu_soc = { .clients = tegra124_mc_clients, .num_clients = ARRAY_SIZE(tegra124_mc_clients), @@ -1024,7 +1008,6 @@ static const struct tegra_smmu_soc tegra124_smmu_soc = { .supports_round_robin_arbitration = true, .supports_request_limit = true, .num_asids = 128, - .ops = &tegra124_smmu_ops, }; const struct tegra_mc_soc tegra124_mc_soc = { @@ -1039,18 +1022,6 @@ const struct tegra_mc_soc tegra124_mc_soc = { #endif /* CONFIG_ARCH_TEGRA_124_SOC */ #ifdef CONFIG_ARCH_TEGRA_132_SOC -static void tegra132_flush_dcache(struct page *page, unsigned long offset, - size_t size) -{ - void *virt = page_address(page) + offset; - - __flush_dcache_area(virt, size); -} - -static const struct tegra_smmu_ops tegra132_smmu_ops = { - .flush_dcache = tegra132_flush_dcache, -}; - static const struct tegra_smmu_soc tegra132_smmu_soc = { .clients = tegra124_mc_clients, .num_clients = ARRAY_SIZE(tegra124_mc_clients), @@ -1059,7 +1030,6 @@ static const struct tegra_smmu_soc tegra132_smmu_soc = { .supports_round_robin_arbitration = true, .supports_request_limit = true, .num_asids = 128, - .ops = &tegra132_smmu_ops, }; const struct tegra_mc_soc tegra132_mc_soc = { diff --git a/drivers/memory/tegra/tegra30.c b/drivers/memory/tegra/tegra30.c index 1abcd8f6f3ba..3cb30b69d95b 100644 --- a/drivers/memory/tegra/tegra30.c +++ b/drivers/memory/tegra/tegra30.c @@ -9,8 +9,6 @@ #include #include -#include - #include #include "mc.h" @@ -936,20 +934,6 @@ static const struct tegra_smmu_swgroup tegra30_swgroups[] = { { .name = "isp", .swgroup = TEGRA_SWGROUP_ISP, .reg = 0x258 }, }; -static void tegra30_flush_dcache(struct page *page, unsigned long offset, - size_t size) -{ - phys_addr_t phys = page_to_phys(page) + offset; - void *virt = page_address(page) + offset; - - __cpuc_flush_dcache_area(virt, size); - outer_flush_range(phys, phys + size); -} - -static const struct tegra_smmu_ops tegra30_smmu_ops = { - .flush_dcache = tegra30_flush_dcache, -}; - static const struct tegra_smmu_soc tegra30_smmu_soc = { .clients = tegra30_mc_clients, .num_clients = ARRAY_SIZE(tegra30_mc_clients), @@ -958,7 +942,6 @@ static const struct tegra_smmu_soc tegra30_smmu_soc = { .supports_round_robin_arbitration = false, .supports_request_limit = false, .num_asids = 4, - .ops = &tegra30_smmu_ops, }; const struct tegra_mc_soc tegra30_mc_soc = { diff --git a/include/soc/tegra/mc.h b/include/soc/tegra/mc.h index 1ab2813273cd..d6c3190ec852 100644 --- a/include/soc/tegra/mc.h +++ b/include/soc/tegra/mc.h @@ -51,11 +51,6 @@ struct tegra_smmu_swgroup { unsigned int reg; }; -struct tegra_smmu_ops { - void (*flush_dcache)(struct page *page, unsigned long offset, - size_t size); -}; - struct tegra_smmu_soc { const struct tegra_mc_client *clients; unsigned int num_clients; @@ -67,8 +62,6 @@ struct tegra_smmu_soc { bool supports_request_limit; unsigned int num_asids; - - const struct tegra_smmu_ops *ops; }; struct tegra_mc; From b8fe03827b192a23d04e99c40d72e6b938fa6576 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 27 Jul 2015 13:29:41 +0100 Subject: [PATCH 09/16] iommu/tegra-smmu: Split smmu_flush_ptc() smmu_flush_ptc() is used in two modes: one is to flush an individual entry, the other is to flush all entries. We know at the call site which we require. Split the function into smmu_flush_ptc_all() and smmu_flush_ptc(). Signed-off-by: Russell King Signed-off-by: Thierry Reding --- drivers/iommu/tegra-smmu.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 42b13c07aeef..5c775b70ef8c 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -165,29 +165,29 @@ static void smmu_flush_dcache(struct page *page, unsigned long offset, #endif } +static void smmu_flush_ptc_all(struct tegra_smmu *smmu) +{ + smmu_writel(smmu, SMMU_PTC_FLUSH_TYPE_ALL, SMMU_PTC_FLUSH); +} + static inline void smmu_flush_ptc(struct tegra_smmu *smmu, struct page *page, unsigned long offset) { - phys_addr_t phys = page ? page_to_phys(page) : 0; + phys_addr_t phys = page_to_phys(page); u32 value; - if (page) { - offset &= ~(smmu->mc->soc->atom_size - 1); + offset &= ~(smmu->mc->soc->atom_size - 1); - if (smmu->mc->soc->num_address_bits > 32) { + if (smmu->mc->soc->num_address_bits > 32) { #ifdef CONFIG_PHYS_ADDR_T_64BIT - value = (phys >> 32) & SMMU_PTC_FLUSH_HI_MASK; + value = (phys >> 32) & SMMU_PTC_FLUSH_HI_MASK; #else - value = 0; + value = 0; #endif - smmu_writel(smmu, value, SMMU_PTC_FLUSH_HI); - } - - value = (phys + offset) | SMMU_PTC_FLUSH_TYPE_ADR; - } else { - value = SMMU_PTC_FLUSH_TYPE_ALL; + smmu_writel(smmu, value, SMMU_PTC_FLUSH_HI); } + value = (phys + offset) | SMMU_PTC_FLUSH_TYPE_ADR; smmu_writel(smmu, value, SMMU_PTC_FLUSH); } @@ -894,7 +894,7 @@ struct tegra_smmu *tegra_smmu_probe(struct device *dev, smmu_writel(smmu, value, SMMU_TLB_CONFIG); - smmu_flush_ptc(smmu, NULL, 0); + smmu_flush_ptc_all(smmu); smmu_flush_tlb(smmu); smmu_writel(smmu, SMMU_CONFIG_ENABLE, SMMU_CONFIG); smmu_flush(smmu); From d62c7a886c2bc9f9258164814245dc0678b9a52e Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 27 Jul 2015 13:29:46 +0100 Subject: [PATCH 10/16] iommu/tegra-smmu: smmu_flush_ptc() wants device addresses Pass smmu_flush_ptc() the device address rather than struct page pointer. Signed-off-by: Russell King Signed-off-by: Thierry Reding --- drivers/iommu/tegra-smmu.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 5c775b70ef8c..f420d8718535 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -170,10 +170,9 @@ static void smmu_flush_ptc_all(struct tegra_smmu *smmu) smmu_writel(smmu, SMMU_PTC_FLUSH_TYPE_ALL, SMMU_PTC_FLUSH); } -static inline void smmu_flush_ptc(struct tegra_smmu *smmu, struct page *page, +static inline void smmu_flush_ptc(struct tegra_smmu *smmu, phys_addr_t phys, unsigned long offset) { - phys_addr_t phys = page_to_phys(page); u32 value; offset &= ~(smmu->mc->soc->atom_size - 1); @@ -413,7 +412,7 @@ static int tegra_smmu_as_prepare(struct tegra_smmu *smmu, return err; smmu_flush_dcache(as->pd, 0, SMMU_SIZE_PD); - smmu_flush_ptc(smmu, as->pd, 0); + smmu_flush_ptc(smmu, page_to_phys(as->pd), 0); smmu_flush_tlb_asid(smmu, as->id); smmu_writel(smmu, as->id & 0x7f, SMMU_PTB_ASID); @@ -546,7 +545,7 @@ static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova, pd[pde] = SMMU_MK_PDE(page, SMMU_PDE_ATTR | SMMU_PDE_NEXT); smmu_flush_dcache(as->pd, pde << 2, 4); - smmu_flush_ptc(smmu, as->pd, pde << 2); + smmu_flush_ptc(smmu, page_to_phys(as->pd), pde << 2); smmu_flush_tlb_section(smmu, as->id, iova); smmu_flush(smmu); } else { @@ -583,7 +582,7 @@ static void tegra_smmu_pte_put_use(struct tegra_smmu_as *as, unsigned long iova) /* Flush the page directory entry */ smmu_flush_dcache(as->pd, offset, sizeof(*pd)); - smmu_flush_ptc(smmu, as->pd, offset); + smmu_flush_ptc(smmu, page_to_phys(as->pd), offset); smmu_flush_tlb_section(smmu, as->id, iova); smmu_flush(smmu); @@ -603,7 +602,7 @@ static void tegra_smmu_set_pte(struct tegra_smmu_as *as, unsigned long iova, *pte = val; smmu_flush_dcache(pte_page, offset, 4); - smmu_flush_ptc(smmu, pte_page, offset); + smmu_flush_ptc(smmu, page_to_phys(pte_page), offset); smmu_flush_tlb_group(smmu, as->id, iova); smmu_flush(smmu); } From e3c971960fd41fc55235ba05b95e053355cb0e73 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 27 Jul 2015 13:29:52 +0100 Subject: [PATCH 11/16] iommu/tegra-smmu: Convert to use DMA API Use the DMA API instead of calling architecture internal functions in the Tegra SMMU driver. Signed-off-by: Russell King Signed-off-by: Thierry Reding --- drivers/iommu/tegra-smmu.c | 139 +++++++++++++++++++++++-------------- 1 file changed, 85 insertions(+), 54 deletions(-) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index f420d8718535..43b69c8cbe46 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -16,8 +16,6 @@ #include #include -#include - #include #include @@ -45,6 +43,7 @@ struct tegra_smmu_as { u32 *count; struct page **pts; struct page *pd; + dma_addr_t pd_dma; unsigned id; u32 attr; }; @@ -82,9 +81,9 @@ static inline u32 smmu_readl(struct tegra_smmu *smmu, unsigned long offset) #define SMMU_PTB_ASID_VALUE(x) ((x) & 0x7f) #define SMMU_PTB_DATA 0x020 -#define SMMU_PTB_DATA_VALUE(page, attr) (page_to_phys(page) >> 12 | (attr)) +#define SMMU_PTB_DATA_VALUE(dma, attr) ((dma) >> 12 | (attr)) -#define SMMU_MK_PDE(page, attr) (page_to_phys(page) >> SMMU_PTE_SHIFT | (attr)) +#define SMMU_MK_PDE(dma, attr) ((dma) >> SMMU_PTE_SHIFT | (attr)) #define SMMU_TLB_FLUSH 0x030 #define SMMU_TLB_FLUSH_VA_MATCH_ALL (0 << 0) @@ -147,22 +146,15 @@ static unsigned int iova_pt_index(unsigned long iova) return (iova >> SMMU_PTE_SHIFT) & (SMMU_NUM_PTE - 1); } -static void smmu_flush_dcache(struct page *page, unsigned long offset, - size_t size) +static bool smmu_dma_addr_valid(struct tegra_smmu *smmu, dma_addr_t addr) { -#ifdef CONFIG_ARM - phys_addr_t phys = page_to_phys(page) + offset; -#endif - void *virt = page_address(page) + offset; + addr >>= 12; + return (addr & smmu->pfn_mask) == addr; +} -#ifdef CONFIG_ARM - __cpuc_flush_dcache_area(virt, size); - outer_flush_range(phys, phys + size); -#endif - -#ifdef CONFIG_ARM64 - __flush_dcache_area(virt, size); -#endif +static dma_addr_t smmu_pde_to_dma(u32 pde) +{ + return pde << 12; } static void smmu_flush_ptc_all(struct tegra_smmu *smmu) @@ -170,7 +162,7 @@ static void smmu_flush_ptc_all(struct tegra_smmu *smmu) smmu_writel(smmu, SMMU_PTC_FLUSH_TYPE_ALL, SMMU_PTC_FLUSH); } -static inline void smmu_flush_ptc(struct tegra_smmu *smmu, phys_addr_t phys, +static inline void smmu_flush_ptc(struct tegra_smmu *smmu, dma_addr_t dma, unsigned long offset) { u32 value; @@ -178,15 +170,15 @@ static inline void smmu_flush_ptc(struct tegra_smmu *smmu, phys_addr_t phys, offset &= ~(smmu->mc->soc->atom_size - 1); if (smmu->mc->soc->num_address_bits > 32) { -#ifdef CONFIG_PHYS_ADDR_T_64BIT - value = (phys >> 32) & SMMU_PTC_FLUSH_HI_MASK; +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + value = (dma >> 32) & SMMU_PTC_FLUSH_HI_MASK; #else value = 0; #endif smmu_writel(smmu, value, SMMU_PTC_FLUSH_HI); } - value = (phys + offset) | SMMU_PTC_FLUSH_TYPE_ADR; + value = (dma + offset) | SMMU_PTC_FLUSH_TYPE_ADR; smmu_writel(smmu, value, SMMU_PTC_FLUSH); } @@ -407,16 +399,26 @@ static int tegra_smmu_as_prepare(struct tegra_smmu *smmu, return 0; } + as->pd_dma = dma_map_page(smmu->dev, as->pd, 0, SMMU_SIZE_PD, + DMA_TO_DEVICE); + if (dma_mapping_error(smmu->dev, as->pd_dma)) + return -ENOMEM; + + /* We can't handle 64-bit DMA addresses */ + if (!smmu_dma_addr_valid(smmu, as->pd_dma)) { + err = -ENOMEM; + goto err_unmap; + } + err = tegra_smmu_alloc_asid(smmu, &as->id); if (err < 0) - return err; + goto err_unmap; - smmu_flush_dcache(as->pd, 0, SMMU_SIZE_PD); - smmu_flush_ptc(smmu, page_to_phys(as->pd), 0); + smmu_flush_ptc(smmu, as->pd_dma, 0); smmu_flush_tlb_asid(smmu, as->id); smmu_writel(smmu, as->id & 0x7f, SMMU_PTB_ASID); - value = SMMU_PTB_DATA_VALUE(as->pd, as->attr); + value = SMMU_PTB_DATA_VALUE(as->pd_dma, as->attr); smmu_writel(smmu, value, SMMU_PTB_DATA); smmu_flush(smmu); @@ -424,6 +426,10 @@ static int tegra_smmu_as_prepare(struct tegra_smmu *smmu, as->use_count++; return 0; + +err_unmap: + dma_unmap_page(smmu->dev, as->pd_dma, SMMU_SIZE_PD, DMA_TO_DEVICE); + return err; } static void tegra_smmu_as_unprepare(struct tegra_smmu *smmu, @@ -433,6 +439,9 @@ static void tegra_smmu_as_unprepare(struct tegra_smmu *smmu, return; tegra_smmu_free_asid(smmu, as->id); + + dma_unmap_page(smmu->dev, as->pd_dma, SMMU_SIZE_PD, DMA_TO_DEVICE); + as->smmu = NULL; } @@ -504,63 +513,81 @@ static u32 *tegra_smmu_pte_offset(struct page *pt_page, unsigned long iova) } static u32 *tegra_smmu_pte_lookup(struct tegra_smmu_as *as, unsigned long iova, - struct page **pagep) + dma_addr_t *dmap) { unsigned int pd_index = iova_pd_index(iova); struct page *pt_page; + u32 *pd; pt_page = as->pts[pd_index]; if (!pt_page) return NULL; - *pagep = pt_page; + pd = page_address(as->pd); + *dmap = smmu_pde_to_dma(pd[pd_index]); return tegra_smmu_pte_offset(pt_page, iova); } static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova, - struct page **pagep) + dma_addr_t *dmap) { u32 *pd = page_address(as->pd), *pt; unsigned int pde = iova_pd_index(iova); struct tegra_smmu *smmu = as->smmu; - struct page *page; unsigned int i; if (!as->pts[pde]) { + struct page *page; + dma_addr_t dma; + page = alloc_page(GFP_KERNEL | __GFP_DMA); if (!page) return NULL; pt = page_address(page); - SetPageReserved(page); for (i = 0; i < SMMU_NUM_PTE; i++) pt[i] = 0; + dma = dma_map_page(smmu->dev, page, 0, SMMU_SIZE_PT, + DMA_TO_DEVICE); + if (dma_mapping_error(smmu->dev, dma)) { + __free_page(page); + return NULL; + } + + if (!smmu_dma_addr_valid(smmu, dma)) { + dma_unmap_page(smmu->dev, dma, SMMU_SIZE_PT, + DMA_TO_DEVICE); + __free_page(page); + return NULL; + } + as->pts[pde] = page; - smmu_flush_dcache(page, 0, SMMU_SIZE_PT); + SetPageReserved(page); - pd[pde] = SMMU_MK_PDE(page, SMMU_PDE_ATTR | SMMU_PDE_NEXT); + pd[pde] = SMMU_MK_PDE(dma, SMMU_PDE_ATTR | SMMU_PDE_NEXT); - smmu_flush_dcache(as->pd, pde << 2, 4); - smmu_flush_ptc(smmu, page_to_phys(as->pd), pde << 2); + dma_sync_single_range_for_device(smmu->dev, as->pd_dma, + pde << 2, 4, DMA_TO_DEVICE); + smmu_flush_ptc(smmu, as->pd_dma, pde << 2); smmu_flush_tlb_section(smmu, as->id, iova); smmu_flush(smmu); + + *dmap = dma; } else { - page = as->pts[pde]; + *dmap = smmu_pde_to_dma(pd[pde]); } - *pagep = page; - - pt = page_address(page); + pt = tegra_smmu_pte_offset(as->pts[pde], iova); /* Keep track of entries in this page table. */ - if (pt[iova_pt_index(iova)] == 0) + if (*pt == 0) as->count[pde]++; - return tegra_smmu_pte_offset(page, iova); + return pt; } static void tegra_smmu_pte_put_use(struct tegra_smmu_as *as, unsigned long iova) @@ -576,17 +603,20 @@ static void tegra_smmu_pte_put_use(struct tegra_smmu_as *as, unsigned long iova) */ if (--as->count[pde] == 0) { unsigned int offset = pde * sizeof(*pd); + dma_addr_t pte_dma = smmu_pde_to_dma(pd[pde]); /* Clear the page directory entry first */ pd[pde] = 0; /* Flush the page directory entry */ - smmu_flush_dcache(as->pd, offset, sizeof(*pd)); - smmu_flush_ptc(smmu, page_to_phys(as->pd), offset); + dma_sync_single_range_for_device(smmu->dev, as->pd_dma, offset, + sizeof(*pd), DMA_TO_DEVICE); + smmu_flush_ptc(smmu, as->pd_dma, offset); smmu_flush_tlb_section(smmu, as->id, iova); smmu_flush(smmu); /* Finally, free the page */ + dma_unmap_page(smmu->dev, pte_dma, SMMU_SIZE_PT, DMA_TO_DEVICE); ClearPageReserved(page); __free_page(page); as->pts[pde] = NULL; @@ -594,15 +624,16 @@ static void tegra_smmu_pte_put_use(struct tegra_smmu_as *as, unsigned long iova) } static void tegra_smmu_set_pte(struct tegra_smmu_as *as, unsigned long iova, - u32 *pte, struct page *pte_page, u32 val) + u32 *pte, dma_addr_t pte_dma, u32 val) { struct tegra_smmu *smmu = as->smmu; unsigned long offset = offset_in_page(pte); *pte = val; - smmu_flush_dcache(pte_page, offset, 4); - smmu_flush_ptc(smmu, page_to_phys(pte_page), offset); + dma_sync_single_range_for_device(smmu->dev, pte_dma, offset, + 4, DMA_TO_DEVICE); + smmu_flush_ptc(smmu, pte_dma, offset); smmu_flush_tlb_group(smmu, as->id, iova); smmu_flush(smmu); } @@ -611,14 +642,14 @@ static int tegra_smmu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot) { struct tegra_smmu_as *as = to_smmu_as(domain); - struct page *page; + dma_addr_t pte_dma; u32 *pte; - pte = as_get_pte(as, iova, &page); + pte = as_get_pte(as, iova, &pte_dma); if (!pte) return -ENOMEM; - tegra_smmu_set_pte(as, iova, pte, page, + tegra_smmu_set_pte(as, iova, pte, pte_dma, __phys_to_pfn(paddr) | SMMU_PTE_ATTR); return 0; @@ -628,14 +659,14 @@ static size_t tegra_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) { struct tegra_smmu_as *as = to_smmu_as(domain); - struct page *pte_page; + dma_addr_t pte_dma; u32 *pte; - pte = tegra_smmu_pte_lookup(as, iova, &pte_page); + pte = tegra_smmu_pte_lookup(as, iova, &pte_dma); if (!pte || !*pte) return 0; - tegra_smmu_set_pte(as, iova, pte, pte_page, 0); + tegra_smmu_set_pte(as, iova, pte, pte_dma, 0); tegra_smmu_pte_put_use(as, iova); return size; @@ -645,11 +676,11 @@ static phys_addr_t tegra_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { struct tegra_smmu_as *as = to_smmu_as(domain); - struct page *pte_page; unsigned long pfn; + dma_addr_t pte_dma; u32 *pte; - pte = tegra_smmu_pte_lookup(as, iova, &pte_page); + pte = tegra_smmu_pte_lookup(as, iova, &pte_dma); if (!pte || !*pte) return 0; From 05a65f06f69fa6c487c2933f2971d9ec4e33eb0d Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 27 Jul 2015 13:29:57 +0100 Subject: [PATCH 12/16] iommu/tegra-smmu: Remove PageReserved manipulation Remove the unnecessary manipulation of the PageReserved flags in the Tegra SMMU driver. None of this is required as the page(s) remain private to the SMMU driver. Signed-off-by: Russell King Signed-off-by: Thierry Reding --- drivers/iommu/tegra-smmu.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 43b69c8cbe46..eb9f6068fe2e 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -293,7 +293,6 @@ static struct iommu_domain *tegra_smmu_domain_alloc(unsigned type) /* clear PDEs */ pd = page_address(as->pd); - SetPageReserved(as->pd); for (i = 0; i < SMMU_NUM_PDE; i++) pd[i] = 0; @@ -311,7 +310,6 @@ static void tegra_smmu_domain_free(struct iommu_domain *domain) struct tegra_smmu_as *as = to_smmu_as(domain); /* TODO: free page directory and page tables */ - ClearPageReserved(as->pd); kfree(as); } @@ -566,8 +564,6 @@ static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova, as->pts[pde] = page; - SetPageReserved(page); - pd[pde] = SMMU_MK_PDE(dma, SMMU_PDE_ATTR | SMMU_PDE_NEXT); dma_sync_single_range_for_device(smmu->dev, as->pd_dma, @@ -617,7 +613,6 @@ static void tegra_smmu_pte_put_use(struct tegra_smmu_as *as, unsigned long iova) /* Finally, free the page */ dma_unmap_page(smmu->dev, pte_dma, SMMU_SIZE_PT, DMA_TO_DEVICE); - ClearPageReserved(page); __free_page(page); as->pts[pde] = NULL; } From 707917cbc6ac0c0ea968b5eb635722ea84808286 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 27 Jul 2015 13:30:02 +0100 Subject: [PATCH 13/16] iommu/tegra-smmu: Use __GFP_ZERO to allocate zeroed pages Rather than explicitly zeroing pages allocated via alloc_page(), add __GFP_ZERO to the gfp mask to ask the allocator for zeroed pages. Signed-off-by: Russell King Signed-off-by: Thierry Reding --- drivers/iommu/tegra-smmu.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index eb9f6068fe2e..27d31f62a822 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -258,8 +258,6 @@ static bool tegra_smmu_capable(enum iommu_cap cap) static struct iommu_domain *tegra_smmu_domain_alloc(unsigned type) { struct tegra_smmu_as *as; - unsigned int i; - uint32_t *pd; if (type != IOMMU_DOMAIN_UNMANAGED) return NULL; @@ -270,7 +268,7 @@ static struct iommu_domain *tegra_smmu_domain_alloc(unsigned type) as->attr = SMMU_PD_READABLE | SMMU_PD_WRITABLE | SMMU_PD_NONSECURE; - as->pd = alloc_page(GFP_KERNEL | __GFP_DMA); + as->pd = alloc_page(GFP_KERNEL | __GFP_DMA | __GFP_ZERO); if (!as->pd) { kfree(as); return NULL; @@ -291,12 +289,6 @@ static struct iommu_domain *tegra_smmu_domain_alloc(unsigned type) return NULL; } - /* clear PDEs */ - pd = page_address(as->pd); - - for (i = 0; i < SMMU_NUM_PDE; i++) - pd[i] = 0; - /* setup aperture */ as->domain.geometry.aperture_start = 0; as->domain.geometry.aperture_end = 0xffffffff; @@ -533,21 +525,15 @@ static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova, u32 *pd = page_address(as->pd), *pt; unsigned int pde = iova_pd_index(iova); struct tegra_smmu *smmu = as->smmu; - unsigned int i; if (!as->pts[pde]) { struct page *page; dma_addr_t dma; - page = alloc_page(GFP_KERNEL | __GFP_DMA); + page = alloc_page(GFP_KERNEL | __GFP_DMA | __GFP_ZERO); if (!page) return NULL; - pt = page_address(page); - - for (i = 0; i < SMMU_NUM_PTE; i++) - pt[i] = 0; - dma = dma_map_page(smmu->dev, page, 0, SMMU_SIZE_PT, DMA_TO_DEVICE); if (dma_mapping_error(smmu->dev, dma)) { From 7ffc6f066eb73b07a0ef7c94d05107aef271ac21 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 6 Aug 2015 14:56:39 +0200 Subject: [PATCH 14/16] iommu/tegra-smmu: Extract tegra_smmu_pte_get_use() Extract the use count reference accounting into a separate function and separate it from allocating the PTE. Signed-off-by: Russell King [treding@nvidia.com: extract and write commit message] Signed-off-by: Thierry Reding --- drivers/iommu/tegra-smmu.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 27d31f62a822..74ad1f43265a 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -522,7 +522,7 @@ static u32 *tegra_smmu_pte_lookup(struct tegra_smmu_as *as, unsigned long iova, static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova, dma_addr_t *dmap) { - u32 *pd = page_address(as->pd), *pt; + u32 *pd = page_address(as->pd); unsigned int pde = iova_pd_index(iova); struct tegra_smmu *smmu = as->smmu; @@ -563,13 +563,14 @@ static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova, *dmap = smmu_pde_to_dma(pd[pde]); } - pt = tegra_smmu_pte_offset(as->pts[pde], iova); + return tegra_smmu_pte_offset(as->pts[pde], iova); +} - /* Keep track of entries in this page table. */ - if (*pt == 0) - as->count[pde]++; +static void tegra_smmu_pte_get_use(struct tegra_smmu_as *as, unsigned long iova) +{ + unsigned int pd_index = iova_pd_index(iova); - return pt; + as->count[pd_index]++; } static void tegra_smmu_pte_put_use(struct tegra_smmu_as *as, unsigned long iova) @@ -630,6 +631,10 @@ static int tegra_smmu_map(struct iommu_domain *domain, unsigned long iova, if (!pte) return -ENOMEM; + /* If we aren't overwriting a pre-existing entry, increment use */ + if (*pte == 0) + tegra_smmu_pte_get_use(as, iova); + tegra_smmu_set_pte(as, iova, pte, pte_dma, __phys_to_pfn(paddr) | SMMU_PTE_ATTR); From 4080e99b8341f81c4ed1e17d8ef44d171c473a1b Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 27 Jul 2015 13:30:12 +0100 Subject: [PATCH 15/16] iommu/tegra-smmu: Factor out tegra_smmu_set_pde() This code is used both when creating a new page directory entry and when tearing it down, with only the PDE value changing between both cases. Factor the code out so that it can be reused. Signed-off-by: Russell King [treding@nvidia.com: make commit message more accurate] Signed-off-by: Thierry Reding --- drivers/iommu/tegra-smmu.c | 49 ++++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 74ad1f43265a..2f1481ad4aa5 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -495,6 +495,27 @@ static void tegra_smmu_detach_dev(struct iommu_domain *domain, struct device *de } } +static void tegra_smmu_set_pde(struct tegra_smmu_as *as, unsigned long iova, + u32 value) +{ + unsigned int pd_index = iova_pd_index(iova); + struct tegra_smmu *smmu = as->smmu; + u32 *pd = page_address(as->pd); + unsigned long offset = pd_index * sizeof(*pd); + + /* Set the page directory entry first */ + pd[pd_index] = value; + + /* The flush the page directory entry from caches */ + dma_sync_single_range_for_device(smmu->dev, as->pd_dma, offset, + sizeof(*pd), DMA_TO_DEVICE); + + /* And flush the iommu */ + smmu_flush_ptc(smmu, as->pd_dma, offset); + smmu_flush_tlb_section(smmu, as->id, iova); + smmu_flush(smmu); +} + static u32 *tegra_smmu_pte_offset(struct page *pt_page, unsigned long iova) { u32 *pt = page_address(pt_page); @@ -522,7 +543,6 @@ static u32 *tegra_smmu_pte_lookup(struct tegra_smmu_as *as, unsigned long iova, static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova, dma_addr_t *dmap) { - u32 *pd = page_address(as->pd); unsigned int pde = iova_pd_index(iova); struct tegra_smmu *smmu = as->smmu; @@ -550,16 +570,13 @@ static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova, as->pts[pde] = page; - pd[pde] = SMMU_MK_PDE(dma, SMMU_PDE_ATTR | SMMU_PDE_NEXT); - - dma_sync_single_range_for_device(smmu->dev, as->pd_dma, - pde << 2, 4, DMA_TO_DEVICE); - smmu_flush_ptc(smmu, as->pd_dma, pde << 2); - smmu_flush_tlb_section(smmu, as->id, iova); - smmu_flush(smmu); + tegra_smmu_set_pde(as, iova, SMMU_MK_PDE(dma, SMMU_PDE_ATTR | + SMMU_PDE_NEXT)); *dmap = dma; } else { + u32 *pd = page_address(as->pd); + *dmap = smmu_pde_to_dma(pd[pde]); } @@ -575,9 +592,7 @@ static void tegra_smmu_pte_get_use(struct tegra_smmu_as *as, unsigned long iova) static void tegra_smmu_pte_put_use(struct tegra_smmu_as *as, unsigned long iova) { - struct tegra_smmu *smmu = as->smmu; unsigned int pde = iova_pd_index(iova); - u32 *pd = page_address(as->pd); struct page *page = as->pts[pde]; /* @@ -585,20 +600,12 @@ static void tegra_smmu_pte_put_use(struct tegra_smmu_as *as, unsigned long iova) * memory page to the system. */ if (--as->count[pde] == 0) { - unsigned int offset = pde * sizeof(*pd); + struct tegra_smmu *smmu = as->smmu; + u32 *pd = page_address(as->pd); dma_addr_t pte_dma = smmu_pde_to_dma(pd[pde]); - /* Clear the page directory entry first */ - pd[pde] = 0; + tegra_smmu_set_pde(as, iova, 0); - /* Flush the page directory entry */ - dma_sync_single_range_for_device(smmu->dev, as->pd_dma, offset, - sizeof(*pd), DMA_TO_DEVICE); - smmu_flush_ptc(smmu, as->pd_dma, offset); - smmu_flush_tlb_section(smmu, as->id, iova); - smmu_flush(smmu); - - /* Finally, free the page */ dma_unmap_page(smmu->dev, pte_dma, SMMU_SIZE_PT, DMA_TO_DEVICE); __free_page(page); as->pts[pde] = NULL; From 11cec15bf3fb498206ef63b1fa26c27689e02d0e Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Thu, 6 Aug 2015 14:20:31 +0200 Subject: [PATCH 16/16] iommu/tegra-smmu: Parameterize number of TLB lines The number of TLB lines was increased from 16 on Tegra30 to 32 on Tegra114 and later. Parameterize the value so that the initial default can be set accordingly. On Tegra30, initializing the value to 32 would effectively disable the TLB and hence cause massive latencies for memory accesses translated through the SMMU. This is especially noticeable for isochronuous clients such as display, whose FIFOs would continuously underrun. Fixes: 891846516317 ("memory: Add NVIDIA Tegra memory controller support") Signed-off-by: Thierry Reding --- drivers/iommu/tegra-smmu.c | 9 +++++++-- drivers/memory/tegra/tegra114.c | 1 + drivers/memory/tegra/tegra124.c | 1 + drivers/memory/tegra/tegra30.c | 1 + include/soc/tegra/mc.h | 1 + 5 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 2f1481ad4aa5..9305964250ac 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -27,6 +27,7 @@ struct tegra_smmu { const struct tegra_smmu_soc *soc; unsigned long pfn_mask; + unsigned long tlb_mask; unsigned long *asids; struct mutex lock; @@ -70,7 +71,8 @@ static inline u32 smmu_readl(struct tegra_smmu *smmu, unsigned long offset) #define SMMU_TLB_CONFIG 0x14 #define SMMU_TLB_CONFIG_HIT_UNDER_MISS (1 << 29) #define SMMU_TLB_CONFIG_ROUND_ROBIN_ARBITRATION (1 << 28) -#define SMMU_TLB_CONFIG_ACTIVE_LINES(x) ((x) & 0x3f) +#define SMMU_TLB_CONFIG_ACTIVE_LINES(smmu) \ + ((smmu)->soc->num_tlb_lines & (smmu)->tlb_mask) #define SMMU_PTC_CONFIG 0x18 #define SMMU_PTC_CONFIG_ENABLE (1 << 29) @@ -901,6 +903,9 @@ struct tegra_smmu *tegra_smmu_probe(struct device *dev, smmu->pfn_mask = BIT_MASK(mc->soc->num_address_bits - PAGE_SHIFT) - 1; dev_dbg(dev, "address bits: %u, PFN mask: %#lx\n", mc->soc->num_address_bits, smmu->pfn_mask); + smmu->tlb_mask = (smmu->soc->num_tlb_lines << 1) - 1; + dev_dbg(dev, "TLB lines: %u, mask: %#lx\n", smmu->soc->num_tlb_lines, + smmu->tlb_mask); value = SMMU_PTC_CONFIG_ENABLE | SMMU_PTC_CONFIG_INDEX_MAP(0x3f); @@ -910,7 +915,7 @@ struct tegra_smmu *tegra_smmu_probe(struct device *dev, smmu_writel(smmu, value, SMMU_PTC_CONFIG); value = SMMU_TLB_CONFIG_HIT_UNDER_MISS | - SMMU_TLB_CONFIG_ACTIVE_LINES(0x20); + SMMU_TLB_CONFIG_ACTIVE_LINES(smmu); if (soc->supports_round_robin_arbitration) value |= SMMU_TLB_CONFIG_ROUND_ROBIN_ARBITRATION; diff --git a/drivers/memory/tegra/tegra114.c b/drivers/memory/tegra/tegra114.c index 7122f39be9cc..8053f70dbfd1 100644 --- a/drivers/memory/tegra/tegra114.c +++ b/drivers/memory/tegra/tegra114.c @@ -919,6 +919,7 @@ static const struct tegra_smmu_soc tegra114_smmu_soc = { .num_swgroups = ARRAY_SIZE(tegra114_swgroups), .supports_round_robin_arbitration = false, .supports_request_limit = false, + .num_tlb_lines = 32, .num_asids = 4, }; diff --git a/drivers/memory/tegra/tegra124.c b/drivers/memory/tegra/tegra124.c index ebda63283853..7d734befe0ed 100644 --- a/drivers/memory/tegra/tegra124.c +++ b/drivers/memory/tegra/tegra124.c @@ -1029,6 +1029,7 @@ static const struct tegra_smmu_soc tegra132_smmu_soc = { .num_swgroups = ARRAY_SIZE(tegra124_swgroups), .supports_round_robin_arbitration = true, .supports_request_limit = true, + .num_tlb_lines = 32, .num_asids = 128, }; diff --git a/drivers/memory/tegra/tegra30.c b/drivers/memory/tegra/tegra30.c index 3cb30b69d95b..7e0694d80edb 100644 --- a/drivers/memory/tegra/tegra30.c +++ b/drivers/memory/tegra/tegra30.c @@ -941,6 +941,7 @@ static const struct tegra_smmu_soc tegra30_smmu_soc = { .num_swgroups = ARRAY_SIZE(tegra30_swgroups), .supports_round_robin_arbitration = false, .supports_request_limit = false, + .num_tlb_lines = 16, .num_asids = 4, }; diff --git a/include/soc/tegra/mc.h b/include/soc/tegra/mc.h index d6c3190ec852..8cb3a7ecd6f8 100644 --- a/include/soc/tegra/mc.h +++ b/include/soc/tegra/mc.h @@ -61,6 +61,7 @@ struct tegra_smmu_soc { bool supports_round_robin_arbitration; bool supports_request_limit; + unsigned int num_tlb_lines; unsigned int num_asids; };