From f05810c9962bba3e809f07619bda1bfdebbfbfb9 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 16 Oct 2008 16:31:54 -0700 Subject: [PATCH 1/9] dmar: use spin_lock_irqsave() in qi_submit_sync() Next patch in the series will use queued invalidation interface qi_submit_sync() for DMA-remapping aswell, which can be called from interrupt context. So use spin_lock_irqsave() instead of spin_lock() in qi_submit_sync(). Signed-off-by: Suresh Siddha Signed-off-by: Youquan Song Signed-off-by: David Woodhouse --- drivers/pci/dmar.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index e842e756308a..b64cec190542 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -580,11 +580,11 @@ void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu) hw = qi->desc; - spin_lock(&qi->q_lock); + spin_lock_irqsave(&qi->q_lock, flags); while (qi->free_cnt < 3) { - spin_unlock(&qi->q_lock); + spin_unlock_irqrestore(&qi->q_lock, flags); cpu_relax(); - spin_lock(&qi->q_lock); + spin_lock_irqsave(&qi->q_lock, flags); } index = qi->free_head; @@ -605,15 +605,22 @@ void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu) qi->free_head = (qi->free_head + 2) % QI_LENGTH; qi->free_cnt -= 2; - spin_lock_irqsave(&iommu->register_lock, flags); + spin_lock(&iommu->register_lock); /* * update the HW tail register indicating the presence of * new descriptors. */ writel(qi->free_head << 4, iommu->reg + DMAR_IQT_REG); - spin_unlock_irqrestore(&iommu->register_lock, flags); + spin_unlock(&iommu->register_lock); while (qi->desc_status[wait_index] != QI_DONE) { + /* + * We will leave the interrupts disabled, to prevent interrupt + * context to queue another cmd while a cmd is already submitted + * and waiting for completion on this cpu. This is to avoid + * a deadlock where the interrupt context can wait indefinitely + * for free slots in the queue. + */ spin_unlock(&qi->q_lock); cpu_relax(); spin_lock(&qi->q_lock); @@ -622,7 +629,7 @@ void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu) qi->desc_status[index] = QI_DONE; reclaim_free_desc(qi); - spin_unlock(&qi->q_lock); + spin_unlock_irqrestore(&qi->q_lock, flags); } /* From 3481f21097cb560392c411377893b5109fbde557 Mon Sep 17 00:00:00 2001 From: Youquan Song Date: Thu, 16 Oct 2008 16:31:55 -0700 Subject: [PATCH 2/9] dmar: context cache and IOTLB invalidation using queued invalidation Implement context cache invalidate and IOTLB invalidation using queued invalidation interface. This interface will be used by DMA remapping, when queued invalidation is supported. Signed-off-by: Youquan Song Signed-off-by: Suresh Siddha Signed-off-by: David Woodhouse --- drivers/pci/dmar.c | 56 +++++++++++++++++++++++++++++++++++++ include/linux/intel-iommu.h | 21 ++++++++++++++ 2 files changed, 77 insertions(+) diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index b64cec190542..0f409e23631e 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -645,6 +645,62 @@ void qi_global_iec(struct intel_iommu *iommu) qi_submit_sync(&desc, iommu); } +int qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm, + u64 type, int non_present_entry_flush) +{ + + struct qi_desc desc; + + if (non_present_entry_flush) { + if (!cap_caching_mode(iommu->cap)) + return 1; + else + did = 0; + } + + desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did) + | QI_CC_GRAN(type) | QI_CC_TYPE; + desc.high = 0; + + qi_submit_sync(&desc, iommu); + + return 0; + +} + +int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, + unsigned int size_order, u64 type, + int non_present_entry_flush) +{ + u8 dw = 0, dr = 0; + + struct qi_desc desc; + int ih = 0; + + if (non_present_entry_flush) { + if (!cap_caching_mode(iommu->cap)) + return 1; + else + did = 0; + } + + if (cap_write_drain(iommu->cap)) + dw = 1; + + if (cap_read_drain(iommu->cap)) + dr = 1; + + desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw) + | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE; + desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih) + | QI_IOTLB_AM(size_order); + + qi_submit_sync(&desc, iommu); + + return 0; + +} + /* * Enable Queued Invalidation interface. This is a must to support * interrupt-remapping. Also used by DMA-remapping, which replaces diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 2e117f30a76c..0c5f5e49107b 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -127,6 +127,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) /* IOTLB_REG */ +#define DMA_TLB_FLUSH_GRANU_OFFSET 60 #define DMA_TLB_GLOBAL_FLUSH (((u64)1) << 60) #define DMA_TLB_DSI_FLUSH (((u64)2) << 60) #define DMA_TLB_PSI_FLUSH (((u64)3) << 60) @@ -140,6 +141,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define DMA_TLB_MAX_SIZE (0x3f) /* INVALID_DESC */ +#define DMA_CCMD_INVL_GRANU_OFFSET 61 #define DMA_ID_TLB_GLOBAL_FLUSH (((u64)1) << 3) #define DMA_ID_TLB_DSI_FLUSH (((u64)2) << 3) #define DMA_ID_TLB_PSI_FLUSH (((u64)3) << 3) @@ -238,6 +240,19 @@ enum { #define QI_IWD_STATUS_DATA(d) (((u64)d) << 32) #define QI_IWD_STATUS_WRITE (((u64)1) << 5) +#define QI_IOTLB_DID(did) (((u64)did) << 16) +#define QI_IOTLB_DR(dr) (((u64)dr) << 7) +#define QI_IOTLB_DW(dw) (((u64)dw) << 6) +#define QI_IOTLB_GRAN(gran) (((u64)gran) >> (DMA_TLB_FLUSH_GRANU_OFFSET-4)) +#define QI_IOTLB_ADDR(addr) (((u64)addr) & PAGE_MASK_4K) +#define QI_IOTLB_IH(ih) (((u64)ih) << 6) +#define QI_IOTLB_AM(am) (((u8)am)) + +#define QI_CC_FM(fm) (((u64)fm) << 48) +#define QI_CC_SID(sid) (((u64)sid) << 32) +#define QI_CC_DID(did) (((u64)did) << 16) +#define QI_CC_GRAN(gran) (((u64)gran) >> (DMA_CCMD_INVL_GRANU_OFFSET-4)) + struct qi_desc { u64 low, high; }; @@ -303,6 +318,12 @@ extern void free_iommu(struct intel_iommu *iommu); extern int dmar_enable_qi(struct intel_iommu *iommu); extern void qi_global_iec(struct intel_iommu *iommu); +extern int qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, + u8 fm, u64 type, int non_present_entry_flush); +extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, + unsigned int size_order, u64 type, + int non_present_entry_flush); + extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); void intel_iommu_domain_exit(struct dmar_domain *domain); From a77b67d4023770805141014b8fa9eb5467457817 Mon Sep 17 00:00:00 2001 From: Youquan Song Date: Thu, 16 Oct 2008 16:31:56 -0700 Subject: [PATCH 3/9] dmar: Use queued invalidation interface for IOTLB and context invalidation If queued invalidation interface is available and enabled, queued invalidation interface will be used instead of the register based interface. According to Vt-d2 specification, when queued invalidation is enabled, invalidation command submit works only through invalidation queue and not through the command registers interface. Signed-off-by: Youquan Song Signed-off-by: Suresh Siddha Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 95 ++++++++++++++++++------------------- include/linux/intel-iommu.h | 8 ++++ 2 files changed, 53 insertions(+), 50 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index fc5f2dbf5323..509470419130 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -567,27 +567,6 @@ static int __iommu_flush_context(struct intel_iommu *iommu, return 0; } -static int inline iommu_flush_context_global(struct intel_iommu *iommu, - int non_present_entry_flush) -{ - return __iommu_flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL, - non_present_entry_flush); -} - -static int inline iommu_flush_context_domain(struct intel_iommu *iommu, u16 did, - int non_present_entry_flush) -{ - return __iommu_flush_context(iommu, did, 0, 0, DMA_CCMD_DOMAIN_INVL, - non_present_entry_flush); -} - -static int inline iommu_flush_context_device(struct intel_iommu *iommu, - u16 did, u16 source_id, u8 function_mask, int non_present_entry_flush) -{ - return __iommu_flush_context(iommu, did, source_id, function_mask, - DMA_CCMD_DEVICE_INVL, non_present_entry_flush); -} - /* return value determine if we need a write buffer flush */ static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, unsigned int size_order, u64 type, @@ -660,20 +639,6 @@ static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, return 0; } -static int inline iommu_flush_iotlb_global(struct intel_iommu *iommu, - int non_present_entry_flush) -{ - return __iommu_flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH, - non_present_entry_flush); -} - -static int inline iommu_flush_iotlb_dsi(struct intel_iommu *iommu, u16 did, - int non_present_entry_flush) -{ - return __iommu_flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH, - non_present_entry_flush); -} - static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, u64 addr, unsigned int pages, int non_present_entry_flush) { @@ -684,8 +649,9 @@ static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, /* Fallback to domain selective flush if no PSI support */ if (!cap_pgsel_inv(iommu->cap)) - return iommu_flush_iotlb_dsi(iommu, did, - non_present_entry_flush); + return iommu->flush.flush_iotlb(iommu, did, 0, 0, + DMA_TLB_DSI_FLUSH, + non_present_entry_flush); /* * PSI requires page size to be 2 ^ x, and the base address is naturally @@ -694,11 +660,12 @@ static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, mask = ilog2(__roundup_pow_of_two(pages)); /* Fallback to domain selective flush if size is too big */ if (mask > cap_max_amask_val(iommu->cap)) - return iommu_flush_iotlb_dsi(iommu, did, - non_present_entry_flush); + return iommu->flush.flush_iotlb(iommu, did, 0, 0, + DMA_TLB_DSI_FLUSH, non_present_entry_flush); - return __iommu_flush_iotlb(iommu, did, addr, mask, - DMA_TLB_PSI_FLUSH, non_present_entry_flush); + return iommu->flush.flush_iotlb(iommu, did, addr, mask, + DMA_TLB_PSI_FLUSH, + non_present_entry_flush); } static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) @@ -1204,11 +1171,13 @@ static int domain_context_mapping_one(struct dmar_domain *domain, __iommu_flush_cache(iommu, context, sizeof(*context)); /* it's a non-present to present mapping */ - if (iommu_flush_context_device(iommu, domain->id, - (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, 1)) + if (iommu->flush.flush_context(iommu, domain->id, + (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, + DMA_CCMD_DEVICE_INVL, 1)) iommu_flush_write_buffer(iommu); else - iommu_flush_iotlb_dsi(iommu, 0, 0); + iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0); + spin_unlock_irqrestore(&iommu->lock, flags); return 0; } @@ -1310,8 +1279,10 @@ domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova, static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn) { clear_context_table(domain->iommu, bus, devfn); - iommu_flush_context_global(domain->iommu, 0); - iommu_flush_iotlb_global(domain->iommu, 0); + domain->iommu->flush.flush_context(domain->iommu, 0, 0, 0, + DMA_CCMD_GLOBAL_INVL, 0); + domain->iommu->flush.flush_iotlb(domain->iommu, 0, 0, 0, + DMA_TLB_GLOBAL_FLUSH, 0); } static void domain_remove_dev_info(struct dmar_domain *domain) @@ -1662,6 +1633,28 @@ int __init init_dmars(void) } } + for_each_drhd_unit(drhd) { + if (drhd->ignored) + continue; + + iommu = drhd->iommu; + if (dmar_enable_qi(iommu)) { + /* + * Queued Invalidate not enabled, use Register Based + * Invalidate + */ + iommu->flush.flush_context = __iommu_flush_context; + iommu->flush.flush_iotlb = __iommu_flush_iotlb; + printk(KERN_INFO "IOMMU 0x%Lx: using Register based " + "invalidation\n", drhd->reg_base_addr); + } else { + iommu->flush.flush_context = qi_flush_context; + iommu->flush.flush_iotlb = qi_flush_iotlb; + printk(KERN_INFO "IOMMU 0x%Lx: using Queued " + "invalidation\n", drhd->reg_base_addr); + } + } + /* * For each rmrr * for each dev attached to rmrr @@ -1714,9 +1707,10 @@ int __init init_dmars(void) iommu_set_root_entry(iommu); - iommu_flush_context_global(iommu, 0); - iommu_flush_iotlb_global(iommu, 0); - + iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL, + 0); + iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH, + 0); iommu_disable_protect_mem_regions(iommu); ret = iommu_enable_translation(iommu); @@ -1891,7 +1885,8 @@ static void flush_unmaps(void) struct intel_iommu *iommu = deferred_flush[i].domain[0]->iommu; - iommu_flush_iotlb_global(iommu, 0); + iommu->flush.flush_iotlb(iommu, 0, 0, 0, + DMA_TLB_GLOBAL_FLUSH, 0); for (j = 0; j < deferred_flush[i].next; j++) { __free_iova(&deferred_flush[i].domain[j]->iovad, deferred_flush[i].iova[j]); diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 0c5f5e49107b..afb0d2a5b7cd 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -278,6 +278,13 @@ struct ir_table { }; #endif +struct iommu_flush { + int (*flush_context)(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm, + u64 type, int non_present_entry_flush); + int (*flush_iotlb)(struct intel_iommu *iommu, u16 did, u64 addr, + unsigned int size_order, u64 type, int non_present_entry_flush); +}; + struct intel_iommu { void __iomem *reg; /* Pointer to hardware regs, virtual addr */ u64 cap; @@ -297,6 +304,7 @@ struct intel_iommu { unsigned char name[7]; /* Device Name */ struct msi_msg saved_msg; struct sys_device sysdev; + struct iommu_flush flush; #endif struct q_inval *qi; /* Queued invalidation info */ #ifdef CONFIG_INTR_REMAP From cacd4213d8ffed83676f38d5d8e93c673e0f1af7 Mon Sep 17 00:00:00 2001 From: Youquan Song Date: Thu, 16 Oct 2008 16:31:57 -0700 Subject: [PATCH 4/9] dmar: remove the quirk which disables dma-remapping when intr-remapping enabled Now that we have DMA-remapping support for queued invalidation, we can enable both DMA-remapping and interrupt-remapping at the same time. Signed-off-by: Youquan Song Signed-off-by: Suresh Siddha Signed-off-by: David Woodhouse --- drivers/pci/dmar.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 0f409e23631e..44d6c7081b8f 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -455,8 +455,8 @@ void __init detect_intel_iommu(void) ret = early_dmar_detect(); -#ifdef CONFIG_DMAR { +#ifdef CONFIG_INTR_REMAP struct acpi_table_dmar *dmar; /* * for now we will disable dma-remapping when interrupt @@ -465,28 +465,18 @@ void __init detect_intel_iommu(void) * is added, we will not need this any more. */ dmar = (struct acpi_table_dmar *) dmar_tbl; - if (ret && cpu_has_x2apic && dmar->flags & 0x1) { + if (ret && cpu_has_x2apic && dmar->flags & 0x1) printk(KERN_INFO "Queued invalidation will be enabled to support " "x2apic and Intr-remapping.\n"); - printk(KERN_INFO - "Disabling IOMMU detection, because of missing " - "queued invalidation support for IOTLB " - "invalidation\n"); - printk(KERN_INFO - "Use \"nox2apic\", if you want to use Intel " - " IOMMU for DMA-remapping and don't care about " - " x2apic support\n"); - - dmar_disabled = 1; - return; - } +#endif +#ifdef CONFIG_DMAR if (ret && !no_iommu && !iommu_detected && !swiotlb && !dmar_disabled) iommu_detected = 1; - } #endif + } } From 5b6985ce8ec7127b4d60ad450b64ca8b82748a3b Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 16 Oct 2008 18:02:32 -0700 Subject: [PATCH 5/9] intel-iommu: IA64 support The current Intel IOMMU code assumes that both host page size and Intel IOMMU page size are 4KiB. The first patch supports variable page size. This provides support for IA64 which has multiple page sizes. This patch also adds some other code hooks for IA64 platform including DMAR_OPERATION_TIMEOUT definition. [dwmw2: some cleanup] Signed-off-by: Fenghua Yu Signed-off-by: Tony Luck Signed-off-by: David Woodhouse --- arch/x86/kernel/pci-dma.c | 16 ----- drivers/pci/dmar.c | 19 ++--- drivers/pci/intel-iommu.c | 128 ++++++++++++++++++---------------- drivers/pci/quirks.c | 14 ++++ include/asm-x86/iommu.h | 4 ++ include/linux/dma_remapping.h | 27 ++++--- include/linux/intel-iommu.h | 37 ++++++---- 7 files changed, 130 insertions(+), 115 deletions(-) diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 192624820217..1972266e8ba5 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -9,8 +9,6 @@ #include #include -static int forbid_dac __read_mostly; - struct dma_mapping_ops *dma_ops; EXPORT_SYMBOL(dma_ops); @@ -293,17 +291,3 @@ void pci_iommu_shutdown(void) } /* Must execute after PCI subsystem */ fs_initcall(pci_iommu_init); - -#ifdef CONFIG_PCI -/* Many VIA bridges seem to corrupt data for DAC. Disable it here */ - -static __devinit void via_no_dac(struct pci_dev *dev) -{ - if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { - printk(KERN_INFO "PCI: VIA PCI bridge detected." - "Disabling DAC.\n"); - forbid_dac = 1; - } -} -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac); -#endif diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 44d6c7081b8f..b65173828bc2 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -277,14 +277,15 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header) drhd = (struct acpi_dmar_hardware_unit *)header; printk (KERN_INFO PREFIX "DRHD (flags: 0x%08x)base: 0x%016Lx\n", - drhd->flags, drhd->address); + drhd->flags, (unsigned long long)drhd->address); break; case ACPI_DMAR_TYPE_RESERVED_MEMORY: rmrr = (struct acpi_dmar_reserved_memory *)header; printk (KERN_INFO PREFIX "RMRR base: 0x%016Lx end: 0x%016Lx\n", - rmrr->base_address, rmrr->end_address); + (unsigned long long)rmrr->base_address, + (unsigned long long)rmrr->end_address); break; } } @@ -304,7 +305,7 @@ parse_dmar_table(void) if (!dmar) return -ENODEV; - if (dmar->width < PAGE_SHIFT_4K - 1) { + if (dmar->width < PAGE_SHIFT - 1) { printk(KERN_WARNING PREFIX "Invalid DMAR haw\n"); return -EINVAL; } @@ -493,7 +494,7 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) iommu->seq_id = iommu_allocated++; - iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K); + iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE); if (!iommu->reg) { printk(KERN_ERR "IOMMU: can't map the region\n"); goto error; @@ -504,8 +505,8 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) /* the registers might be more than one page */ map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap), cap_max_fault_reg_offset(iommu->cap)); - map_size = PAGE_ALIGN_4K(map_size); - if (map_size > PAGE_SIZE_4K) { + map_size = VTD_PAGE_ALIGN(map_size); + if (map_size > VTD_PAGE_SIZE) { iounmap(iommu->reg); iommu->reg = ioremap(drhd->reg_base_addr, map_size); if (!iommu->reg) { @@ -516,8 +517,10 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) ver = readl(iommu->reg + DMAR_VER_REG); pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n", - drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver), - iommu->cap, iommu->ecap); + (unsigned long long)drhd->reg_base_addr, + DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver), + (unsigned long long)iommu->cap, + (unsigned long long)iommu->ecap); spin_lock_init(&iommu->register_lock); diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 509470419130..2bf96babbc4f 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -18,6 +18,7 @@ * Author: Ashok Raj * Author: Shaohua Li * Author: Anil S Keshavamurthy + * Author: Fenghua Yu */ #include @@ -35,11 +36,13 @@ #include #include #include -#include /* force_iommu in this header in x86-64*/ #include #include #include "pci.h" +#define ROOT_SIZE VTD_PAGE_SIZE +#define CONTEXT_SIZE VTD_PAGE_SIZE + #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY) #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) @@ -199,7 +202,7 @@ static struct context_entry * device_to_context_entry(struct intel_iommu *iommu, spin_unlock_irqrestore(&iommu->lock, flags); return NULL; } - __iommu_flush_cache(iommu, (void *)context, PAGE_SIZE_4K); + __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE); phy_addr = virt_to_phys((void *)context); set_root_value(root, phy_addr); set_root_present(root); @@ -345,7 +348,7 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr) return NULL; } __iommu_flush_cache(domain->iommu, tmp_page, - PAGE_SIZE_4K); + PAGE_SIZE); dma_set_pte_addr(*pte, virt_to_phys(tmp_page)); /* * high level table always sets r/w, last level page @@ -408,13 +411,13 @@ static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end) start &= (((u64)1) << addr_width) - 1; end &= (((u64)1) << addr_width) - 1; /* in case it's partial page */ - start = PAGE_ALIGN_4K(start); - end &= PAGE_MASK_4K; + start = PAGE_ALIGN(start); + end &= PAGE_MASK; /* we don't need lock here, nobody else touches the iova range */ while (start < end) { dma_pte_clear_one(domain, start); - start += PAGE_SIZE_4K; + start += VTD_PAGE_SIZE; } } @@ -468,7 +471,7 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu) if (!root) return -ENOMEM; - __iommu_flush_cache(iommu, root, PAGE_SIZE_4K); + __iommu_flush_cache(iommu, root, ROOT_SIZE); spin_lock_irqsave(&iommu->lock, flags); iommu->root_entry = root; @@ -634,7 +637,8 @@ static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, printk(KERN_ERR"IOMMU: flush IOTLB failed\n"); if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type)) pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n", - DMA_TLB_IIRG(type), DMA_TLB_IAIG(val)); + (unsigned long long)DMA_TLB_IIRG(type), + (unsigned long long)DMA_TLB_IAIG(val)); /* flush context entry will implictly flush write buffer */ return 0; } @@ -644,7 +648,7 @@ static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, { unsigned int mask; - BUG_ON(addr & (~PAGE_MASK_4K)); + BUG_ON(addr & (~VTD_PAGE_MASK)); BUG_ON(pages == 0); /* Fallback to domain selective flush if no PSI support */ @@ -798,7 +802,7 @@ void dmar_msi_read(int irq, struct msi_msg *msg) } static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type, - u8 fault_reason, u16 source_id, u64 addr) + u8 fault_reason, u16 source_id, unsigned long long addr) { const char *reason; @@ -1051,9 +1055,9 @@ static void dmar_init_reserved_ranges(void) if (!r->flags || !(r->flags & IORESOURCE_MEM)) continue; addr = r->start; - addr &= PAGE_MASK_4K; + addr &= PAGE_MASK; size = r->end - addr; - size = PAGE_ALIGN_4K(size); + size = PAGE_ALIGN(size); iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr), IOVA_PFN(size + addr) - 1); if (!iova) @@ -1115,7 +1119,7 @@ static int domain_init(struct dmar_domain *domain, int guest_width) domain->pgd = (struct dma_pte *)alloc_pgtable_page(); if (!domain->pgd) return -ENOMEM; - __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE_4K); + __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE); return 0; } @@ -1131,7 +1135,7 @@ static void domain_exit(struct dmar_domain *domain) /* destroy iovas */ put_iova_domain(&domain->iovad); end = DOMAIN_MAX_ADDR(domain->gaw); - end = end & (~PAGE_MASK_4K); + end = end & (~PAGE_MASK); /* clear ptes */ dma_pte_clear_range(domain, 0, end); @@ -1252,22 +1256,25 @@ domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova, u64 start_pfn, end_pfn; struct dma_pte *pte; int index; + int addr_width = agaw_to_width(domain->agaw); + + hpa &= (((u64)1) << addr_width) - 1; if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) return -EINVAL; - iova &= PAGE_MASK_4K; - start_pfn = ((u64)hpa) >> PAGE_SHIFT_4K; - end_pfn = (PAGE_ALIGN_4K(((u64)hpa) + size)) >> PAGE_SHIFT_4K; + iova &= PAGE_MASK; + start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT; + end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT; index = 0; while (start_pfn < end_pfn) { - pte = addr_to_dma_pte(domain, iova + PAGE_SIZE_4K * index); + pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index); if (!pte) return -ENOMEM; /* We don't need lock here, nobody else * touches the iova range */ BUG_ON(dma_pte_addr(*pte)); - dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K); + dma_set_pte_addr(*pte, start_pfn << VTD_PAGE_SHIFT); dma_set_pte_prot(*pte, prot); __iommu_flush_cache(domain->iommu, pte, sizeof(*pte)); start_pfn++; @@ -1445,11 +1452,13 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) return find_domain(pdev); } -static int iommu_prepare_identity_map(struct pci_dev *pdev, u64 start, u64 end) +static int iommu_prepare_identity_map(struct pci_dev *pdev, + unsigned long long start, + unsigned long long end) { struct dmar_domain *domain; unsigned long size; - u64 base; + unsigned long long base; int ret; printk(KERN_INFO @@ -1461,9 +1470,9 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev, u64 start, u64 end) return -ENOMEM; /* The address might not be aligned */ - base = start & PAGE_MASK_4K; + base = start & PAGE_MASK; size = end - base; - size = PAGE_ALIGN_4K(size); + size = PAGE_ALIGN(size); if (!reserve_iova(&domain->iovad, IOVA_PFN(base), IOVA_PFN(base + size) - 1)) { printk(KERN_ERR "IOMMU: reserve iova failed\n"); @@ -1732,8 +1741,8 @@ int __init init_dmars(void) static inline u64 aligned_size(u64 host_addr, size_t size) { u64 addr; - addr = (host_addr & (~PAGE_MASK_4K)) + size; - return PAGE_ALIGN_4K(addr); + addr = (host_addr & (~PAGE_MASK)) + size; + return PAGE_ALIGN(addr); } struct iova * @@ -1747,7 +1756,7 @@ iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end) return NULL; piova = alloc_iova(&domain->iovad, - size >> PAGE_SHIFT_4K, IOVA_PFN(end), 1); + size >> PAGE_SHIFT, IOVA_PFN(end), 1); return piova; } @@ -1807,12 +1816,12 @@ get_valid_domain_for_dev(struct pci_dev *pdev) return domain; } -static dma_addr_t +dma_addr_t intel_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, int dir) { struct pci_dev *pdev = to_pci_dev(hwdev); struct dmar_domain *domain; - unsigned long start_paddr; + phys_addr_t start_paddr; struct iova *iova; int prot = 0; int ret; @@ -1831,7 +1840,7 @@ intel_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, int dir) if (!iova) goto error; - start_paddr = iova->pfn_lo << PAGE_SHIFT_4K; + start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT; /* * Check if DMAR supports zero-length reads on write only @@ -1849,27 +1858,23 @@ intel_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, int dir) * is not a big problem */ ret = domain_page_mapping(domain, start_paddr, - ((u64)paddr) & PAGE_MASK_4K, size, prot); + ((u64)paddr) & PAGE_MASK, size, prot); if (ret) goto error; - pr_debug("Device %s request: %lx@%llx mapping: %lx@%llx, dir %d\n", - pci_name(pdev), size, (u64)paddr, - size, (u64)start_paddr, dir); - /* it's a non-present to present mapping */ ret = iommu_flush_iotlb_psi(domain->iommu, domain->id, - start_paddr, size >> PAGE_SHIFT_4K, 1); + start_paddr, size >> VTD_PAGE_SHIFT, 1); if (ret) iommu_flush_write_buffer(domain->iommu); - return (start_paddr + ((u64)paddr & (~PAGE_MASK_4K))); + return start_paddr + ((u64)paddr & (~PAGE_MASK)); error: if (iova) __free_iova(&domain->iovad, iova); printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n", - pci_name(pdev), size, (u64)paddr, dir); + pci_name(pdev), size, (unsigned long long)paddr, dir); return 0; } @@ -1931,8 +1936,8 @@ static void add_unmap(struct dmar_domain *dom, struct iova *iova) spin_unlock_irqrestore(&async_umap_flush_lock, flags); } -static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, - size_t size, int dir) +void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size, + int dir) { struct pci_dev *pdev = to_pci_dev(dev); struct dmar_domain *domain; @@ -1948,11 +1953,11 @@ static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, if (!iova) return; - start_addr = iova->pfn_lo << PAGE_SHIFT_4K; + start_addr = iova->pfn_lo << PAGE_SHIFT; size = aligned_size((u64)dev_addr, size); pr_debug("Device %s unmapping: %lx@%llx\n", - pci_name(pdev), size, (u64)start_addr); + pci_name(pdev), size, (unsigned long long)start_addr); /* clear the whole page */ dma_pte_clear_range(domain, start_addr, start_addr + size); @@ -1960,7 +1965,7 @@ static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, dma_pte_free_pagetable(domain, start_addr, start_addr + size); if (intel_iommu_strict) { if (iommu_flush_iotlb_psi(domain->iommu, - domain->id, start_addr, size >> PAGE_SHIFT_4K, 0)) + domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0)) iommu_flush_write_buffer(domain->iommu); /* free iova */ __free_iova(&domain->iovad, iova); @@ -1973,13 +1978,13 @@ static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, } } -static void * intel_alloc_coherent(struct device *hwdev, size_t size, - dma_addr_t *dma_handle, gfp_t flags) +void *intel_alloc_coherent(struct device *hwdev, size_t size, + dma_addr_t *dma_handle, gfp_t flags) { void *vaddr; int order; - size = PAGE_ALIGN_4K(size); + size = PAGE_ALIGN(size); order = get_order(size); flags &= ~(GFP_DMA | GFP_DMA32); @@ -1995,12 +2000,12 @@ static void * intel_alloc_coherent(struct device *hwdev, size_t size, return NULL; } -static void intel_free_coherent(struct device *hwdev, size_t size, - void *vaddr, dma_addr_t dma_handle) +void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr, + dma_addr_t dma_handle) { int order; - size = PAGE_ALIGN_4K(size); + size = PAGE_ALIGN(size); order = get_order(size); intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL); @@ -2008,8 +2013,9 @@ static void intel_free_coherent(struct device *hwdev, size_t size, } #define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg))) -static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, - int nelems, int dir) + +void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, + int nelems, int dir) { int i; struct pci_dev *pdev = to_pci_dev(hwdev); @@ -2033,7 +2039,7 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, size += aligned_size((u64)addr, sg->length); } - start_addr = iova->pfn_lo << PAGE_SHIFT_4K; + start_addr = iova->pfn_lo << PAGE_SHIFT; /* clear the whole page */ dma_pte_clear_range(domain, start_addr, start_addr + size); @@ -2041,7 +2047,7 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, dma_pte_free_pagetable(domain, start_addr, start_addr + size); if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr, - size >> PAGE_SHIFT_4K, 0)) + size >> VTD_PAGE_SHIFT, 0)) iommu_flush_write_buffer(domain->iommu); /* free iova */ @@ -2062,8 +2068,8 @@ static int intel_nontranslate_map_sg(struct device *hddev, return nelems; } -static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, - int nelems, int dir) +int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, + int dir) { void *addr; int i; @@ -2107,14 +2113,14 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) prot |= DMA_PTE_WRITE; - start_addr = iova->pfn_lo << PAGE_SHIFT_4K; + start_addr = iova->pfn_lo << PAGE_SHIFT; offset = 0; for_each_sg(sglist, sg, nelems, i) { addr = SG_ENT_VIRT_ADDRESS(sg); addr = (void *)virt_to_phys(addr); size = aligned_size((u64)addr, sg->length); ret = domain_page_mapping(domain, start_addr + offset, - ((u64)addr) & PAGE_MASK_4K, + ((u64)addr) & PAGE_MASK, size, prot); if (ret) { /* clear the page */ @@ -2128,14 +2134,14 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, return 0; } sg->dma_address = start_addr + offset + - ((u64)addr & (~PAGE_MASK_4K)); + ((u64)addr & (~PAGE_MASK)); sg->dma_length = sg->length; offset += size; } /* it's a non-present to present mapping */ if (iommu_flush_iotlb_psi(domain->iommu, domain->id, - start_addr, offset >> PAGE_SHIFT_4K, 1)) + start_addr, offset >> VTD_PAGE_SHIFT, 1)) iommu_flush_write_buffer(domain->iommu); return nelems; } @@ -2175,7 +2181,6 @@ static inline int iommu_devinfo_cache_init(void) sizeof(struct device_domain_info), 0, SLAB_HWCACHE_ALIGN, - NULL); if (!iommu_devinfo_cache) { printk(KERN_ERR "Couldn't create devinfo cache\n"); @@ -2193,7 +2198,6 @@ static inline int iommu_iova_cache_init(void) sizeof(struct iova), 0, SLAB_HWCACHE_ALIGN, - NULL); if (!iommu_iova_cache) { printk(KERN_ERR "Couldn't create iova cache\n"); @@ -2322,7 +2326,7 @@ void intel_iommu_domain_exit(struct dmar_domain *domain) return; end = DOMAIN_MAX_ADDR(domain->gaw); - end = end & (~PAGE_MASK_4K); + end = end & (~VTD_PAGE_MASK); /* clear ptes */ dma_pte_clear_range(domain, 0, end); @@ -2418,6 +2422,6 @@ u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova) if (pte) pfn = dma_pte_addr(*pte); - return pfn >> PAGE_SHIFT_4K; + return pfn >> VTD_PAGE_SHIFT; } EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn); diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index e872ac925b4b..832175d9ca25 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -35,6 +35,20 @@ static void __devinit quirk_mellanox_tavor(struct pci_dev *dev) DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_MELLANOX,PCI_DEVICE_ID_MELLANOX_TAVOR,quirk_mellanox_tavor); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_MELLANOX,PCI_DEVICE_ID_MELLANOX_TAVOR_BRIDGE,quirk_mellanox_tavor); +/* Many VIA bridges seem to corrupt data for DAC. Disable it here */ +int forbid_dac __read_mostly; +EXPORT_SYMBOL(forbid_dac); + +static __devinit void via_no_dac(struct pci_dev *dev) +{ + if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { + dev_info(&dev->dev, + "VIA PCI bridge detected. Disabling DAC.\n"); + forbid_dac = 1; + } +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac); + /* Deal with broken BIOS'es that neglect to enable passive release, which can cause problems in combination with the 82441FX/PPro MTRRs */ static void quirk_passive_release(struct pci_dev *dev) diff --git a/include/asm-x86/iommu.h b/include/asm-x86/iommu.h index 961e746da977..2daaffcda52f 100644 --- a/include/asm-x86/iommu.h +++ b/include/asm-x86/iommu.h @@ -7,9 +7,13 @@ extern struct dma_mapping_ops nommu_dma_ops; extern int force_iommu, no_iommu; extern int iommu_detected; extern int dmar_disabled; +extern int forbid_dac; extern unsigned long iommu_nr_pages(unsigned long addr, unsigned long len); +/* 10 seconds */ +#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) + #ifdef CONFIG_GART_IOMMU extern int gart_iommu_aperture; extern int gart_iommu_aperture_allowed; diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index bff5c65f81dc..952df39c989d 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -2,15 +2,14 @@ #define _DMA_REMAPPING_H /* - * We need a fixed PAGE_SIZE of 4K irrespective of - * arch PAGE_SIZE for IOMMU page tables. + * VT-d hardware uses 4KiB page size regardless of host page size. */ -#define PAGE_SHIFT_4K (12) -#define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K) -#define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K) -#define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K) +#define VTD_PAGE_SHIFT (12) +#define VTD_PAGE_SIZE (1UL << VTD_PAGE_SHIFT) +#define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT) +#define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK) -#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT_4K) +#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) #define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK) #define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK) @@ -25,7 +24,7 @@ struct root_entry { u64 val; u64 rsvd1; }; -#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry)) +#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry)) static inline bool root_present(struct root_entry *root) { return (root->val & 1); @@ -36,7 +35,7 @@ static inline void set_root_present(struct root_entry *root) } static inline void set_root_value(struct root_entry *root, unsigned long value) { - root->val |= value & PAGE_MASK_4K; + root->val |= value & VTD_PAGE_MASK; } struct context_entry; @@ -45,7 +44,7 @@ get_context_addr_from_root(struct root_entry *root) { return (struct context_entry *) (root_present(root)?phys_to_virt( - root->val & PAGE_MASK_4K): + root->val & VTD_PAGE_MASK) : NULL); } @@ -67,7 +66,7 @@ struct context_entry { #define context_present(c) ((c).lo & 1) #define context_fault_disable(c) (((c).lo >> 1) & 1) #define context_translation_type(c) (((c).lo >> 2) & 3) -#define context_address_root(c) ((c).lo & PAGE_MASK_4K) +#define context_address_root(c) ((c).lo & VTD_PAGE_MASK) #define context_address_width(c) ((c).hi & 7) #define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1)) @@ -81,7 +80,7 @@ struct context_entry { } while (0) #define CONTEXT_TT_MULTI_LEVEL 0 #define context_set_address_root(c, val) \ - do {(c).lo |= (val) & PAGE_MASK_4K;} while (0) + do {(c).lo |= (val) & VTD_PAGE_MASK; } while (0) #define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0) #define context_set_domain_id(c, val) \ do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0) @@ -107,9 +106,9 @@ struct dma_pte { #define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0) #define dma_set_pte_prot(p, prot) \ do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0) -#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K) +#define dma_pte_addr(p) ((p).val & VTD_PAGE_MASK) #define dma_set_pte_addr(p, addr) do {\ - (p).val |= ((addr) & PAGE_MASK_4K); } while (0) + (p).val |= ((addr) & VTD_PAGE_MASK); } while (0) #define dma_pte_present(p) (((p).val & 3) != 0) struct intel_iommu; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index afb0d2a5b7cd..3d017cfd245b 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -29,6 +29,7 @@ #include #include #include +#include /* * Intel IOMMU register specification per version 1.0 public spec. @@ -202,22 +203,21 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define dma_frcd_type(d) ((d >> 30) & 1) #define dma_frcd_fault_reason(c) (c & 0xff) #define dma_frcd_source_id(c) (c & 0xffff) -#define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */ +/* low 64 bit */ +#define dma_frcd_page_addr(d) (d & (((u64)-1) << PAGE_SHIFT)) -#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) /* 10sec */ - -#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \ -{\ - cycles_t start_time = get_cycles();\ - while (1) {\ - sts = op (iommu->reg + offset);\ - if (cond)\ - break;\ +#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \ +do { \ + cycles_t start_time = get_cycles(); \ + while (1) { \ + sts = op(iommu->reg + offset); \ + if (cond) \ + break; \ if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\ - panic("DMAR hardware is malfunctioning\n");\ - cpu_relax();\ - }\ -} + panic("DMAR hardware is malfunctioning\n"); \ + cpu_relax(); \ + } \ +} while (0) #define QI_LENGTH 256 /* queue length */ @@ -244,7 +244,7 @@ enum { #define QI_IOTLB_DR(dr) (((u64)dr) << 7) #define QI_IOTLB_DW(dw) (((u64)dw) << 6) #define QI_IOTLB_GRAN(gran) (((u64)gran) >> (DMA_TLB_FLUSH_GRANU_OFFSET-4)) -#define QI_IOTLB_ADDR(addr) (((u64)addr) & PAGE_MASK_4K) +#define QI_IOTLB_ADDR(addr) (((u64)addr) & VTD_PAGE_MASK) #define QI_IOTLB_IH(ih) (((u64)ih) << 6) #define QI_IOTLB_AM(am) (((u8)am)) @@ -353,4 +353,11 @@ static inline int intel_iommu_found(void) } #endif /* CONFIG_DMAR */ +extern void *intel_alloc_coherent(struct device *, size_t, dma_addr_t *, gfp_t); +extern void intel_free_coherent(struct device *, size_t, void *, dma_addr_t); +extern dma_addr_t intel_map_single(struct device *, phys_addr_t, size_t, int); +extern void intel_unmap_single(struct device *, dma_addr_t, size_t, int); +extern int intel_map_sg(struct device *, struct scatterlist *, int, int); +extern void intel_unmap_sg(struct device *, struct scatterlist *, int, int); + #endif From f609891f428e1c20e270e7c350daf8c93cc459d7 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Thu, 16 Oct 2008 16:27:36 +0200 Subject: [PATCH 6/9] amd_iommu: fix nasty bug that caused ILLEGAL_DEVICE_TABLE_ENTRY errors We are on 64-bit so better use u64 instead of u32 to deal with addresses: static void __init iommu_set_device_table(struct amd_iommu *iommu) { u64 entry; ... entry = virt_to_phys(amd_iommu_dev_table); ... (I am wondering why gcc 4.2.x did not warn about the assignment between u32 and unsigned long.) Cc: iommu@lists.linux-foundation.org Cc: stable@kernel.org Signed-off-by: Andreas Herrmann Signed-off-by: Joerg Roedel Signed-off-by: David Woodhouse --- arch/x86/kernel/amd_iommu_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 4cd8083c58be..0cdcda35a05f 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -212,7 +212,7 @@ static void __init iommu_set_exclusion_range(struct amd_iommu *iommu) /* Programs the physical address of the device table into the IOMMU hardware */ static void __init iommu_set_device_table(struct amd_iommu *iommu) { - u32 entry; + u64 entry; BUG_ON(iommu->mmio_base == NULL); From bb9e6d65078da2f38cfe1067cfd31a896ca867c0 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 15 Oct 2008 16:08:28 +0900 Subject: [PATCH 7/9] intel-iommu: use coherent_dma_mask in alloc_coherent This patch fixes intel-iommu to use dev->coherent_dma_mask in alloc_coherent. Currently, intel-iommu uses dev->dma_mask in alloc_coherent but alloc_coherent is supposed to use coherent_dma_mask. It could break drivers that uses smaller coherent_dma_mask than dma_mask (though the current code works for the majority that use the same mask for coherent_dma_mask and dma_mask). [dwmw2: dma_mask can be bigger than 'unsigned long'] Signed-off-by: FUJITA Tomonori Reviewed-by: Grant Grundler Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 2bf96babbc4f..d315e413fae0 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1762,14 +1762,14 @@ iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end) static struct iova * __intel_alloc_iova(struct device *dev, struct dmar_domain *domain, - size_t size) + size_t size, u64 dma_mask) { struct pci_dev *pdev = to_pci_dev(dev); struct iova *iova = NULL; - if ((pdev->dma_mask <= DMA_32BIT_MASK) || (dmar_forcedac)) { - iova = iommu_alloc_iova(domain, size, pdev->dma_mask); - } else { + if (dma_mask <= DMA_32BIT_MASK || dmar_forcedac) + iova = iommu_alloc_iova(domain, size, dma_mask); + else { /* * First try to allocate an io virtual address in * DMA_32BIT_MASK and if that fails then try allocating @@ -1777,7 +1777,7 @@ __intel_alloc_iova(struct device *dev, struct dmar_domain *domain, */ iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK); if (!iova) - iova = iommu_alloc_iova(domain, size, pdev->dma_mask); + iova = iommu_alloc_iova(domain, size, dma_mask); } if (!iova) { @@ -1816,8 +1816,8 @@ get_valid_domain_for_dev(struct pci_dev *pdev) return domain; } -dma_addr_t -intel_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, int dir) +static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, + size_t size, int dir, u64 dma_mask) { struct pci_dev *pdev = to_pci_dev(hwdev); struct dmar_domain *domain; @@ -1836,7 +1836,7 @@ intel_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, int dir) size = aligned_size((u64)paddr, size); - iova = __intel_alloc_iova(hwdev, domain, size); + iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask); if (!iova) goto error; @@ -1878,6 +1878,13 @@ intel_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, int dir) return 0; } +dma_addr_t intel_map_single(struct device *hwdev, phys_addr_t paddr, + size_t size, int dir) +{ + return __intel_map_single(hwdev, paddr, size, dir, + to_pci_dev(hwdev)->dma_mask); +} + static void flush_unmaps(void) { int i, j; @@ -1993,7 +2000,9 @@ void *intel_alloc_coherent(struct device *hwdev, size_t size, return NULL; memset(vaddr, 0, size); - *dma_handle = intel_map_single(hwdev, virt_to_bus(vaddr), size, DMA_BIDIRECTIONAL); + *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size, + DMA_BIDIRECTIONAL, + hwdev->coherent_dma_mask); if (*dma_handle) return vaddr; free_pages((unsigned long)vaddr, order); @@ -2097,7 +2106,7 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, size += aligned_size((u64)addr, sg->length); } - iova = __intel_alloc_iova(hwdev, domain, size); + iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask); if (!iova) { sglist->dma_length = 0; return 0; From f82851a8a480a26611175f064f54e17f5f7b01ae Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 18 Oct 2008 15:43:14 +0100 Subject: [PATCH 8/9] dmar: fix uninitialised 'ret' variable in dmar_parse_dev() This was introduced by commit 1886e8a90a580f3ad343f2065c84c1b9e1dac9ef ("x64, x2apic/intr-remap: code re-structuring, to be used by both DMA and Interrupt remapping"). It was causing bogus results to be returned from dmar_parse_dev() when the first unit with the INCLUDE_ALL flag was processed. Signed-off-by: David Woodhouse --- drivers/pci/dmar.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index b65173828bc2..7b3751136e63 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -188,12 +188,11 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header) return 0; } -static int __init -dmar_parse_dev(struct dmar_drhd_unit *dmaru) +static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru) { struct acpi_dmar_hardware_unit *drhd; static int include_all; - int ret; + int ret = 0; drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr; From 6c8909b42fee1be67647bcd2518161a0fa8ca533 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 18 Oct 2008 16:12:17 +0100 Subject: [PATCH 9/9] Admit to maintaining VT-d, for my sins. Signed-off-by: David Woodhouse --- MAINTAINERS | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 52702b057c02..3ae1cac9afd7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2176,6 +2176,13 @@ M: maciej.sosnowski@intel.com L: linux-kernel@vger.kernel.org S: Supported +INTEL IOMMU (VT-d) +P: David Woodhouse +M: dwmw2@infradead.org +L: iommu@lists.linux-foundation.org +T: git://git.infradead.org/iommu-2.6.git +S: Supported + INTEL IOP-ADMA DMA DRIVER P: Dan Williams M: dan.j.williams@intel.com