From cd12909cb576d37311fe35868780e82d5007d0c8 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Thu, 29 Sep 2011 16:53:32 +0100 Subject: [PATCH 1/7] xen: map foreign pages for shared rings by updating the PTEs directly When mapping a foreign page with xenbus_map_ring_valloc() with the GNTTABOP_map_grant_ref hypercall, set the GNTMAP_contains_pte flag and pass a pointer to the PTE (in init_mm). After the page is mapped, the usual fault mechanism can be used to update additional MMs. This allows the vmalloc_sync_all() to be removed from alloc_vm_area(). Signed-off-by: David Vrabel Acked-by: Andrew Morton [v1: Squashed fix by Michal for no-mmu case] Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Michal Simek --- arch/x86/xen/grant-table.c | 2 +- drivers/xen/xenbus/xenbus_client.c | 11 ++++++++--- include/linux/vmalloc.h | 2 +- mm/nommu.c | 2 +- mm/vmalloc.c | 27 +++++++++++++-------------- 5 files changed, 24 insertions(+), 20 deletions(-) diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c index 6bbfd7ac5e81..5a40d24ba331 100644 --- a/arch/x86/xen/grant-table.c +++ b/arch/x86/xen/grant-table.c @@ -71,7 +71,7 @@ int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes, if (shared == NULL) { struct vm_struct *area = - alloc_vm_area(PAGE_SIZE * max_nr_gframes); + alloc_vm_area(PAGE_SIZE * max_nr_gframes, NULL); BUG_ON(area == NULL); shared = area->addr; *__shared = shared; diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index 81c3ce6b8bbe..1906125eab49 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -436,19 +437,20 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn); int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr) { struct gnttab_map_grant_ref op = { - .flags = GNTMAP_host_map, + .flags = GNTMAP_host_map | GNTMAP_contains_pte, .ref = gnt_ref, .dom = dev->otherend_id, }; struct vm_struct *area; + pte_t *pte; *vaddr = NULL; - area = alloc_vm_area(PAGE_SIZE); + area = alloc_vm_area(PAGE_SIZE, &pte); if (!area) return -ENOMEM; - op.host_addr = (unsigned long)area->addr; + op.host_addr = arbitrary_virt_to_machine(pte).maddr; if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) BUG(); @@ -527,6 +529,7 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr) struct gnttab_unmap_grant_ref op = { .host_addr = (unsigned long)vaddr, }; + unsigned int level; /* It'd be nice if linux/vmalloc.h provided a find_vm_area(void *addr) * method so that we don't have to muck with vmalloc internals here. @@ -548,6 +551,8 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr) } op.handle = (grant_handle_t)area->phys_addr; + op.host_addr = arbitrary_virt_to_machine( + lookup_address((unsigned long)vaddr, &level)).maddr; if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) BUG(); diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 687fb11e2010..4bde182fcf93 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -119,7 +119,7 @@ unmap_kernel_range(unsigned long addr, unsigned long size) #endif /* Allocate/destroy a 'vmalloc' VM area. */ -extern struct vm_struct *alloc_vm_area(size_t size); +extern struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes); extern void free_vm_area(struct vm_struct *area); /* for /dev/kmem */ diff --git a/mm/nommu.c b/mm/nommu.c index 73419c55eda6..b982290fd962 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -454,7 +454,7 @@ void __attribute__((weak)) vmalloc_sync_all(void) * between processes, it syncs the pagetable across all * processes. */ -struct vm_struct *alloc_vm_area(size_t size) +struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes) { BUG(); return NULL; diff --git a/mm/vmalloc.c b/mm/vmalloc.c index b669aa6f6caf..3231bf332878 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2141,23 +2141,30 @@ void __attribute__((weak)) vmalloc_sync_all(void) static int f(pte_t *pte, pgtable_t table, unsigned long addr, void *data) { - /* apply_to_page_range() does all the hard work. */ + pte_t ***p = data; + + if (p) { + *(*p) = pte; + (*p)++; + } return 0; } /** * alloc_vm_area - allocate a range of kernel address space * @size: size of the area + * @ptes: returns the PTEs for the address space * * Returns: NULL on failure, vm_struct on success * * This function reserves a range of kernel address space, and * allocates pagetables to map that range. No actual mappings - * are created. If the kernel address space is not shared - * between processes, it syncs the pagetable across all - * processes. + * are created. + * + * If @ptes is non-NULL, pointers to the PTEs (in init_mm) + * allocated for the VM area are returned. */ -struct vm_struct *alloc_vm_area(size_t size) +struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes) { struct vm_struct *area; @@ -2171,19 +2178,11 @@ struct vm_struct *alloc_vm_area(size_t size) * of kernel virtual address space and mapped into init_mm. */ if (apply_to_page_range(&init_mm, (unsigned long)area->addr, - area->size, f, NULL)) { + size, f, ptes ? &ptes : NULL)) { free_vm_area(area); return NULL; } - /* - * If the allocated address space is passed to a hypercall - * before being used then we cannot rely on a page fault to - * trigger an update of the page tables. So sync all the page - * tables here. - */ - vmalloc_sync_all(); - return area; } EXPORT_SYMBOL_GPL(alloc_vm_area); From 5f76d7078ce784916d55fc4e1bb0a42985f085a6 Mon Sep 17 00:00:00 2001 From: Daniel De Graaf Date: Wed, 19 Oct 2011 18:05:27 -0400 Subject: [PATCH 2/7] xen: Remove hanging references to CONFIG_XEN_PLATFORM_PCI In 5fbdc10395cd500d6ff844825a918c4e6f38de37 the XEN_PLATFORM_PCI config option was removed, but references in header files remained. Clean up those references. Signed-off-by: Daniel De Graaf Signed-off-by: Konrad Rzeszutek Wilk --- include/xen/platform_pci.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/include/xen/platform_pci.h b/include/xen/platform_pci.h index a785a3b0c8c7..438c256c274b 100644 --- a/include/xen/platform_pci.h +++ b/include/xen/platform_pci.h @@ -29,8 +29,7 @@ static inline int xen_must_unplug_nics(void) { #if (defined(CONFIG_XEN_NETDEV_FRONTEND) || \ defined(CONFIG_XEN_NETDEV_FRONTEND_MODULE)) && \ - (defined(CONFIG_XEN_PLATFORM_PCI) || \ - defined(CONFIG_XEN_PLATFORM_PCI_MODULE)) + defined(CONFIG_XEN_PVHVM) return 1; #else return 0; @@ -40,8 +39,7 @@ static inline int xen_must_unplug_nics(void) { static inline int xen_must_unplug_disks(void) { #if (defined(CONFIG_XEN_BLKDEV_FRONTEND) || \ defined(CONFIG_XEN_BLKDEV_FRONTEND_MODULE)) && \ - (defined(CONFIG_XEN_PLATFORM_PCI) || \ - defined(CONFIG_XEN_PLATFORM_PCI_MODULE)) + defined(CONFIG_XEN_PVHVM) return 1; #else return 0; From 72e9cf2ab12ef3e050577ffebdb0c88a28df821d Mon Sep 17 00:00:00 2001 From: Daniel De Graaf Date: Wed, 19 Oct 2011 17:59:37 -0400 Subject: [PATCH 3/7] xen/balloon: Avoid OOM when requesting highmem If highmem pages are requested from the balloon on a system without highmem, the implementation of alloc_xenballooned_pages will allocate all available memory trying to find highmem pages to return. Allow low memory to be returned when highmem pages are requested to avoid this loop. Signed-off-by: Daniel De Graaf Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/balloon.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index a767884a6c7a..31ab82fda38a 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -501,7 +501,7 @@ EXPORT_SYMBOL_GPL(balloon_set_new_target); * alloc_xenballooned_pages - get pages that have been ballooned out * @nr_pages: Number of pages to get * @pages: pages returned - * @highmem: highmem or lowmem pages + * @highmem: allow highmem pages * @return 0 on success, error otherwise */ int alloc_xenballooned_pages(int nr_pages, struct page **pages, bool highmem) @@ -511,7 +511,7 @@ int alloc_xenballooned_pages(int nr_pages, struct page **pages, bool highmem) mutex_lock(&balloon_mutex); while (pgno < nr_pages) { page = balloon_retrieve(highmem); - if (page && PageHighMem(page) == highmem) { + if (page && (highmem || !PageHighMem(page))) { pages[pgno++] = page; } else { enum bp_state st; From 90d4f5534d14815bd94c10e8ceccc57287657ecc Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Thu, 27 Oct 2011 22:28:59 -0700 Subject: [PATCH 4/7] xen:pvhvm: enable PVHVM VCPU placement when using more than 32 CPUs. PVHVM running with more than 32 vcpus and pv_irq/pv_time enabled need VCPU placement to work, or else it will softlockup. CC: stable@kernel.org Acked-by: Stefano Stabellini Signed-off-by: Zhenzhong Duan Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/enlighten.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index da8afd576a6b..1f928659c338 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1356,7 +1356,7 @@ static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self, int cpu = (long)hcpu; switch (action) { case CPU_UP_PREPARE: - per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; + xen_vcpu_setup(cpu); if (xen_have_vector_callback) xen_init_lock_cpu(cpu); break; @@ -1386,7 +1386,6 @@ static void __init xen_hvm_guest_init(void) xen_hvm_smp_init(); register_cpu_notifier(&xen_hvm_cpu_notifier); xen_unplug_emulated_devices(); - have_vcpu_info_placement = 0; x86_init.irqs.intr_init = xen_init_IRQ; xen_hvm_init_time_ops(); xen_hvm_init_mmu_ops(); From fc6e0c3b909157748ce1c0c0f2a9935a5ee3c812 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 4 Nov 2011 21:23:32 +0300 Subject: [PATCH 5/7] xen-gntdev: integer overflow in gntdev_alloc_map() The multiplications here can overflow resulting in smaller buffer sizes than expected. "count" comes from a copy_from_user(). Signed-off-by: Dan Carpenter Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/gntdev.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 39871326afa2..afca14d9042e 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -114,11 +114,11 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count) if (NULL == add) return NULL; - add->grants = kzalloc(sizeof(add->grants[0]) * count, GFP_KERNEL); - add->map_ops = kzalloc(sizeof(add->map_ops[0]) * count, GFP_KERNEL); - add->unmap_ops = kzalloc(sizeof(add->unmap_ops[0]) * count, GFP_KERNEL); - add->kmap_ops = kzalloc(sizeof(add->kmap_ops[0]) * count, GFP_KERNEL); - add->pages = kzalloc(sizeof(add->pages[0]) * count, GFP_KERNEL); + add->grants = kcalloc(count, sizeof(add->grants[0]), GFP_KERNEL); + add->map_ops = kcalloc(count, sizeof(add->map_ops[0]), GFP_KERNEL); + add->unmap_ops = kcalloc(count, sizeof(add->unmap_ops[0]), GFP_KERNEL); + add->kmap_ops = kcalloc(count, sizeof(add->kmap_ops[0]), GFP_KERNEL); + add->pages = kcalloc(count, sizeof(add->pages[0]), GFP_KERNEL); if (NULL == add->grants || NULL == add->map_ops || NULL == add->unmap_ops || From 21643e69a4c06f7ef155fbc70e3fba13fba4a756 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 4 Nov 2011 21:24:08 +0300 Subject: [PATCH 6/7] xen-gntalloc: integer overflow in gntalloc_ioctl_alloc() On 32 bit systems a high value of op.count could lead to an integer overflow in the kzalloc() and gref_ids would be smaller than expected. If the you triggered another integer overflow in "if (gref_size + op.count > limit)" then you'd probably get memory corruption inside add_grefs(). CC: stable@kernel.org Signed-off-by: Dan Carpenter Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/gntalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c index f6832f46aea4..23c60cf4313e 100644 --- a/drivers/xen/gntalloc.c +++ b/drivers/xen/gntalloc.c @@ -280,7 +280,7 @@ static long gntalloc_ioctl_alloc(struct gntalloc_file_private_data *priv, goto out; } - gref_ids = kzalloc(sizeof(gref_ids[0]) * op.count, GFP_TEMPORARY); + gref_ids = kcalloc(op.count, sizeof(gref_ids[0]), GFP_TEMPORARY); if (!gref_ids) { rc = -ENOMEM; goto out; From 99cb2ddcc617f43917e94a4147aa3ccdb2bcd77e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 4 Nov 2011 21:24:36 +0300 Subject: [PATCH 7/7] xen-gntalloc: signedness bug in add_grefs() gref->gref_id is unsigned so the error handling didn't work. gnttab_grant_foreign_access() returns an int type, so we can add a cast here, and it doesn't cause any problems. gnttab_grant_foreign_access() can return a variety of errors including -ENOSPC, -ENOSYS and -ENOMEM. CC: stable@kernel.org Signed-off-by: Dan Carpenter Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/gntalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c index 23c60cf4313e..e1c4c6e5b469 100644 --- a/drivers/xen/gntalloc.c +++ b/drivers/xen/gntalloc.c @@ -135,7 +135,7 @@ static int add_grefs(struct ioctl_gntalloc_alloc_gref *op, /* Grant foreign access to the page. */ gref->gref_id = gnttab_grant_foreign_access(op->domid, pfn_to_mfn(page_to_pfn(gref->page)), readonly); - if (gref->gref_id < 0) { + if ((int)gref->gref_id < 0) { rc = gref->gref_id; goto undo; }