diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index ba7f4c8f5c3e..8073625371f5 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -89,6 +89,17 @@ int xen_unmap_domain_gfn_range(struct vm_area_struct *vma, } EXPORT_SYMBOL_GPL(xen_unmap_domain_gfn_range); +/* Not used by XENFEAT_auto_translated guests. */ +int xen_remap_domain_mfn_array(struct vm_area_struct *vma, + unsigned long addr, + xen_pfn_t *mfn, int nr, + int *err_ptr, pgprot_t prot, + unsigned int domid, struct page **pages) +{ + return -ENOSYS; +} +EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_array); + static void xen_read_wallclock(struct timespec64 *ts) { u32 version; diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 2d76106788a3..96fc2f0fdbfe 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -63,37 +63,44 @@ static noinline void xen_flush_tlb_all(void) #define REMAP_BATCH_SIZE 16 struct remap_data { - xen_pfn_t *mfn; + xen_pfn_t *pfn; bool contiguous; + bool no_translate; pgprot_t prot; struct mmu_update *mmu_update; }; -static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token, +static int remap_area_pfn_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr, void *data) { struct remap_data *rmd = data; - pte_t pte = pte_mkspecial(mfn_pte(*rmd->mfn, rmd->prot)); + pte_t pte = pte_mkspecial(mfn_pte(*rmd->pfn, rmd->prot)); - /* If we have a contiguous range, just update the mfn itself, - else update pointer to be "next mfn". */ + /* + * If we have a contiguous range, just update the pfn itself, + * else update pointer to be "next pfn". + */ if (rmd->contiguous) - (*rmd->mfn)++; + (*rmd->pfn)++; else - rmd->mfn++; + rmd->pfn++; - rmd->mmu_update->ptr = virt_to_machine(ptep).maddr | MMU_NORMAL_PT_UPDATE; + rmd->mmu_update->ptr = virt_to_machine(ptep).maddr; + rmd->mmu_update->ptr |= rmd->no_translate ? + MMU_PT_UPDATE_NO_TRANSLATE : + MMU_NORMAL_PT_UPDATE; rmd->mmu_update->val = pte_val_ma(pte); rmd->mmu_update++; return 0; } -static int do_remap_gfn(struct vm_area_struct *vma, +static int do_remap_pfn(struct vm_area_struct *vma, unsigned long addr, - xen_pfn_t *gfn, int nr, + xen_pfn_t *pfn, int nr, int *err_ptr, pgprot_t prot, - unsigned domid, + unsigned int domid, + bool no_translate, struct page **pages) { int err = 0; @@ -104,11 +111,14 @@ static int do_remap_gfn(struct vm_area_struct *vma, BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO))); - rmd.mfn = gfn; + rmd.pfn = pfn; rmd.prot = prot; - /* We use the err_ptr to indicate if there we are doing a contiguous - * mapping or a discontigious mapping. */ + /* + * We use the err_ptr to indicate if there we are doing a contiguous + * mapping or a discontigious mapping. + */ rmd.contiguous = !err_ptr; + rmd.no_translate = no_translate; while (nr) { int index = 0; @@ -119,7 +129,7 @@ static int do_remap_gfn(struct vm_area_struct *vma, rmd.mmu_update = mmu_update; err = apply_to_page_range(vma->vm_mm, addr, range, - remap_area_mfn_pte_fn, &rmd); + remap_area_pfn_pte_fn, &rmd); if (err) goto out; @@ -173,7 +183,8 @@ int xen_remap_domain_gfn_range(struct vm_area_struct *vma, if (xen_feature(XENFEAT_auto_translated_physmap)) return -EOPNOTSUPP; - return do_remap_gfn(vma, addr, &gfn, nr, NULL, prot, domid, pages); + return do_remap_pfn(vma, addr, &gfn, nr, NULL, prot, domid, false, + pages); } EXPORT_SYMBOL_GPL(xen_remap_domain_gfn_range); @@ -192,10 +203,25 @@ int xen_remap_domain_gfn_array(struct vm_area_struct *vma, * cause of "wrong memory was mapped in". */ BUG_ON(err_ptr == NULL); - return do_remap_gfn(vma, addr, gfn, nr, err_ptr, prot, domid, pages); + return do_remap_pfn(vma, addr, gfn, nr, err_ptr, prot, domid, + false, pages); } EXPORT_SYMBOL_GPL(xen_remap_domain_gfn_array); +int xen_remap_domain_mfn_array(struct vm_area_struct *vma, + unsigned long addr, + xen_pfn_t *mfn, int nr, + int *err_ptr, pgprot_t prot, + unsigned int domid, struct page **pages) +{ + if (xen_feature(XENFEAT_auto_translated_physmap)) + return -EOPNOTSUPP; + + return do_remap_pfn(vma, addr, mfn, nr, err_ptr, prot, domid, + true, pages); +} +EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_array); + /* Returns: 0 success */ int xen_unmap_domain_gfn_range(struct vm_area_struct *vma, int nr, struct page **pages) diff --git a/arch/x86/xen/xen-pvh.S b/arch/x86/xen/xen-pvh.S index e1a5fbeae08d..ca2d3b2bf2af 100644 --- a/arch/x86/xen/xen-pvh.S +++ b/arch/x86/xen/xen-pvh.S @@ -54,12 +54,19 @@ * charge of setting up it's own stack, GDT and IDT. */ +#define PVH_GDT_ENTRY_CS 1 +#define PVH_GDT_ENTRY_DS 2 +#define PVH_GDT_ENTRY_CANARY 3 +#define PVH_CS_SEL (PVH_GDT_ENTRY_CS * 8) +#define PVH_DS_SEL (PVH_GDT_ENTRY_DS * 8) +#define PVH_CANARY_SEL (PVH_GDT_ENTRY_CANARY * 8) + ENTRY(pvh_start_xen) cld lgdt (_pa(gdt)) - mov $(__BOOT_DS),%eax + mov $PVH_DS_SEL,%eax mov %eax,%ds mov %eax,%es mov %eax,%ss @@ -93,11 +100,17 @@ ENTRY(pvh_start_xen) mov %eax, %cr0 /* Jump to 64-bit mode. */ - ljmp $__KERNEL_CS, $_pa(1f) + ljmp $PVH_CS_SEL, $_pa(1f) /* 64-bit entry point. */ .code64 1: + /* Set base address in stack canary descriptor. */ + mov $MSR_GS_BASE,%ecx + mov $_pa(canary), %eax + xor %edx, %edx + wrmsr + call xen_prepare_pvh /* startup_64 expects boot_params in %rsi. */ @@ -107,6 +120,17 @@ ENTRY(pvh_start_xen) #else /* CONFIG_X86_64 */ + /* Set base address in stack canary descriptor. */ + movl $_pa(gdt_start),%eax + movl $_pa(canary),%ecx + movw %cx, (PVH_GDT_ENTRY_CANARY * 8) + 2(%eax) + shrl $16, %ecx + movb %cl, (PVH_GDT_ENTRY_CANARY * 8) + 4(%eax) + movb %ch, (PVH_GDT_ENTRY_CANARY * 8) + 7(%eax) + + mov $PVH_CANARY_SEL,%eax + mov %eax,%gs + call mk_early_pgtbl_32 mov $_pa(initial_page_table), %eax @@ -116,13 +140,13 @@ ENTRY(pvh_start_xen) or $(X86_CR0_PG | X86_CR0_PE), %eax mov %eax, %cr0 - ljmp $__BOOT_CS, $1f + ljmp $PVH_CS_SEL, $1f 1: call xen_prepare_pvh mov $_pa(pvh_bootparams), %esi /* startup_32 doesn't expect paging and PAE to be on. */ - ljmp $__BOOT_CS, $_pa(2f) + ljmp $PVH_CS_SEL, $_pa(2f) 2: mov %cr0, %eax and $~X86_CR0_PG, %eax @@ -131,7 +155,7 @@ ENTRY(pvh_start_xen) and $~X86_CR4_PAE, %eax mov %eax, %cr4 - ljmp $__BOOT_CS, $_pa(startup_32) + ljmp $PVH_CS_SEL, $_pa(startup_32) #endif END(pvh_start_xen) @@ -143,16 +167,19 @@ gdt: .word 0 gdt_start: .quad 0x0000000000000000 /* NULL descriptor */ - .quad 0x0000000000000000 /* reserved */ #ifdef CONFIG_X86_64 - .quad GDT_ENTRY(0xa09a, 0, 0xfffff) /* __KERNEL_CS */ + .quad GDT_ENTRY(0xa09a, 0, 0xfffff) /* PVH_CS_SEL */ #else - .quad GDT_ENTRY(0xc09a, 0, 0xfffff) /* __KERNEL_CS */ + .quad GDT_ENTRY(0xc09a, 0, 0xfffff) /* PVH_CS_SEL */ #endif - .quad GDT_ENTRY(0xc092, 0, 0xfffff) /* __KERNEL_DS */ + .quad GDT_ENTRY(0xc092, 0, 0xfffff) /* PVH_DS_SEL */ + .quad GDT_ENTRY(0x4090, 0, 0x18) /* PVH_CANARY_SEL */ gdt_end: - .balign 4 + .balign 16 +canary: + .fill 48, 1, 0 + early_stack: .fill 256, 1, 0 early_stack_end: diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 4dd0668003e7..679da1abd73c 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -564,7 +564,7 @@ static u16 xennet_select_queue(struct net_device *dev, struct sk_buff *skb, #define MAX_XEN_SKB_FRAGS (65536 / XEN_PAGE_SIZE + 1) -static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct netfront_info *np = netdev_priv(dev); struct netfront_stats *tx_stats = this_cpu_ptr(np->tx_stats); diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 1c909183c42a..8ae0349d9f0a 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -722,6 +723,134 @@ static long privcmd_ioctl_restrict(struct file *file, void __user *udata) return 0; } +struct remap_pfn { + struct mm_struct *mm; + struct page **pages; + pgprot_t prot; + unsigned long i; +}; + +static int remap_pfn_fn(pte_t *ptep, pgtable_t token, unsigned long addr, + void *data) +{ + struct remap_pfn *r = data; + struct page *page = r->pages[r->i]; + pte_t pte = pte_mkspecial(pfn_pte(page_to_pfn(page), r->prot)); + + set_pte_at(r->mm, addr, ptep, pte); + r->i++; + + return 0; +} + +static long privcmd_ioctl_mmap_resource(struct file *file, void __user *udata) +{ + struct privcmd_data *data = file->private_data; + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + struct privcmd_mmap_resource kdata; + xen_pfn_t *pfns = NULL; + struct xen_mem_acquire_resource xdata; + int rc; + + if (copy_from_user(&kdata, udata, sizeof(kdata))) + return -EFAULT; + + /* If restriction is in place, check the domid matches */ + if (data->domid != DOMID_INVALID && data->domid != kdata.dom) + return -EPERM; + + down_write(&mm->mmap_sem); + + vma = find_vma(mm, kdata.addr); + if (!vma || vma->vm_ops != &privcmd_vm_ops) { + rc = -EINVAL; + goto out; + } + + pfns = kcalloc(kdata.num, sizeof(*pfns), GFP_KERNEL); + if (!pfns) { + rc = -ENOMEM; + goto out; + } + + if (xen_feature(XENFEAT_auto_translated_physmap)) { + unsigned int nr = DIV_ROUND_UP(kdata.num, XEN_PFN_PER_PAGE); + struct page **pages; + unsigned int i; + + rc = alloc_empty_pages(vma, nr); + if (rc < 0) + goto out; + + pages = vma->vm_private_data; + for (i = 0; i < kdata.num; i++) { + xen_pfn_t pfn = + page_to_xen_pfn(pages[i / XEN_PFN_PER_PAGE]); + + pfns[i] = pfn + (i % XEN_PFN_PER_PAGE); + } + } else + vma->vm_private_data = PRIV_VMA_LOCKED; + + memset(&xdata, 0, sizeof(xdata)); + xdata.domid = kdata.dom; + xdata.type = kdata.type; + xdata.id = kdata.id; + xdata.frame = kdata.idx; + xdata.nr_frames = kdata.num; + set_xen_guest_handle(xdata.frame_list, pfns); + + xen_preemptible_hcall_begin(); + rc = HYPERVISOR_memory_op(XENMEM_acquire_resource, &xdata); + xen_preemptible_hcall_end(); + + if (rc) + goto out; + + if (xen_feature(XENFEAT_auto_translated_physmap)) { + struct remap_pfn r = { + .mm = vma->vm_mm, + .pages = vma->vm_private_data, + .prot = vma->vm_page_prot, + }; + + rc = apply_to_page_range(r.mm, kdata.addr, + kdata.num << PAGE_SHIFT, + remap_pfn_fn, &r); + } else { + unsigned int domid = + (xdata.flags & XENMEM_rsrc_acq_caller_owned) ? + DOMID_SELF : kdata.dom; + int num; + + num = xen_remap_domain_mfn_array(vma, + kdata.addr & PAGE_MASK, + pfns, kdata.num, (int *)pfns, + vma->vm_page_prot, + domid, + vma->vm_private_data); + if (num < 0) + rc = num; + else if (num != kdata.num) { + unsigned int i; + + for (i = 0; i < num; i++) { + rc = pfns[i]; + if (rc < 0) + break; + } + } else + rc = 0; + } + +out: + up_write(&mm->mmap_sem); + kfree(pfns); + + return rc; +} + static long privcmd_ioctl(struct file *file, unsigned int cmd, unsigned long data) { @@ -753,6 +882,10 @@ static long privcmd_ioctl(struct file *file, ret = privcmd_ioctl_restrict(file, udata); break; + case IOCTL_PRIVCMD_MMAP_RESOURCE: + ret = privcmd_ioctl_mmap_resource(file, udata); + break; + default: break; } @@ -801,7 +934,7 @@ static void privcmd_close(struct vm_area_struct *vma) kfree(pages); } -static int privcmd_fault(struct vm_fault *vmf) +static vm_fault_t privcmd_fault(struct vm_fault *vmf) { printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n", vmf->vma, vmf->vma->vm_start, vmf->vma->vm_end, diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index ec9eb4fba59c..f2088838f690 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -710,7 +710,7 @@ static int __init xenstored_local_init(void) if (!page) goto out_err; - xen_store_gfn = xen_start_info->store_mfn = virt_to_gfn((void *)page); + xen_store_gfn = virt_to_gfn((void *)page); /* Next allocate a local port which xenstored can bind to */ alloc_unbound.dom = DOMID_SELF; @@ -722,8 +722,7 @@ static int __init xenstored_local_init(void) goto out_err; BUG_ON(err); - xen_store_evtchn = xen_start_info->store_evtchn = - alloc_unbound.port; + xen_store_evtchn = alloc_unbound.port; return 0; diff --git a/include/uapi/xen/privcmd.h b/include/uapi/xen/privcmd.h index 39d3e7b8e993..d2029556083e 100644 --- a/include/uapi/xen/privcmd.h +++ b/include/uapi/xen/privcmd.h @@ -89,6 +89,15 @@ struct privcmd_dm_op { const struct privcmd_dm_op_buf __user *ubufs; }; +struct privcmd_mmap_resource { + domid_t dom; + __u32 type; + __u32 id; + __u32 idx; + __u64 num; + __u64 addr; +}; + /* * @cmd: IOCTL_PRIVCMD_HYPERCALL * @arg: &privcmd_hypercall_t @@ -114,5 +123,7 @@ struct privcmd_dm_op { _IOC(_IOC_NONE, 'P', 5, sizeof(struct privcmd_dm_op)) #define IOCTL_PRIVCMD_RESTRICT \ _IOC(_IOC_NONE, 'P', 6, sizeof(domid_t)) +#define IOCTL_PRIVCMD_MMAP_RESOURCE \ + _IOC(_IOC_NONE, 'P', 7, sizeof(struct privcmd_mmap_resource)) #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */ diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h index 583dd93b3016..4c5751c26f87 100644 --- a/include/xen/interface/memory.h +++ b/include/xen/interface/memory.h @@ -265,4 +265,70 @@ struct xen_remove_from_physmap { }; DEFINE_GUEST_HANDLE_STRUCT(xen_remove_from_physmap); +/* + * Get the pages for a particular guest resource, so that they can be + * mapped directly by a tools domain. + */ +#define XENMEM_acquire_resource 28 +struct xen_mem_acquire_resource { + /* IN - The domain whose resource is to be mapped */ + domid_t domid; + /* IN - the type of resource */ + uint16_t type; + +#define XENMEM_resource_ioreq_server 0 +#define XENMEM_resource_grant_table 1 + + /* + * IN - a type-specific resource identifier, which must be zero + * unless stated otherwise. + * + * type == XENMEM_resource_ioreq_server -> id == ioreq server id + * type == XENMEM_resource_grant_table -> id defined below + */ + uint32_t id; + +#define XENMEM_resource_grant_table_id_shared 0 +#define XENMEM_resource_grant_table_id_status 1 + + /* IN/OUT - As an IN parameter number of frames of the resource + * to be mapped. However, if the specified value is 0 and + * frame_list is NULL then this field will be set to the + * maximum value supported by the implementation on return. + */ + uint32_t nr_frames; + /* + * OUT - Must be zero on entry. On return this may contain a bitwise + * OR of the following values. + */ + uint32_t flags; + + /* The resource pages have been assigned to the calling domain */ +#define _XENMEM_rsrc_acq_caller_owned 0 +#define XENMEM_rsrc_acq_caller_owned (1u << _XENMEM_rsrc_acq_caller_owned) + + /* + * IN - the index of the initial frame to be mapped. This parameter + * is ignored if nr_frames is 0. + */ + uint64_t frame; + +#define XENMEM_resource_ioreq_server_frame_bufioreq 0 +#define XENMEM_resource_ioreq_server_frame_ioreq(n) (1 + (n)) + + /* + * IN/OUT - If the tools domain is PV then, upon return, frame_list + * will be populated with the MFNs of the resource. + * If the tools domain is HVM then it is expected that, on + * entry, frame_list will be populated with a list of GFNs + * that will be mapped to the MFNs of the resource. + * If -EIO is returned then the frame_list has only been + * partially mapped and it is up to the caller to unmap all + * the GFNs. + * This parameter may be NULL if nr_frames is 0. + */ + GUEST_HANDLE(xen_pfn_t) frame_list; +}; +DEFINE_GUEST_HANDLE_STRUCT(xen_mem_acquire_resource); + #endif /* __XEN_PUBLIC_MEMORY_H__ */ diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h index 4f4830ef8f93..8bfb242f433e 100644 --- a/include/xen/interface/xen.h +++ b/include/xen/interface/xen.h @@ -265,9 +265,10 @@ * * PAT (bit 7 on) --> PWT (bit 3 on) and clear bit 7. */ -#define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */ -#define MMU_MACHPHYS_UPDATE 1 /* ptr = MA of frame to modify entry for */ -#define MMU_PT_UPDATE_PRESERVE_AD 2 /* atomically: *ptr = val | (*ptr&(A|D)) */ +#define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */ +#define MMU_MACHPHYS_UPDATE 1 /* ptr = MA of frame to modify entry for */ +#define MMU_PT_UPDATE_PRESERVE_AD 2 /* atomically: *ptr = val | (*ptr&(A|D)) */ +#define MMU_PT_UPDATE_NO_TRANSLATE 3 /* checked '*ptr = val'. ptr is MA. */ /* * MMU EXTENDED OPERATIONS diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index fd23e42c6024..fd18c974a619 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -63,7 +63,7 @@ static inline void xen_destroy_contiguous_region(phys_addr_t pstart, struct vm_area_struct; /* - * xen_remap_domain_gfn_array() - map an array of foreign frames + * xen_remap_domain_gfn_array() - map an array of foreign frames by gfn * @vma: VMA to map the pages into * @addr: Address at which to map the pages * @gfn: Array of GFNs to map @@ -86,6 +86,28 @@ int xen_remap_domain_gfn_array(struct vm_area_struct *vma, unsigned domid, struct page **pages); +/* + * xen_remap_domain_mfn_array() - map an array of foreign frames by mfn + * @vma: VMA to map the pages into + * @addr: Address at which to map the pages + * @mfn: Array of MFNs to map + * @nr: Number entries in the MFN array + * @err_ptr: Returns per-MFN error status. + * @prot: page protection mask + * @domid: Domain owning the pages + * @pages: Array of pages if this domain has an auto-translated physmap + * + * @mfn and @err_ptr may point to the same buffer, the MFNs will be + * overwritten by the error codes after they are mapped. + * + * Returns the number of successfully mapped frames, or a -ve error + * code. + */ +int xen_remap_domain_mfn_array(struct vm_area_struct *vma, + unsigned long addr, xen_pfn_t *mfn, int nr, + int *err_ptr, pgprot_t prot, + unsigned int domid, struct page **pages); + /* xen_remap_domain_gfn_range() - map a range of foreign frames * @vma: VMA to map the pages into * @addr: Address at which to map the pages