Merge branch 'for-rc1/xen/core' of git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen

* 'for-rc1/xen/core' of git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen:
  xen: add FIX_TEXT_POKE to fixmap
  xen: honour VCPU availability on boot
  xen: clean up gate trap/interrupt constants
  xen: set _PAGE_NX in __supported_pte_mask before pagetable construction
  xen: resume interrupts before system devices.
  xen/mmu: weaken flush_tlb_other test
  xen/mmu: some early pagetable cleanups
  Xen: Add virt_to_pfn helper function
  x86-64: remove PGE from must-have feature list
  xen: mask XSAVE from cpuid
  NULL noise: arch/x86/xen/smp.c
  xen: remove xen_load_gdt debug
  xen: make xen_load_gdt simpler
  xen: clean up xen_load_gdt
  xen: split construction of p2m mfn tables from registration
  xen: separate p2m allocation from setting
  xen: disable preempt for leave_lazy_mmu
This commit is contained in:
Linus Torvalds 2009-04-13 15:30:20 -07:00
commit 2e1c63b7ed
9 changed files with 198 additions and 76 deletions

View file

@ -50,7 +50,7 @@
#ifdef CONFIG_X86_64
#define NEED_PSE 0
#define NEED_MSR (1<<(X86_FEATURE_MSR & 31))
#define NEED_PGE (1<<(X86_FEATURE_PGE & 31))
#define NEED_PGE 0
#define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31))
#define NEED_XMM (1<<(X86_FEATURE_XMM & 31))
#define NEED_XMM2 (1<<(X86_FEATURE_XMM2 & 31))

View file

@ -124,7 +124,8 @@ static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
/* VIRT <-> MACHINE conversion */
#define virt_to_machine(v) (phys_to_machine(XPADDR(__pa(v))))
#define virt_to_mfn(v) (pfn_to_mfn(PFN_DOWN(__pa(v))))
#define virt_to_pfn(v) (PFN_DOWN(__pa(v)))
#define virt_to_mfn(v) (pfn_to_mfn(virt_to_pfn(v)))
#define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT))
static inline unsigned long pte_mfn(pte_t pte)

View file

@ -42,6 +42,7 @@
#include <asm/xen/hypervisor.h>
#include <asm/fixmap.h>
#include <asm/processor.h>
#include <asm/proto.h>
#include <asm/msr-index.h>
#include <asm/setup.h>
#include <asm/desc.h>
@ -168,21 +169,23 @@ static void __init xen_banner(void)
xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
}
static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0;
static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0;
static void xen_cpuid(unsigned int *ax, unsigned int *bx,
unsigned int *cx, unsigned int *dx)
{
unsigned maskecx = ~0;
unsigned maskedx = ~0;
/*
* Mask out inconvenient features, to try and disable as many
* unsupported kernel subsystems as possible.
*/
if (*ax == 1)
maskedx = ~((1 << X86_FEATURE_APIC) | /* disable APIC */
(1 << X86_FEATURE_ACPI) | /* disable ACPI */
(1 << X86_FEATURE_MCE) | /* disable MCE */
(1 << X86_FEATURE_MCA) | /* disable MCA */
(1 << X86_FEATURE_ACC)); /* thermal monitoring */
if (*ax == 1) {
maskecx = cpuid_leaf1_ecx_mask;
maskedx = cpuid_leaf1_edx_mask;
}
asm(XEN_EMULATE_PREFIX "cpuid"
: "=a" (*ax),
@ -190,9 +193,43 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
"=c" (*cx),
"=d" (*dx)
: "0" (*ax), "2" (*cx));
*cx &= maskecx;
*dx &= maskedx;
}
static __init void xen_init_cpuid_mask(void)
{
unsigned int ax, bx, cx, dx;
cpuid_leaf1_edx_mask =
~((1 << X86_FEATURE_MCE) | /* disable MCE */
(1 << X86_FEATURE_MCA) | /* disable MCA */
(1 << X86_FEATURE_ACC)); /* thermal monitoring */
if (!xen_initial_domain())
cpuid_leaf1_edx_mask &=
~((1 << X86_FEATURE_APIC) | /* disable local APIC */
(1 << X86_FEATURE_ACPI)); /* disable ACPI */
ax = 1;
xen_cpuid(&ax, &bx, &cx, &dx);
/* cpuid claims we support xsave; try enabling it to see what happens */
if (cx & (1 << (X86_FEATURE_XSAVE % 32))) {
unsigned long cr4;
set_in_cr4(X86_CR4_OSXSAVE);
cr4 = read_cr4();
if ((cr4 & X86_CR4_OSXSAVE) == 0)
cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_XSAVE % 32));
clear_in_cr4(X86_CR4_OSXSAVE);
}
}
static void xen_set_debugreg(int reg, unsigned long val)
{
HYPERVISOR_set_debugreg(reg, val);
@ -284,12 +321,11 @@ static void xen_set_ldt(const void *addr, unsigned entries)
static void xen_load_gdt(const struct desc_ptr *dtr)
{
unsigned long *frames;
unsigned long va = dtr->address;
unsigned int size = dtr->size + 1;
unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
unsigned long frames[pages];
int f;
struct multicall_space mcs;
/* A GDT can be up to 64k in size, which corresponds to 8192
8-byte entries, or 16 4k pages.. */
@ -297,19 +333,26 @@ static void xen_load_gdt(const struct desc_ptr *dtr)
BUG_ON(size > 65536);
BUG_ON(va & ~PAGE_MASK);
mcs = xen_mc_entry(sizeof(*frames) * pages);
frames = mcs.args;
for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) {
frames[f] = arbitrary_virt_to_mfn((void *)va);
int level;
pte_t *ptep = lookup_address(va, &level);
unsigned long pfn, mfn;
void *virt;
BUG_ON(ptep == NULL);
pfn = pte_pfn(*ptep);
mfn = pfn_to_mfn(pfn);
virt = __va(PFN_PHYS(pfn));
frames[f] = mfn;
make_lowmem_page_readonly((void *)va);
make_lowmem_page_readonly(mfn_to_virt(frames[f]));
make_lowmem_page_readonly(virt);
}
MULTI_set_gdt(mcs.mc, frames, size / sizeof(struct desc_struct));
xen_mc_issue(PARAVIRT_LAZY_CPU);
if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct)))
BUG();
}
static void load_TLS_descriptor(struct thread_struct *t,
@ -385,7 +428,7 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
static int cvt_gate_to_trap(int vector, const gate_desc *val,
struct trap_info *info)
{
if (val->type != 0xf && val->type != 0xe)
if (val->type != GATE_TRAP && val->type != GATE_INTERRUPT)
return 0;
info->vector = vector;
@ -393,8 +436,8 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
info->cs = gate_segment(*val);
info->flags = val->dpl;
/* interrupt gates clear IF */
if (val->type == 0xe)
info->flags |= 4;
if (val->type == GATE_INTERRUPT)
info->flags |= 1 << 2;
return 1;
}
@ -872,7 +915,6 @@ static const struct machine_ops __initdata xen_machine_ops = {
.emergency_restart = xen_emergency_restart,
};
/* First C function to be called on Xen boot */
asmlinkage void __init xen_start_kernel(void)
{
@ -897,6 +939,8 @@ asmlinkage void __init xen_start_kernel(void)
xen_init_irq_ops();
xen_init_cpuid_mask();
#ifdef CONFIG_X86_LOCAL_APIC
/*
* set up the basic apic ops.
@ -938,6 +982,11 @@ asmlinkage void __init xen_start_kernel(void)
if (!xen_initial_domain())
__supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
#ifdef CONFIG_X86_64
/* Work out if we support NX */
check_efer();
#endif
/* Don't do the full vcpu_info placement stuff until we have a
possible map and a non-dummy shared_info. */
per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];

View file

@ -184,7 +184,7 @@ static inline unsigned p2m_index(unsigned long pfn)
}
/* Build the parallel p2m_top_mfn structures */
void xen_setup_mfn_list_list(void)
static void __init xen_build_mfn_list_list(void)
{
unsigned pfn, idx;
@ -198,7 +198,10 @@ void xen_setup_mfn_list_list(void)
unsigned topidx = idx * P2M_ENTRIES_PER_PAGE;
p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]);
}
}
void xen_setup_mfn_list_list(void)
{
BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
@ -218,6 +221,8 @@ void __init xen_build_dynamic_phys_to_machine(void)
p2m_top[topidx] = &mfn_list[pfn];
}
xen_build_mfn_list_list();
}
unsigned long get_phys_to_machine(unsigned long pfn)
@ -233,47 +238,74 @@ unsigned long get_phys_to_machine(unsigned long pfn)
}
EXPORT_SYMBOL_GPL(get_phys_to_machine);
static void alloc_p2m(unsigned long **pp, unsigned long *mfnp)
/* install a new p2m_top page */
bool install_p2mtop_page(unsigned long pfn, unsigned long *p)
{
unsigned long *p;
unsigned topidx = p2m_top_index(pfn);
unsigned long **pfnp, *mfnp;
unsigned i;
p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL);
BUG_ON(p == NULL);
pfnp = &p2m_top[topidx];
mfnp = &p2m_top_mfn[topidx];
for (i = 0; i < P2M_ENTRIES_PER_PAGE; i++)
p[i] = INVALID_P2M_ENTRY;
if (cmpxchg(pp, p2m_missing, p) != p2m_missing)
free_page((unsigned long)p);
else
if (cmpxchg(pfnp, p2m_missing, p) == p2m_missing) {
*mfnp = virt_to_mfn(p);
return true;
}
return false;
}
static void alloc_p2m(unsigned long pfn)
{
unsigned long *p;
p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL);
BUG_ON(p == NULL);
if (!install_p2mtop_page(pfn, p))
free_page((unsigned long)p);
}
/* Try to install p2m mapping; fail if intermediate bits missing */
bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
{
unsigned topidx, idx;
if (unlikely(pfn >= MAX_DOMAIN_PAGES)) {
BUG_ON(mfn != INVALID_P2M_ENTRY);
return true;
}
topidx = p2m_top_index(pfn);
if (p2m_top[topidx] == p2m_missing) {
if (mfn == INVALID_P2M_ENTRY)
return true;
return false;
}
idx = p2m_index(pfn);
p2m_top[topidx][idx] = mfn;
return true;
}
void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
{
unsigned topidx, idx;
if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
return;
}
if (unlikely(pfn >= MAX_DOMAIN_PAGES)) {
BUG_ON(mfn != INVALID_P2M_ENTRY);
return;
}
if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
alloc_p2m(pfn);
topidx = p2m_top_index(pfn);
if (p2m_top[topidx] == p2m_missing) {
/* no need to allocate a page to store an invalid entry */
if (mfn == INVALID_P2M_ENTRY)
return;
alloc_p2m(&p2m_top[topidx], &p2m_top_mfn[topidx]);
if (!__set_phys_to_machine(pfn, mfn))
BUG();
}
idx = p2m_index(pfn);
p2m_top[topidx][idx] = mfn;
}
unsigned long arbitrary_virt_to_mfn(void *vaddr)
@ -987,7 +1019,7 @@ static __init int xen_mark_pinned(struct mm_struct *mm, struct page *page,
return 0;
}
void __init xen_mark_init_mm_pinned(void)
static void __init xen_mark_init_mm_pinned(void)
{
xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP);
}
@ -1270,8 +1302,8 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
} *args;
struct multicall_space mcs;
BUG_ON(cpumask_empty(cpus));
BUG_ON(!mm);
if (cpumask_empty(cpus))
return; /* nothing to do */
mcs = xen_mc_entry(sizeof(*args));
args = mcs.args;
@ -1438,10 +1470,29 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte)
}
#endif
static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
{
struct mmuext_op op;
op.cmd = cmd;
op.arg1.mfn = pfn_to_mfn(pfn);
if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
BUG();
}
/* Early in boot, while setting up the initial pagetable, assume
everything is pinned. */
static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
{
#ifdef CONFIG_FLATMEM
BUG_ON(mem_map); /* should only be used early */
#endif
make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
}
/* Used for pmd and pud */
static __init void xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn)
{
#ifdef CONFIG_FLATMEM
BUG_ON(mem_map); /* should only be used early */
#endif
@ -1450,18 +1501,15 @@ static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
/* Early release_pte assumes that all pts are pinned, since there's
only init_mm and anything attached to that is pinned. */
static void xen_release_pte_init(unsigned long pfn)
static __init void xen_release_pte_init(unsigned long pfn)
{
pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
}
static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
static __init void xen_release_pmd_init(unsigned long pfn)
{
struct mmuext_op op;
op.cmd = cmd;
op.arg1.mfn = pfn_to_mfn(pfn);
if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
BUG();
make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
}
/* This needs to make sure the new pte page is pinned iff its being
@ -1773,6 +1821,9 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
#ifdef CONFIG_X86_LOCAL_APIC
case FIX_APIC_BASE: /* maps dummy local APIC */
#endif
case FIX_TEXT_POKE0:
case FIX_TEXT_POKE1:
/* All local page mappings */
pte = pfn_pte(phys, prot);
break;
@ -1819,7 +1870,6 @@ __init void xen_post_allocator_init(void)
xen_mark_init_mm_pinned();
}
const struct pv_mmu_ops xen_mmu_ops __initdata = {
.pagetable_setup_start = xen_pagetable_setup_start,
.pagetable_setup_done = xen_pagetable_setup_done,
@ -1843,9 +1893,9 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = {
.alloc_pte = xen_alloc_pte_init,
.release_pte = xen_release_pte_init,
.alloc_pmd = xen_alloc_pte_init,
.alloc_pmd = xen_alloc_pmd_init,
.alloc_pmd_clone = paravirt_nop,
.release_pmd = xen_release_pte_init,
.release_pmd = xen_release_pmd_init,
#ifdef CONFIG_HIGHPTE
.kmap_atomic_pte = xen_kmap_atomic_pte,
@ -1883,8 +1933,8 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = {
.make_pud = PV_CALLEE_SAVE(xen_make_pud),
.set_pgd = xen_set_pgd_hyper,
.alloc_pud = xen_alloc_pte_init,
.release_pud = xen_release_pte_init,
.alloc_pud = xen_alloc_pmd_init,
.release_pud = xen_release_pmd_init,
#endif /* PAGETABLE_LEVELS == 4 */
.activate_mm = xen_activate_mm,

View file

@ -11,6 +11,9 @@ enum pt_level {
};
bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
bool install_p2mtop_page(unsigned long pfn, unsigned long *p);
void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);

View file

@ -317,7 +317,7 @@ static int __cpuinit xen_cpu_up(unsigned int cpu)
BUG_ON(rc);
while(per_cpu(cpu_state, cpu) != CPU_ONLINE) {
HYPERVISOR_sched_op(SCHEDOP_yield, 0);
HYPERVISOR_sched_op(SCHEDOP_yield, NULL);
barrier();
}
@ -422,7 +422,7 @@ static void xen_smp_send_call_function_ipi(const struct cpumask *mask)
/* Make sure other vcpus get a chance to run if they need to. */
for_each_cpu(cpu, mask) {
if (xen_vcpu_stolen(cpu)) {
HYPERVISOR_sched_op(SCHEDOP_yield, 0);
HYPERVISOR_sched_op(SCHEDOP_yield, NULL);
break;
}
}

View file

@ -57,8 +57,6 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
bool xen_vcpu_stolen(int vcpu);
void xen_mark_init_mm_pinned(void);
void xen_setup_vcpu_info_placement(void);
#ifdef CONFIG_SMP

View file

@ -21,29 +21,41 @@ static void disable_hotplug_cpu(int cpu)
set_cpu_present(cpu, false);
}
static void vcpu_hotplug(unsigned int cpu)
static int vcpu_online(unsigned int cpu)
{
int err;
char dir[32], state[32];
if (!cpu_possible(cpu))
return;
sprintf(dir, "cpu/%u", cpu);
err = xenbus_scanf(XBT_NIL, dir, "availability", "%s", state);
if (err != 1) {
printk(KERN_ERR "XENBUS: Unable to read cpu state\n");
return;
return err;
}
if (strcmp(state, "online") == 0) {
if (strcmp(state, "online") == 0)
return 1;
else if (strcmp(state, "offline") == 0)
return 0;
printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n", state, cpu);
return -EINVAL;
}
static void vcpu_hotplug(unsigned int cpu)
{
if (!cpu_possible(cpu))
return;
switch (vcpu_online(cpu)) {
case 1:
enable_hotplug_cpu(cpu);
} else if (strcmp(state, "offline") == 0) {
break;
case 0:
(void)cpu_down(cpu);
disable_hotplug_cpu(cpu);
} else {
printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n",
state, cpu);
break;
default:
break;
}
}
@ -64,12 +76,20 @@ static void handle_vcpu_hotplug_event(struct xenbus_watch *watch,
static int setup_cpu_watcher(struct notifier_block *notifier,
unsigned long event, void *data)
{
int cpu;
static struct xenbus_watch cpu_watch = {
.node = "cpu",
.callback = handle_vcpu_hotplug_event};
(void)register_xenbus_watch(&cpu_watch);
for_each_possible_cpu(cpu) {
if (vcpu_online(cpu) == 0) {
(void)cpu_down(cpu);
cpu_clear(cpu, cpu_present_map);
}
}
return NOTIFY_DONE;
}

View file

@ -62,14 +62,15 @@ static int xen_suspend(void *data)
gnttab_resume();
xen_mm_unpin_all();
sysdev_resume();
if (!*cancelled) {
xen_irq_resume();
xen_console_resume();
xen_timer_resume();
}
sysdev_resume();
device_power_up(PMSG_RESUME);
return 0;
}