KVM: PPC: Improve split mode
[safe/jmp/linux-2.6] / arch / powerpc / kvm / book3s.c
index b18415f..397701d 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/gfp.h>
 #include <linux/sched.h>
 #include <linux/vmalloc.h>
+#include <linux/highmem.h>
 
 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
 
 /* #define EXIT_DEBUG_SIMPLE */
 /* #define DEBUG_EXT */
 
-static void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr);
 static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
                             ulong msr);
 
+/* Some compatibility defines */
+#ifdef CONFIG_PPC_BOOK3S_32
+#define MSR_USER32 MSR_USER
+#define MSR_USER64 MSR_USER
+#define HW_PAGE_SIZE PAGE_SIZE
+#endif
+
 struct kvm_stats_debugfs_item debugfs_entries[] = {
        { "exits",       VCPU_STAT(sum_exits) },
        { "mmio",        VCPU_STAT(mmio_exits) },
@@ -71,18 +78,26 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
 
 void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
-       memcpy(get_paca()->kvm_slb, to_book3s(vcpu)->slb_shadow, sizeof(get_paca()->kvm_slb));
-       memcpy(&get_paca()->shadow_vcpu, &to_book3s(vcpu)->shadow_vcpu,
+#ifdef CONFIG_PPC_BOOK3S_64
+       memcpy(to_svcpu(vcpu)->slb, to_book3s(vcpu)->slb_shadow, sizeof(to_svcpu(vcpu)->slb));
+       memcpy(&get_paca()->shadow_vcpu, to_book3s(vcpu)->shadow_vcpu,
               sizeof(get_paca()->shadow_vcpu));
-       get_paca()->kvm_slb_max = to_book3s(vcpu)->slb_shadow_max;
+       to_svcpu(vcpu)->slb_max = to_book3s(vcpu)->slb_shadow_max;
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_32
+       current->thread.kvm_shadow_vcpu = to_book3s(vcpu)->shadow_vcpu;
+#endif
 }
 
 void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
 {
-       memcpy(to_book3s(vcpu)->slb_shadow, get_paca()->kvm_slb, sizeof(get_paca()->kvm_slb));
-       memcpy(&to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu,
+#ifdef CONFIG_PPC_BOOK3S_64
+       memcpy(to_book3s(vcpu)->slb_shadow, to_svcpu(vcpu)->slb, sizeof(to_svcpu(vcpu)->slb));
+       memcpy(to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu,
               sizeof(get_paca()->shadow_vcpu));
-       to_book3s(vcpu)->slb_shadow_max = get_paca()->kvm_slb_max;
+       to_book3s(vcpu)->slb_shadow_max = to_svcpu(vcpu)->slb_max;
+#endif
 
        kvmppc_giveup_ext(vcpu, MSR_FP);
        kvmppc_giveup_ext(vcpu, MSR_VEC);
@@ -133,18 +148,22 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
                }
        }
 
-       if (((vcpu->arch.msr & (MSR_IR|MSR_DR)) != (old_msr & (MSR_IR|MSR_DR))) ||
-           (vcpu->arch.msr & MSR_PR) != (old_msr & MSR_PR)) {
+       if ((vcpu->arch.msr & (MSR_PR|MSR_IR|MSR_DR)) !=
+                  (old_msr & (MSR_PR|MSR_IR|MSR_DR))) {
                kvmppc_mmu_flush_segments(vcpu);
-               kvmppc_mmu_map_segment(vcpu, vcpu->arch.pc);
+               kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
        }
+
+       /* Preload FPU if it's enabled */
+       if (vcpu->arch.msr & MSR_FP)
+               kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
 }
 
 void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags)
 {
-       vcpu->arch.srr0 = vcpu->arch.pc;
+       vcpu->arch.srr0 = kvmppc_get_pc(vcpu);
        vcpu->arch.srr1 = vcpu->arch.msr | flags;
-       vcpu->arch.pc = to_book3s(vcpu)->hior + vec;
+       kvmppc_set_pc(vcpu, to_book3s(vcpu)->hior + vec);
        vcpu->arch.mmu.reset_msr(vcpu);
 }
 
@@ -220,6 +239,12 @@ void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
        kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
 }
 
+void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
+                                  struct kvm_interrupt *irq)
+{
+       kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
+}
+
 int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
 {
        int deliver = 1;
@@ -304,7 +329,7 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
                printk(KERN_EMERG "KVM: Check pending: %lx\n", vcpu->arch.pending_exceptions);
 #endif
        priority = __ffs(*pending);
-       while (priority <= (sizeof(unsigned int) * 8)) {
+       while (priority < BOOK3S_IRQPRIO_MAX) {
                if (kvmppc_book3s_irqprio_deliver(vcpu, priority) &&
                    (priority != BOOK3S_IRQPRIO_DECREMENTER)) {
                        /* DEC interrupts get cleared by mtdec */
@@ -322,11 +347,14 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
 {
        vcpu->arch.hflags &= ~BOOK3S_HFLAG_SLB;
        vcpu->arch.pvr = pvr;
+#ifdef CONFIG_PPC_BOOK3S_64
        if ((pvr >= 0x330000) && (pvr < 0x70330000)) {
                kvmppc_mmu_book3s_64_init(vcpu);
                to_book3s(vcpu)->hior = 0xfff00000;
                to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL;
-       } else {
+       } else
+#endif
+       {
                kvmppc_mmu_book3s_32_init(vcpu);
                to_book3s(vcpu)->hior = 0;
                to_book3s(vcpu)->msr_mask = 0xffffffffULL;
@@ -339,6 +367,15 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
            !strcmp(cur_cpu_spec->platform, "ppc970"))
                vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
 
+       /* Cell performs badly if MSR_FEx are set. So let's hope nobody
+          really needs them in a VM on Cell and force disable them. */
+       if (!strcmp(cur_cpu_spec->platform, "ppc-cell-be"))
+               to_book3s(vcpu)->msr_mask &= ~(MSR_FE0 | MSR_FE1);
+
+#ifdef CONFIG_PPC_BOOK3S_32
+       /* 32 bit Book3S always has 32 byte dcbz */
+       vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
+#endif
 }
 
 /* Book3s_32 CPUs always have 32 bytes cache line size, which Linux assumes. To
@@ -352,34 +389,29 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
  */
 static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
 {
-       bool touched = false;
-       hva_t hpage;
+       struct page *hpage;
+       u64 hpage_offset;
        u32 *page;
        int i;
 
-       hpage = gfn_to_hva(vcpu->kvm, pte->raddr >> PAGE_SHIFT);
-       if (kvm_is_error_hva(hpage))
+       hpage = gfn_to_page(vcpu->kvm, pte->raddr >> PAGE_SHIFT);
+       if (is_error_page(hpage))
                return;
 
-       hpage |= pte->raddr & ~PAGE_MASK;
-       hpage &= ~0xFFFULL;
-
-       page = vmalloc(HW_PAGE_SIZE);
-
-       if (copy_from_user(page, (void __user *)hpage, HW_PAGE_SIZE))
-               goto out;
+       hpage_offset = pte->raddr & ~PAGE_MASK;
+       hpage_offset &= ~0xFFFULL;
+       hpage_offset /= 4;
 
-       for (i=0; i < HW_PAGE_SIZE / 4; i++)
-               if ((page[i] & 0xff0007ff) == INS_DCBZ) {
-                       page[i] &= 0xfffffff7; // reserved instruction, so we trap
-                       touched = true;
-               }
+       get_page(hpage);
+       page = kmap_atomic(hpage, KM_USER0);
 
-       if (touched)
-               copy_to_user((void __user *)hpage, page, HW_PAGE_SIZE);
+       /* patch dcbz into reserved instruction, so we trap */
+       for (i=hpage_offset; i < hpage_offset + (HW_PAGE_SIZE / 4); i++)
+               if ((page[i] & 0xff0007ff) == INS_DCBZ)
+                       page[i] &= 0xfffffff7;
 
-out:
-       vfree(page);
+       kunmap_atomic(page, KM_USER0);
+       put_page(hpage);
 }
 
 static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data,
@@ -393,15 +425,7 @@ static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data,
        } else {
                pte->eaddr = eaddr;
                pte->raddr = eaddr & 0xffffffff;
-               pte->vpage = eaddr >> 12;
-               switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) {
-               case 0:
-                       pte->vpage |= VSID_REAL;
-               case MSR_DR:
-                       pte->vpage |= VSID_REAL_DR;
-               case MSR_IR:
-                       pte->vpage |= VSID_REAL_IR;
-               }
+               pte->vpage = VSID_REAL | eaddr >> 12;
                pte->may_read = true;
                pte->may_write = true;
                pte->may_execute = true;
@@ -436,55 +460,55 @@ err:
        return kvmppc_bad_hva();
 }
 
-int kvmppc_st(struct kvm_vcpu *vcpu, ulong eaddr, int size, void *ptr)
+int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
+             bool data)
 {
        struct kvmppc_pte pte;
-       hva_t hva = eaddr;
 
        vcpu->stat.st++;
 
-       if (kvmppc_xlate(vcpu, eaddr, false, &pte))
-               goto err;
+       if (kvmppc_xlate(vcpu, *eaddr, data, &pte))
+               return -ENOENT;
 
-       hva = kvmppc_pte_to_hva(vcpu, &pte, false);
-       if (kvm_is_error_hva(hva))
-               goto err;
+       *eaddr = pte.raddr;
 
-       if (copy_to_user((void __user *)hva, ptr, size)) {
-               printk(KERN_INFO "kvmppc_st at 0x%lx failed\n", hva);
-               goto err;
-       }
+       if (!pte.may_write)
+               return -EPERM;
 
-       return 0;
+       if (kvm_write_guest(vcpu->kvm, pte.raddr, ptr, size))
+               return EMULATE_DO_MMIO;
 
-err:
-       return -ENOENT;
+       return EMULATE_DONE;
 }
 
-int kvmppc_ld(struct kvm_vcpu *vcpu, ulong eaddr, int size, void *ptr,
+int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
                      bool data)
 {
        struct kvmppc_pte pte;
-       hva_t hva = eaddr;
+       hva_t hva = *eaddr;
 
        vcpu->stat.ld++;
 
-       if (kvmppc_xlate(vcpu, eaddr, data, &pte))
-               goto err;
+       if (kvmppc_xlate(vcpu, *eaddr, data, &pte))
+               goto nopte;
+
+       *eaddr = pte.raddr;
 
        hva = kvmppc_pte_to_hva(vcpu, &pte, true);
        if (kvm_is_error_hva(hva))
-               goto err;
+               goto mmio;
 
        if (copy_from_user(ptr, (void __user *)hva, size)) {
                printk(KERN_INFO "kvmppc_ld at 0x%lx failed\n", hva);
-               goto err;
+               goto mmio;
        }
 
-       return 0;
+       return EMULATE_DONE;
 
-err:
+nopte:
        return -ENOENT;
+mmio:
+       return EMULATE_DO_MMIO;
 }
 
 static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
@@ -501,12 +525,11 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
        int page_found = 0;
        struct kvmppc_pte pte;
        bool is_mmio = false;
+       bool dr = (vcpu->arch.msr & MSR_DR) ? true : false;
+       bool ir = (vcpu->arch.msr & MSR_IR) ? true : false;
+       u64 vsid;
 
-       if ( vec == BOOK3S_INTERRUPT_DATA_STORAGE ) {
-               relocated = (vcpu->arch.msr & MSR_DR);
-       } else {
-               relocated = (vcpu->arch.msr & MSR_IR);
-       }
+       relocated = data ? dr : ir;
 
        /* Resolve real address if translation turned on */
        if (relocated) {
@@ -518,14 +541,25 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
                pte.raddr = eaddr & 0xffffffff;
                pte.eaddr = eaddr;
                pte.vpage = eaddr >> 12;
-               switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) {
-               case 0:
-                       pte.vpage |= VSID_REAL;
-               case MSR_DR:
-                       pte.vpage |= VSID_REAL_DR;
-               case MSR_IR:
-                       pte.vpage |= VSID_REAL_IR;
-               }
+       }
+
+       switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) {
+       case 0:
+               pte.vpage |= ((u64)VSID_REAL << (SID_SHIFT - 12));
+               break;
+       case MSR_DR:
+       case MSR_IR:
+               vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid);
+
+               if ((vcpu->arch.msr & (MSR_DR|MSR_IR)) == MSR_DR)
+                       pte.vpage |= ((u64)VSID_REAL_DR << (SID_SHIFT - 12));
+               else
+                       pte.vpage |= ((u64)VSID_REAL_IR << (SID_SHIFT - 12));
+               pte.vpage |= vsid;
+
+               if (vsid == -1)
+                       page_found = -EINVAL;
+               break;
        }
 
        if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
@@ -540,20 +574,20 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
        if (page_found == -ENOENT) {
                /* Page not found in guest PTE entries */
-               vcpu->arch.dear = vcpu->arch.fault_dear;
-               to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr;
-               vcpu->arch.msr |= (vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL);
+               vcpu->arch.dear = kvmppc_get_fault_dar(vcpu);
+               to_book3s(vcpu)->dsisr = to_svcpu(vcpu)->fault_dsisr;
+               vcpu->arch.msr |= (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL);
                kvmppc_book3s_queue_irqprio(vcpu, vec);
        } else if (page_found == -EPERM) {
                /* Storage protection */
-               vcpu->arch.dear = vcpu->arch.fault_dear;
-               to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr & ~DSISR_NOHPTE;
+               vcpu->arch.dear = kvmppc_get_fault_dar(vcpu);
+               to_book3s(vcpu)->dsisr = to_svcpu(vcpu)->fault_dsisr & ~DSISR_NOHPTE;
                to_book3s(vcpu)->dsisr |= DSISR_PROTFAULT;
-               vcpu->arch.msr |= (vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL);
+               vcpu->arch.msr |= (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL);
                kvmppc_book3s_queue_irqprio(vcpu, vec);
        } else if (page_found == -EINVAL) {
                /* Page not found in guest SLB */
-               vcpu->arch.dear = vcpu->arch.fault_dear;
+               vcpu->arch.dear = kvmppc_get_fault_dar(vcpu);
                kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80);
        } else if (!is_mmio &&
                   kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) {
@@ -585,11 +619,13 @@ static inline int get_fpr_index(int i)
 }
 
 /* Give up external provider (FPU, Altivec, VSX) */
-static void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
+void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
 {
        struct thread_struct *t = &current->thread;
        u64 *vcpu_fpr = vcpu->arch.fpr;
+#ifdef CONFIG_VSX
        u64 *vcpu_vsx = vcpu->arch.vsr;
+#endif
        u64 *thread_fpr = (u64*)t->fpr;
        int i;
 
@@ -631,26 +667,35 @@ static void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
        kvmppc_recalc_shadow_msr(vcpu);
 }
 
-static int kvmppc_check_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr)
+static int kvmppc_read_inst(struct kvm_vcpu *vcpu)
 {
-       ulong srr0 = vcpu->arch.pc;
+       ulong srr0 = kvmppc_get_pc(vcpu);
+       u32 last_inst = kvmppc_get_last_inst(vcpu);
        int ret;
 
-       /* Need to do paired single emulation? */
-       if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE))
-               return EMULATE_DONE;
-
-       /* Read out the instruction */
-       ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &vcpu->arch.last_inst, false);
+       ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false);
        if (ret == -ENOENT) {
                vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 33, 33, 1);
                vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 34, 36, 0);
                vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 42, 47, 0);
                kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE);
-       } else if(ret == EMULATE_DONE) {
+               return EMULATE_AGAIN;
+       }
+
+       return EMULATE_DONE;
+}
+
+static int kvmppc_check_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr)
+{
+
+       /* Need to do paired single emulation? */
+       if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE))
+               return EMULATE_DONE;
+
+       /* Read out the instruction */
+       if (kvmppc_read_inst(vcpu) == EMULATE_DONE)
                /* Need to emulate */
                return EMULATE_FAIL;
-       }
 
        return EMULATE_AGAIN;
 }
@@ -661,7 +706,9 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
 {
        struct thread_struct *t = &current->thread;
        u64 *vcpu_fpr = vcpu->arch.fpr;
+#ifdef CONFIG_VSX
        u64 *vcpu_vsx = vcpu->arch.vsr;
+#endif
        u64 *thread_fpr = (u64*)t->fpr;
        int i;
 
@@ -674,6 +721,11 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
                return RESUME_GUEST;
        }
 
+       /* We already own the ext */
+       if (vcpu->arch.guest_owned_ext & msr) {
+               return RESUME_GUEST;
+       }
+
 #ifdef DEBUG_EXT
        printk(KERN_INFO "Loading up ext 0x%lx\n", msr);
 #endif
@@ -726,21 +778,33 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
        run->ready_for_interrupt_injection = 1;
 #ifdef EXIT_DEBUG
        printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | dec=0x%x | msr=0x%lx\n",
-               exit_nr, vcpu->arch.pc, vcpu->arch.fault_dear,
-               kvmppc_get_dec(vcpu), vcpu->arch.msr);
+               exit_nr, kvmppc_get_pc(vcpu), kvmppc_get_fault_dar(vcpu),
+               kvmppc_get_dec(vcpu), to_svcpu(vcpu)->shadow_srr1);
 #elif defined (EXIT_DEBUG_SIMPLE)
        if ((exit_nr != 0x900) && (exit_nr != 0x500))
                printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | msr=0x%lx\n",
-                       exit_nr, vcpu->arch.pc, vcpu->arch.fault_dear,
+                       exit_nr, kvmppc_get_pc(vcpu), kvmppc_get_fault_dar(vcpu),
                        vcpu->arch.msr);
 #endif
        kvm_resched(vcpu);
        switch (exit_nr) {
        case BOOK3S_INTERRUPT_INST_STORAGE:
                vcpu->stat.pf_instruc++;
+
+#ifdef CONFIG_PPC_BOOK3S_32
+               /* We set segments as unused segments when invalidating them. So
+                * treat the respective fault as segment fault. */
+               if (to_svcpu(vcpu)->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT]
+                   == SR_INVALID) {
+                       kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
+                       r = RESUME_GUEST;
+                       break;
+               }
+#endif
+
                /* only care about PTEG not found errors, but leave NX alone */
-               if (vcpu->arch.shadow_srr1 & 0x40000000) {
-                       r = kvmppc_handle_pagefault(run, vcpu, vcpu->arch.pc, exit_nr);
+               if (to_svcpu(vcpu)->shadow_srr1 & 0x40000000) {
+                       r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr);
                        vcpu->stat.sp_instruc++;
                } else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
                          (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) {
@@ -749,37 +813,52 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
                         *     so we can't use the NX bit inside the guest. Let's cross our fingers,
                         *     that no guest that needs the dcbz hack does NX.
                         */
-                       kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL);
+                       kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL);
+                       r = RESUME_GUEST;
                } else {
-                       vcpu->arch.msr |= vcpu->arch.shadow_srr1 & 0x58000000;
+                       vcpu->arch.msr |= to_svcpu(vcpu)->shadow_srr1 & 0x58000000;
                        kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
-                       kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL);
+                       kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL);
                        r = RESUME_GUEST;
                }
                break;
        case BOOK3S_INTERRUPT_DATA_STORAGE:
+       {
+               ulong dar = kvmppc_get_fault_dar(vcpu);
                vcpu->stat.pf_storage++;
+
+#ifdef CONFIG_PPC_BOOK3S_32
+               /* We set segments as unused segments when invalidating them. So
+                * treat the respective fault as segment fault. */
+               if ((to_svcpu(vcpu)->sr[dar >> SID_SHIFT]) == SR_INVALID) {
+                       kvmppc_mmu_map_segment(vcpu, dar);
+                       r = RESUME_GUEST;
+                       break;
+               }
+#endif
+
                /* The only case we need to handle is missing shadow PTEs */
-               if (vcpu->arch.fault_dsisr & DSISR_NOHPTE) {
-                       r = kvmppc_handle_pagefault(run, vcpu, vcpu->arch.fault_dear, exit_nr);
+               if (to_svcpu(vcpu)->fault_dsisr & DSISR_NOHPTE) {
+                       r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr);
                } else {
-                       vcpu->arch.dear = vcpu->arch.fault_dear;
-                       to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr;
+                       vcpu->arch.dear = dar;
+                       to_book3s(vcpu)->dsisr = to_svcpu(vcpu)->fault_dsisr;
                        kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
-                       kvmppc_mmu_pte_flush(vcpu, vcpu->arch.dear, ~0xFFFULL);
+                       kvmppc_mmu_pte_flush(vcpu, vcpu->arch.dear, ~0xFFFUL);
                        r = RESUME_GUEST;
                }
                break;
+       }
        case BOOK3S_INTERRUPT_DATA_SEGMENT:
-               if (kvmppc_mmu_map_segment(vcpu, vcpu->arch.fault_dear) < 0) {
-                       vcpu->arch.dear = vcpu->arch.fault_dear;
+               if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_fault_dar(vcpu)) < 0) {
+                       vcpu->arch.dear = kvmppc_get_fault_dar(vcpu);
                        kvmppc_book3s_queue_irqprio(vcpu,
                                BOOK3S_INTERRUPT_DATA_SEGMENT);
                }
                r = RESUME_GUEST;
                break;
        case BOOK3S_INTERRUPT_INST_SEGMENT:
-               if (kvmppc_mmu_map_segment(vcpu, vcpu->arch.pc) < 0) {
+               if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)) < 0) {
                        kvmppc_book3s_queue_irqprio(vcpu,
                                BOOK3S_INTERRUPT_INST_SEGMENT);
                }
@@ -794,19 +873,22 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
                vcpu->stat.ext_intr_exits++;
                r = RESUME_GUEST;
                break;
+       case BOOK3S_INTERRUPT_PERFMON:
+               r = RESUME_GUEST;
+               break;
        case BOOK3S_INTERRUPT_PROGRAM:
        {
                enum emulation_result er;
                ulong flags;
 
 program_interrupt:
-               flags = vcpu->arch.shadow_srr1 & 0x1f0000ull;
+               flags = to_svcpu(vcpu)->shadow_srr1 & 0x1f0000ull;
 
                if (vcpu->arch.msr & MSR_PR) {
 #ifdef EXIT_DEBUG
-                       printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", vcpu->arch.pc, vcpu->arch.last_inst);
+                       printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu));
 #endif
-                       if ((vcpu->arch.last_inst & 0xff0007ff) !=
+                       if ((kvmppc_get_last_inst(vcpu) & 0xff0007ff) !=
                            (INS_DCBZ & 0xfffffff7)) {
                                kvmppc_core_queue_program(vcpu, flags);
                                r = RESUME_GUEST;
@@ -825,22 +907,38 @@ program_interrupt:
                        break;
                case EMULATE_FAIL:
                        printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
-                              __func__, vcpu->arch.pc, vcpu->arch.last_inst);
+                              __func__, kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu));
                        kvmppc_core_queue_program(vcpu, flags);
                        r = RESUME_GUEST;
                        break;
+               case EMULATE_DO_MMIO:
+                       run->exit_reason = KVM_EXIT_MMIO;
+                       r = RESUME_HOST_NV;
+                       break;
                default:
                        BUG();
                }
                break;
        }
        case BOOK3S_INTERRUPT_SYSCALL:
-#ifdef EXIT_DEBUG
-               printk(KERN_INFO "Syscall Nr %d\n", (int)kvmppc_get_gpr(vcpu, 0));
-#endif
-               vcpu->stat.syscall_exits++;
-               kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
-               r = RESUME_GUEST;
+               // XXX make user settable
+               if (vcpu->arch.osi_enabled &&
+                   (((u32)kvmppc_get_gpr(vcpu, 3)) == OSI_SC_MAGIC_R3) &&
+                   (((u32)kvmppc_get_gpr(vcpu, 4)) == OSI_SC_MAGIC_R4)) {
+                       u64 *gprs = run->osi.gprs;
+                       int i;
+
+                       run->exit_reason = KVM_EXIT_OSI;
+                       for (i = 0; i < 32; i++)
+                               gprs[i] = kvmppc_get_gpr(vcpu, i);
+                       vcpu->arch.osi_needed = 1;
+                       r = RESUME_HOST_NV;
+
+               } else {
+                       vcpu->stat.syscall_exits++;
+                       kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+                       r = RESUME_GUEST;
+               }
                break;
        case BOOK3S_INTERRUPT_FP_UNAVAIL:
        case BOOK3S_INTERRUPT_ALTIVEC:
@@ -869,6 +967,16 @@ program_interrupt:
                }
                break;
        }
+       case BOOK3S_INTERRUPT_ALIGNMENT:
+               if (kvmppc_read_inst(vcpu) == EMULATE_DONE) {
+                       to_book3s(vcpu)->dsisr = kvmppc_alignment_dsisr(vcpu,
+                               kvmppc_get_last_inst(vcpu));
+                       vcpu->arch.dear = kvmppc_alignment_dar(vcpu,
+                               kvmppc_get_last_inst(vcpu));
+                       kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+               }
+               r = RESUME_GUEST;
+               break;
        case BOOK3S_INTERRUPT_MACHINE_CHECK:
        case BOOK3S_INTERRUPT_TRACE:
                kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
@@ -877,7 +985,7 @@ program_interrupt:
        default:
                /* Ugh - bork here! What did we get? */
                printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n",
-                       exit_nr, vcpu->arch.pc, vcpu->arch.shadow_srr1);
+                       exit_nr, kvmppc_get_pc(vcpu), to_svcpu(vcpu)->shadow_srr1);
                r = RESUME_HOST;
                BUG();
                break;
@@ -904,7 +1012,7 @@ program_interrupt:
        }
 
 #ifdef EXIT_DEBUG
-       printk(KERN_EMERG "KVM exit: vcpu=0x%p pc=0x%lx r=0x%x\n", vcpu, vcpu->arch.pc, r);
+       printk(KERN_EMERG "KVM exit: vcpu=0x%p pc=0x%lx r=0x%x\n", vcpu, kvmppc_get_pc(vcpu), r);
 #endif
 
        return r;
@@ -919,10 +1027,12 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 {
        int i;
 
-       regs->pc = vcpu->arch.pc;
+       vcpu_load(vcpu);
+
+       regs->pc = kvmppc_get_pc(vcpu);
        regs->cr = kvmppc_get_cr(vcpu);
-       regs->ctr = vcpu->arch.ctr;
-       regs->lr = vcpu->arch.lr;
+       regs->ctr = kvmppc_get_ctr(vcpu);
+       regs->lr = kvmppc_get_lr(vcpu);
        regs->xer = kvmppc_get_xer(vcpu);
        regs->msr = vcpu->arch.msr;
        regs->srr0 = vcpu->arch.srr0;
@@ -939,6 +1049,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
        for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
                regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
 
+       vcpu_put(vcpu);
+
        return 0;
 }
 
@@ -946,10 +1058,12 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 {
        int i;
 
-       vcpu->arch.pc = regs->pc;
+       vcpu_load(vcpu);
+
+       kvmppc_set_pc(vcpu, regs->pc);
        kvmppc_set_cr(vcpu, regs->cr);
-       vcpu->arch.ctr = regs->ctr;
-       vcpu->arch.lr = regs->lr;
+       kvmppc_set_ctr(vcpu, regs->ctr);
+       kvmppc_set_lr(vcpu, regs->lr);
        kvmppc_set_xer(vcpu, regs->xer);
        kvmppc_set_msr(vcpu, regs->msr);
        vcpu->arch.srr0 = regs->srr0;
@@ -965,6 +1079,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
        for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
                kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
 
+       vcpu_put(vcpu);
+
        return 0;
 }
 
@@ -1094,24 +1210,33 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 {
        struct kvmppc_vcpu_book3s *vcpu_book3s;
        struct kvm_vcpu *vcpu;
-       int err;
+       int err = -ENOMEM;
 
-       vcpu_book3s = (struct kvmppc_vcpu_book3s *)__get_free_pages( GFP_KERNEL | __GFP_ZERO,
-                       get_order(sizeof(struct kvmppc_vcpu_book3s)));
-       if (!vcpu_book3s) {
-               err = -ENOMEM;
+       vcpu_book3s = vmalloc(sizeof(struct kvmppc_vcpu_book3s));
+       if (!vcpu_book3s)
                goto out;
-       }
+
+       memset(vcpu_book3s, 0, sizeof(struct kvmppc_vcpu_book3s));
+
+       vcpu_book3s->shadow_vcpu = (struct kvmppc_book3s_shadow_vcpu *)
+               kzalloc(sizeof(*vcpu_book3s->shadow_vcpu), GFP_KERNEL);
+       if (!vcpu_book3s->shadow_vcpu)
+               goto free_vcpu;
 
        vcpu = &vcpu_book3s->vcpu;
        err = kvm_vcpu_init(vcpu, kvm, id);
        if (err)
-               goto free_vcpu;
+               goto free_shadow_vcpu;
 
        vcpu->arch.host_retip = kvm_return_point;
        vcpu->arch.host_msr = mfmsr();
+#ifdef CONFIG_PPC_BOOK3S_64
        /* default to book3s_64 (970fx) */
        vcpu->arch.pvr = 0x3C0301;
+#else
+       /* default to book3s_32 (750) */
+       vcpu->arch.pvr = 0x84202;
+#endif
        kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
        vcpu_book3s->slb_nr = 64;
 
@@ -1119,23 +1244,24 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
        vcpu->arch.trampoline_lowmem = kvmppc_trampoline_lowmem;
        vcpu->arch.trampoline_enter = kvmppc_trampoline_enter;
        vcpu->arch.highmem_handler = (ulong)kvmppc_handler_highmem;
+#ifdef CONFIG_PPC_BOOK3S_64
        vcpu->arch.rmcall = *(ulong*)kvmppc_rmcall;
+#else
+       vcpu->arch.rmcall = (ulong)kvmppc_rmcall;
+#endif
 
        vcpu->arch.shadow_msr = MSR_USER64;
 
-       err = __init_new_context();
+       err = kvmppc_mmu_init(vcpu);
        if (err < 0)
-               goto free_vcpu;
-       vcpu_book3s->context_id = err;
-
-       vcpu_book3s->vsid_max = ((vcpu_book3s->context_id + 1) << USER_ESID_BITS) - 1;
-       vcpu_book3s->vsid_first = vcpu_book3s->context_id << USER_ESID_BITS;
-       vcpu_book3s->vsid_next = vcpu_book3s->vsid_first;
+               goto free_shadow_vcpu;
 
        return vcpu;
 
+free_shadow_vcpu:
+       kfree(vcpu_book3s->shadow_vcpu);
 free_vcpu:
-       free_pages((long)vcpu_book3s, get_order(sizeof(struct kvmppc_vcpu_book3s)));
+       vfree(vcpu_book3s);
 out:
        return ERR_PTR(err);
 }
@@ -1144,9 +1270,9 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
 {
        struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
 
-       __destroy_context(vcpu_book3s->context_id);
        kvm_vcpu_uninit(vcpu);
-       free_pages((long)vcpu_book3s, get_order(sizeof(struct kvmppc_vcpu_book3s)));
+       kfree(vcpu_book3s->shadow_vcpu);
+       vfree(vcpu_book3s);
 }
 
 extern int __kvmppc_vcpu_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
@@ -1154,8 +1280,12 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 {
        int ret;
        struct thread_struct ext_bkp;
+#ifdef CONFIG_ALTIVEC
        bool save_vec = current->thread.used_vr;
+#endif
+#ifdef CONFIG_VSX
        bool save_vsx = current->thread.used_vsr;
+#endif
        ulong ext_msr;
 
        /* No need to go into the guest when all we do is going out */
@@ -1196,6 +1326,10 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
        /* XXX we get called with irq disabled - change that! */
        local_irq_enable();
 
+       /* Preload FPU if it's enabled */
+       if (vcpu->arch.msr & MSR_FP)
+               kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
+
        ret = __kvmppc_vcpu_entry(kvm_run, vcpu);
 
        local_irq_disable();