X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=arch%2Fpowerpc%2Fkvm%2Fbook3s.c;h=397701d39ae7571ab04b903191aa800bcd26fbd6;hb=f7bc74e1c306636a659a04805474b2f8fcbd1f7e;hp=09ba8dbaababf0d8750e74a80ea1440791d2cebb;hpb=992b5b29b5ae254c416c62faf98d59a6cf970027;p=safe%2Fjmp%2Flinux-2.6 diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 09ba8db..397701d 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -26,13 +26,26 @@ #include #include #include +#include #include #include +#include #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU /* #define EXIT_DEBUG */ /* #define EXIT_DEBUG_SIMPLE */ +/* #define DEBUG_EXT */ + +static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, + ulong msr); + +/* Some compatibility defines */ +#ifdef CONFIG_PPC_BOOK3S_32 +#define MSR_USER32 MSR_USER +#define MSR_USER64 MSR_USER +#define HW_PAGE_SIZE PAGE_SIZE +#endif struct kvm_stats_debugfs_item debugfs_entries[] = { { "exits", VCPU_STAT(sum_exits) }, @@ -65,14 +78,30 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu) void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { - memcpy(get_paca()->kvm_slb, to_book3s(vcpu)->slb_shadow, sizeof(get_paca()->kvm_slb)); - get_paca()->kvm_slb_max = to_book3s(vcpu)->slb_shadow_max; +#ifdef CONFIG_PPC_BOOK3S_64 + memcpy(to_svcpu(vcpu)->slb, to_book3s(vcpu)->slb_shadow, sizeof(to_svcpu(vcpu)->slb)); + memcpy(&get_paca()->shadow_vcpu, to_book3s(vcpu)->shadow_vcpu, + sizeof(get_paca()->shadow_vcpu)); + to_svcpu(vcpu)->slb_max = to_book3s(vcpu)->slb_shadow_max; +#endif + +#ifdef CONFIG_PPC_BOOK3S_32 + current->thread.kvm_shadow_vcpu = to_book3s(vcpu)->shadow_vcpu; +#endif } void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) { - memcpy(to_book3s(vcpu)->slb_shadow, get_paca()->kvm_slb, sizeof(get_paca()->kvm_slb)); - to_book3s(vcpu)->slb_shadow_max = get_paca()->kvm_slb_max; +#ifdef CONFIG_PPC_BOOK3S_64 + memcpy(to_book3s(vcpu)->slb_shadow, to_svcpu(vcpu)->slb, sizeof(to_svcpu(vcpu)->slb)); + memcpy(to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu, + sizeof(get_paca()->shadow_vcpu)); + to_book3s(vcpu)->slb_shadow_max = to_svcpu(vcpu)->slb_max; +#endif + + kvmppc_giveup_ext(vcpu, MSR_FP); + kvmppc_giveup_ext(vcpu, MSR_VEC); + kvmppc_giveup_ext(vcpu, MSR_VSX); } #if defined(EXIT_DEBUG) @@ -83,6 +112,23 @@ static u32 kvmppc_get_dec(struct kvm_vcpu *vcpu) } #endif +static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu) +{ + vcpu->arch.shadow_msr = vcpu->arch.msr; + /* Guest MSR values */ + vcpu->arch.shadow_msr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | + MSR_BE | MSR_DE; + /* Process MSR values */ + vcpu->arch.shadow_msr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | + MSR_EE; + /* External providers the guest reserved */ + vcpu->arch.shadow_msr |= (vcpu->arch.msr & vcpu->arch.guest_owned_ext); + /* 64-bit Process MSR values */ +#ifdef CONFIG_PPC_BOOK3S_64 + vcpu->arch.shadow_msr |= MSR_ISF | MSR_HV; +#endif +} + void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) { ulong old_msr = vcpu->arch.msr; @@ -90,12 +136,10 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) #ifdef EXIT_DEBUG printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr); #endif + msr &= to_book3s(vcpu)->msr_mask; vcpu->arch.msr = msr; - vcpu->arch.shadow_msr = msr | MSR_USER32; - vcpu->arch.shadow_msr &= ( MSR_VEC | MSR_VSX | MSR_FP | MSR_FE0 | - MSR_USER64 | MSR_SE | MSR_BE | MSR_DE | - MSR_FE1); + kvmppc_recalc_shadow_msr(vcpu); if (msr & (MSR_WE|MSR_POW)) { if (!vcpu->arch.pending_exceptions) { @@ -104,18 +148,22 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) } } - if (((vcpu->arch.msr & (MSR_IR|MSR_DR)) != (old_msr & (MSR_IR|MSR_DR))) || - (vcpu->arch.msr & MSR_PR) != (old_msr & MSR_PR)) { + if ((vcpu->arch.msr & (MSR_PR|MSR_IR|MSR_DR)) != + (old_msr & (MSR_PR|MSR_IR|MSR_DR))) { kvmppc_mmu_flush_segments(vcpu); - kvmppc_mmu_map_segment(vcpu, vcpu->arch.pc); + kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)); } + + /* Preload FPU if it's enabled */ + if (vcpu->arch.msr & MSR_FP) + kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); } void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags) { - vcpu->arch.srr0 = vcpu->arch.pc; + vcpu->arch.srr0 = kvmppc_get_pc(vcpu); vcpu->arch.srr1 = vcpu->arch.msr | flags; - vcpu->arch.pc = to_book3s(vcpu)->hior + vec; + kvmppc_set_pc(vcpu, to_book3s(vcpu)->hior + vec); vcpu->arch.mmu.reset_msr(vcpu); } @@ -164,8 +212,9 @@ void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec) } -void kvmppc_core_queue_program(struct kvm_vcpu *vcpu) +void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags) { + to_book3s(vcpu)->prog_flags = flags; kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_PROGRAM); } @@ -190,10 +239,17 @@ void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL); } +void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, + struct kvm_interrupt *irq) +{ + kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL); +} + int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) { int deliver = 1; int vec = 0; + ulong flags = 0ULL; switch (priority) { case BOOK3S_IRQPRIO_DECREMENTER: @@ -227,6 +283,7 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) break; case BOOK3S_IRQPRIO_PROGRAM: vec = BOOK3S_INTERRUPT_PROGRAM; + flags = to_book3s(vcpu)->prog_flags; break; case BOOK3S_IRQPRIO_VSX: vec = BOOK3S_INTERRUPT_VSX; @@ -257,7 +314,7 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) #endif if (deliver) - kvmppc_inject_interrupt(vcpu, vec, 0ULL); + kvmppc_inject_interrupt(vcpu, vec, flags); return deliver; } @@ -272,7 +329,7 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu) printk(KERN_EMERG "KVM: Check pending: %lx\n", vcpu->arch.pending_exceptions); #endif priority = __ffs(*pending); - while (priority <= (sizeof(unsigned int) * 8)) { + while (priority < BOOK3S_IRQPRIO_MAX) { if (kvmppc_book3s_irqprio_deliver(vcpu, priority) && (priority != BOOK3S_IRQPRIO_DECREMENTER)) { /* DEC interrupts get cleared by mtdec */ @@ -290,11 +347,14 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) { vcpu->arch.hflags &= ~BOOK3S_HFLAG_SLB; vcpu->arch.pvr = pvr; +#ifdef CONFIG_PPC_BOOK3S_64 if ((pvr >= 0x330000) && (pvr < 0x70330000)) { kvmppc_mmu_book3s_64_init(vcpu); to_book3s(vcpu)->hior = 0xfff00000; to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL; - } else { + } else +#endif + { kvmppc_mmu_book3s_32_init(vcpu); to_book3s(vcpu)->hior = 0; to_book3s(vcpu)->msr_mask = 0xffffffffULL; @@ -307,6 +367,15 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) !strcmp(cur_cpu_spec->platform, "ppc970")) vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; + /* Cell performs badly if MSR_FEx are set. So let's hope nobody + really needs them in a VM on Cell and force disable them. */ + if (!strcmp(cur_cpu_spec->platform, "ppc-cell-be")) + to_book3s(vcpu)->msr_mask &= ~(MSR_FE0 | MSR_FE1); + +#ifdef CONFIG_PPC_BOOK3S_32 + /* 32 bit Book3S always has 32 byte dcbz */ + vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; +#endif } /* Book3s_32 CPUs always have 32 bytes cache line size, which Linux assumes. To @@ -320,34 +389,29 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) */ static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) { - bool touched = false; - hva_t hpage; + struct page *hpage; + u64 hpage_offset; u32 *page; int i; - hpage = gfn_to_hva(vcpu->kvm, pte->raddr >> PAGE_SHIFT); - if (kvm_is_error_hva(hpage)) + hpage = gfn_to_page(vcpu->kvm, pte->raddr >> PAGE_SHIFT); + if (is_error_page(hpage)) return; - hpage |= pte->raddr & ~PAGE_MASK; - hpage &= ~0xFFFULL; + hpage_offset = pte->raddr & ~PAGE_MASK; + hpage_offset &= ~0xFFFULL; + hpage_offset /= 4; - page = vmalloc(HW_PAGE_SIZE); + get_page(hpage); + page = kmap_atomic(hpage, KM_USER0); - if (copy_from_user(page, (void __user *)hpage, HW_PAGE_SIZE)) - goto out; - - for (i=0; i < HW_PAGE_SIZE / 4; i++) - if ((page[i] & 0xff0007ff) == INS_DCBZ) { - page[i] &= 0xfffffff7; // reserved instruction, so we trap - touched = true; - } - - if (touched) - copy_to_user((void __user *)hpage, page, HW_PAGE_SIZE); + /* patch dcbz into reserved instruction, so we trap */ + for (i=hpage_offset; i < hpage_offset + (HW_PAGE_SIZE / 4); i++) + if ((page[i] & 0xff0007ff) == INS_DCBZ) + page[i] &= 0xfffffff7; -out: - vfree(page); + kunmap_atomic(page, KM_USER0); + put_page(hpage); } static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data, @@ -361,15 +425,7 @@ static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data, } else { pte->eaddr = eaddr; pte->raddr = eaddr & 0xffffffff; - pte->vpage = eaddr >> 12; - switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) { - case 0: - pte->vpage |= VSID_REAL; - case MSR_DR: - pte->vpage |= VSID_REAL_DR; - case MSR_IR: - pte->vpage |= VSID_REAL_IR; - } + pte->vpage = VSID_REAL | eaddr >> 12; pte->may_read = true; pte->may_write = true; pte->may_execute = true; @@ -404,55 +460,55 @@ err: return kvmppc_bad_hva(); } -int kvmppc_st(struct kvm_vcpu *vcpu, ulong eaddr, int size, void *ptr) +int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, + bool data) { struct kvmppc_pte pte; - hva_t hva = eaddr; vcpu->stat.st++; - if (kvmppc_xlate(vcpu, eaddr, false, &pte)) - goto err; + if (kvmppc_xlate(vcpu, *eaddr, data, &pte)) + return -ENOENT; - hva = kvmppc_pte_to_hva(vcpu, &pte, false); - if (kvm_is_error_hva(hva)) - goto err; + *eaddr = pte.raddr; - if (copy_to_user((void __user *)hva, ptr, size)) { - printk(KERN_INFO "kvmppc_st at 0x%lx failed\n", hva); - goto err; - } + if (!pte.may_write) + return -EPERM; - return 0; + if (kvm_write_guest(vcpu->kvm, pte.raddr, ptr, size)) + return EMULATE_DO_MMIO; -err: - return -ENOENT; + return EMULATE_DONE; } -int kvmppc_ld(struct kvm_vcpu *vcpu, ulong eaddr, int size, void *ptr, +int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data) { struct kvmppc_pte pte; - hva_t hva = eaddr; + hva_t hva = *eaddr; vcpu->stat.ld++; - if (kvmppc_xlate(vcpu, eaddr, data, &pte)) - goto err; + if (kvmppc_xlate(vcpu, *eaddr, data, &pte)) + goto nopte; + + *eaddr = pte.raddr; hva = kvmppc_pte_to_hva(vcpu, &pte, true); if (kvm_is_error_hva(hva)) - goto err; + goto mmio; if (copy_from_user(ptr, (void __user *)hva, size)) { printk(KERN_INFO "kvmppc_ld at 0x%lx failed\n", hva); - goto err; + goto mmio; } - return 0; + return EMULATE_DONE; -err: +nopte: return -ENOENT; +mmio: + return EMULATE_DO_MMIO; } static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) @@ -469,12 +525,11 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, int page_found = 0; struct kvmppc_pte pte; bool is_mmio = false; + bool dr = (vcpu->arch.msr & MSR_DR) ? true : false; + bool ir = (vcpu->arch.msr & MSR_IR) ? true : false; + u64 vsid; - if ( vec == BOOK3S_INTERRUPT_DATA_STORAGE ) { - relocated = (vcpu->arch.msr & MSR_DR); - } else { - relocated = (vcpu->arch.msr & MSR_IR); - } + relocated = data ? dr : ir; /* Resolve real address if translation turned on */ if (relocated) { @@ -486,14 +541,25 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, pte.raddr = eaddr & 0xffffffff; pte.eaddr = eaddr; pte.vpage = eaddr >> 12; - switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) { - case 0: - pte.vpage |= VSID_REAL; - case MSR_DR: - pte.vpage |= VSID_REAL_DR; - case MSR_IR: - pte.vpage |= VSID_REAL_IR; - } + } + + switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) { + case 0: + pte.vpage |= ((u64)VSID_REAL << (SID_SHIFT - 12)); + break; + case MSR_DR: + case MSR_IR: + vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid); + + if ((vcpu->arch.msr & (MSR_DR|MSR_IR)) == MSR_DR) + pte.vpage |= ((u64)VSID_REAL_DR << (SID_SHIFT - 12)); + else + pte.vpage |= ((u64)VSID_REAL_IR << (SID_SHIFT - 12)); + pte.vpage |= vsid; + + if (vsid == -1) + page_found = -EINVAL; + break; } if (vcpu->arch.mmu.is_dcbz32(vcpu) && @@ -508,20 +574,20 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, if (page_found == -ENOENT) { /* Page not found in guest PTE entries */ - vcpu->arch.dear = vcpu->arch.fault_dear; - to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr; - vcpu->arch.msr |= (vcpu->arch.shadow_msr & 0x00000000f8000000ULL); + vcpu->arch.dear = kvmppc_get_fault_dar(vcpu); + to_book3s(vcpu)->dsisr = to_svcpu(vcpu)->fault_dsisr; + vcpu->arch.msr |= (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL); kvmppc_book3s_queue_irqprio(vcpu, vec); } else if (page_found == -EPERM) { /* Storage protection */ - vcpu->arch.dear = vcpu->arch.fault_dear; - to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr & ~DSISR_NOHPTE; + vcpu->arch.dear = kvmppc_get_fault_dar(vcpu); + to_book3s(vcpu)->dsisr = to_svcpu(vcpu)->fault_dsisr & ~DSISR_NOHPTE; to_book3s(vcpu)->dsisr |= DSISR_PROTFAULT; - vcpu->arch.msr |= (vcpu->arch.shadow_msr & 0x00000000f8000000ULL); + vcpu->arch.msr |= (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL); kvmppc_book3s_queue_irqprio(vcpu, vec); } else if (page_found == -EINVAL) { /* Page not found in guest SLB */ - vcpu->arch.dear = vcpu->arch.fault_dear; + vcpu->arch.dear = kvmppc_get_fault_dar(vcpu); kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80); } else if (!is_mmio && kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) { @@ -544,6 +610,163 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, return r; } +static inline int get_fpr_index(int i) +{ +#ifdef CONFIG_VSX + i *= 2; +#endif + return i; +} + +/* Give up external provider (FPU, Altivec, VSX) */ +void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr) +{ + struct thread_struct *t = ¤t->thread; + u64 *vcpu_fpr = vcpu->arch.fpr; +#ifdef CONFIG_VSX + u64 *vcpu_vsx = vcpu->arch.vsr; +#endif + u64 *thread_fpr = (u64*)t->fpr; + int i; + + if (!(vcpu->arch.guest_owned_ext & msr)) + return; + +#ifdef DEBUG_EXT + printk(KERN_INFO "Giving up ext 0x%lx\n", msr); +#endif + + switch (msr) { + case MSR_FP: + giveup_fpu(current); + for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) + vcpu_fpr[i] = thread_fpr[get_fpr_index(i)]; + + vcpu->arch.fpscr = t->fpscr.val; + break; + case MSR_VEC: +#ifdef CONFIG_ALTIVEC + giveup_altivec(current); + memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr)); + vcpu->arch.vscr = t->vscr; +#endif + break; + case MSR_VSX: +#ifdef CONFIG_VSX + __giveup_vsx(current); + for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++) + vcpu_vsx[i] = thread_fpr[get_fpr_index(i) + 1]; +#endif + break; + default: + BUG(); + } + + vcpu->arch.guest_owned_ext &= ~msr; + current->thread.regs->msr &= ~msr; + kvmppc_recalc_shadow_msr(vcpu); +} + +static int kvmppc_read_inst(struct kvm_vcpu *vcpu) +{ + ulong srr0 = kvmppc_get_pc(vcpu); + u32 last_inst = kvmppc_get_last_inst(vcpu); + int ret; + + ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false); + if (ret == -ENOENT) { + vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 33, 33, 1); + vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 34, 36, 0); + vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 42, 47, 0); + kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE); + return EMULATE_AGAIN; + } + + return EMULATE_DONE; +} + +static int kvmppc_check_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr) +{ + + /* Need to do paired single emulation? */ + if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)) + return EMULATE_DONE; + + /* Read out the instruction */ + if (kvmppc_read_inst(vcpu) == EMULATE_DONE) + /* Need to emulate */ + return EMULATE_FAIL; + + return EMULATE_AGAIN; +} + +/* Handle external providers (FPU, Altivec, VSX) */ +static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, + ulong msr) +{ + struct thread_struct *t = ¤t->thread; + u64 *vcpu_fpr = vcpu->arch.fpr; +#ifdef CONFIG_VSX + u64 *vcpu_vsx = vcpu->arch.vsr; +#endif + u64 *thread_fpr = (u64*)t->fpr; + int i; + + /* When we have paired singles, we emulate in software */ + if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE) + return RESUME_GUEST; + + if (!(vcpu->arch.msr & msr)) { + kvmppc_book3s_queue_irqprio(vcpu, exit_nr); + return RESUME_GUEST; + } + + /* We already own the ext */ + if (vcpu->arch.guest_owned_ext & msr) { + return RESUME_GUEST; + } + +#ifdef DEBUG_EXT + printk(KERN_INFO "Loading up ext 0x%lx\n", msr); +#endif + + current->thread.regs->msr |= msr; + + switch (msr) { + case MSR_FP: + for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) + thread_fpr[get_fpr_index(i)] = vcpu_fpr[i]; + + t->fpscr.val = vcpu->arch.fpscr; + t->fpexc_mode = 0; + kvmppc_load_up_fpu(); + break; + case MSR_VEC: +#ifdef CONFIG_ALTIVEC + memcpy(t->vr, vcpu->arch.vr, sizeof(vcpu->arch.vr)); + t->vscr = vcpu->arch.vscr; + t->vrsave = -1; + kvmppc_load_up_altivec(); +#endif + break; + case MSR_VSX: +#ifdef CONFIG_VSX + for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++) + thread_fpr[get_fpr_index(i) + 1] = vcpu_vsx[i]; + kvmppc_load_up_vsx(); +#endif + break; + default: + BUG(); + } + + vcpu->arch.guest_owned_ext |= msr; + + kvmppc_recalc_shadow_msr(vcpu); + + return RESUME_GUEST; +} + int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int exit_nr) { @@ -555,21 +778,33 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, run->ready_for_interrupt_injection = 1; #ifdef EXIT_DEBUG printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | dec=0x%x | msr=0x%lx\n", - exit_nr, vcpu->arch.pc, vcpu->arch.fault_dear, - kvmppc_get_dec(vcpu), vcpu->arch.msr); + exit_nr, kvmppc_get_pc(vcpu), kvmppc_get_fault_dar(vcpu), + kvmppc_get_dec(vcpu), to_svcpu(vcpu)->shadow_srr1); #elif defined (EXIT_DEBUG_SIMPLE) if ((exit_nr != 0x900) && (exit_nr != 0x500)) printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | msr=0x%lx\n", - exit_nr, vcpu->arch.pc, vcpu->arch.fault_dear, + exit_nr, kvmppc_get_pc(vcpu), kvmppc_get_fault_dar(vcpu), vcpu->arch.msr); #endif kvm_resched(vcpu); switch (exit_nr) { case BOOK3S_INTERRUPT_INST_STORAGE: vcpu->stat.pf_instruc++; + +#ifdef CONFIG_PPC_BOOK3S_32 + /* We set segments as unused segments when invalidating them. So + * treat the respective fault as segment fault. */ + if (to_svcpu(vcpu)->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT] + == SR_INVALID) { + kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)); + r = RESUME_GUEST; + break; + } +#endif + /* only care about PTEG not found errors, but leave NX alone */ - if (vcpu->arch.shadow_msr & 0x40000000) { - r = kvmppc_handle_pagefault(run, vcpu, vcpu->arch.pc, exit_nr); + if (to_svcpu(vcpu)->shadow_srr1 & 0x40000000) { + r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr); vcpu->stat.sp_instruc++; } else if (vcpu->arch.mmu.is_dcbz32(vcpu) && (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) { @@ -578,37 +813,52 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, * so we can't use the NX bit inside the guest. Let's cross our fingers, * that no guest that needs the dcbz hack does NX. */ - kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL); + kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL); + r = RESUME_GUEST; } else { - vcpu->arch.msr |= (vcpu->arch.shadow_msr & 0x58000000); + vcpu->arch.msr |= to_svcpu(vcpu)->shadow_srr1 & 0x58000000; kvmppc_book3s_queue_irqprio(vcpu, exit_nr); - kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL); + kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL); r = RESUME_GUEST; } break; case BOOK3S_INTERRUPT_DATA_STORAGE: + { + ulong dar = kvmppc_get_fault_dar(vcpu); vcpu->stat.pf_storage++; + +#ifdef CONFIG_PPC_BOOK3S_32 + /* We set segments as unused segments when invalidating them. So + * treat the respective fault as segment fault. */ + if ((to_svcpu(vcpu)->sr[dar >> SID_SHIFT]) == SR_INVALID) { + kvmppc_mmu_map_segment(vcpu, dar); + r = RESUME_GUEST; + break; + } +#endif + /* The only case we need to handle is missing shadow PTEs */ - if (vcpu->arch.fault_dsisr & DSISR_NOHPTE) { - r = kvmppc_handle_pagefault(run, vcpu, vcpu->arch.fault_dear, exit_nr); + if (to_svcpu(vcpu)->fault_dsisr & DSISR_NOHPTE) { + r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr); } else { - vcpu->arch.dear = vcpu->arch.fault_dear; - to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr; + vcpu->arch.dear = dar; + to_book3s(vcpu)->dsisr = to_svcpu(vcpu)->fault_dsisr; kvmppc_book3s_queue_irqprio(vcpu, exit_nr); - kvmppc_mmu_pte_flush(vcpu, vcpu->arch.dear, ~0xFFFULL); + kvmppc_mmu_pte_flush(vcpu, vcpu->arch.dear, ~0xFFFUL); r = RESUME_GUEST; } break; + } case BOOK3S_INTERRUPT_DATA_SEGMENT: - if (kvmppc_mmu_map_segment(vcpu, vcpu->arch.fault_dear) < 0) { - vcpu->arch.dear = vcpu->arch.fault_dear; + if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_fault_dar(vcpu)) < 0) { + vcpu->arch.dear = kvmppc_get_fault_dar(vcpu); kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_SEGMENT); } r = RESUME_GUEST; break; case BOOK3S_INTERRUPT_INST_SEGMENT: - if (kvmppc_mmu_map_segment(vcpu, vcpu->arch.pc) < 0) { + if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)) < 0) { kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_SEGMENT); } @@ -623,17 +873,24 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, vcpu->stat.ext_intr_exits++; r = RESUME_GUEST; break; + case BOOK3S_INTERRUPT_PERFMON: + r = RESUME_GUEST; + break; case BOOK3S_INTERRUPT_PROGRAM: { enum emulation_result er; + ulong flags; + +program_interrupt: + flags = to_svcpu(vcpu)->shadow_srr1 & 0x1f0000ull; if (vcpu->arch.msr & MSR_PR) { #ifdef EXIT_DEBUG - printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", vcpu->arch.pc, vcpu->arch.last_inst); + printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu)); #endif - if ((vcpu->arch.last_inst & 0xff0007ff) != + if ((kvmppc_get_last_inst(vcpu) & 0xff0007ff) != (INS_DCBZ & 0xfffffff7)) { - kvmppc_book3s_queue_irqprio(vcpu, exit_nr); + kvmppc_core_queue_program(vcpu, flags); r = RESUME_GUEST; break; } @@ -645,36 +902,90 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, case EMULATE_DONE: r = RESUME_GUEST_NV; break; + case EMULATE_AGAIN: + r = RESUME_GUEST; + break; case EMULATE_FAIL: printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", - __func__, vcpu->arch.pc, vcpu->arch.last_inst); - kvmppc_book3s_queue_irqprio(vcpu, exit_nr); + __func__, kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu)); + kvmppc_core_queue_program(vcpu, flags); r = RESUME_GUEST; break; + case EMULATE_DO_MMIO: + run->exit_reason = KVM_EXIT_MMIO; + r = RESUME_HOST_NV; + break; default: BUG(); } break; } case BOOK3S_INTERRUPT_SYSCALL: -#ifdef EXIT_DEBUG - printk(KERN_INFO "Syscall Nr %d\n", (int)kvmppc_get_gpr(vcpu, 0)); -#endif - vcpu->stat.syscall_exits++; - kvmppc_book3s_queue_irqprio(vcpu, exit_nr); - r = RESUME_GUEST; + // XXX make user settable + if (vcpu->arch.osi_enabled && + (((u32)kvmppc_get_gpr(vcpu, 3)) == OSI_SC_MAGIC_R3) && + (((u32)kvmppc_get_gpr(vcpu, 4)) == OSI_SC_MAGIC_R4)) { + u64 *gprs = run->osi.gprs; + int i; + + run->exit_reason = KVM_EXIT_OSI; + for (i = 0; i < 32; i++) + gprs[i] = kvmppc_get_gpr(vcpu, i); + vcpu->arch.osi_needed = 1; + r = RESUME_HOST_NV; + + } else { + vcpu->stat.syscall_exits++; + kvmppc_book3s_queue_irqprio(vcpu, exit_nr); + r = RESUME_GUEST; + } break; - case BOOK3S_INTERRUPT_MACHINE_CHECK: case BOOK3S_INTERRUPT_FP_UNAVAIL: - case BOOK3S_INTERRUPT_TRACE: case BOOK3S_INTERRUPT_ALTIVEC: case BOOK3S_INTERRUPT_VSX: + { + int ext_msr = 0; + + switch (exit_nr) { + case BOOK3S_INTERRUPT_FP_UNAVAIL: ext_msr = MSR_FP; break; + case BOOK3S_INTERRUPT_ALTIVEC: ext_msr = MSR_VEC; break; + case BOOK3S_INTERRUPT_VSX: ext_msr = MSR_VSX; break; + } + + switch (kvmppc_check_ext(vcpu, exit_nr)) { + case EMULATE_DONE: + /* everything ok - let's enable the ext */ + r = kvmppc_handle_ext(vcpu, exit_nr, ext_msr); + break; + case EMULATE_FAIL: + /* we need to emulate this instruction */ + goto program_interrupt; + break; + default: + /* nothing to worry about - go again */ + break; + } + break; + } + case BOOK3S_INTERRUPT_ALIGNMENT: + if (kvmppc_read_inst(vcpu) == EMULATE_DONE) { + to_book3s(vcpu)->dsisr = kvmppc_alignment_dsisr(vcpu, + kvmppc_get_last_inst(vcpu)); + vcpu->arch.dear = kvmppc_alignment_dar(vcpu, + kvmppc_get_last_inst(vcpu)); + kvmppc_book3s_queue_irqprio(vcpu, exit_nr); + } + r = RESUME_GUEST; + break; + case BOOK3S_INTERRUPT_MACHINE_CHECK: + case BOOK3S_INTERRUPT_TRACE: kvmppc_book3s_queue_irqprio(vcpu, exit_nr); r = RESUME_GUEST; break; default: /* Ugh - bork here! What did we get? */ - printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n", exit_nr, vcpu->arch.pc, vcpu->arch.shadow_msr); + printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n", + exit_nr, kvmppc_get_pc(vcpu), to_svcpu(vcpu)->shadow_srr1); r = RESUME_HOST; BUG(); break; @@ -701,7 +1012,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, } #ifdef EXIT_DEBUG - printk(KERN_EMERG "KVM exit: vcpu=0x%p pc=0x%lx r=0x%x\n", vcpu, vcpu->arch.pc, r); + printk(KERN_EMERG "KVM exit: vcpu=0x%p pc=0x%lx r=0x%x\n", vcpu, kvmppc_get_pc(vcpu), r); #endif return r; @@ -716,10 +1027,12 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { int i; - regs->pc = vcpu->arch.pc; + vcpu_load(vcpu); + + regs->pc = kvmppc_get_pc(vcpu); regs->cr = kvmppc_get_cr(vcpu); - regs->ctr = vcpu->arch.ctr; - regs->lr = vcpu->arch.lr; + regs->ctr = kvmppc_get_ctr(vcpu); + regs->lr = kvmppc_get_lr(vcpu); regs->xer = kvmppc_get_xer(vcpu); regs->msr = vcpu->arch.msr; regs->srr0 = vcpu->arch.srr0; @@ -736,6 +1049,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) regs->gpr[i] = kvmppc_get_gpr(vcpu, i); + vcpu_put(vcpu); + return 0; } @@ -743,10 +1058,12 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { int i; - vcpu->arch.pc = regs->pc; + vcpu_load(vcpu); + + kvmppc_set_pc(vcpu, regs->pc); kvmppc_set_cr(vcpu, regs->cr); - vcpu->arch.ctr = regs->ctr; - vcpu->arch.lr = regs->lr; + kvmppc_set_ctr(vcpu, regs->ctr); + kvmppc_set_lr(vcpu, regs->lr); kvmppc_set_xer(vcpu, regs->xer); kvmppc_set_msr(vcpu, regs->msr); vcpu->arch.srr0 = regs->srr0; @@ -762,6 +1079,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) kvmppc_set_gpr(vcpu, i, regs->gpr[i]); + vcpu_put(vcpu); + return 0; } @@ -853,7 +1172,8 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_vcpu *vcpu; ulong ga, ga_end; int is_dirty = 0; - int r, n; + int r; + unsigned long n; mutex_lock(&kvm->slots_lock); @@ -871,7 +1191,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, kvm_for_each_vcpu(n, vcpu, kvm) kvmppc_mmu_pte_pflush(vcpu, ga, ga_end); - n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; + n = kvm_dirty_bitmap_bytes(memslot); memset(memslot->dirty_bitmap, 0, n); } @@ -890,24 +1210,33 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) { struct kvmppc_vcpu_book3s *vcpu_book3s; struct kvm_vcpu *vcpu; - int err; + int err = -ENOMEM; - vcpu_book3s = (struct kvmppc_vcpu_book3s *)__get_free_pages( GFP_KERNEL | __GFP_ZERO, - get_order(sizeof(struct kvmppc_vcpu_book3s))); - if (!vcpu_book3s) { - err = -ENOMEM; + vcpu_book3s = vmalloc(sizeof(struct kvmppc_vcpu_book3s)); + if (!vcpu_book3s) goto out; - } + + memset(vcpu_book3s, 0, sizeof(struct kvmppc_vcpu_book3s)); + + vcpu_book3s->shadow_vcpu = (struct kvmppc_book3s_shadow_vcpu *) + kzalloc(sizeof(*vcpu_book3s->shadow_vcpu), GFP_KERNEL); + if (!vcpu_book3s->shadow_vcpu) + goto free_vcpu; vcpu = &vcpu_book3s->vcpu; err = kvm_vcpu_init(vcpu, kvm, id); if (err) - goto free_vcpu; + goto free_shadow_vcpu; vcpu->arch.host_retip = kvm_return_point; vcpu->arch.host_msr = mfmsr(); +#ifdef CONFIG_PPC_BOOK3S_64 /* default to book3s_64 (970fx) */ vcpu->arch.pvr = 0x3C0301; +#else + /* default to book3s_32 (750) */ + vcpu->arch.pvr = 0x84202; +#endif kvmppc_set_pvr(vcpu, vcpu->arch.pvr); vcpu_book3s->slb_nr = 64; @@ -915,22 +1244,24 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) vcpu->arch.trampoline_lowmem = kvmppc_trampoline_lowmem; vcpu->arch.trampoline_enter = kvmppc_trampoline_enter; vcpu->arch.highmem_handler = (ulong)kvmppc_handler_highmem; +#ifdef CONFIG_PPC_BOOK3S_64 + vcpu->arch.rmcall = *(ulong*)kvmppc_rmcall; +#else + vcpu->arch.rmcall = (ulong)kvmppc_rmcall; +#endif vcpu->arch.shadow_msr = MSR_USER64; - err = __init_new_context(); + err = kvmppc_mmu_init(vcpu); if (err < 0) - goto free_vcpu; - vcpu_book3s->context_id = err; - - vcpu_book3s->vsid_max = ((vcpu_book3s->context_id + 1) << USER_ESID_BITS) - 1; - vcpu_book3s->vsid_first = vcpu_book3s->context_id << USER_ESID_BITS; - vcpu_book3s->vsid_next = vcpu_book3s->vsid_first; + goto free_shadow_vcpu; return vcpu; +free_shadow_vcpu: + kfree(vcpu_book3s->shadow_vcpu); free_vcpu: - free_pages((long)vcpu_book3s, get_order(sizeof(struct kvmppc_vcpu_book3s))); + vfree(vcpu_book3s); out: return ERR_PTR(err); } @@ -939,15 +1270,23 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); - __destroy_context(vcpu_book3s->context_id); kvm_vcpu_uninit(vcpu); - free_pages((long)vcpu_book3s, get_order(sizeof(struct kvmppc_vcpu_book3s))); + kfree(vcpu_book3s->shadow_vcpu); + vfree(vcpu_book3s); } extern int __kvmppc_vcpu_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) { int ret; + struct thread_struct ext_bkp; +#ifdef CONFIG_ALTIVEC + bool save_vec = current->thread.used_vr; +#endif +#ifdef CONFIG_VSX + bool save_vsx = current->thread.used_vsr; +#endif + ulong ext_msr; /* No need to go into the guest when all we do is going out */ if (signal_pending(current)) { @@ -955,13 +1294,72 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) return -EINTR; } + /* Save FPU state in stack */ + if (current->thread.regs->msr & MSR_FP) + giveup_fpu(current); + memcpy(ext_bkp.fpr, current->thread.fpr, sizeof(current->thread.fpr)); + ext_bkp.fpscr = current->thread.fpscr; + ext_bkp.fpexc_mode = current->thread.fpexc_mode; + +#ifdef CONFIG_ALTIVEC + /* Save Altivec state in stack */ + if (save_vec) { + if (current->thread.regs->msr & MSR_VEC) + giveup_altivec(current); + memcpy(ext_bkp.vr, current->thread.vr, sizeof(ext_bkp.vr)); + ext_bkp.vscr = current->thread.vscr; + ext_bkp.vrsave = current->thread.vrsave; + } + ext_bkp.used_vr = current->thread.used_vr; +#endif + +#ifdef CONFIG_VSX + /* Save VSX state in stack */ + if (save_vsx && (current->thread.regs->msr & MSR_VSX)) + __giveup_vsx(current); + ext_bkp.used_vsr = current->thread.used_vsr; +#endif + + /* Remember the MSR with disabled extensions */ + ext_msr = current->thread.regs->msr; + /* XXX we get called with irq disabled - change that! */ local_irq_enable(); + /* Preload FPU if it's enabled */ + if (vcpu->arch.msr & MSR_FP) + kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); + ret = __kvmppc_vcpu_entry(kvm_run, vcpu); local_irq_disable(); + current->thread.regs->msr = ext_msr; + + /* Make sure we save the guest FPU/Altivec/VSX state */ + kvmppc_giveup_ext(vcpu, MSR_FP); + kvmppc_giveup_ext(vcpu, MSR_VEC); + kvmppc_giveup_ext(vcpu, MSR_VSX); + + /* Restore FPU state from stack */ + memcpy(current->thread.fpr, ext_bkp.fpr, sizeof(ext_bkp.fpr)); + current->thread.fpscr = ext_bkp.fpscr; + current->thread.fpexc_mode = ext_bkp.fpexc_mode; + +#ifdef CONFIG_ALTIVEC + /* Restore Altivec state from stack */ + if (save_vec && current->thread.used_vr) { + memcpy(current->thread.vr, ext_bkp.vr, sizeof(ext_bkp.vr)); + current->thread.vscr = ext_bkp.vscr; + current->thread.vrsave= ext_bkp.vrsave; + } + current->thread.used_vr = ext_bkp.used_vr; +#endif + +#ifdef CONFIG_VSX + current->thread.used_vsr = ext_bkp.used_vsr; +#endif + return ret; }