KVM: ppc: directly insert shadow mappings into the hardware TLB
[safe/jmp/linux-2.6] / arch / powerpc / kvm / booke.c
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License, version 2, as
4  * published by the Free Software Foundation.
5  *
6  * This program is distributed in the hope that it will be useful,
7  * but WITHOUT ANY WARRANTY; without even the implied warranty of
8  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
9  * GNU General Public License for more details.
10  *
11  * You should have received a copy of the GNU General Public License
12  * along with this program; if not, write to the Free Software
13  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
14  *
15  * Copyright IBM Corp. 2007
16  *
17  * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18  *          Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
19  */
20
21 #include <linux/errno.h>
22 #include <linux/err.h>
23 #include <linux/kvm_host.h>
24 #include <linux/module.h>
25 #include <linux/vmalloc.h>
26 #include <linux/fs.h>
27
28 #include <asm/cputable.h>
29 #include <asm/uaccess.h>
30 #include <asm/kvm_ppc.h>
31 #include <asm/cacheflush.h>
32 #include <asm/kvm_44x.h>
33
34 #include "booke.h"
35 #include "44x_tlb.h"
36
37 unsigned long kvmppc_booke_handlers;
38
39 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
40 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
41
42 struct kvm_stats_debugfs_item debugfs_entries[] = {
43         { "mmio",       VCPU_STAT(mmio_exits) },
44         { "dcr",        VCPU_STAT(dcr_exits) },
45         { "sig",        VCPU_STAT(signal_exits) },
46         { "itlb_r",     VCPU_STAT(itlb_real_miss_exits) },
47         { "itlb_v",     VCPU_STAT(itlb_virt_miss_exits) },
48         { "dtlb_r",     VCPU_STAT(dtlb_real_miss_exits) },
49         { "dtlb_v",     VCPU_STAT(dtlb_virt_miss_exits) },
50         { "sysc",       VCPU_STAT(syscall_exits) },
51         { "isi",        VCPU_STAT(isi_exits) },
52         { "dsi",        VCPU_STAT(dsi_exits) },
53         { "inst_emu",   VCPU_STAT(emulated_inst_exits) },
54         { "dec",        VCPU_STAT(dec_exits) },
55         { "ext_intr",   VCPU_STAT(ext_intr_exits) },
56         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
57         { NULL }
58 };
59
60 /* TODO: use vcpu_printf() */
61 void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
62 {
63         int i;
64
65         printk("pc:   %08lx msr:  %08lx\n", vcpu->arch.pc, vcpu->arch.msr);
66         printk("lr:   %08lx ctr:  %08lx\n", vcpu->arch.lr, vcpu->arch.ctr);
67         printk("srr0: %08lx srr1: %08lx\n", vcpu->arch.srr0, vcpu->arch.srr1);
68
69         printk("exceptions: %08lx\n", vcpu->arch.pending_exceptions);
70
71         for (i = 0; i < 32; i += 4) {
72                 printk("gpr%02d: %08lx %08lx %08lx %08lx\n", i,
73                        vcpu->arch.gpr[i],
74                        vcpu->arch.gpr[i+1],
75                        vcpu->arch.gpr[i+2],
76                        vcpu->arch.gpr[i+3]);
77         }
78 }
79
80 static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,
81                                        unsigned int priority)
82 {
83         set_bit(priority, &vcpu->arch.pending_exceptions);
84 }
85
86 void kvmppc_core_queue_program(struct kvm_vcpu *vcpu)
87 {
88         kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
89 }
90
91 void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
92 {
93         kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DECREMENTER);
94 }
95
96 int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu)
97 {
98         return test_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions);
99 }
100
101 void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
102                                 struct kvm_interrupt *irq)
103 {
104         kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_EXTERNAL);
105 }
106
107 /* Deliver the interrupt of the corresponding priority, if possible. */
108 static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
109                                         unsigned int priority)
110 {
111         int allowed = 0;
112         ulong msr_mask;
113
114         switch (priority) {
115         case BOOKE_IRQPRIO_PROGRAM:
116         case BOOKE_IRQPRIO_DTLB_MISS:
117         case BOOKE_IRQPRIO_ITLB_MISS:
118         case BOOKE_IRQPRIO_SYSCALL:
119         case BOOKE_IRQPRIO_DATA_STORAGE:
120         case BOOKE_IRQPRIO_INST_STORAGE:
121         case BOOKE_IRQPRIO_FP_UNAVAIL:
122         case BOOKE_IRQPRIO_AP_UNAVAIL:
123         case BOOKE_IRQPRIO_ALIGNMENT:
124                 allowed = 1;
125                 msr_mask = MSR_CE|MSR_ME|MSR_DE;
126                 break;
127         case BOOKE_IRQPRIO_CRITICAL:
128         case BOOKE_IRQPRIO_WATCHDOG:
129                 allowed = vcpu->arch.msr & MSR_CE;
130                 msr_mask = MSR_ME;
131                 break;
132         case BOOKE_IRQPRIO_MACHINE_CHECK:
133                 allowed = vcpu->arch.msr & MSR_ME;
134                 msr_mask = 0;
135                 break;
136         case BOOKE_IRQPRIO_EXTERNAL:
137         case BOOKE_IRQPRIO_DECREMENTER:
138         case BOOKE_IRQPRIO_FIT:
139                 allowed = vcpu->arch.msr & MSR_EE;
140                 msr_mask = MSR_CE|MSR_ME|MSR_DE;
141                 break;
142         case BOOKE_IRQPRIO_DEBUG:
143                 allowed = vcpu->arch.msr & MSR_DE;
144                 msr_mask = MSR_ME;
145                 break;
146         }
147
148         if (allowed) {
149                 vcpu->arch.srr0 = vcpu->arch.pc;
150                 vcpu->arch.srr1 = vcpu->arch.msr;
151                 vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority];
152                 kvmppc_set_msr(vcpu, vcpu->arch.msr & msr_mask);
153
154                 clear_bit(priority, &vcpu->arch.pending_exceptions);
155         }
156
157         return allowed;
158 }
159
160 /* Check pending exceptions and deliver one, if possible. */
161 void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
162 {
163         unsigned long *pending = &vcpu->arch.pending_exceptions;
164         unsigned int priority;
165
166         priority = __ffs(*pending);
167         while (priority <= BOOKE_MAX_INTERRUPT) {
168                 if (kvmppc_booke_irqprio_deliver(vcpu, priority))
169                         break;
170
171                 priority = find_next_bit(pending,
172                                          BITS_PER_BYTE * sizeof(*pending),
173                                          priority + 1);
174         }
175 }
176
177 /**
178  * kvmppc_handle_exit
179  *
180  * Return value is in the form (errcode<<2 | RESUME_FLAG_HOST | RESUME_FLAG_NV)
181  */
182 int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
183                        unsigned int exit_nr)
184 {
185         enum emulation_result er;
186         int r = RESUME_HOST;
187
188         local_irq_enable();
189
190         run->exit_reason = KVM_EXIT_UNKNOWN;
191         run->ready_for_interrupt_injection = 1;
192
193         switch (exit_nr) {
194         case BOOKE_INTERRUPT_MACHINE_CHECK:
195                 printk("MACHINE CHECK: %lx\n", mfspr(SPRN_MCSR));
196                 kvmppc_dump_vcpu(vcpu);
197                 r = RESUME_HOST;
198                 break;
199
200         case BOOKE_INTERRUPT_EXTERNAL:
201                 vcpu->stat.ext_intr_exits++;
202                 if (need_resched())
203                         cond_resched();
204                 r = RESUME_GUEST;
205                 break;
206
207         case BOOKE_INTERRUPT_DECREMENTER:
208                 /* Since we switched IVPR back to the host's value, the host
209                  * handled this interrupt the moment we enabled interrupts.
210                  * Now we just offer it a chance to reschedule the guest. */
211
212                 vcpu->stat.dec_exits++;
213                 if (need_resched())
214                         cond_resched();
215                 r = RESUME_GUEST;
216                 break;
217
218         case BOOKE_INTERRUPT_PROGRAM:
219                 if (vcpu->arch.msr & MSR_PR) {
220                         /* Program traps generated by user-level software must be handled
221                          * by the guest kernel. */
222                         vcpu->arch.esr = vcpu->arch.fault_esr;
223                         kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
224                         r = RESUME_GUEST;
225                         break;
226                 }
227
228                 er = kvmppc_emulate_instruction(run, vcpu);
229                 switch (er) {
230                 case EMULATE_DONE:
231                         /* Future optimization: only reload non-volatiles if
232                          * they were actually modified by emulation. */
233                         vcpu->stat.emulated_inst_exits++;
234                         r = RESUME_GUEST_NV;
235                         break;
236                 case EMULATE_DO_DCR:
237                         run->exit_reason = KVM_EXIT_DCR;
238                         vcpu->stat.dcr_exits++;
239                         r = RESUME_HOST;
240                         break;
241                 case EMULATE_FAIL:
242                         /* XXX Deliver Program interrupt to guest. */
243                         printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
244                                __func__, vcpu->arch.pc, vcpu->arch.last_inst);
245                         /* For debugging, encode the failing instruction and
246                          * report it to userspace. */
247                         run->hw.hardware_exit_reason = ~0ULL << 32;
248                         run->hw.hardware_exit_reason |= vcpu->arch.last_inst;
249                         r = RESUME_HOST;
250                         break;
251                 default:
252                         BUG();
253                 }
254                 break;
255
256         case BOOKE_INTERRUPT_FP_UNAVAIL:
257                 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_FP_UNAVAIL);
258                 r = RESUME_GUEST;
259                 break;
260
261         case BOOKE_INTERRUPT_DATA_STORAGE:
262                 vcpu->arch.dear = vcpu->arch.fault_dear;
263                 vcpu->arch.esr = vcpu->arch.fault_esr;
264                 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE);
265                 vcpu->stat.dsi_exits++;
266                 r = RESUME_GUEST;
267                 break;
268
269         case BOOKE_INTERRUPT_INST_STORAGE:
270                 vcpu->arch.esr = vcpu->arch.fault_esr;
271                 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE);
272                 vcpu->stat.isi_exits++;
273                 r = RESUME_GUEST;
274                 break;
275
276         case BOOKE_INTERRUPT_SYSCALL:
277                 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SYSCALL);
278                 vcpu->stat.syscall_exits++;
279                 r = RESUME_GUEST;
280                 break;
281
282         /* XXX move to a 440-specific file. */
283         case BOOKE_INTERRUPT_DTLB_MISS: {
284                 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
285                 struct kvmppc_44x_tlbe *gtlbe;
286                 unsigned long eaddr = vcpu->arch.fault_dear;
287                 int gtlb_index;
288                 gfn_t gfn;
289
290                 /* Check the guest TLB. */
291                 gtlb_index = kvmppc_44x_dtlb_index(vcpu, eaddr);
292                 if (gtlb_index < 0) {
293                         /* The guest didn't have a mapping for it. */
294                         kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS);
295                         vcpu->arch.dear = vcpu->arch.fault_dear;
296                         vcpu->arch.esr = vcpu->arch.fault_esr;
297                         vcpu->stat.dtlb_real_miss_exits++;
298                         r = RESUME_GUEST;
299                         break;
300                 }
301
302                 gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
303                 vcpu->arch.paddr_accessed = tlb_xlate(gtlbe, eaddr);
304                 gfn = vcpu->arch.paddr_accessed >> PAGE_SHIFT;
305
306                 if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
307                         /* The guest TLB had a mapping, but the shadow TLB
308                          * didn't, and it is RAM. This could be because:
309                          * a) the entry is mapping the host kernel, or
310                          * b) the guest used a large mapping which we're faking
311                          * Either way, we need to satisfy the fault without
312                          * invoking the guest. */
313                         kvmppc_mmu_map(vcpu, eaddr, vcpu->arch.paddr_accessed, gtlbe->tid,
314                                        gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index);
315                         vcpu->stat.dtlb_virt_miss_exits++;
316                         r = RESUME_GUEST;
317                 } else {
318                         /* Guest has mapped and accessed a page which is not
319                          * actually RAM. */
320                         r = kvmppc_emulate_mmio(run, vcpu);
321                         vcpu->stat.mmio_exits++;
322                 }
323
324                 break;
325         }
326
327         /* XXX move to a 440-specific file. */
328         case BOOKE_INTERRUPT_ITLB_MISS: {
329                 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
330                 struct kvmppc_44x_tlbe *gtlbe;
331                 unsigned long eaddr = vcpu->arch.pc;
332                 gpa_t gpaddr;
333                 gfn_t gfn;
334                 int gtlb_index;
335
336                 r = RESUME_GUEST;
337
338                 /* Check the guest TLB. */
339                 gtlb_index = kvmppc_44x_itlb_index(vcpu, eaddr);
340                 if (gtlb_index < 0) {
341                         /* The guest didn't have a mapping for it. */
342                         kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ITLB_MISS);
343                         vcpu->stat.itlb_real_miss_exits++;
344                         break;
345                 }
346
347                 vcpu->stat.itlb_virt_miss_exits++;
348
349                 gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
350                 gpaddr = tlb_xlate(gtlbe, eaddr);
351                 gfn = gpaddr >> PAGE_SHIFT;
352
353                 if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
354                         /* The guest TLB had a mapping, but the shadow TLB
355                          * didn't. This could be because:
356                          * a) the entry is mapping the host kernel, or
357                          * b) the guest used a large mapping which we're faking
358                          * Either way, we need to satisfy the fault without
359                          * invoking the guest. */
360                         kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlbe->tid,
361                                        gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index);
362                 } else {
363                         /* Guest mapped and leaped at non-RAM! */
364                         kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_MACHINE_CHECK);
365                 }
366
367                 break;
368         }
369
370         case BOOKE_INTERRUPT_DEBUG: {
371                 u32 dbsr;
372
373                 vcpu->arch.pc = mfspr(SPRN_CSRR0);
374
375                 /* clear IAC events in DBSR register */
376                 dbsr = mfspr(SPRN_DBSR);
377                 dbsr &= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4;
378                 mtspr(SPRN_DBSR, dbsr);
379
380                 run->exit_reason = KVM_EXIT_DEBUG;
381                 r = RESUME_HOST;
382                 break;
383         }
384
385         default:
386                 printk(KERN_EMERG "exit_nr %d\n", exit_nr);
387                 BUG();
388         }
389
390         local_irq_disable();
391
392         kvmppc_core_deliver_interrupts(vcpu);
393
394         if (!(r & RESUME_HOST)) {
395                 /* To avoid clobbering exit_reason, only check for signals if
396                  * we aren't already exiting to userspace for some other
397                  * reason. */
398                 if (signal_pending(current)) {
399                         run->exit_reason = KVM_EXIT_INTR;
400                         r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
401                         vcpu->stat.signal_exits++;
402                 }
403         }
404
405         return r;
406 }
407
408 /* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */
409 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
410 {
411         vcpu->arch.pc = 0;
412         vcpu->arch.msr = 0;
413         vcpu->arch.gpr[1] = (16<<20) - 8; /* -8 for the callee-save LR slot */
414
415         vcpu->arch.shadow_pid = 1;
416
417         /* Eye-catching number so we know if the guest takes an interrupt
418          * before it's programmed its own IVPR. */
419         vcpu->arch.ivpr = 0x55550000;
420
421         return kvmppc_core_vcpu_setup(vcpu);
422 }
423
424 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
425 {
426         int i;
427
428         regs->pc = vcpu->arch.pc;
429         regs->cr = vcpu->arch.cr;
430         regs->ctr = vcpu->arch.ctr;
431         regs->lr = vcpu->arch.lr;
432         regs->xer = vcpu->arch.xer;
433         regs->msr = vcpu->arch.msr;
434         regs->srr0 = vcpu->arch.srr0;
435         regs->srr1 = vcpu->arch.srr1;
436         regs->pid = vcpu->arch.pid;
437         regs->sprg0 = vcpu->arch.sprg0;
438         regs->sprg1 = vcpu->arch.sprg1;
439         regs->sprg2 = vcpu->arch.sprg2;
440         regs->sprg3 = vcpu->arch.sprg3;
441         regs->sprg5 = vcpu->arch.sprg4;
442         regs->sprg6 = vcpu->arch.sprg5;
443         regs->sprg7 = vcpu->arch.sprg6;
444
445         for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
446                 regs->gpr[i] = vcpu->arch.gpr[i];
447
448         return 0;
449 }
450
451 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
452 {
453         int i;
454
455         vcpu->arch.pc = regs->pc;
456         vcpu->arch.cr = regs->cr;
457         vcpu->arch.ctr = regs->ctr;
458         vcpu->arch.lr = regs->lr;
459         vcpu->arch.xer = regs->xer;
460         kvmppc_set_msr(vcpu, regs->msr);
461         vcpu->arch.srr0 = regs->srr0;
462         vcpu->arch.srr1 = regs->srr1;
463         vcpu->arch.sprg0 = regs->sprg0;
464         vcpu->arch.sprg1 = regs->sprg1;
465         vcpu->arch.sprg2 = regs->sprg2;
466         vcpu->arch.sprg3 = regs->sprg3;
467         vcpu->arch.sprg5 = regs->sprg4;
468         vcpu->arch.sprg6 = regs->sprg5;
469         vcpu->arch.sprg7 = regs->sprg6;
470
471         for (i = 0; i < ARRAY_SIZE(vcpu->arch.gpr); i++)
472                 vcpu->arch.gpr[i] = regs->gpr[i];
473
474         return 0;
475 }
476
477 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
478                                   struct kvm_sregs *sregs)
479 {
480         return -ENOTSUPP;
481 }
482
483 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
484                                   struct kvm_sregs *sregs)
485 {
486         return -ENOTSUPP;
487 }
488
489 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
490 {
491         return -ENOTSUPP;
492 }
493
494 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
495 {
496         return -ENOTSUPP;
497 }
498
499 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
500                                   struct kvm_translation *tr)
501 {
502         return kvmppc_core_vcpu_translate(vcpu, tr);
503 }
504
505 int kvmppc_booke_init(void)
506 {
507         unsigned long ivor[16];
508         unsigned long max_ivor = 0;
509         int i;
510
511         /* We install our own exception handlers by hijacking IVPR. IVPR must
512          * be 16-bit aligned, so we need a 64KB allocation. */
513         kvmppc_booke_handlers = __get_free_pages(GFP_KERNEL | __GFP_ZERO,
514                                                  VCPU_SIZE_ORDER);
515         if (!kvmppc_booke_handlers)
516                 return -ENOMEM;
517
518         /* XXX make sure our handlers are smaller than Linux's */
519
520         /* Copy our interrupt handlers to match host IVORs. That way we don't
521          * have to swap the IVORs on every guest/host transition. */
522         ivor[0] = mfspr(SPRN_IVOR0);
523         ivor[1] = mfspr(SPRN_IVOR1);
524         ivor[2] = mfspr(SPRN_IVOR2);
525         ivor[3] = mfspr(SPRN_IVOR3);
526         ivor[4] = mfspr(SPRN_IVOR4);
527         ivor[5] = mfspr(SPRN_IVOR5);
528         ivor[6] = mfspr(SPRN_IVOR6);
529         ivor[7] = mfspr(SPRN_IVOR7);
530         ivor[8] = mfspr(SPRN_IVOR8);
531         ivor[9] = mfspr(SPRN_IVOR9);
532         ivor[10] = mfspr(SPRN_IVOR10);
533         ivor[11] = mfspr(SPRN_IVOR11);
534         ivor[12] = mfspr(SPRN_IVOR12);
535         ivor[13] = mfspr(SPRN_IVOR13);
536         ivor[14] = mfspr(SPRN_IVOR14);
537         ivor[15] = mfspr(SPRN_IVOR15);
538
539         for (i = 0; i < 16; i++) {
540                 if (ivor[i] > max_ivor)
541                         max_ivor = ivor[i];
542
543                 memcpy((void *)kvmppc_booke_handlers + ivor[i],
544                        kvmppc_handlers_start + i * kvmppc_handler_len,
545                        kvmppc_handler_len);
546         }
547         flush_icache_range(kvmppc_booke_handlers,
548                            kvmppc_booke_handlers + max_ivor + kvmppc_handler_len);
549
550         return 0;
551 }
552
553 void __exit kvmppc_booke_exit(void)
554 {
555         free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER);
556         kvm_exit();
557 }