KVM: skip_emulated_instruction() decode instruction if size is not known
[safe/jmp/linux-2.6] / arch / x86 / kvm / svm.c
1 /*
2  * Kernel-based Virtual Machine driver for Linux
3  *
4  * AMD SVM support
5  *
6  * Copyright (C) 2006 Qumranet, Inc.
7  *
8  * Authors:
9  *   Yaniv Kamay  <yaniv@qumranet.com>
10  *   Avi Kivity   <avi@qumranet.com>
11  *
12  * This work is licensed under the terms of the GNU GPL, version 2.  See
13  * the COPYING file in the top-level directory.
14  *
15  */
16 #include <linux/kvm_host.h>
17
18 #include "kvm_svm.h"
19 #include "irq.h"
20 #include "mmu.h"
21 #include "kvm_cache_regs.h"
22 #include "x86.h"
23
24 #include <linux/module.h>
25 #include <linux/kernel.h>
26 #include <linux/vmalloc.h>
27 #include <linux/highmem.h>
28 #include <linux/sched.h>
29
30 #include <asm/desc.h>
31
32 #include <asm/virtext.h>
33
34 #define __ex(x) __kvm_handle_fault_on_reboot(x)
35
36 MODULE_AUTHOR("Qumranet");
37 MODULE_LICENSE("GPL");
38
39 #define IOPM_ALLOC_ORDER 2
40 #define MSRPM_ALLOC_ORDER 1
41
42 #define SEG_TYPE_LDT 2
43 #define SEG_TYPE_BUSY_TSS16 3
44
45 #define SVM_FEATURE_NPT  (1 << 0)
46 #define SVM_FEATURE_LBRV (1 << 1)
47 #define SVM_FEATURE_SVML (1 << 2)
48
49 #define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
50
51 /* Turn on to get debugging output*/
52 /* #define NESTED_DEBUG */
53
54 #ifdef NESTED_DEBUG
55 #define nsvm_printk(fmt, args...) printk(KERN_INFO fmt, ## args)
56 #else
57 #define nsvm_printk(fmt, args...) do {} while(0)
58 #endif
59
60 /* enable NPT for AMD64 and X86 with PAE */
61 #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
62 static bool npt_enabled = true;
63 #else
64 static bool npt_enabled = false;
65 #endif
66 static int npt = 1;
67
68 module_param(npt, int, S_IRUGO);
69
70 static int nested = 0;
71 module_param(nested, int, S_IRUGO);
72
73 static void svm_flush_tlb(struct kvm_vcpu *vcpu);
74
75 static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override);
76 static int nested_svm_vmexit(struct vcpu_svm *svm);
77 static int nested_svm_vmsave(struct vcpu_svm *svm, void *nested_vmcb,
78                              void *arg2, void *opaque);
79 static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
80                                       bool has_error_code, u32 error_code);
81
82 static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
83 {
84         return container_of(vcpu, struct vcpu_svm, vcpu);
85 }
86
87 static inline bool is_nested(struct vcpu_svm *svm)
88 {
89         return svm->nested_vmcb;
90 }
91
92 static unsigned long iopm_base;
93
94 struct kvm_ldttss_desc {
95         u16 limit0;
96         u16 base0;
97         unsigned base1 : 8, type : 5, dpl : 2, p : 1;
98         unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8;
99         u32 base3;
100         u32 zero1;
101 } __attribute__((packed));
102
103 struct svm_cpu_data {
104         int cpu;
105
106         u64 asid_generation;
107         u32 max_asid;
108         u32 next_asid;
109         struct kvm_ldttss_desc *tss_desc;
110
111         struct page *save_area;
112 };
113
114 static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
115 static uint32_t svm_features;
116
117 struct svm_init_data {
118         int cpu;
119         int r;
120 };
121
122 static u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
123
124 #define NUM_MSR_MAPS ARRAY_SIZE(msrpm_ranges)
125 #define MSRS_RANGE_SIZE 2048
126 #define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2)
127
128 #define MAX_INST_SIZE 15
129
130 static inline u32 svm_has(u32 feat)
131 {
132         return svm_features & feat;
133 }
134
135 static inline void clgi(void)
136 {
137         asm volatile (__ex(SVM_CLGI));
138 }
139
140 static inline void stgi(void)
141 {
142         asm volatile (__ex(SVM_STGI));
143 }
144
145 static inline void invlpga(unsigned long addr, u32 asid)
146 {
147         asm volatile (__ex(SVM_INVLPGA) :: "a"(addr), "c"(asid));
148 }
149
150 static inline unsigned long kvm_read_cr2(void)
151 {
152         unsigned long cr2;
153
154         asm volatile ("mov %%cr2, %0" : "=r" (cr2));
155         return cr2;
156 }
157
158 static inline void kvm_write_cr2(unsigned long val)
159 {
160         asm volatile ("mov %0, %%cr2" :: "r" (val));
161 }
162
163 static inline void force_new_asid(struct kvm_vcpu *vcpu)
164 {
165         to_svm(vcpu)->asid_generation--;
166 }
167
168 static inline void flush_guest_tlb(struct kvm_vcpu *vcpu)
169 {
170         force_new_asid(vcpu);
171 }
172
173 static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
174 {
175         if (!npt_enabled && !(efer & EFER_LMA))
176                 efer &= ~EFER_LME;
177
178         to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
179         vcpu->arch.shadow_efer = efer;
180 }
181
182 static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
183                                 bool has_error_code, u32 error_code)
184 {
185         struct vcpu_svm *svm = to_svm(vcpu);
186
187         /* If we are within a nested VM we'd better #VMEXIT and let the
188            guest handle the exception */
189         if (nested_svm_check_exception(svm, nr, has_error_code, error_code))
190                 return;
191
192         svm->vmcb->control.event_inj = nr
193                 | SVM_EVTINJ_VALID
194                 | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
195                 | SVM_EVTINJ_TYPE_EXEPT;
196         svm->vmcb->control.event_inj_err = error_code;
197 }
198
199 static int is_external_interrupt(u32 info)
200 {
201         info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
202         return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR);
203 }
204
205 static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
206 {
207         struct vcpu_svm *svm = to_svm(vcpu);
208         u32 ret = 0;
209
210         if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
211                 ret |= X86_SHADOW_INT_STI | X86_SHADOW_INT_MOV_SS;
212         return ret & mask;
213 }
214
215 static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
216 {
217         struct vcpu_svm *svm = to_svm(vcpu);
218
219         if (mask == 0)
220                 svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
221         else
222                 svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK;
223
224 }
225
226 static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
227 {
228         struct vcpu_svm *svm = to_svm(vcpu);
229
230         if (!svm->next_rip) {
231                 if (emulate_instruction(vcpu, vcpu->run, 0, 0, EMULTYPE_SKIP) !=
232                                 EMULATE_DONE)
233                         printk(KERN_DEBUG "%s: NOP\n", __func__);
234                 return;
235         }
236         if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
237                 printk(KERN_ERR "%s: ip 0x%lx next 0x%llx\n",
238                        __func__, kvm_rip_read(vcpu), svm->next_rip);
239
240         kvm_rip_write(vcpu, svm->next_rip);
241         svm_set_interrupt_shadow(vcpu, 0);
242 }
243
244 static int has_svm(void)
245 {
246         const char *msg;
247
248         if (!cpu_has_svm(&msg)) {
249                 printk(KERN_INFO "has_svm: %s\n", msg);
250                 return 0;
251         }
252
253         return 1;
254 }
255
256 static void svm_hardware_disable(void *garbage)
257 {
258         cpu_svm_disable();
259 }
260
261 static void svm_hardware_enable(void *garbage)
262 {
263
264         struct svm_cpu_data *svm_data;
265         uint64_t efer;
266         struct desc_ptr gdt_descr;
267         struct desc_struct *gdt;
268         int me = raw_smp_processor_id();
269
270         if (!has_svm()) {
271                 printk(KERN_ERR "svm_cpu_init: err EOPNOTSUPP on %d\n", me);
272                 return;
273         }
274         svm_data = per_cpu(svm_data, me);
275
276         if (!svm_data) {
277                 printk(KERN_ERR "svm_cpu_init: svm_data is NULL on %d\n",
278                        me);
279                 return;
280         }
281
282         svm_data->asid_generation = 1;
283         svm_data->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
284         svm_data->next_asid = svm_data->max_asid + 1;
285
286         asm volatile ("sgdt %0" : "=m"(gdt_descr));
287         gdt = (struct desc_struct *)gdt_descr.address;
288         svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
289
290         rdmsrl(MSR_EFER, efer);
291         wrmsrl(MSR_EFER, efer | EFER_SVME);
292
293         wrmsrl(MSR_VM_HSAVE_PA,
294                page_to_pfn(svm_data->save_area) << PAGE_SHIFT);
295 }
296
297 static void svm_cpu_uninit(int cpu)
298 {
299         struct svm_cpu_data *svm_data
300                 = per_cpu(svm_data, raw_smp_processor_id());
301
302         if (!svm_data)
303                 return;
304
305         per_cpu(svm_data, raw_smp_processor_id()) = NULL;
306         __free_page(svm_data->save_area);
307         kfree(svm_data);
308 }
309
310 static int svm_cpu_init(int cpu)
311 {
312         struct svm_cpu_data *svm_data;
313         int r;
314
315         svm_data = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL);
316         if (!svm_data)
317                 return -ENOMEM;
318         svm_data->cpu = cpu;
319         svm_data->save_area = alloc_page(GFP_KERNEL);
320         r = -ENOMEM;
321         if (!svm_data->save_area)
322                 goto err_1;
323
324         per_cpu(svm_data, cpu) = svm_data;
325
326         return 0;
327
328 err_1:
329         kfree(svm_data);
330         return r;
331
332 }
333
334 static void set_msr_interception(u32 *msrpm, unsigned msr,
335                                  int read, int write)
336 {
337         int i;
338
339         for (i = 0; i < NUM_MSR_MAPS; i++) {
340                 if (msr >= msrpm_ranges[i] &&
341                     msr < msrpm_ranges[i] + MSRS_IN_RANGE) {
342                         u32 msr_offset = (i * MSRS_IN_RANGE + msr -
343                                           msrpm_ranges[i]) * 2;
344
345                         u32 *base = msrpm + (msr_offset / 32);
346                         u32 msr_shift = msr_offset % 32;
347                         u32 mask = ((write) ? 0 : 2) | ((read) ? 0 : 1);
348                         *base = (*base & ~(0x3 << msr_shift)) |
349                                 (mask << msr_shift);
350                         return;
351                 }
352         }
353         BUG();
354 }
355
356 static void svm_vcpu_init_msrpm(u32 *msrpm)
357 {
358         memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER));
359
360 #ifdef CONFIG_X86_64
361         set_msr_interception(msrpm, MSR_GS_BASE, 1, 1);
362         set_msr_interception(msrpm, MSR_FS_BASE, 1, 1);
363         set_msr_interception(msrpm, MSR_KERNEL_GS_BASE, 1, 1);
364         set_msr_interception(msrpm, MSR_LSTAR, 1, 1);
365         set_msr_interception(msrpm, MSR_CSTAR, 1, 1);
366         set_msr_interception(msrpm, MSR_SYSCALL_MASK, 1, 1);
367 #endif
368         set_msr_interception(msrpm, MSR_K6_STAR, 1, 1);
369         set_msr_interception(msrpm, MSR_IA32_SYSENTER_CS, 1, 1);
370         set_msr_interception(msrpm, MSR_IA32_SYSENTER_ESP, 1, 1);
371         set_msr_interception(msrpm, MSR_IA32_SYSENTER_EIP, 1, 1);
372 }
373
374 static void svm_enable_lbrv(struct vcpu_svm *svm)
375 {
376         u32 *msrpm = svm->msrpm;
377
378         svm->vmcb->control.lbr_ctl = 1;
379         set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
380         set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
381         set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
382         set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
383 }
384
385 static void svm_disable_lbrv(struct vcpu_svm *svm)
386 {
387         u32 *msrpm = svm->msrpm;
388
389         svm->vmcb->control.lbr_ctl = 0;
390         set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
391         set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
392         set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0);
393         set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
394 }
395
396 static __init int svm_hardware_setup(void)
397 {
398         int cpu;
399         struct page *iopm_pages;
400         void *iopm_va;
401         int r;
402
403         iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER);
404
405         if (!iopm_pages)
406                 return -ENOMEM;
407
408         iopm_va = page_address(iopm_pages);
409         memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER));
410         iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;
411
412         if (boot_cpu_has(X86_FEATURE_NX))
413                 kvm_enable_efer_bits(EFER_NX);
414
415         if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
416                 kvm_enable_efer_bits(EFER_FFXSR);
417
418         if (nested) {
419                 printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
420                 kvm_enable_efer_bits(EFER_SVME);
421         }
422
423         for_each_online_cpu(cpu) {
424                 r = svm_cpu_init(cpu);
425                 if (r)
426                         goto err;
427         }
428
429         svm_features = cpuid_edx(SVM_CPUID_FUNC);
430
431         if (!svm_has(SVM_FEATURE_NPT))
432                 npt_enabled = false;
433
434         if (npt_enabled && !npt) {
435                 printk(KERN_INFO "kvm: Nested Paging disabled\n");
436                 npt_enabled = false;
437         }
438
439         if (npt_enabled) {
440                 printk(KERN_INFO "kvm: Nested Paging enabled\n");
441                 kvm_enable_tdp();
442         } else
443                 kvm_disable_tdp();
444
445         return 0;
446
447 err:
448         __free_pages(iopm_pages, IOPM_ALLOC_ORDER);
449         iopm_base = 0;
450         return r;
451 }
452
453 static __exit void svm_hardware_unsetup(void)
454 {
455         int cpu;
456
457         for_each_online_cpu(cpu)
458                 svm_cpu_uninit(cpu);
459
460         __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
461         iopm_base = 0;
462 }
463
464 static void init_seg(struct vmcb_seg *seg)
465 {
466         seg->selector = 0;
467         seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK |
468                 SVM_SELECTOR_WRITE_MASK; /* Read/Write Data Segment */
469         seg->limit = 0xffff;
470         seg->base = 0;
471 }
472
473 static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
474 {
475         seg->selector = 0;
476         seg->attrib = SVM_SELECTOR_P_MASK | type;
477         seg->limit = 0xffff;
478         seg->base = 0;
479 }
480
481 static void init_vmcb(struct vcpu_svm *svm)
482 {
483         struct vmcb_control_area *control = &svm->vmcb->control;
484         struct vmcb_save_area *save = &svm->vmcb->save;
485
486         control->intercept_cr_read =    INTERCEPT_CR0_MASK |
487                                         INTERCEPT_CR3_MASK |
488                                         INTERCEPT_CR4_MASK;
489
490         control->intercept_cr_write =   INTERCEPT_CR0_MASK |
491                                         INTERCEPT_CR3_MASK |
492                                         INTERCEPT_CR4_MASK |
493                                         INTERCEPT_CR8_MASK;
494
495         control->intercept_dr_read =    INTERCEPT_DR0_MASK |
496                                         INTERCEPT_DR1_MASK |
497                                         INTERCEPT_DR2_MASK |
498                                         INTERCEPT_DR3_MASK;
499
500         control->intercept_dr_write =   INTERCEPT_DR0_MASK |
501                                         INTERCEPT_DR1_MASK |
502                                         INTERCEPT_DR2_MASK |
503                                         INTERCEPT_DR3_MASK |
504                                         INTERCEPT_DR5_MASK |
505                                         INTERCEPT_DR7_MASK;
506
507         control->intercept_exceptions = (1 << PF_VECTOR) |
508                                         (1 << UD_VECTOR) |
509                                         (1 << MC_VECTOR);
510
511
512         control->intercept =    (1ULL << INTERCEPT_INTR) |
513                                 (1ULL << INTERCEPT_NMI) |
514                                 (1ULL << INTERCEPT_SMI) |
515                                 (1ULL << INTERCEPT_CPUID) |
516                                 (1ULL << INTERCEPT_INVD) |
517                                 (1ULL << INTERCEPT_HLT) |
518                                 (1ULL << INTERCEPT_INVLPG) |
519                                 (1ULL << INTERCEPT_INVLPGA) |
520                                 (1ULL << INTERCEPT_IOIO_PROT) |
521                                 (1ULL << INTERCEPT_MSR_PROT) |
522                                 (1ULL << INTERCEPT_TASK_SWITCH) |
523                                 (1ULL << INTERCEPT_SHUTDOWN) |
524                                 (1ULL << INTERCEPT_VMRUN) |
525                                 (1ULL << INTERCEPT_VMMCALL) |
526                                 (1ULL << INTERCEPT_VMLOAD) |
527                                 (1ULL << INTERCEPT_VMSAVE) |
528                                 (1ULL << INTERCEPT_STGI) |
529                                 (1ULL << INTERCEPT_CLGI) |
530                                 (1ULL << INTERCEPT_SKINIT) |
531                                 (1ULL << INTERCEPT_WBINVD) |
532                                 (1ULL << INTERCEPT_MONITOR) |
533                                 (1ULL << INTERCEPT_MWAIT);
534
535         control->iopm_base_pa = iopm_base;
536         control->msrpm_base_pa = __pa(svm->msrpm);
537         control->tsc_offset = 0;
538         control->int_ctl = V_INTR_MASKING_MASK;
539
540         init_seg(&save->es);
541         init_seg(&save->ss);
542         init_seg(&save->ds);
543         init_seg(&save->fs);
544         init_seg(&save->gs);
545
546         save->cs.selector = 0xf000;
547         /* Executable/Readable Code Segment */
548         save->cs.attrib = SVM_SELECTOR_READ_MASK | SVM_SELECTOR_P_MASK |
549                 SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK;
550         save->cs.limit = 0xffff;
551         /*
552          * cs.base should really be 0xffff0000, but vmx can't handle that, so
553          * be consistent with it.
554          *
555          * Replace when we have real mode working for vmx.
556          */
557         save->cs.base = 0xf0000;
558
559         save->gdtr.limit = 0xffff;
560         save->idtr.limit = 0xffff;
561
562         init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
563         init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
564
565         save->efer = EFER_SVME;
566         save->dr6 = 0xffff0ff0;
567         save->dr7 = 0x400;
568         save->rflags = 2;
569         save->rip = 0x0000fff0;
570         svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;
571
572         /*
573          * cr0 val on cpu init should be 0x60000010, we enable cpu
574          * cache by default. the orderly way is to enable cache in bios.
575          */
576         save->cr0 = 0x00000010 | X86_CR0_PG | X86_CR0_WP;
577         save->cr4 = X86_CR4_PAE;
578         /* rdx = ?? */
579
580         if (npt_enabled) {
581                 /* Setup VMCB for Nested Paging */
582                 control->nested_ctl = 1;
583                 control->intercept &= ~((1ULL << INTERCEPT_TASK_SWITCH) |
584                                         (1ULL << INTERCEPT_INVLPG));
585                 control->intercept_exceptions &= ~(1 << PF_VECTOR);
586                 control->intercept_cr_read &= ~(INTERCEPT_CR0_MASK|
587                                                 INTERCEPT_CR3_MASK);
588                 control->intercept_cr_write &= ~(INTERCEPT_CR0_MASK|
589                                                  INTERCEPT_CR3_MASK);
590                 save->g_pat = 0x0007040600070406ULL;
591                 /* enable caching because the QEMU Bios doesn't enable it */
592                 save->cr0 = X86_CR0_ET;
593                 save->cr3 = 0;
594                 save->cr4 = 0;
595         }
596         force_new_asid(&svm->vcpu);
597
598         svm->nested_vmcb = 0;
599         svm->vcpu.arch.hflags = HF_GIF_MASK;
600 }
601
602 static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
603 {
604         struct vcpu_svm *svm = to_svm(vcpu);
605
606         init_vmcb(svm);
607
608         if (vcpu->vcpu_id != 0) {
609                 kvm_rip_write(vcpu, 0);
610                 svm->vmcb->save.cs.base = svm->vcpu.arch.sipi_vector << 12;
611                 svm->vmcb->save.cs.selector = svm->vcpu.arch.sipi_vector << 8;
612         }
613         vcpu->arch.regs_avail = ~0;
614         vcpu->arch.regs_dirty = ~0;
615
616         return 0;
617 }
618
619 static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
620 {
621         struct vcpu_svm *svm;
622         struct page *page;
623         struct page *msrpm_pages;
624         struct page *hsave_page;
625         struct page *nested_msrpm_pages;
626         int err;
627
628         svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
629         if (!svm) {
630                 err = -ENOMEM;
631                 goto out;
632         }
633
634         err = kvm_vcpu_init(&svm->vcpu, kvm, id);
635         if (err)
636                 goto free_svm;
637
638         page = alloc_page(GFP_KERNEL);
639         if (!page) {
640                 err = -ENOMEM;
641                 goto uninit;
642         }
643
644         err = -ENOMEM;
645         msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
646         if (!msrpm_pages)
647                 goto uninit;
648
649         nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
650         if (!nested_msrpm_pages)
651                 goto uninit;
652
653         svm->msrpm = page_address(msrpm_pages);
654         svm_vcpu_init_msrpm(svm->msrpm);
655
656         hsave_page = alloc_page(GFP_KERNEL);
657         if (!hsave_page)
658                 goto uninit;
659         svm->hsave = page_address(hsave_page);
660
661         svm->nested_msrpm = page_address(nested_msrpm_pages);
662
663         svm->vmcb = page_address(page);
664         clear_page(svm->vmcb);
665         svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
666         svm->asid_generation = 0;
667         init_vmcb(svm);
668
669         fx_init(&svm->vcpu);
670         svm->vcpu.fpu_active = 1;
671         svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
672         if (svm->vcpu.vcpu_id == 0)
673                 svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
674
675         return &svm->vcpu;
676
677 uninit:
678         kvm_vcpu_uninit(&svm->vcpu);
679 free_svm:
680         kmem_cache_free(kvm_vcpu_cache, svm);
681 out:
682         return ERR_PTR(err);
683 }
684
685 static void svm_free_vcpu(struct kvm_vcpu *vcpu)
686 {
687         struct vcpu_svm *svm = to_svm(vcpu);
688
689         __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT));
690         __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
691         __free_page(virt_to_page(svm->hsave));
692         __free_pages(virt_to_page(svm->nested_msrpm), MSRPM_ALLOC_ORDER);
693         kvm_vcpu_uninit(vcpu);
694         kmem_cache_free(kvm_vcpu_cache, svm);
695 }
696
697 static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
698 {
699         struct vcpu_svm *svm = to_svm(vcpu);
700         int i;
701
702         if (unlikely(cpu != vcpu->cpu)) {
703                 u64 tsc_this, delta;
704
705                 /*
706                  * Make sure that the guest sees a monotonically
707                  * increasing TSC.
708                  */
709                 rdtscll(tsc_this);
710                 delta = vcpu->arch.host_tsc - tsc_this;
711                 svm->vmcb->control.tsc_offset += delta;
712                 vcpu->cpu = cpu;
713                 kvm_migrate_timers(vcpu);
714         }
715
716         for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
717                 rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
718 }
719
720 static void svm_vcpu_put(struct kvm_vcpu *vcpu)
721 {
722         struct vcpu_svm *svm = to_svm(vcpu);
723         int i;
724
725         ++vcpu->stat.host_state_reload;
726         for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
727                 wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
728
729         rdtscll(vcpu->arch.host_tsc);
730 }
731
732 static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
733 {
734         return to_svm(vcpu)->vmcb->save.rflags;
735 }
736
737 static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
738 {
739         to_svm(vcpu)->vmcb->save.rflags = rflags;
740 }
741
742 static void svm_set_vintr(struct vcpu_svm *svm)
743 {
744         svm->vmcb->control.intercept |= 1ULL << INTERCEPT_VINTR;
745 }
746
747 static void svm_clear_vintr(struct vcpu_svm *svm)
748 {
749         svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR);
750 }
751
752 static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg)
753 {
754         struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
755
756         switch (seg) {
757         case VCPU_SREG_CS: return &save->cs;
758         case VCPU_SREG_DS: return &save->ds;
759         case VCPU_SREG_ES: return &save->es;
760         case VCPU_SREG_FS: return &save->fs;
761         case VCPU_SREG_GS: return &save->gs;
762         case VCPU_SREG_SS: return &save->ss;
763         case VCPU_SREG_TR: return &save->tr;
764         case VCPU_SREG_LDTR: return &save->ldtr;
765         }
766         BUG();
767         return NULL;
768 }
769
770 static u64 svm_get_segment_base(struct kvm_vcpu *vcpu, int seg)
771 {
772         struct vmcb_seg *s = svm_seg(vcpu, seg);
773
774         return s->base;
775 }
776
777 static void svm_get_segment(struct kvm_vcpu *vcpu,
778                             struct kvm_segment *var, int seg)
779 {
780         struct vmcb_seg *s = svm_seg(vcpu, seg);
781
782         var->base = s->base;
783         var->limit = s->limit;
784         var->selector = s->selector;
785         var->type = s->attrib & SVM_SELECTOR_TYPE_MASK;
786         var->s = (s->attrib >> SVM_SELECTOR_S_SHIFT) & 1;
787         var->dpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3;
788         var->present = (s->attrib >> SVM_SELECTOR_P_SHIFT) & 1;
789         var->avl = (s->attrib >> SVM_SELECTOR_AVL_SHIFT) & 1;
790         var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1;
791         var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
792         var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1;
793
794         /* AMD's VMCB does not have an explicit unusable field, so emulate it
795          * for cross vendor migration purposes by "not present"
796          */
797         var->unusable = !var->present || (var->type == 0);
798
799         switch (seg) {
800         case VCPU_SREG_CS:
801                 /*
802                  * SVM always stores 0 for the 'G' bit in the CS selector in
803                  * the VMCB on a VMEXIT. This hurts cross-vendor migration:
804                  * Intel's VMENTRY has a check on the 'G' bit.
805                  */
806                 var->g = s->limit > 0xfffff;
807                 break;
808         case VCPU_SREG_TR:
809                 /*
810                  * Work around a bug where the busy flag in the tr selector
811                  * isn't exposed
812                  */
813                 var->type |= 0x2;
814                 break;
815         case VCPU_SREG_DS:
816         case VCPU_SREG_ES:
817         case VCPU_SREG_FS:
818         case VCPU_SREG_GS:
819                 /*
820                  * The accessed bit must always be set in the segment
821                  * descriptor cache, although it can be cleared in the
822                  * descriptor, the cached bit always remains at 1. Since
823                  * Intel has a check on this, set it here to support
824                  * cross-vendor migration.
825                  */
826                 if (!var->unusable)
827                         var->type |= 0x1;
828                 break;
829         case VCPU_SREG_SS:
830                 /* On AMD CPUs sometimes the DB bit in the segment
831                  * descriptor is left as 1, although the whole segment has
832                  * been made unusable. Clear it here to pass an Intel VMX
833                  * entry check when cross vendor migrating.
834                  */
835                 if (var->unusable)
836                         var->db = 0;
837                 break;
838         }
839 }
840
841 static int svm_get_cpl(struct kvm_vcpu *vcpu)
842 {
843         struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
844
845         return save->cpl;
846 }
847
848 static void svm_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
849 {
850         struct vcpu_svm *svm = to_svm(vcpu);
851
852         dt->limit = svm->vmcb->save.idtr.limit;
853         dt->base = svm->vmcb->save.idtr.base;
854 }
855
856 static void svm_set_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
857 {
858         struct vcpu_svm *svm = to_svm(vcpu);
859
860         svm->vmcb->save.idtr.limit = dt->limit;
861         svm->vmcb->save.idtr.base = dt->base ;
862 }
863
864 static void svm_get_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
865 {
866         struct vcpu_svm *svm = to_svm(vcpu);
867
868         dt->limit = svm->vmcb->save.gdtr.limit;
869         dt->base = svm->vmcb->save.gdtr.base;
870 }
871
872 static void svm_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
873 {
874         struct vcpu_svm *svm = to_svm(vcpu);
875
876         svm->vmcb->save.gdtr.limit = dt->limit;
877         svm->vmcb->save.gdtr.base = dt->base ;
878 }
879
880 static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
881 {
882 }
883
884 static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
885 {
886         struct vcpu_svm *svm = to_svm(vcpu);
887
888 #ifdef CONFIG_X86_64
889         if (vcpu->arch.shadow_efer & EFER_LME) {
890                 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
891                         vcpu->arch.shadow_efer |= EFER_LMA;
892                         svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
893                 }
894
895                 if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) {
896                         vcpu->arch.shadow_efer &= ~EFER_LMA;
897                         svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME);
898                 }
899         }
900 #endif
901         if (npt_enabled)
902                 goto set;
903
904         if ((vcpu->arch.cr0 & X86_CR0_TS) && !(cr0 & X86_CR0_TS)) {
905                 svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
906                 vcpu->fpu_active = 1;
907         }
908
909         vcpu->arch.cr0 = cr0;
910         cr0 |= X86_CR0_PG | X86_CR0_WP;
911         if (!vcpu->fpu_active) {
912                 svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR);
913                 cr0 |= X86_CR0_TS;
914         }
915 set:
916         /*
917          * re-enable caching here because the QEMU bios
918          * does not do it - this results in some delay at
919          * reboot
920          */
921         cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
922         svm->vmcb->save.cr0 = cr0;
923 }
924
925 static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
926 {
927         unsigned long host_cr4_mce = read_cr4() & X86_CR4_MCE;
928         unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4;
929
930         if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE))
931                 force_new_asid(vcpu);
932
933         vcpu->arch.cr4 = cr4;
934         if (!npt_enabled)
935                 cr4 |= X86_CR4_PAE;
936         cr4 |= host_cr4_mce;
937         to_svm(vcpu)->vmcb->save.cr4 = cr4;
938 }
939
940 static void svm_set_segment(struct kvm_vcpu *vcpu,
941                             struct kvm_segment *var, int seg)
942 {
943         struct vcpu_svm *svm = to_svm(vcpu);
944         struct vmcb_seg *s = svm_seg(vcpu, seg);
945
946         s->base = var->base;
947         s->limit = var->limit;
948         s->selector = var->selector;
949         if (var->unusable)
950                 s->attrib = 0;
951         else {
952                 s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK);
953                 s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT;
954                 s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT;
955                 s->attrib |= (var->present & 1) << SVM_SELECTOR_P_SHIFT;
956                 s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT;
957                 s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT;
958                 s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT;
959                 s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT;
960         }
961         if (seg == VCPU_SREG_CS)
962                 svm->vmcb->save.cpl
963                         = (svm->vmcb->save.cs.attrib
964                            >> SVM_SELECTOR_DPL_SHIFT) & 3;
965
966 }
967
968 static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
969 {
970         int old_debug = vcpu->guest_debug;
971         struct vcpu_svm *svm = to_svm(vcpu);
972
973         vcpu->guest_debug = dbg->control;
974
975         svm->vmcb->control.intercept_exceptions &=
976                 ~((1 << DB_VECTOR) | (1 << BP_VECTOR));
977         if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
978                 if (vcpu->guest_debug &
979                     (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
980                         svm->vmcb->control.intercept_exceptions |=
981                                 1 << DB_VECTOR;
982                 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
983                         svm->vmcb->control.intercept_exceptions |=
984                                 1 << BP_VECTOR;
985         } else
986                 vcpu->guest_debug = 0;
987
988         if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
989                 svm->vmcb->save.dr7 = dbg->arch.debugreg[7];
990         else
991                 svm->vmcb->save.dr7 = vcpu->arch.dr7;
992
993         if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
994                 svm->vmcb->save.rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
995         else if (old_debug & KVM_GUESTDBG_SINGLESTEP)
996                 svm->vmcb->save.rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
997
998         return 0;
999 }
1000
1001 static void load_host_msrs(struct kvm_vcpu *vcpu)
1002 {
1003 #ifdef CONFIG_X86_64
1004         wrmsrl(MSR_GS_BASE, to_svm(vcpu)->host_gs_base);
1005 #endif
1006 }
1007
1008 static void save_host_msrs(struct kvm_vcpu *vcpu)
1009 {
1010 #ifdef CONFIG_X86_64
1011         rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host_gs_base);
1012 #endif
1013 }
1014
1015 static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *svm_data)
1016 {
1017         if (svm_data->next_asid > svm_data->max_asid) {
1018                 ++svm_data->asid_generation;
1019                 svm_data->next_asid = 1;
1020                 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
1021         }
1022
1023         svm->vcpu.cpu = svm_data->cpu;
1024         svm->asid_generation = svm_data->asid_generation;
1025         svm->vmcb->control.asid = svm_data->next_asid++;
1026 }
1027
1028 static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr)
1029 {
1030         struct vcpu_svm *svm = to_svm(vcpu);
1031         unsigned long val;
1032
1033         switch (dr) {
1034         case 0 ... 3:
1035                 val = vcpu->arch.db[dr];
1036                 break;
1037         case 6:
1038                 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1039                         val = vcpu->arch.dr6;
1040                 else
1041                         val = svm->vmcb->save.dr6;
1042                 break;
1043         case 7:
1044                 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1045                         val = vcpu->arch.dr7;
1046                 else
1047                         val = svm->vmcb->save.dr7;
1048                 break;
1049         default:
1050                 val = 0;
1051         }
1052
1053         KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler);
1054         return val;
1055 }
1056
1057 static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
1058                        int *exception)
1059 {
1060         struct vcpu_svm *svm = to_svm(vcpu);
1061
1062         KVMTRACE_2D(DR_WRITE, vcpu, (u32)dr, (u32)value, handler);
1063
1064         *exception = 0;
1065
1066         switch (dr) {
1067         case 0 ... 3:
1068                 vcpu->arch.db[dr] = value;
1069                 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
1070                         vcpu->arch.eff_db[dr] = value;
1071                 return;
1072         case 4 ... 5:
1073                 if (vcpu->arch.cr4 & X86_CR4_DE)
1074                         *exception = UD_VECTOR;
1075                 return;
1076         case 6:
1077                 if (value & 0xffffffff00000000ULL) {
1078                         *exception = GP_VECTOR;
1079                         return;
1080                 }
1081                 vcpu->arch.dr6 = (value & DR6_VOLATILE) | DR6_FIXED_1;
1082                 return;
1083         case 7:
1084                 if (value & 0xffffffff00000000ULL) {
1085                         *exception = GP_VECTOR;
1086                         return;
1087                 }
1088                 vcpu->arch.dr7 = (value & DR7_VOLATILE) | DR7_FIXED_1;
1089                 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
1090                         svm->vmcb->save.dr7 = vcpu->arch.dr7;
1091                         vcpu->arch.switch_db_regs = (value & DR7_BP_EN_MASK);
1092                 }
1093                 return;
1094         default:
1095                 /* FIXME: Possible case? */
1096                 printk(KERN_DEBUG "%s: unexpected dr %u\n",
1097                        __func__, dr);
1098                 *exception = UD_VECTOR;
1099                 return;
1100         }
1101 }
1102
1103 static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1104 {
1105         u64 fault_address;
1106         u32 error_code;
1107
1108         fault_address  = svm->vmcb->control.exit_info_2;
1109         error_code = svm->vmcb->control.exit_info_1;
1110
1111         if (!npt_enabled)
1112                 KVMTRACE_3D(PAGE_FAULT, &svm->vcpu, error_code,
1113                             (u32)fault_address, (u32)(fault_address >> 32),
1114                             handler);
1115         else
1116                 KVMTRACE_3D(TDP_FAULT, &svm->vcpu, error_code,
1117                             (u32)fault_address, (u32)(fault_address >> 32),
1118                             handler);
1119         /*
1120          * FIXME: Tis shouldn't be necessary here, but there is a flush
1121          * missing in the MMU code. Until we find this bug, flush the
1122          * complete TLB here on an NPF
1123          */
1124         if (npt_enabled)
1125                 svm_flush_tlb(&svm->vcpu);
1126         else {
1127                 if (kvm_event_needs_reinjection(&svm->vcpu))
1128                         kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
1129         }
1130         return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
1131 }
1132
1133 static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1134 {
1135         if (!(svm->vcpu.guest_debug &
1136               (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
1137                 kvm_queue_exception(&svm->vcpu, DB_VECTOR);
1138                 return 1;
1139         }
1140         kvm_run->exit_reason = KVM_EXIT_DEBUG;
1141         kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
1142         kvm_run->debug.arch.exception = DB_VECTOR;
1143         return 0;
1144 }
1145
1146 static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1147 {
1148         kvm_run->exit_reason = KVM_EXIT_DEBUG;
1149         kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
1150         kvm_run->debug.arch.exception = BP_VECTOR;
1151         return 0;
1152 }
1153
1154 static int ud_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1155 {
1156         int er;
1157
1158         er = emulate_instruction(&svm->vcpu, kvm_run, 0, 0, EMULTYPE_TRAP_UD);
1159         if (er != EMULATE_DONE)
1160                 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
1161         return 1;
1162 }
1163
1164 static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1165 {
1166         svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
1167         if (!(svm->vcpu.arch.cr0 & X86_CR0_TS))
1168                 svm->vmcb->save.cr0 &= ~X86_CR0_TS;
1169         svm->vcpu.fpu_active = 1;
1170
1171         return 1;
1172 }
1173
1174 static int mc_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1175 {
1176         /*
1177          * On an #MC intercept the MCE handler is not called automatically in
1178          * the host. So do it by hand here.
1179          */
1180         asm volatile (
1181                 "int $0x12\n");
1182         /* not sure if we ever come back to this point */
1183
1184         return 1;
1185 }
1186
1187 static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1188 {
1189         /*
1190          * VMCB is undefined after a SHUTDOWN intercept
1191          * so reinitialize it.
1192          */
1193         clear_page(svm->vmcb);
1194         init_vmcb(svm);
1195
1196         kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
1197         return 0;
1198 }
1199
1200 static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1201 {
1202         u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
1203         int size, in, string;
1204         unsigned port;
1205
1206         ++svm->vcpu.stat.io_exits;
1207
1208         svm->next_rip = svm->vmcb->control.exit_info_2;
1209
1210         string = (io_info & SVM_IOIO_STR_MASK) != 0;
1211
1212         if (string) {
1213                 if (emulate_instruction(&svm->vcpu,
1214                                         kvm_run, 0, 0, 0) == EMULATE_DO_MMIO)
1215                         return 0;
1216                 return 1;
1217         }
1218
1219         in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
1220         port = io_info >> 16;
1221         size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
1222
1223         skip_emulated_instruction(&svm->vcpu);
1224         return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port);
1225 }
1226
1227 static int nmi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1228 {
1229         KVMTRACE_0D(NMI, &svm->vcpu, handler);
1230         return 1;
1231 }
1232
1233 static int intr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1234 {
1235         ++svm->vcpu.stat.irq_exits;
1236         KVMTRACE_0D(INTR, &svm->vcpu, handler);
1237         return 1;
1238 }
1239
1240 static int nop_on_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1241 {
1242         return 1;
1243 }
1244
1245 static int halt_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1246 {
1247         svm->next_rip = kvm_rip_read(&svm->vcpu) + 1;
1248         skip_emulated_instruction(&svm->vcpu);
1249         return kvm_emulate_halt(&svm->vcpu);
1250 }
1251
1252 static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1253 {
1254         svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
1255         skip_emulated_instruction(&svm->vcpu);
1256         kvm_emulate_hypercall(&svm->vcpu);
1257         return 1;
1258 }
1259
1260 static int nested_svm_check_permissions(struct vcpu_svm *svm)
1261 {
1262         if (!(svm->vcpu.arch.shadow_efer & EFER_SVME)
1263             || !is_paging(&svm->vcpu)) {
1264                 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
1265                 return 1;
1266         }
1267
1268         if (svm->vmcb->save.cpl) {
1269                 kvm_inject_gp(&svm->vcpu, 0);
1270                 return 1;
1271         }
1272
1273        return 0;
1274 }
1275
1276 static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
1277                                       bool has_error_code, u32 error_code)
1278 {
1279         if (is_nested(svm)) {
1280                 svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
1281                 svm->vmcb->control.exit_code_hi = 0;
1282                 svm->vmcb->control.exit_info_1 = error_code;
1283                 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
1284                 if (nested_svm_exit_handled(svm, false)) {
1285                         nsvm_printk("VMexit -> EXCP 0x%x\n", nr);
1286
1287                         nested_svm_vmexit(svm);
1288                         return 1;
1289                 }
1290         }
1291
1292         return 0;
1293 }
1294
1295 static inline int nested_svm_intr(struct vcpu_svm *svm)
1296 {
1297         if (is_nested(svm)) {
1298                 if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
1299                         return 0;
1300
1301                 if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))
1302                         return 0;
1303
1304                 svm->vmcb->control.exit_code = SVM_EXIT_INTR;
1305
1306                 if (nested_svm_exit_handled(svm, false)) {
1307                         nsvm_printk("VMexit -> INTR\n");
1308                         nested_svm_vmexit(svm);
1309                         return 1;
1310                 }
1311         }
1312
1313         return 0;
1314 }
1315
1316 static struct page *nested_svm_get_page(struct vcpu_svm *svm, u64 gpa)
1317 {
1318         struct page *page;
1319
1320         down_read(&current->mm->mmap_sem);
1321         page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT);
1322         up_read(&current->mm->mmap_sem);
1323
1324         if (is_error_page(page)) {
1325                 printk(KERN_INFO "%s: could not find page at 0x%llx\n",
1326                        __func__, gpa);
1327                 kvm_release_page_clean(page);
1328                 kvm_inject_gp(&svm->vcpu, 0);
1329                 return NULL;
1330         }
1331         return page;
1332 }
1333
1334 static int nested_svm_do(struct vcpu_svm *svm,
1335                          u64 arg1_gpa, u64 arg2_gpa, void *opaque,
1336                          int (*handler)(struct vcpu_svm *svm,
1337                                         void *arg1,
1338                                         void *arg2,
1339                                         void *opaque))
1340 {
1341         struct page *arg1_page;
1342         struct page *arg2_page = NULL;
1343         void *arg1;
1344         void *arg2 = NULL;
1345         int retval;
1346
1347         arg1_page = nested_svm_get_page(svm, arg1_gpa);
1348         if(arg1_page == NULL)
1349                 return 1;
1350
1351         if (arg2_gpa) {
1352                 arg2_page = nested_svm_get_page(svm, arg2_gpa);
1353                 if(arg2_page == NULL) {
1354                         kvm_release_page_clean(arg1_page);
1355                         return 1;
1356                 }
1357         }
1358
1359         arg1 = kmap_atomic(arg1_page, KM_USER0);
1360         if (arg2_gpa)
1361                 arg2 = kmap_atomic(arg2_page, KM_USER1);
1362
1363         retval = handler(svm, arg1, arg2, opaque);
1364
1365         kunmap_atomic(arg1, KM_USER0);
1366         if (arg2_gpa)
1367                 kunmap_atomic(arg2, KM_USER1);
1368
1369         kvm_release_page_dirty(arg1_page);
1370         if (arg2_gpa)
1371                 kvm_release_page_dirty(arg2_page);
1372
1373         return retval;
1374 }
1375
1376 static int nested_svm_exit_handled_real(struct vcpu_svm *svm,
1377                                         void *arg1,
1378                                         void *arg2,
1379                                         void *opaque)
1380 {
1381         struct vmcb *nested_vmcb = (struct vmcb *)arg1;
1382         bool kvm_overrides = *(bool *)opaque;
1383         u32 exit_code = svm->vmcb->control.exit_code;
1384
1385         if (kvm_overrides) {
1386                 switch (exit_code) {
1387                 case SVM_EXIT_INTR:
1388                 case SVM_EXIT_NMI:
1389                         return 0;
1390                 /* For now we are always handling NPFs when using them */
1391                 case SVM_EXIT_NPF:
1392                         if (npt_enabled)
1393                                 return 0;
1394                         break;
1395                 /* When we're shadowing, trap PFs */
1396                 case SVM_EXIT_EXCP_BASE + PF_VECTOR:
1397                         if (!npt_enabled)
1398                                 return 0;
1399                         break;
1400                 default:
1401                         break;
1402                 }
1403         }
1404
1405         switch (exit_code) {
1406         case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR8: {
1407                 u32 cr_bits = 1 << (exit_code - SVM_EXIT_READ_CR0);
1408                 if (nested_vmcb->control.intercept_cr_read & cr_bits)
1409                         return 1;
1410                 break;
1411         }
1412         case SVM_EXIT_WRITE_CR0 ... SVM_EXIT_WRITE_CR8: {
1413                 u32 cr_bits = 1 << (exit_code - SVM_EXIT_WRITE_CR0);
1414                 if (nested_vmcb->control.intercept_cr_write & cr_bits)
1415                         return 1;
1416                 break;
1417         }
1418         case SVM_EXIT_READ_DR0 ... SVM_EXIT_READ_DR7: {
1419                 u32 dr_bits = 1 << (exit_code - SVM_EXIT_READ_DR0);
1420                 if (nested_vmcb->control.intercept_dr_read & dr_bits)
1421                         return 1;
1422                 break;
1423         }
1424         case SVM_EXIT_WRITE_DR0 ... SVM_EXIT_WRITE_DR7: {
1425                 u32 dr_bits = 1 << (exit_code - SVM_EXIT_WRITE_DR0);
1426                 if (nested_vmcb->control.intercept_dr_write & dr_bits)
1427                         return 1;
1428                 break;
1429         }
1430         case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
1431                 u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
1432                 if (nested_vmcb->control.intercept_exceptions & excp_bits)
1433                         return 1;
1434                 break;
1435         }
1436         default: {
1437                 u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
1438                 nsvm_printk("exit code: 0x%x\n", exit_code);
1439                 if (nested_vmcb->control.intercept & exit_bits)
1440                         return 1;
1441         }
1442         }
1443
1444         return 0;
1445 }
1446
1447 static int nested_svm_exit_handled_msr(struct vcpu_svm *svm,
1448                                        void *arg1, void *arg2,
1449                                        void *opaque)
1450 {
1451         struct vmcb *nested_vmcb = (struct vmcb *)arg1;
1452         u8 *msrpm = (u8 *)arg2;
1453         u32 t0, t1;
1454         u32 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
1455         u32 param = svm->vmcb->control.exit_info_1 & 1;
1456
1457         if (!(nested_vmcb->control.intercept & (1ULL << INTERCEPT_MSR_PROT)))
1458                 return 0;
1459
1460         switch(msr) {
1461         case 0 ... 0x1fff:
1462                 t0 = (msr * 2) % 8;
1463                 t1 = msr / 8;
1464                 break;
1465         case 0xc0000000 ... 0xc0001fff:
1466                 t0 = (8192 + msr - 0xc0000000) * 2;
1467                 t1 = (t0 / 8);
1468                 t0 %= 8;
1469                 break;
1470         case 0xc0010000 ... 0xc0011fff:
1471                 t0 = (16384 + msr - 0xc0010000) * 2;
1472                 t1 = (t0 / 8);
1473                 t0 %= 8;
1474                 break;
1475         default:
1476                 return 1;
1477                 break;
1478         }
1479         if (msrpm[t1] & ((1 << param) << t0))
1480                 return 1;
1481
1482         return 0;
1483 }
1484
1485 static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override)
1486 {
1487         bool k = kvm_override;
1488
1489         switch (svm->vmcb->control.exit_code) {
1490         case SVM_EXIT_MSR:
1491                 return nested_svm_do(svm, svm->nested_vmcb,
1492                                      svm->nested_vmcb_msrpm, NULL,
1493                                      nested_svm_exit_handled_msr);
1494         default: break;
1495         }
1496
1497         return nested_svm_do(svm, svm->nested_vmcb, 0, &k,
1498                              nested_svm_exit_handled_real);
1499 }
1500
1501 static int nested_svm_vmexit_real(struct vcpu_svm *svm, void *arg1,
1502                                   void *arg2, void *opaque)
1503 {
1504         struct vmcb *nested_vmcb = (struct vmcb *)arg1;
1505         struct vmcb *hsave = svm->hsave;
1506         u64 nested_save[] = { nested_vmcb->save.cr0,
1507                               nested_vmcb->save.cr3,
1508                               nested_vmcb->save.cr4,
1509                               nested_vmcb->save.efer,
1510                               nested_vmcb->control.intercept_cr_read,
1511                               nested_vmcb->control.intercept_cr_write,
1512                               nested_vmcb->control.intercept_dr_read,
1513                               nested_vmcb->control.intercept_dr_write,
1514                               nested_vmcb->control.intercept_exceptions,
1515                               nested_vmcb->control.intercept,
1516                               nested_vmcb->control.msrpm_base_pa,
1517                               nested_vmcb->control.iopm_base_pa,
1518                               nested_vmcb->control.tsc_offset };
1519
1520         /* Give the current vmcb to the guest */
1521         memcpy(nested_vmcb, svm->vmcb, sizeof(struct vmcb));
1522         nested_vmcb->save.cr0 = nested_save[0];
1523         if (!npt_enabled)
1524                 nested_vmcb->save.cr3 = nested_save[1];
1525         nested_vmcb->save.cr4 = nested_save[2];
1526         nested_vmcb->save.efer = nested_save[3];
1527         nested_vmcb->control.intercept_cr_read = nested_save[4];
1528         nested_vmcb->control.intercept_cr_write = nested_save[5];
1529         nested_vmcb->control.intercept_dr_read = nested_save[6];
1530         nested_vmcb->control.intercept_dr_write = nested_save[7];
1531         nested_vmcb->control.intercept_exceptions = nested_save[8];
1532         nested_vmcb->control.intercept = nested_save[9];
1533         nested_vmcb->control.msrpm_base_pa = nested_save[10];
1534         nested_vmcb->control.iopm_base_pa = nested_save[11];
1535         nested_vmcb->control.tsc_offset = nested_save[12];
1536
1537         /* We always set V_INTR_MASKING and remember the old value in hflags */
1538         if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
1539                 nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
1540
1541         if ((nested_vmcb->control.int_ctl & V_IRQ_MASK) &&
1542             (nested_vmcb->control.int_vector)) {
1543                 nsvm_printk("WARNING: IRQ 0x%x still enabled on #VMEXIT\n",
1544                                 nested_vmcb->control.int_vector);
1545         }
1546
1547         /* Restore the original control entries */
1548         svm->vmcb->control = hsave->control;
1549
1550         /* Kill any pending exceptions */
1551         if (svm->vcpu.arch.exception.pending == true)
1552                 nsvm_printk("WARNING: Pending Exception\n");
1553         svm->vcpu.arch.exception.pending = false;
1554
1555         /* Restore selected save entries */
1556         svm->vmcb->save.es = hsave->save.es;
1557         svm->vmcb->save.cs = hsave->save.cs;
1558         svm->vmcb->save.ss = hsave->save.ss;
1559         svm->vmcb->save.ds = hsave->save.ds;
1560         svm->vmcb->save.gdtr = hsave->save.gdtr;
1561         svm->vmcb->save.idtr = hsave->save.idtr;
1562         svm->vmcb->save.rflags = hsave->save.rflags;
1563         svm_set_efer(&svm->vcpu, hsave->save.efer);
1564         svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE);
1565         svm_set_cr4(&svm->vcpu, hsave->save.cr4);
1566         if (npt_enabled) {
1567                 svm->vmcb->save.cr3 = hsave->save.cr3;
1568                 svm->vcpu.arch.cr3 = hsave->save.cr3;
1569         } else {
1570                 kvm_set_cr3(&svm->vcpu, hsave->save.cr3);
1571         }
1572         kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax);
1573         kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp);
1574         kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, hsave->save.rip);
1575         svm->vmcb->save.dr7 = 0;
1576         svm->vmcb->save.cpl = 0;
1577         svm->vmcb->control.exit_int_info = 0;
1578
1579         svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
1580         /* Exit nested SVM mode */
1581         svm->nested_vmcb = 0;
1582
1583         return 0;
1584 }
1585
1586 static int nested_svm_vmexit(struct vcpu_svm *svm)
1587 {
1588         nsvm_printk("VMexit\n");
1589         if (nested_svm_do(svm, svm->nested_vmcb, 0,
1590                           NULL, nested_svm_vmexit_real))
1591                 return 1;
1592
1593         kvm_mmu_reset_context(&svm->vcpu);
1594         kvm_mmu_load(&svm->vcpu);
1595
1596         return 0;
1597 }
1598
1599 static int nested_svm_vmrun_msrpm(struct vcpu_svm *svm, void *arg1,
1600                                   void *arg2, void *opaque)
1601 {
1602         int i;
1603         u32 *nested_msrpm = (u32*)arg1;
1604         for (i=0; i< PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER) / 4; i++)
1605                 svm->nested_msrpm[i] = svm->msrpm[i] | nested_msrpm[i];
1606         svm->vmcb->control.msrpm_base_pa = __pa(svm->nested_msrpm);
1607
1608         return 0;
1609 }
1610
1611 static int nested_svm_vmrun(struct vcpu_svm *svm, void *arg1,
1612                             void *arg2, void *opaque)
1613 {
1614         struct vmcb *nested_vmcb = (struct vmcb *)arg1;
1615         struct vmcb *hsave = svm->hsave;
1616
1617         /* nested_vmcb is our indicator if nested SVM is activated */
1618         svm->nested_vmcb = svm->vmcb->save.rax;
1619
1620         /* Clear internal status */
1621         svm->vcpu.arch.exception.pending = false;
1622
1623         /* Save the old vmcb, so we don't need to pick what we save, but
1624            can restore everything when a VMEXIT occurs */
1625         memcpy(hsave, svm->vmcb, sizeof(struct vmcb));
1626         /* We need to remember the original CR3 in the SPT case */
1627         if (!npt_enabled)
1628                 hsave->save.cr3 = svm->vcpu.arch.cr3;
1629         hsave->save.cr4 = svm->vcpu.arch.cr4;
1630         hsave->save.rip = svm->next_rip;
1631
1632         if (svm->vmcb->save.rflags & X86_EFLAGS_IF)
1633                 svm->vcpu.arch.hflags |= HF_HIF_MASK;
1634         else
1635                 svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
1636
1637         /* Load the nested guest state */
1638         svm->vmcb->save.es = nested_vmcb->save.es;
1639         svm->vmcb->save.cs = nested_vmcb->save.cs;
1640         svm->vmcb->save.ss = nested_vmcb->save.ss;
1641         svm->vmcb->save.ds = nested_vmcb->save.ds;
1642         svm->vmcb->save.gdtr = nested_vmcb->save.gdtr;
1643         svm->vmcb->save.idtr = nested_vmcb->save.idtr;
1644         svm->vmcb->save.rflags = nested_vmcb->save.rflags;
1645         svm_set_efer(&svm->vcpu, nested_vmcb->save.efer);
1646         svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0);
1647         svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4);
1648         if (npt_enabled) {
1649                 svm->vmcb->save.cr3 = nested_vmcb->save.cr3;
1650                 svm->vcpu.arch.cr3 = nested_vmcb->save.cr3;
1651         } else {
1652                 kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
1653                 kvm_mmu_reset_context(&svm->vcpu);
1654         }
1655         svm->vmcb->save.cr2 = nested_vmcb->save.cr2;
1656         kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax);
1657         kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp);
1658         kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip);
1659         /* In case we don't even reach vcpu_run, the fields are not updated */
1660         svm->vmcb->save.rax = nested_vmcb->save.rax;
1661         svm->vmcb->save.rsp = nested_vmcb->save.rsp;
1662         svm->vmcb->save.rip = nested_vmcb->save.rip;
1663         svm->vmcb->save.dr7 = nested_vmcb->save.dr7;
1664         svm->vmcb->save.dr6 = nested_vmcb->save.dr6;
1665         svm->vmcb->save.cpl = nested_vmcb->save.cpl;
1666
1667         /* We don't want a nested guest to be more powerful than the guest,
1668            so all intercepts are ORed */
1669         svm->vmcb->control.intercept_cr_read |=
1670                 nested_vmcb->control.intercept_cr_read;
1671         svm->vmcb->control.intercept_cr_write |=
1672                 nested_vmcb->control.intercept_cr_write;
1673         svm->vmcb->control.intercept_dr_read |=
1674                 nested_vmcb->control.intercept_dr_read;
1675         svm->vmcb->control.intercept_dr_write |=
1676                 nested_vmcb->control.intercept_dr_write;
1677         svm->vmcb->control.intercept_exceptions |=
1678                 nested_vmcb->control.intercept_exceptions;
1679
1680         svm->vmcb->control.intercept |= nested_vmcb->control.intercept;
1681
1682         svm->nested_vmcb_msrpm = nested_vmcb->control.msrpm_base_pa;
1683
1684         force_new_asid(&svm->vcpu);
1685         svm->vmcb->control.exit_int_info = nested_vmcb->control.exit_int_info;
1686         svm->vmcb->control.exit_int_info_err = nested_vmcb->control.exit_int_info_err;
1687         svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
1688         if (nested_vmcb->control.int_ctl & V_IRQ_MASK) {
1689                 nsvm_printk("nSVM Injecting Interrupt: 0x%x\n",
1690                                 nested_vmcb->control.int_ctl);
1691         }
1692         if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
1693                 svm->vcpu.arch.hflags |= HF_VINTR_MASK;
1694         else
1695                 svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
1696
1697         nsvm_printk("nSVM exit_int_info: 0x%x | int_state: 0x%x\n",
1698                         nested_vmcb->control.exit_int_info,
1699                         nested_vmcb->control.int_state);
1700
1701         svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
1702         svm->vmcb->control.int_state = nested_vmcb->control.int_state;
1703         svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
1704         if (nested_vmcb->control.event_inj & SVM_EVTINJ_VALID)
1705                 nsvm_printk("Injecting Event: 0x%x\n",
1706                                 nested_vmcb->control.event_inj);
1707         svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
1708         svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
1709
1710         svm->vcpu.arch.hflags |= HF_GIF_MASK;
1711
1712         return 0;
1713 }
1714
1715 static int nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
1716 {
1717         to_vmcb->save.fs = from_vmcb->save.fs;
1718         to_vmcb->save.gs = from_vmcb->save.gs;
1719         to_vmcb->save.tr = from_vmcb->save.tr;
1720         to_vmcb->save.ldtr = from_vmcb->save.ldtr;
1721         to_vmcb->save.kernel_gs_base = from_vmcb->save.kernel_gs_base;
1722         to_vmcb->save.star = from_vmcb->save.star;
1723         to_vmcb->save.lstar = from_vmcb->save.lstar;
1724         to_vmcb->save.cstar = from_vmcb->save.cstar;
1725         to_vmcb->save.sfmask = from_vmcb->save.sfmask;
1726         to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs;
1727         to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp;
1728         to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
1729
1730         return 1;
1731 }
1732
1733 static int nested_svm_vmload(struct vcpu_svm *svm, void *nested_vmcb,
1734                              void *arg2, void *opaque)
1735 {
1736         return nested_svm_vmloadsave((struct vmcb *)nested_vmcb, svm->vmcb);
1737 }
1738
1739 static int nested_svm_vmsave(struct vcpu_svm *svm, void *nested_vmcb,
1740                              void *arg2, void *opaque)
1741 {
1742         return nested_svm_vmloadsave(svm->vmcb, (struct vmcb *)nested_vmcb);
1743 }
1744
1745 static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1746 {
1747         if (nested_svm_check_permissions(svm))
1748                 return 1;
1749
1750         svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
1751         skip_emulated_instruction(&svm->vcpu);
1752
1753         nested_svm_do(svm, svm->vmcb->save.rax, 0, NULL, nested_svm_vmload);
1754
1755         return 1;
1756 }
1757
1758 static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1759 {
1760         if (nested_svm_check_permissions(svm))
1761                 return 1;
1762
1763         svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
1764         skip_emulated_instruction(&svm->vcpu);
1765
1766         nested_svm_do(svm, svm->vmcb->save.rax, 0, NULL, nested_svm_vmsave);
1767
1768         return 1;
1769 }
1770
1771 static int vmrun_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1772 {
1773         nsvm_printk("VMrun\n");
1774         if (nested_svm_check_permissions(svm))
1775                 return 1;
1776
1777         svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
1778         skip_emulated_instruction(&svm->vcpu);
1779
1780         if (nested_svm_do(svm, svm->vmcb->save.rax, 0,
1781                           NULL, nested_svm_vmrun))
1782                 return 1;
1783
1784         if (nested_svm_do(svm, svm->nested_vmcb_msrpm, 0,
1785                       NULL, nested_svm_vmrun_msrpm))
1786                 return 1;
1787
1788         return 1;
1789 }
1790
1791 static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1792 {
1793         if (nested_svm_check_permissions(svm))
1794                 return 1;
1795
1796         svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
1797         skip_emulated_instruction(&svm->vcpu);
1798
1799         svm->vcpu.arch.hflags |= HF_GIF_MASK;
1800
1801         return 1;
1802 }
1803
1804 static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1805 {
1806         if (nested_svm_check_permissions(svm))
1807                 return 1;
1808
1809         svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
1810         skip_emulated_instruction(&svm->vcpu);
1811
1812         svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
1813
1814         /* After a CLGI no interrupts should come */
1815         svm_clear_vintr(svm);
1816         svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
1817
1818         return 1;
1819 }
1820
1821 static int invalid_op_interception(struct vcpu_svm *svm,
1822                                    struct kvm_run *kvm_run)
1823 {
1824         kvm_queue_exception(&svm->vcpu, UD_VECTOR);
1825         return 1;
1826 }
1827
1828 static int task_switch_interception(struct vcpu_svm *svm,
1829                                     struct kvm_run *kvm_run)
1830 {
1831         u16 tss_selector;
1832         int reason;
1833         int int_type = svm->vmcb->control.exit_int_info &
1834                 SVM_EXITINTINFO_TYPE_MASK;
1835         int int_vec = svm->vmcb->control.exit_int_info & SVM_EVTINJ_VEC_MASK;
1836         uint32_t type =
1837                 svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK;
1838         uint32_t idt_v =
1839                 svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID;
1840
1841         tss_selector = (u16)svm->vmcb->control.exit_info_1;
1842
1843         if (svm->vmcb->control.exit_info_2 &
1844             (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET))
1845                 reason = TASK_SWITCH_IRET;
1846         else if (svm->vmcb->control.exit_info_2 &
1847                  (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP))
1848                 reason = TASK_SWITCH_JMP;
1849         else if (idt_v)
1850                 reason = TASK_SWITCH_GATE;
1851         else
1852                 reason = TASK_SWITCH_CALL;
1853
1854         if (reason == TASK_SWITCH_GATE) {
1855                 switch (type) {
1856                 case SVM_EXITINTINFO_TYPE_NMI:
1857                         svm->vcpu.arch.nmi_injected = false;
1858                         break;
1859                 case SVM_EXITINTINFO_TYPE_EXEPT:
1860                         kvm_clear_exception_queue(&svm->vcpu);
1861                         break;
1862                 case SVM_EXITINTINFO_TYPE_INTR:
1863                         kvm_clear_interrupt_queue(&svm->vcpu);
1864                         break;
1865                 default:
1866                         break;
1867                 }
1868         }
1869
1870         if (reason != TASK_SWITCH_GATE ||
1871             int_type == SVM_EXITINTINFO_TYPE_SOFT ||
1872             (int_type == SVM_EXITINTINFO_TYPE_EXEPT &&
1873              (int_vec == OF_VECTOR || int_vec == BP_VECTOR)))
1874                 skip_emulated_instruction(&svm->vcpu);
1875
1876         return kvm_task_switch(&svm->vcpu, tss_selector, reason);
1877 }
1878
1879 static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1880 {
1881         svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
1882         kvm_emulate_cpuid(&svm->vcpu);
1883         return 1;
1884 }
1885
1886 static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1887 {
1888         ++svm->vcpu.stat.nmi_window_exits;
1889         svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
1890         svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
1891         return 1;
1892 }
1893
1894 static int invlpg_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1895 {
1896         if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0, 0) != EMULATE_DONE)
1897                 pr_unimpl(&svm->vcpu, "%s: failed\n", __func__);
1898         return 1;
1899 }
1900
1901 static int emulate_on_interception(struct vcpu_svm *svm,
1902                                    struct kvm_run *kvm_run)
1903 {
1904         if (emulate_instruction(&svm->vcpu, NULL, 0, 0, 0) != EMULATE_DONE)
1905                 pr_unimpl(&svm->vcpu, "%s: failed\n", __func__);
1906         return 1;
1907 }
1908
1909 static int cr8_write_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1910 {
1911         u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
1912         /* instruction emulation calls kvm_set_cr8() */
1913         emulate_instruction(&svm->vcpu, NULL, 0, 0, 0);
1914         if (irqchip_in_kernel(svm->vcpu.kvm)) {
1915                 svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK;
1916                 return 1;
1917         }
1918         if (cr8_prev <= kvm_get_cr8(&svm->vcpu))
1919                 return 1;
1920         kvm_run->exit_reason = KVM_EXIT_SET_TPR;
1921         return 0;
1922 }
1923
1924 static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
1925 {
1926         struct vcpu_svm *svm = to_svm(vcpu);
1927
1928         switch (ecx) {
1929         case MSR_IA32_TIME_STAMP_COUNTER: {
1930                 u64 tsc;
1931
1932                 rdtscll(tsc);
1933                 *data = svm->vmcb->control.tsc_offset + tsc;
1934                 break;
1935         }
1936         case MSR_K6_STAR:
1937                 *data = svm->vmcb->save.star;
1938                 break;
1939 #ifdef CONFIG_X86_64
1940         case MSR_LSTAR:
1941                 *data = svm->vmcb->save.lstar;
1942                 break;
1943         case MSR_CSTAR:
1944                 *data = svm->vmcb->save.cstar;
1945                 break;
1946         case MSR_KERNEL_GS_BASE:
1947                 *data = svm->vmcb->save.kernel_gs_base;
1948                 break;
1949         case MSR_SYSCALL_MASK:
1950                 *data = svm->vmcb->save.sfmask;
1951                 break;
1952 #endif
1953         case MSR_IA32_SYSENTER_CS:
1954                 *data = svm->vmcb->save.sysenter_cs;
1955                 break;
1956         case MSR_IA32_SYSENTER_EIP:
1957                 *data = svm->vmcb->save.sysenter_eip;
1958                 break;
1959         case MSR_IA32_SYSENTER_ESP:
1960                 *data = svm->vmcb->save.sysenter_esp;
1961                 break;
1962         /* Nobody will change the following 5 values in the VMCB so
1963            we can safely return them on rdmsr. They will always be 0
1964            until LBRV is implemented. */
1965         case MSR_IA32_DEBUGCTLMSR:
1966                 *data = svm->vmcb->save.dbgctl;
1967                 break;
1968         case MSR_IA32_LASTBRANCHFROMIP:
1969                 *data = svm->vmcb->save.br_from;
1970                 break;
1971         case MSR_IA32_LASTBRANCHTOIP:
1972                 *data = svm->vmcb->save.br_to;
1973                 break;
1974         case MSR_IA32_LASTINTFROMIP:
1975                 *data = svm->vmcb->save.last_excp_from;
1976                 break;
1977         case MSR_IA32_LASTINTTOIP:
1978                 *data = svm->vmcb->save.last_excp_to;
1979                 break;
1980         case MSR_VM_HSAVE_PA:
1981                 *data = svm->hsave_msr;
1982                 break;
1983         case MSR_VM_CR:
1984                 *data = 0;
1985                 break;
1986         case MSR_IA32_UCODE_REV:
1987                 *data = 0x01000065;
1988                 break;
1989         default:
1990                 return kvm_get_msr_common(vcpu, ecx, data);
1991         }
1992         return 0;
1993 }
1994
1995 static int rdmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1996 {
1997         u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
1998         u64 data;
1999
2000         if (svm_get_msr(&svm->vcpu, ecx, &data))
2001                 kvm_inject_gp(&svm->vcpu, 0);
2002         else {
2003                 KVMTRACE_3D(MSR_READ, &svm->vcpu, ecx, (u32)data,
2004                             (u32)(data >> 32), handler);
2005
2006                 svm->vcpu.arch.regs[VCPU_REGS_RAX] = data & 0xffffffff;
2007                 svm->vcpu.arch.regs[VCPU_REGS_RDX] = data >> 32;
2008                 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
2009                 skip_emulated_instruction(&svm->vcpu);
2010         }
2011         return 1;
2012 }
2013
2014 static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
2015 {
2016         struct vcpu_svm *svm = to_svm(vcpu);
2017
2018         switch (ecx) {
2019         case MSR_IA32_TIME_STAMP_COUNTER: {
2020                 u64 tsc;
2021
2022                 rdtscll(tsc);
2023                 svm->vmcb->control.tsc_offset = data - tsc;
2024                 break;
2025         }
2026         case MSR_K6_STAR:
2027                 svm->vmcb->save.star = data;
2028                 break;
2029 #ifdef CONFIG_X86_64
2030         case MSR_LSTAR:
2031                 svm->vmcb->save.lstar = data;
2032                 break;
2033         case MSR_CSTAR:
2034                 svm->vmcb->save.cstar = data;
2035                 break;
2036         case MSR_KERNEL_GS_BASE:
2037                 svm->vmcb->save.kernel_gs_base = data;
2038                 break;
2039         case MSR_SYSCALL_MASK:
2040                 svm->vmcb->save.sfmask = data;
2041                 break;
2042 #endif
2043         case MSR_IA32_SYSENTER_CS:
2044                 svm->vmcb->save.sysenter_cs = data;
2045                 break;
2046         case MSR_IA32_SYSENTER_EIP:
2047                 svm->vmcb->save.sysenter_eip = data;
2048                 break;
2049         case MSR_IA32_SYSENTER_ESP:
2050                 svm->vmcb->save.sysenter_esp = data;
2051                 break;
2052         case MSR_IA32_DEBUGCTLMSR:
2053                 if (!svm_has(SVM_FEATURE_LBRV)) {
2054                         pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n",
2055                                         __func__, data);
2056                         break;
2057                 }
2058                 if (data & DEBUGCTL_RESERVED_BITS)
2059                         return 1;
2060
2061                 svm->vmcb->save.dbgctl = data;
2062                 if (data & (1ULL<<0))
2063                         svm_enable_lbrv(svm);
2064                 else
2065                         svm_disable_lbrv(svm);
2066                 break;
2067         case MSR_K7_EVNTSEL0:
2068         case MSR_K7_EVNTSEL1:
2069         case MSR_K7_EVNTSEL2:
2070         case MSR_K7_EVNTSEL3:
2071         case MSR_K7_PERFCTR0:
2072         case MSR_K7_PERFCTR1:
2073         case MSR_K7_PERFCTR2:
2074         case MSR_K7_PERFCTR3:
2075                 /*
2076                  * Just discard all writes to the performance counters; this
2077                  * should keep both older linux and windows 64-bit guests
2078                  * happy
2079                  */
2080                 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", ecx, data);
2081
2082                 break;
2083         case MSR_VM_HSAVE_PA:
2084                 svm->hsave_msr = data;
2085                 break;
2086         default:
2087                 return kvm_set_msr_common(vcpu, ecx, data);
2088         }
2089         return 0;
2090 }
2091
2092 static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
2093 {
2094         u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
2095         u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u)
2096                 | ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32);
2097
2098         KVMTRACE_3D(MSR_WRITE, &svm->vcpu, ecx, (u32)data, (u32)(data >> 32),
2099                     handler);
2100
2101         svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
2102         if (svm_set_msr(&svm->vcpu, ecx, data))
2103                 kvm_inject_gp(&svm->vcpu, 0);
2104         else
2105                 skip_emulated_instruction(&svm->vcpu);
2106         return 1;
2107 }
2108
2109 static int msr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
2110 {
2111         if (svm->vmcb->control.exit_info_1)
2112                 return wrmsr_interception(svm, kvm_run);
2113         else
2114                 return rdmsr_interception(svm, kvm_run);
2115 }
2116
2117 static int interrupt_window_interception(struct vcpu_svm *svm,
2118                                    struct kvm_run *kvm_run)
2119 {
2120         KVMTRACE_0D(PEND_INTR, &svm->vcpu, handler);
2121
2122         svm_clear_vintr(svm);
2123         svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
2124         /*
2125          * If the user space waits to inject interrupts, exit as soon as
2126          * possible
2127          */
2128         if (!irqchip_in_kernel(svm->vcpu.kvm) &&
2129             kvm_run->request_interrupt_window &&
2130             !kvm_cpu_has_interrupt(&svm->vcpu)) {
2131                 ++svm->vcpu.stat.irq_window_exits;
2132                 kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
2133                 return 0;
2134         }
2135
2136         return 1;
2137 }
2138
2139 static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
2140                                       struct kvm_run *kvm_run) = {
2141         [SVM_EXIT_READ_CR0]                     = emulate_on_interception,
2142         [SVM_EXIT_READ_CR3]                     = emulate_on_interception,
2143         [SVM_EXIT_READ_CR4]                     = emulate_on_interception,
2144         [SVM_EXIT_READ_CR8]                     = emulate_on_interception,
2145         /* for now: */
2146         [SVM_EXIT_WRITE_CR0]                    = emulate_on_interception,
2147         [SVM_EXIT_WRITE_CR3]                    = emulate_on_interception,
2148         [SVM_EXIT_WRITE_CR4]                    = emulate_on_interception,
2149         [SVM_EXIT_WRITE_CR8]                    = cr8_write_interception,
2150         [SVM_EXIT_READ_DR0]                     = emulate_on_interception,
2151         [SVM_EXIT_READ_DR1]                     = emulate_on_interception,
2152         [SVM_EXIT_READ_DR2]                     = emulate_on_interception,
2153         [SVM_EXIT_READ_DR3]                     = emulate_on_interception,
2154         [SVM_EXIT_WRITE_DR0]                    = emulate_on_interception,
2155         [SVM_EXIT_WRITE_DR1]                    = emulate_on_interception,
2156         [SVM_EXIT_WRITE_DR2]                    = emulate_on_interception,
2157         [SVM_EXIT_WRITE_DR3]                    = emulate_on_interception,
2158         [SVM_EXIT_WRITE_DR5]                    = emulate_on_interception,
2159         [SVM_EXIT_WRITE_DR7]                    = emulate_on_interception,
2160         [SVM_EXIT_EXCP_BASE + DB_VECTOR]        = db_interception,
2161         [SVM_EXIT_EXCP_BASE + BP_VECTOR]        = bp_interception,
2162         [SVM_EXIT_EXCP_BASE + UD_VECTOR]        = ud_interception,
2163         [SVM_EXIT_EXCP_BASE + PF_VECTOR]        = pf_interception,
2164         [SVM_EXIT_EXCP_BASE + NM_VECTOR]        = nm_interception,
2165         [SVM_EXIT_EXCP_BASE + MC_VECTOR]        = mc_interception,
2166         [SVM_EXIT_INTR]                         = intr_interception,
2167         [SVM_EXIT_NMI]                          = nmi_interception,
2168         [SVM_EXIT_SMI]                          = nop_on_interception,
2169         [SVM_EXIT_INIT]                         = nop_on_interception,
2170         [SVM_EXIT_VINTR]                        = interrupt_window_interception,
2171         /* [SVM_EXIT_CR0_SEL_WRITE]             = emulate_on_interception, */
2172         [SVM_EXIT_CPUID]                        = cpuid_interception,
2173         [SVM_EXIT_IRET]                         = iret_interception,
2174         [SVM_EXIT_INVD]                         = emulate_on_interception,
2175         [SVM_EXIT_HLT]                          = halt_interception,
2176         [SVM_EXIT_INVLPG]                       = invlpg_interception,
2177         [SVM_EXIT_INVLPGA]                      = invalid_op_interception,
2178         [SVM_EXIT_IOIO]                         = io_interception,
2179         [SVM_EXIT_MSR]                          = msr_interception,
2180         [SVM_EXIT_TASK_SWITCH]                  = task_switch_interception,
2181         [SVM_EXIT_SHUTDOWN]                     = shutdown_interception,
2182         [SVM_EXIT_VMRUN]                        = vmrun_interception,
2183         [SVM_EXIT_VMMCALL]                      = vmmcall_interception,
2184         [SVM_EXIT_VMLOAD]                       = vmload_interception,
2185         [SVM_EXIT_VMSAVE]                       = vmsave_interception,
2186         [SVM_EXIT_STGI]                         = stgi_interception,
2187         [SVM_EXIT_CLGI]                         = clgi_interception,
2188         [SVM_EXIT_SKINIT]                       = invalid_op_interception,
2189         [SVM_EXIT_WBINVD]                       = emulate_on_interception,
2190         [SVM_EXIT_MONITOR]                      = invalid_op_interception,
2191         [SVM_EXIT_MWAIT]                        = invalid_op_interception,
2192         [SVM_EXIT_NPF]                          = pf_interception,
2193 };
2194
2195 static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
2196 {
2197         struct vcpu_svm *svm = to_svm(vcpu);
2198         u32 exit_code = svm->vmcb->control.exit_code;
2199
2200         KVMTRACE_3D(VMEXIT, vcpu, exit_code, (u32)svm->vmcb->save.rip,
2201                     (u32)((u64)svm->vmcb->save.rip >> 32), entryexit);
2202
2203         if (is_nested(svm)) {
2204                 nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n",
2205                             exit_code, svm->vmcb->control.exit_info_1,
2206                             svm->vmcb->control.exit_info_2, svm->vmcb->save.rip);
2207                 if (nested_svm_exit_handled(svm, true)) {
2208                         nested_svm_vmexit(svm);
2209                         nsvm_printk("-> #VMEXIT\n");
2210                         return 1;
2211                 }
2212         }
2213
2214         if (npt_enabled) {
2215                 int mmu_reload = 0;
2216                 if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) {
2217                         svm_set_cr0(vcpu, svm->vmcb->save.cr0);
2218                         mmu_reload = 1;
2219                 }
2220                 vcpu->arch.cr0 = svm->vmcb->save.cr0;
2221                 vcpu->arch.cr3 = svm->vmcb->save.cr3;
2222                 if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
2223                         if (!load_pdptrs(vcpu, vcpu->arch.cr3)) {
2224                                 kvm_inject_gp(vcpu, 0);
2225                                 return 1;
2226                         }
2227                 }
2228                 if (mmu_reload) {
2229                         kvm_mmu_reset_context(vcpu);
2230                         kvm_mmu_load(vcpu);
2231                 }
2232         }
2233
2234
2235         if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
2236                 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
2237                 kvm_run->fail_entry.hardware_entry_failure_reason
2238                         = svm->vmcb->control.exit_code;
2239                 return 0;
2240         }
2241
2242         if (is_external_interrupt(svm->vmcb->control.exit_int_info) &&
2243             exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR &&
2244             exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH)
2245                 printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x "
2246                        "exit_code 0x%x\n",
2247                        __func__, svm->vmcb->control.exit_int_info,
2248                        exit_code);
2249
2250         if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
2251             || !svm_exit_handlers[exit_code]) {
2252                 kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
2253                 kvm_run->hw.hardware_exit_reason = exit_code;
2254                 return 0;
2255         }
2256
2257         return svm_exit_handlers[exit_code](svm, kvm_run);
2258 }
2259
2260 static void reload_tss(struct kvm_vcpu *vcpu)
2261 {
2262         int cpu = raw_smp_processor_id();
2263
2264         struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu);
2265         svm_data->tss_desc->type = 9; /* available 32/64-bit TSS */
2266         load_TR_desc();
2267 }
2268
2269 static void pre_svm_run(struct vcpu_svm *svm)
2270 {
2271         int cpu = raw_smp_processor_id();
2272
2273         struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu);
2274
2275         svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
2276         if (svm->vcpu.cpu != cpu ||
2277             svm->asid_generation != svm_data->asid_generation)
2278                 new_asid(svm, svm_data);
2279 }
2280
2281 static void svm_inject_nmi(struct kvm_vcpu *vcpu)
2282 {
2283         struct vcpu_svm *svm = to_svm(vcpu);
2284
2285         svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
2286         vcpu->arch.hflags |= HF_NMI_MASK;
2287         svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET);
2288         ++vcpu->stat.nmi_injections;
2289 }
2290
2291 static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
2292 {
2293         struct vmcb_control_area *control;
2294
2295         KVMTRACE_1D(INJ_VIRQ, &svm->vcpu, (u32)irq, handler);
2296
2297         ++svm->vcpu.stat.irq_injections;
2298         control = &svm->vmcb->control;
2299         control->int_vector = irq;
2300         control->int_ctl &= ~V_INTR_PRIO_MASK;
2301         control->int_ctl |= V_IRQ_MASK |
2302                 ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT);
2303 }
2304
2305 static void svm_queue_irq(struct kvm_vcpu *vcpu, unsigned nr)
2306 {
2307         struct vcpu_svm *svm = to_svm(vcpu);
2308
2309         svm->vmcb->control.event_inj = nr |
2310                 SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
2311 }
2312
2313 static void svm_set_irq(struct kvm_vcpu *vcpu, int irq)
2314 {
2315         struct vcpu_svm *svm = to_svm(vcpu);
2316
2317         nested_svm_intr(svm);
2318
2319         svm_queue_irq(vcpu, irq);
2320 }
2321
2322 static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
2323 {
2324         struct vcpu_svm *svm = to_svm(vcpu);
2325
2326         if (irr == -1)
2327                 return;
2328
2329         if (tpr >= irr)
2330                 svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR8_MASK;
2331 }
2332
2333 static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
2334 {
2335         struct vcpu_svm *svm = to_svm(vcpu);
2336         struct vmcb *vmcb = svm->vmcb;
2337         return !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) &&
2338                 !(svm->vcpu.arch.hflags & HF_NMI_MASK);
2339 }
2340
2341 static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
2342 {
2343         struct vcpu_svm *svm = to_svm(vcpu);
2344         struct vmcb *vmcb = svm->vmcb;
2345         return (vmcb->save.rflags & X86_EFLAGS_IF) &&
2346                 !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) &&
2347                 (svm->vcpu.arch.hflags & HF_GIF_MASK);
2348 }
2349
2350 static void enable_irq_window(struct kvm_vcpu *vcpu)
2351 {
2352         svm_set_vintr(to_svm(vcpu));
2353         svm_inject_irq(to_svm(vcpu), 0x0);
2354 }
2355
2356 static void enable_nmi_window(struct kvm_vcpu *vcpu)
2357 {
2358         struct vcpu_svm *svm = to_svm(vcpu);
2359
2360         if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
2361                 enable_irq_window(vcpu);
2362 }
2363
2364 static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
2365 {
2366         return 0;
2367 }
2368
2369 static void svm_flush_tlb(struct kvm_vcpu *vcpu)
2370 {
2371         force_new_asid(vcpu);
2372 }
2373
2374 static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
2375 {
2376 }
2377
2378 static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
2379 {
2380         struct vcpu_svm *svm = to_svm(vcpu);
2381
2382         if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) {
2383                 int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK;
2384                 kvm_set_cr8(vcpu, cr8);
2385         }
2386 }
2387
2388 static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu)
2389 {
2390         struct vcpu_svm *svm = to_svm(vcpu);
2391         u64 cr8;
2392
2393         cr8 = kvm_get_cr8(vcpu);
2394         svm->vmcb->control.int_ctl &= ~V_TPR_MASK;
2395         svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK;
2396 }
2397
2398 static void svm_complete_interrupts(struct vcpu_svm *svm)
2399 {
2400         u8 vector;
2401         int type;
2402         u32 exitintinfo = svm->vmcb->control.exit_int_info;
2403
2404         svm->vcpu.arch.nmi_injected = false;
2405         kvm_clear_exception_queue(&svm->vcpu);
2406         kvm_clear_interrupt_queue(&svm->vcpu);
2407
2408         if (!(exitintinfo & SVM_EXITINTINFO_VALID))
2409                 return;
2410
2411         vector = exitintinfo & SVM_EXITINTINFO_VEC_MASK;
2412         type = exitintinfo & SVM_EXITINTINFO_TYPE_MASK;
2413
2414         switch (type) {
2415         case SVM_EXITINTINFO_TYPE_NMI:
2416                 svm->vcpu.arch.nmi_injected = true;
2417                 break;
2418         case SVM_EXITINTINFO_TYPE_EXEPT:
2419                 /* In case of software exception do not reinject an exception
2420                    vector, but re-execute and instruction instead */
2421                 if (vector == BP_VECTOR || vector == OF_VECTOR)
2422                         break;
2423                 if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
2424                         u32 err = svm->vmcb->control.exit_int_info_err;
2425                         kvm_queue_exception_e(&svm->vcpu, vector, err);
2426
2427                 } else
2428                         kvm_queue_exception(&svm->vcpu, vector);
2429                 break;
2430         case SVM_EXITINTINFO_TYPE_INTR:
2431                 kvm_queue_interrupt(&svm->vcpu, vector);
2432                 break;
2433         default:
2434                 break;
2435         }
2436 }
2437
2438 #ifdef CONFIG_X86_64
2439 #define R "r"
2440 #else
2441 #define R "e"
2442 #endif
2443
2444 static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2445 {
2446         struct vcpu_svm *svm = to_svm(vcpu);
2447         u16 fs_selector;
2448         u16 gs_selector;
2449         u16 ldt_selector;
2450
2451         svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
2452         svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
2453         svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
2454
2455         pre_svm_run(svm);
2456
2457         sync_lapic_to_cr8(vcpu);
2458
2459         save_host_msrs(vcpu);
2460         fs_selector = kvm_read_fs();
2461         gs_selector = kvm_read_gs();
2462         ldt_selector = kvm_read_ldt();
2463         svm->host_cr2 = kvm_read_cr2();
2464         if (!is_nested(svm))
2465                 svm->vmcb->save.cr2 = vcpu->arch.cr2;
2466         /* required for live migration with NPT */
2467         if (npt_enabled)
2468                 svm->vmcb->save.cr3 = vcpu->arch.cr3;
2469
2470         clgi();
2471
2472         local_irq_enable();
2473
2474         asm volatile (
2475                 "push %%"R"bp; \n\t"
2476                 "mov %c[rbx](%[svm]), %%"R"bx \n\t"
2477                 "mov %c[rcx](%[svm]), %%"R"cx \n\t"
2478                 "mov %c[rdx](%[svm]), %%"R"dx \n\t"
2479                 "mov %c[rsi](%[svm]), %%"R"si \n\t"
2480                 "mov %c[rdi](%[svm]), %%"R"di \n\t"
2481                 "mov %c[rbp](%[svm]), %%"R"bp \n\t"
2482 #ifdef CONFIG_X86_64
2483                 "mov %c[r8](%[svm]),  %%r8  \n\t"
2484                 "mov %c[r9](%[svm]),  %%r9  \n\t"
2485                 "mov %c[r10](%[svm]), %%r10 \n\t"
2486                 "mov %c[r11](%[svm]), %%r11 \n\t"
2487                 "mov %c[r12](%[svm]), %%r12 \n\t"
2488                 "mov %c[r13](%[svm]), %%r13 \n\t"
2489                 "mov %c[r14](%[svm]), %%r14 \n\t"
2490                 "mov %c[r15](%[svm]), %%r15 \n\t"
2491 #endif
2492
2493                 /* Enter guest mode */
2494                 "push %%"R"ax \n\t"
2495                 "mov %c[vmcb](%[svm]), %%"R"ax \n\t"
2496                 __ex(SVM_VMLOAD) "\n\t"
2497                 __ex(SVM_VMRUN) "\n\t"
2498                 __ex(SVM_VMSAVE) "\n\t"
2499                 "pop %%"R"ax \n\t"
2500
2501                 /* Save guest registers, load host registers */
2502                 "mov %%"R"bx, %c[rbx](%[svm]) \n\t"
2503                 "mov %%"R"cx, %c[rcx](%[svm]) \n\t"
2504                 "mov %%"R"dx, %c[rdx](%[svm]) \n\t"
2505                 "mov %%"R"si, %c[rsi](%[svm]) \n\t"
2506                 "mov %%"R"di, %c[rdi](%[svm]) \n\t"
2507                 "mov %%"R"bp, %c[rbp](%[svm]) \n\t"
2508 #ifdef CONFIG_X86_64
2509                 "mov %%r8,  %c[r8](%[svm]) \n\t"
2510                 "mov %%r9,  %c[r9](%[svm]) \n\t"
2511                 "mov %%r10, %c[r10](%[svm]) \n\t"
2512                 "mov %%r11, %c[r11](%[svm]) \n\t"
2513                 "mov %%r12, %c[r12](%[svm]) \n\t"
2514                 "mov %%r13, %c[r13](%[svm]) \n\t"
2515                 "mov %%r14, %c[r14](%[svm]) \n\t"
2516                 "mov %%r15, %c[r15](%[svm]) \n\t"
2517 #endif
2518                 "pop %%"R"bp"
2519                 :
2520                 : [svm]"a"(svm),
2521                   [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)),
2522                   [rbx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBX])),
2523                   [rcx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RCX])),
2524                   [rdx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDX])),
2525                   [rsi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RSI])),
2526                   [rdi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDI])),
2527                   [rbp]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBP]))
2528 #ifdef CONFIG_X86_64
2529                   , [r8]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R8])),
2530                   [r9]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R9])),
2531                   [r10]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R10])),
2532                   [r11]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R11])),
2533                   [r12]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R12])),
2534                   [r13]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R13])),
2535                   [r14]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R14])),
2536                   [r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15]))
2537 #endif
2538                 : "cc", "memory"
2539                 , R"bx", R"cx", R"dx", R"si", R"di"
2540 #ifdef CONFIG_X86_64
2541                 , "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
2542 #endif
2543                 );
2544
2545         vcpu->arch.cr2 = svm->vmcb->save.cr2;
2546         vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
2547         vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
2548         vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
2549
2550         kvm_write_cr2(svm->host_cr2);
2551
2552         kvm_load_fs(fs_selector);
2553         kvm_load_gs(gs_selector);
2554         kvm_load_ldt(ldt_selector);
2555         load_host_msrs(vcpu);
2556
2557         reload_tss(vcpu);
2558
2559         local_irq_disable();
2560
2561         stgi();
2562
2563         sync_cr8_to_lapic(vcpu);
2564
2565         svm->next_rip = 0;
2566
2567         svm_complete_interrupts(svm);
2568 }
2569
2570 #undef R
2571
2572 static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
2573 {
2574         struct vcpu_svm *svm = to_svm(vcpu);
2575
2576         if (npt_enabled) {
2577                 svm->vmcb->control.nested_cr3 = root;
2578                 force_new_asid(vcpu);
2579                 return;
2580         }
2581
2582         svm->vmcb->save.cr3 = root;
2583         force_new_asid(vcpu);
2584
2585         if (vcpu->fpu_active) {
2586                 svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR);
2587                 svm->vmcb->save.cr0 |= X86_CR0_TS;
2588                 vcpu->fpu_active = 0;
2589         }
2590 }
2591
2592 static int is_disabled(void)
2593 {
2594         u64 vm_cr;
2595
2596         rdmsrl(MSR_VM_CR, vm_cr);
2597         if (vm_cr & (1 << SVM_VM_CR_SVM_DISABLE))
2598                 return 1;
2599
2600         return 0;
2601 }
2602
2603 static void
2604 svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
2605 {
2606         /*
2607          * Patch in the VMMCALL instruction:
2608          */
2609         hypercall[0] = 0x0f;
2610         hypercall[1] = 0x01;
2611         hypercall[2] = 0xd9;
2612 }
2613
2614 static void svm_check_processor_compat(void *rtn)
2615 {
2616         *(int *)rtn = 0;
2617 }
2618
2619 static bool svm_cpu_has_accelerated_tpr(void)
2620 {
2621         return false;
2622 }
2623
2624 static int get_npt_level(void)
2625 {
2626 #ifdef CONFIG_X86_64
2627         return PT64_ROOT_LEVEL;
2628 #else
2629         return PT32E_ROOT_LEVEL;
2630 #endif
2631 }
2632
2633 static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
2634 {
2635         return 0;
2636 }
2637
2638 static struct kvm_x86_ops svm_x86_ops = {
2639         .cpu_has_kvm_support = has_svm,
2640         .disabled_by_bios = is_disabled,
2641         .hardware_setup = svm_hardware_setup,
2642         .hardware_unsetup = svm_hardware_unsetup,
2643         .check_processor_compatibility = svm_check_processor_compat,
2644         .hardware_enable = svm_hardware_enable,
2645         .hardware_disable = svm_hardware_disable,
2646         .cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr,
2647
2648         .vcpu_create = svm_create_vcpu,
2649         .vcpu_free = svm_free_vcpu,
2650         .vcpu_reset = svm_vcpu_reset,
2651
2652         .prepare_guest_switch = svm_prepare_guest_switch,
2653         .vcpu_load = svm_vcpu_load,
2654         .vcpu_put = svm_vcpu_put,
2655
2656         .set_guest_debug = svm_guest_debug,
2657         .get_msr = svm_get_msr,
2658         .set_msr = svm_set_msr,
2659         .get_segment_base = svm_get_segment_base,
2660         .get_segment = svm_get_segment,
2661         .set_segment = svm_set_segment,
2662         .get_cpl = svm_get_cpl,
2663         .get_cs_db_l_bits = kvm_get_cs_db_l_bits,
2664         .decache_cr4_guest_bits = svm_decache_cr4_guest_bits,
2665         .set_cr0 = svm_set_cr0,
2666         .set_cr3 = svm_set_cr3,
2667         .set_cr4 = svm_set_cr4,
2668         .set_efer = svm_set_efer,
2669         .get_idt = svm_get_idt,
2670         .set_idt = svm_set_idt,
2671         .get_gdt = svm_get_gdt,
2672         .set_gdt = svm_set_gdt,
2673         .get_dr = svm_get_dr,
2674         .set_dr = svm_set_dr,
2675         .get_rflags = svm_get_rflags,
2676         .set_rflags = svm_set_rflags,
2677
2678         .tlb_flush = svm_flush_tlb,
2679
2680         .run = svm_vcpu_run,
2681         .handle_exit = handle_exit,
2682         .skip_emulated_instruction = skip_emulated_instruction,
2683         .set_interrupt_shadow = svm_set_interrupt_shadow,
2684         .get_interrupt_shadow = svm_get_interrupt_shadow,
2685         .patch_hypercall = svm_patch_hypercall,
2686         .set_irq = svm_set_irq,
2687         .set_nmi = svm_inject_nmi,
2688         .queue_exception = svm_queue_exception,
2689         .interrupt_allowed = svm_interrupt_allowed,
2690         .nmi_allowed = svm_nmi_allowed,
2691         .enable_nmi_window = enable_nmi_window,
2692         .enable_irq_window = enable_irq_window,
2693         .update_cr8_intercept = update_cr8_intercept,
2694
2695         .set_tss_addr = svm_set_tss_addr,
2696         .get_tdp_level = get_npt_level,
2697         .get_mt_mask = svm_get_mt_mask,
2698 };
2699
2700 static int __init svm_init(void)
2701 {
2702         return kvm_init(&svm_x86_ops, sizeof(struct vcpu_svm),
2703                               THIS_MODULE);
2704 }
2705
2706 static void __exit svm_exit(void)
2707 {
2708         kvm_exit();
2709 }
2710
2711 module_init(svm_init)
2712 module_exit(svm_exit)