KVM: x86: Drop unneeded CONFIG_HAS_IOMEM check
[safe/jmp/linux-2.6] / arch / x86 / kvm / x86.c
index 95fa45c..5d450cc 100644 (file)
@@ -77,6 +77,7 @@ static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffffeULL;
 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
 
+static void update_cr8_intercept(struct kvm_vcpu *vcpu);
 static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
                                    struct kvm_cpuid_entry2 __user *entries);
 
@@ -132,18 +133,16 @@ unsigned long segment_base(u16 selector)
        if (selector == 0)
                return 0;
 
-       asm("sgdt %0" : "=m"(gdt));
+       kvm_get_gdt(&gdt);
        table_base = gdt.base;
 
        if (selector & 4) {           /* from ldt */
-               u16 ldt_selector;
+               u16 ldt_selector = kvm_read_ldt();
 
-               asm("sldt %0" : "=g"(ldt_selector));
                table_base = segment_base(ldt_selector);
        }
        d = (struct desc_struct *)(table_base + (selector & ~7));
-       v = d->base0 | ((unsigned long)d->base1 << 16) |
-               ((unsigned long)d->base2 << 24);
+       v = get_desc_base(d);
 #ifdef CONFIG_X86_64
        if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11))
                v |= ((unsigned long)((struct ldttss_desc64 *)d)->base3) << 32;
@@ -223,12 +222,37 @@ void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
 }
 EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
 
-static void __queue_exception(struct kvm_vcpu *vcpu)
+/*
+ * Checks if cpl <= required_cpl; if true, return true.  Otherwise queue
+ * a #GP and return false.
+ */
+bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl)
+{
+       if (kvm_x86_ops->get_cpl(vcpu) <= required_cpl)
+               return true;
+       kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
+       return false;
+}
+EXPORT_SYMBOL_GPL(kvm_require_cpl);
+
+unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
+{
+       unsigned long rflags;
+
+       rflags = kvm_x86_ops->get_rflags(vcpu);
+       if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+               rflags &= ~(unsigned long)(X86_EFLAGS_TF | X86_EFLAGS_RF);
+       return rflags;
+}
+EXPORT_SYMBOL_GPL(kvm_get_rflags);
+
+void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
 {
-       kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr,
-                                    vcpu->arch.exception.has_error_code,
-                                    vcpu->arch.exception.error_code);
+       if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+               rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
+       kvm_x86_ops->set_rflags(vcpu, rflags);
 }
+EXPORT_SYMBOL_GPL(kvm_set_rflags);
 
 /*
  * Load the pae pdptrs.  Return true is they are all valid.
@@ -479,16 +503,19 @@ static inline u32 bit(int bitno)
  * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
  *
  * This list is modified at module load time to reflect the
- * capabilities of the host cpu.
+ * capabilities of the host cpu. This capabilities test skips MSRs that are
+ * kvm-specific. Those are put in the beginning of the list.
  */
+
+#define KVM_SAVE_MSRS_BEGIN    2
 static u32 msrs_to_save[] = {
+       MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
        MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
        MSR_K6_STAR,
 #ifdef CONFIG_X86_64
        MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
 #endif
-       MSR_IA32_TSC, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
-       MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
+       MSR_IA32_TSC, MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
 };
 
 static unsigned num_msrs_to_save;
@@ -1057,9 +1084,12 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
        case MSR_K8_SYSCFG:
        case MSR_K7_HWCR:
        case MSR_VM_HSAVE_PA:
+       case MSR_P6_PERFCTR0:
+       case MSR_P6_PERFCTR1:
        case MSR_P6_EVNTSEL0:
        case MSR_P6_EVNTSEL1:
        case MSR_K7_EVNTSEL0:
+       case MSR_K7_PERFCTR0:
        case MSR_K8_INT_PENDING_MSG:
        case MSR_AMD64_NB_CFG:
        case MSR_FAM10H_MMIO_CONF_BASE:
@@ -1212,7 +1242,10 @@ int kvm_dev_ioctl_check_extension(long ext)
        case KVM_CAP_IRQ_INJECT_STATUS:
        case KVM_CAP_ASSIGN_DEV_IRQ:
        case KVM_CAP_IRQFD:
+       case KVM_CAP_IOEVENTFD:
        case KVM_CAP_PIT2:
+       case KVM_CAP_PIT_STATE2:
+       case KVM_CAP_SET_IDENTITY_MAP_ADDR:
                r = 1;
                break;
        case KVM_CAP_COALESCED_MMIO:
@@ -1227,8 +1260,8 @@ int kvm_dev_ioctl_check_extension(long ext)
        case KVM_CAP_NR_MEMSLOTS:
                r = KVM_MEMORY_SLOTS;
                break;
-       case KVM_CAP_PV_MMU:
-               r = !tdp_enabled;
+       case KVM_CAP_PV_MMU:    /* obsolete */
+               r = 0;
                break;
        case KVM_CAP_IOMMU:
                r = iommu_found();
@@ -1315,6 +1348,12 @@ out:
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
        kvm_x86_ops->vcpu_load(vcpu, cpu);
+       if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0)) {
+               unsigned long khz = cpufreq_quick_get(cpu);
+               if (!khz)
+                       khz = tsc_khz;
+               per_cpu(cpu_tsc_khz, cpu) = khz;
+       }
        kvm_request_guest_time_update(vcpu);
 }
 
@@ -1450,6 +1489,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
                         u32 index, int *nent, int maxnent)
 {
        unsigned f_nx = is_efer_nx() ? F(NX) : 0;
+       unsigned f_gbpages = kvm_x86_ops->gb_page_enable() ? F(GBPAGES) : 0;
 #ifdef CONFIG_X86_64
        unsigned f_lm = F(LM);
 #else
@@ -1474,7 +1514,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
                F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
                F(PAT) | F(PSE36) | 0 /* Reserved */ |
                f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
-               F(FXSR) | F(FXSR_OPT) | 0 /* GBPAGES */ | 0 /* RDTSCP */ |
+               F(FXSR) | F(FXSR_OPT) | f_gbpages | 0 /* RDTSCP */ |
                0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
        /* cpuid 1.ecx */
        const u32 kvm_supported_word4_x86_features =
@@ -1504,6 +1544,9 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
        case 1:
                entry->edx &= kvm_supported_word0_x86_features;
                entry->ecx &= kvm_supported_word4_x86_features;
+               /* we support x2apic emulation even if host does not support
+                * it since we emulate x2apic in software */
+               entry->ecx |= F(X2APIC);
                break;
        /* function 2 entries are STATEFUL. That is, repeated cpuid commands
         * may return different values. This forces us to get_cpu() before
@@ -1576,6 +1619,8 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
 
        if (cpuid->nent < 1)
                goto out;
+       if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
+               cpuid->nent = KVM_MAX_CPUID_ENTRIES;
        r = -ENOMEM;
        cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent);
        if (!cpuid_entries)
@@ -1628,6 +1673,7 @@ static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
        vcpu_load(vcpu);
        memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
        kvm_apic_post_state_restore(vcpu);
+       update_cr8_intercept(vcpu);
        vcpu_put(vcpu);
 
        return 0;
@@ -1674,7 +1720,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
        unsigned bank_num = mcg_cap & 0xff, bank;
 
        r = -EINVAL;
-       if (!bank_num)
+       if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS)
                goto out;
        if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000))
                goto out;
@@ -1910,6 +1956,13 @@ static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
        return ret;
 }
 
+static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
+                                             u64 ident_addr)
+{
+       kvm->arch.ept_identity_map_addr = ident_addr;
+       return 0;
+}
+
 static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
                                          u32 kvm_nr_mmu_pages)
 {
@@ -2013,9 +2066,7 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
                        sizeof(struct kvm_pic_state));
                break;
        case KVM_IRQCHIP_IOAPIC:
-               memcpy(&chip->chip.ioapic,
-                       ioapic_irqchip(kvm),
-                       sizeof(struct kvm_ioapic_state));
+               r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
                break;
        default:
                r = -EINVAL;
@@ -2045,11 +2096,7 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
                spin_unlock(&pic_irqchip(kvm)->lock);
                break;
        case KVM_IRQCHIP_IOAPIC:
-               mutex_lock(&kvm->irq_lock);
-               memcpy(ioapic_irqchip(kvm),
-                       &chip->chip.ioapic,
-                       sizeof(struct kvm_ioapic_state));
-               mutex_unlock(&kvm->irq_lock);
+               r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
                break;
        default:
                r = -EINVAL;
@@ -2075,7 +2122,36 @@ static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
 
        mutex_lock(&kvm->arch.vpit->pit_state.lock);
        memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state));
-       kvm_pit_load_count(kvm, 0, ps->channels[0].count);
+       kvm_pit_load_count(kvm, 0, ps->channels[0].count, 0);
+       mutex_unlock(&kvm->arch.vpit->pit_state.lock);
+       return r;
+}
+
+static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
+{
+       int r = 0;
+
+       mutex_lock(&kvm->arch.vpit->pit_state.lock);
+       memcpy(ps->channels, &kvm->arch.vpit->pit_state.channels,
+               sizeof(ps->channels));
+       ps->flags = kvm->arch.vpit->pit_state.flags;
+       mutex_unlock(&kvm->arch.vpit->pit_state.lock);
+       return r;
+}
+
+static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
+{
+       int r = 0, start = 0;
+       u32 prev_legacy, cur_legacy;
+       mutex_lock(&kvm->arch.vpit->pit_state.lock);
+       prev_legacy = kvm->arch.vpit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY;
+       cur_legacy = ps->flags & KVM_PIT_FLAGS_HPET_LEGACY;
+       if (!prev_legacy && cur_legacy)
+               start = 1;
+       memcpy(&kvm->arch.vpit->pit_state.channels, &ps->channels,
+              sizeof(kvm->arch.vpit->pit_state.channels));
+       kvm->arch.vpit->pit_state.flags = ps->flags;
+       kvm_pit_load_count(kvm, 0, kvm->arch.vpit->pit_state.channels[0].count, start);
        mutex_unlock(&kvm->arch.vpit->pit_state.lock);
        return r;
 }
@@ -2113,7 +2189,6 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
                spin_lock(&kvm->mmu_lock);
                kvm_mmu_slot_remove_write_access(kvm, log->slot);
                spin_unlock(&kvm->mmu_lock);
-               kvm_flush_remote_tlbs(kvm);
                memslot = &kvm->memslots[log->slot];
                n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
                memset(memslot->dirty_bitmap, 0, n);
@@ -2129,7 +2204,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 {
        struct kvm *kvm = filp->private_data;
        void __user *argp = (void __user *)arg;
-       int r = -EINVAL;
+       int r = -ENOTTY;
        /*
         * This union makes it completely explicit to gcc-3.x
         * that these two variables' stack usage should be
@@ -2137,6 +2212,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
         */
        union {
                struct kvm_pit_state ps;
+               struct kvm_pit_state2 ps2;
                struct kvm_memory_alias alias;
                struct kvm_pit_config pit_config;
        } u;
@@ -2147,6 +2223,17 @@ long kvm_arch_vm_ioctl(struct file *filp,
                if (r < 0)
                        goto out;
                break;
+       case KVM_SET_IDENTITY_MAP_ADDR: {
+               u64 ident_addr;
+
+               r = -EFAULT;
+               if (copy_from_user(&ident_addr, argp, sizeof ident_addr))
+                       goto out;
+               r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr);
+               if (r < 0)
+                       goto out;
+               break;
+       }
        case KVM_SET_MEMORY_REGION: {
                struct kvm_memory_region kvm_mem;
                struct kvm_userspace_memory_region kvm_userspace_mem;
@@ -2227,10 +2314,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
                        goto out;
                if (irqchip_in_kernel(kvm)) {
                        __s32 status;
-                       mutex_lock(&kvm->irq_lock);
                        status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
                                        irq_event.irq, irq_event.level);
-                       mutex_unlock(&kvm->irq_lock);
                        if (ioctl == KVM_IRQ_LINE_STATUS) {
                                irq_event.status = status;
                                if (copy_to_user(argp, &irq_event,
@@ -2319,6 +2404,32 @@ long kvm_arch_vm_ioctl(struct file *filp,
                r = 0;
                break;
        }
+       case KVM_GET_PIT2: {
+               r = -ENXIO;
+               if (!kvm->arch.vpit)
+                       goto out;
+               r = kvm_vm_ioctl_get_pit2(kvm, &u.ps2);
+               if (r)
+                       goto out;
+               r = -EFAULT;
+               if (copy_to_user(argp, &u.ps2, sizeof(u.ps2)))
+                       goto out;
+               r = 0;
+               break;
+       }
+       case KVM_SET_PIT2: {
+               r = -EFAULT;
+               if (copy_from_user(&u.ps2, argp, sizeof(u.ps2)))
+                       goto out;
+               r = -ENXIO;
+               if (!kvm->arch.vpit)
+                       goto out;
+               r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2);
+               if (r)
+                       goto out;
+               r = 0;
+               break;
+       }
        case KVM_REINJECT_CONTROL: {
                struct kvm_reinject_control control;
                r =  -EFAULT;
@@ -2342,7 +2453,8 @@ static void kvm_init_msr_list(void)
        u32 dummy[2];
        unsigned i, j;
 
-       for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
+       /* skip the first msrs in the list. KVM-specific */
+       for (i = j = KVM_SAVE_MSRS_BEGIN; i < ARRAY_SIZE(msrs_to_save); i++) {
                if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
                        continue;
                if (j < i)
@@ -2555,12 +2667,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
                                     unsigned int bytes,
                                     struct kvm_vcpu *vcpu)
 {
-       static int reported;
-
-       if (!reported) {
-               reported = 1;
-               printk(KERN_WARNING "kvm: emulating exchange as write\n");
-       }
+       printk_once(KERN_WARNING "kvm: emulating exchange as write\n");
 #ifndef CONFIG_X86_64
        /* guests cmpxchg8b have to be emulated atomically */
        if (bytes == 8) {
@@ -2671,18 +2778,18 @@ static void cache_all_regs(struct kvm_vcpu *vcpu)
 }
 
 int emulate_instruction(struct kvm_vcpu *vcpu,
-                       struct kvm_run *run,
                        unsigned long cr2,
                        u16 error_code,
                        int emulation_type)
 {
        int r, shadow_mask;
        struct decode_cache *c;
+       struct kvm_run *run = vcpu->run;
 
        kvm_clear_exception_queue(vcpu);
        vcpu->arch.mmio_fault_cr2 = cr2;
        /*
-        * TODO: fix x86_emulate.c to use guest_read/write_register
+        * TODO: fix emulate.c to use guest_read/write_register
         * instead of direct ->regs accesses, can save hundred cycles
         * on Intel for instructions that don't read/change RSP, for
         * for example.
@@ -2697,7 +2804,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
                kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
 
                vcpu->arch.emulate_ctxt.vcpu = vcpu;
-               vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu);
+               vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu);
                vcpu->arch.emulate_ctxt.mode =
                        (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM)
                        ? X86EMUL_MODE_REAL : cs_l
@@ -2775,7 +2882,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
                return EMULATE_DO_MMIO;
        }
 
-       kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
+       kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
 
        if (vcpu->mmio_is_write) {
                vcpu->mmio_needed = 0;
@@ -2883,8 +2990,7 @@ static int pio_string_write(struct kvm_vcpu *vcpu)
        return r;
 }
 
-int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
-                 int size, unsigned port)
+int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, int size, unsigned port)
 {
        unsigned long val;
 
@@ -2913,7 +3019,7 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_pio);
 
-int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
+int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in,
                  int size, unsigned long count, int down,
                  gva_t address, int rep, unsigned port)
 {
@@ -2986,9 +3092,6 @@ static void bounce_off(void *info)
        /* nothing */
 }
 
-static unsigned int  ref_freq;
-static unsigned long tsc_khz_ref;
-
 static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
                                     void *data)
 {
@@ -2997,14 +3100,11 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
        struct kvm_vcpu *vcpu;
        int i, send_ipi = 0;
 
-       if (!ref_freq)
-               ref_freq = freq->old;
-
        if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
                return 0;
        if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
                return 0;
-       per_cpu(cpu_tsc_khz, freq->cpu) = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
+       per_cpu(cpu_tsc_khz, freq->cpu) = freq->new;
 
        spin_lock(&kvm_lock);
        list_for_each_entry(kvm, &vm_list, vm_list) {
@@ -3041,9 +3141,28 @@ static struct notifier_block kvmclock_cpufreq_notifier_block = {
         .notifier_call  = kvmclock_cpufreq_notifier
 };
 
+static void kvm_timer_init(void)
+{
+       int cpu;
+
+       if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
+               cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
+                                         CPUFREQ_TRANSITION_NOTIFIER);
+               for_each_online_cpu(cpu) {
+                       unsigned long khz = cpufreq_get(cpu);
+                       if (!khz)
+                               khz = tsc_khz;
+                       per_cpu(cpu_tsc_khz, cpu) = khz;
+               }
+       } else {
+               for_each_possible_cpu(cpu)
+                       per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
+       }
+}
+
 int kvm_arch_init(void *opaque)
 {
-       int r, cpu;
+       int r;
        struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque;
 
        if (kvm_x86_ops) {
@@ -3075,13 +3194,7 @@ int kvm_arch_init(void *opaque)
        kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
                        PT_DIRTY_MASK, PT64_NX_MASK, 0);
 
-       for_each_possible_cpu(cpu)
-               per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
-       if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
-               tsc_khz_ref = tsc_khz;
-               cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
-                                         CPUFREQ_TRANSITION_NOTIFIER);
-       }
+       kvm_timer_init();
 
        return 0;
 
@@ -3141,6 +3254,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
                a3 &= 0xFFFFFFFF;
        }
 
+       if (kvm_x86_ops->get_cpl(vcpu) != 0) {
+               ret = -KVM_EPERM;
+               goto out;
+       }
+
        switch (nr) {
        case KVM_HC_VAPIC_POLL_IRQ:
                ret = 0;
@@ -3152,6 +3270,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
                ret = -KVM_ENOSYS;
                break;
        }
+out:
        kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
        ++vcpu->stat.hypercalls;
        return r;
@@ -3203,7 +3322,7 @@ void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
                   unsigned long *rflags)
 {
        kvm_lmsw(vcpu, msw);
-       *rflags = kvm_x86_ops->get_rflags(vcpu);
+       *rflags = kvm_get_rflags(vcpu);
 }
 
 unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
@@ -3241,7 +3360,7 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
        switch (cr) {
        case 0:
                kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val));
-               *rflags = kvm_x86_ops->get_rflags(vcpu);
+               *rflags = kvm_get_rflags(vcpu);
                break;
        case 2:
                vcpu->arch.cr2 = val;
@@ -3361,18 +3480,18 @@ EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
  *
  * No need to exit to userspace if we already have an interrupt queued.
  */
-static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
-                                         struct kvm_run *kvm_run)
+static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
 {
        return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) &&
-               kvm_run->request_interrupt_window &&
+               vcpu->run->request_interrupt_window &&
                kvm_arch_interrupt_allowed(vcpu));
 }
 
-static void post_kvm_run_save(struct kvm_vcpu *vcpu,
-                             struct kvm_run *kvm_run)
+static void post_kvm_run_save(struct kvm_vcpu *vcpu)
 {
-       kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
+       struct kvm_run *kvm_run = vcpu->run;
+
+       kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
        kvm_run->cr8 = kvm_get_cr8(vcpu);
        kvm_run->apic_base = kvm_get_apic_base(vcpu);
        if (irqchip_in_kernel(vcpu->kvm))
@@ -3417,6 +3536,9 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
        if (!kvm_x86_ops->update_cr8_intercept)
                return;
 
+       if (!vcpu->arch.apic)
+               return;
+
        if (!vcpu->arch.apic->vapic_addr)
                max_irr = kvm_lapic_find_highest_irr(vcpu);
        else
@@ -3430,9 +3552,16 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
        kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
 }
 
-static void inject_pending_irq(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static void inject_pending_event(struct kvm_vcpu *vcpu)
 {
        /* try to reinject previous events if any */
+       if (vcpu->arch.exception.pending) {
+               kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr,
+                                         vcpu->arch.exception.has_error_code,
+                                         vcpu->arch.exception.error_code);
+               return;
+       }
+
        if (vcpu->arch.nmi_injected) {
                kvm_x86_ops->set_nmi(vcpu);
                return;
@@ -3459,11 +3588,11 @@ static void inject_pending_irq(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        }
 }
 
-static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 {
        int r;
        bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
-               kvm_run->request_interrupt_window;
+               vcpu->run->request_interrupt_window;
 
        if (vcpu->requests)
                if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
@@ -3484,12 +3613,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
                        kvm_x86_ops->tlb_flush(vcpu);
                if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS,
                                       &vcpu->requests)) {
-                       kvm_run->exit_reason = KVM_EXIT_TPR_ACCESS;
+                       vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
                        r = 0;
                        goto out;
                }
                if (test_and_clear_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests)) {
-                       kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
+                       vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
                        r = 0;
                        goto out;
                }
@@ -3506,16 +3635,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        smp_mb__after_clear_bit();
 
        if (vcpu->requests || need_resched() || signal_pending(current)) {
+               set_bit(KVM_REQ_KICK, &vcpu->requests);
                local_irq_enable();
                preempt_enable();
                r = 1;
                goto out;
        }
 
-       if (vcpu->arch.exception.pending)
-               __queue_exception(vcpu);
-       else
-               inject_pending_irq(vcpu, kvm_run);
+       inject_pending_event(vcpu);
 
        /* enable NMI/IRQ window open exits if needed */
        if (vcpu->arch.nmi_pending)
@@ -3532,14 +3659,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
        kvm_guest_enter();
 
-       get_debugreg(vcpu->arch.host_dr6, 6);
-       get_debugreg(vcpu->arch.host_dr7, 7);
        if (unlikely(vcpu->arch.switch_db_regs)) {
-               get_debugreg(vcpu->arch.host_db[0], 0);
-               get_debugreg(vcpu->arch.host_db[1], 1);
-               get_debugreg(vcpu->arch.host_db[2], 2);
-               get_debugreg(vcpu->arch.host_db[3], 3);
-
                set_debugreg(0, 7);
                set_debugreg(vcpu->arch.eff_db[0], 0);
                set_debugreg(vcpu->arch.eff_db[1], 1);
@@ -3548,17 +3668,16 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        }
 
        trace_kvm_entry(vcpu->vcpu_id);
-       kvm_x86_ops->run(vcpu, kvm_run);
+       kvm_x86_ops->run(vcpu);
 
-       if (unlikely(vcpu->arch.switch_db_regs)) {
-               set_debugreg(0, 7);
-               set_debugreg(vcpu->arch.host_db[0], 0);
-               set_debugreg(vcpu->arch.host_db[1], 1);
-               set_debugreg(vcpu->arch.host_db[2], 2);
-               set_debugreg(vcpu->arch.host_db[3], 3);
+       if (unlikely(vcpu->arch.switch_db_regs || test_thread_flag(TIF_DEBUG))) {
+               set_debugreg(current->thread.debugreg0, 0);
+               set_debugreg(current->thread.debugreg1, 1);
+               set_debugreg(current->thread.debugreg2, 2);
+               set_debugreg(current->thread.debugreg3, 3);
+               set_debugreg(current->thread.debugreg6, 6);
+               set_debugreg(current->thread.debugreg7, 7);
        }
-       set_debugreg(vcpu->arch.host_dr6, 6);
-       set_debugreg(vcpu->arch.host_dr7, 7);
 
        set_bit(KVM_REQ_KICK, &vcpu->requests);
        local_irq_enable();
@@ -3590,13 +3709,13 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
        kvm_lapic_sync_from_vapic(vcpu);
 
-       r = kvm_x86_ops->handle_exit(kvm_run, vcpu);
+       r = kvm_x86_ops->handle_exit(vcpu);
 out:
        return r;
 }
 
 
-static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static int __vcpu_run(struct kvm_vcpu *vcpu)
 {
        int r;
 
@@ -3616,7 +3735,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        r = 1;
        while (r > 0) {
                if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
-                       r = vcpu_enter_guest(vcpu, kvm_run);
+                       r = vcpu_enter_guest(vcpu);
                else {
                        up_read(&vcpu->kvm->slots_lock);
                        kvm_vcpu_block(vcpu);
@@ -3644,14 +3763,14 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
                if (kvm_cpu_has_pending_timer(vcpu))
                        kvm_inject_pending_timer_irqs(vcpu);
 
-               if (dm_request_for_irq_injection(vcpu, kvm_run)) {
+               if (dm_request_for_irq_injection(vcpu)) {
                        r = -EINTR;
-                       kvm_run->exit_reason = KVM_EXIT_INTR;
+                       vcpu->run->exit_reason = KVM_EXIT_INTR;
                        ++vcpu->stat.request_irq_exits;
                }
                if (signal_pending(current)) {
                        r = -EINTR;
-                       kvm_run->exit_reason = KVM_EXIT_INTR;
+                       vcpu->run->exit_reason = KVM_EXIT_INTR;
                        ++vcpu->stat.signal_exits;
                }
                if (need_resched()) {
@@ -3662,7 +3781,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        }
 
        up_read(&vcpu->kvm->slots_lock);
-       post_kvm_run_save(vcpu, kvm_run);
+       post_kvm_run_save(vcpu);
 
        vapic_exit(vcpu);
 
@@ -3695,15 +3814,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
                if (r)
                        goto out;
        }
-#if CONFIG_HAS_IOMEM
        if (vcpu->mmio_needed) {
                memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
                vcpu->mmio_read_completed = 1;
                vcpu->mmio_needed = 0;
 
                down_read(&vcpu->kvm->slots_lock);
-               r = emulate_instruction(vcpu, kvm_run,
-                                       vcpu->arch.mmio_fault_cr2, 0,
+               r = emulate_instruction(vcpu, vcpu->arch.mmio_fault_cr2, 0,
                                        EMULTYPE_NO_DECODE);
                up_read(&vcpu->kvm->slots_lock);
                if (r == EMULATE_DO_MMIO) {
@@ -3714,12 +3831,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
                        goto out;
                }
        }
-#endif
        if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL)
                kvm_register_write(vcpu, VCPU_REGS_RAX,
                                     kvm_run->hypercall.ret);
 
-       r = __vcpu_run(vcpu, kvm_run);
+       r = __vcpu_run(vcpu);
 
 out:
        if (vcpu->sigset_active)
@@ -3753,13 +3869,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 #endif
 
        regs->rip = kvm_rip_read(vcpu);
-       regs->rflags = kvm_x86_ops->get_rflags(vcpu);
-
-       /*
-        * Don't leak debug flags in case they were set for guest debugging
-        */
-       if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
-               regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
+       regs->rflags = kvm_get_rflags(vcpu);
 
        vcpu_put(vcpu);
 
@@ -3787,12 +3897,10 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
        kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13);
        kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14);
        kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15);
-
 #endif
 
        kvm_rip_write(vcpu, regs->rip);
-       kvm_x86_ops->set_rflags(vcpu, regs->rflags);
-
+       kvm_set_rflags(vcpu, regs->rflags);
 
        vcpu->arch.exception.pending = false;
 
@@ -3888,11 +3996,8 @@ static void kvm_set_segment(struct kvm_vcpu *vcpu,
 static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector,
                                   struct kvm_segment *kvm_desct)
 {
-       kvm_desct->base = seg_desc->base0;
-       kvm_desct->base |= seg_desc->base1 << 16;
-       kvm_desct->base |= seg_desc->base2 << 24;
-       kvm_desct->limit = seg_desc->limit0;
-       kvm_desct->limit |= seg_desc->limit << 16;
+       kvm_desct->base = get_desc_base(seg_desc);
+       kvm_desct->limit = get_desc_limit(seg_desc);
        if (seg_desc->g) {
                kvm_desct->limit <<= 12;
                kvm_desct->limit |= 0xfff;
@@ -3936,7 +4041,6 @@ static void get_segment_descriptor_dtable(struct kvm_vcpu *vcpu,
 static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
                                         struct desc_struct *seg_desc)
 {
-       gpa_t gpa;
        struct descriptor_table dtable;
        u16 index = selector >> 3;
 
@@ -3946,16 +4050,13 @@ static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
                kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc);
                return 1;
        }
-       gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, dtable.base);
-       gpa += index * 8;
-       return kvm_read_guest(vcpu->kvm, gpa, seg_desc, 8);
+       return kvm_read_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu);
 }
 
 /* allowed just for 8 bytes segments */
 static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
                                         struct desc_struct *seg_desc)
 {
-       gpa_t gpa;
        struct descriptor_table dtable;
        u16 index = selector >> 3;
 
@@ -3963,19 +4064,13 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
 
        if (dtable.limit < index * 8 + 7)
                return 1;
-       gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, dtable.base);
-       gpa += index * 8;
-       return kvm_write_guest(vcpu->kvm, gpa, seg_desc, 8);
+       return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu);
 }
 
-static u32 get_tss_base_addr(struct kvm_vcpu *vcpu,
+static gpa_t get_tss_base_addr(struct kvm_vcpu *vcpu,
                             struct desc_struct *seg_desc)
 {
-       u32 base_addr;
-
-       base_addr = seg_desc->base0;
-       base_addr |= (seg_desc->base1 << 16);
-       base_addr |= (seg_desc->base2 << 24);
+       u32 base_addr = get_desc_base(seg_desc);
 
        return vcpu->arch.mmu.gva_to_gpa(vcpu, base_addr);
 }
@@ -4020,12 +4115,19 @@ static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int se
        return 0;
 }
 
+static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg)
+{
+       return (seg != VCPU_SREG_LDTR) &&
+               (seg != VCPU_SREG_TR) &&
+               (kvm_get_rflags(vcpu) & X86_EFLAGS_VM);
+}
+
 int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
                                int type_bits, int seg)
 {
        struct kvm_segment kvm_seg;
 
-       if (!(vcpu->arch.cr0 & X86_CR0_PE))
+       if (is_vm86_segment(vcpu, seg) || !(vcpu->arch.cr0 & X86_CR0_PE))
                return kvm_load_realmode_segment(vcpu, selector, seg);
        if (load_segment_descriptor_to_kvm_desct(vcpu, selector, &kvm_seg))
                return 1;
@@ -4045,7 +4147,7 @@ static void save_state_to_tss32(struct kvm_vcpu *vcpu,
 {
        tss->cr3 = vcpu->arch.cr3;
        tss->eip = kvm_rip_read(vcpu);
-       tss->eflags = kvm_x86_ops->get_rflags(vcpu);
+       tss->eflags = kvm_get_rflags(vcpu);
        tss->eax = kvm_register_read(vcpu, VCPU_REGS_RAX);
        tss->ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
        tss->edx = kvm_register_read(vcpu, VCPU_REGS_RDX);
@@ -4069,7 +4171,7 @@ static int load_state_from_tss32(struct kvm_vcpu *vcpu,
        kvm_set_cr3(vcpu, tss->cr3);
 
        kvm_rip_write(vcpu, tss->eip);
-       kvm_x86_ops->set_rflags(vcpu, tss->eflags | 2);
+       kvm_set_rflags(vcpu, tss->eflags | 2);
 
        kvm_register_write(vcpu, VCPU_REGS_RAX, tss->eax);
        kvm_register_write(vcpu, VCPU_REGS_RCX, tss->ecx);
@@ -4107,7 +4209,7 @@ static void save_state_to_tss16(struct kvm_vcpu *vcpu,
                                struct tss_segment_16 *tss)
 {
        tss->ip = kvm_rip_read(vcpu);
-       tss->flag = kvm_x86_ops->get_rflags(vcpu);
+       tss->flag = kvm_get_rflags(vcpu);
        tss->ax = kvm_register_read(vcpu, VCPU_REGS_RAX);
        tss->cx = kvm_register_read(vcpu, VCPU_REGS_RCX);
        tss->dx = kvm_register_read(vcpu, VCPU_REGS_RDX);
@@ -4122,14 +4224,13 @@ static void save_state_to_tss16(struct kvm_vcpu *vcpu,
        tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS);
        tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS);
        tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR);
-       tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR);
 }
 
 static int load_state_from_tss16(struct kvm_vcpu *vcpu,
                                 struct tss_segment_16 *tss)
 {
        kvm_rip_write(vcpu, tss->ip);
-       kvm_x86_ops->set_rflags(vcpu, tss->flag | 2);
+       kvm_set_rflags(vcpu, tss->flag | 2);
        kvm_register_write(vcpu, VCPU_REGS_RAX, tss->ax);
        kvm_register_write(vcpu, VCPU_REGS_RCX, tss->cx);
        kvm_register_write(vcpu, VCPU_REGS_RDX, tss->dx);
@@ -4264,7 +4365,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
                }
        }
 
-       if (!nseg_desc.p || (nseg_desc.limit0 | nseg_desc.limit << 16) < 0x67) {
+       if (!nseg_desc.p || get_desc_limit(&nseg_desc) < 0x67) {
                kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc);
                return 1;
        }
@@ -4275,8 +4376,8 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
        }
 
        if (reason == TASK_SWITCH_IRET) {
-               u32 eflags = kvm_x86_ops->get_rflags(vcpu);
-               kvm_x86_ops->set_rflags(vcpu, eflags & ~X86_EFLAGS_NT);
+               u32 eflags = kvm_get_rflags(vcpu);
+               kvm_set_rflags(vcpu, eflags & ~X86_EFLAGS_NT);
        }
 
        /* set back link to prev task only if NT bit is set in eflags
@@ -4297,8 +4398,8 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
                                         old_tss_base, &nseg_desc);
 
        if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) {
-               u32 eflags = kvm_x86_ops->get_rflags(vcpu);
-               kvm_x86_ops->set_rflags(vcpu, eflags | X86_EFLAGS_NT);
+               u32 eflags = kvm_get_rflags(vcpu);
+               kvm_set_rflags(vcpu, eflags | X86_EFLAGS_NT);
        }
 
        if (reason != TASK_SWITCH_IRET) {
@@ -4376,6 +4477,8 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
        kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
        kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
 
+       update_cr8_intercept(vcpu);
+
        /* Older userspace won't unhalt the vcpu on reset. */
        if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&
            sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
@@ -4390,12 +4493,22 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
                                        struct kvm_guest_debug *dbg)
 {
-       int i, r;
+       unsigned long rflags;
+       int i;
 
        vcpu_load(vcpu);
 
-       if ((dbg->control & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) ==
-           (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) {
+       /*
+        * Read rflags as long as potentially injected trace flags are still
+        * filtered out.
+        */
+       rflags = kvm_get_rflags(vcpu);
+
+       vcpu->guest_debug = dbg->control;
+       if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
+               vcpu->guest_debug = 0;
+
+       if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
                for (i = 0; i < KVM_NR_DB_REGS; ++i)
                        vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
                vcpu->arch.switch_db_regs =
@@ -4406,16 +4519,22 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
                vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK);
        }
 
-       r = kvm_x86_ops->set_guest_debug(vcpu, dbg);
+       /*
+        * Trigger an rflags update that will inject or remove the trace
+        * flags.
+        */
+       kvm_set_rflags(vcpu, rflags);
 
-       if (dbg->control & KVM_GUESTDBG_INJECT_DB)
+       kvm_x86_ops->set_guest_debug(vcpu, dbg);
+
+       if (vcpu->guest_debug & KVM_GUESTDBG_INJECT_DB)
                kvm_queue_exception(vcpu, DB_VECTOR);
-       else if (dbg->control & KVM_GUESTDBG_INJECT_BP)
+       else if (vcpu->guest_debug & KVM_GUESTDBG_INJECT_BP)
                kvm_queue_exception(vcpu, BP_VECTOR);
 
        vcpu_put(vcpu);
 
-       return r;
+       return 0;
 }
 
 /*
@@ -4613,9 +4732,17 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
        return kvm_x86_ops->vcpu_reset(vcpu);
 }
 
-void kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void *garbage)
 {
-       kvm_x86_ops->hardware_enable(garbage);
+       /*
+        * Since this may be called from a hotplug notifcation,
+        * we can't get the CPU frequency directly.
+        */
+       if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
+               int cpu = raw_smp_processor_id();
+               per_cpu(cpu_tsc_khz, cpu) = 0;
+       }
+       return kvm_x86_ops->hardware_enable(garbage);
 }
 
 void kvm_arch_hardware_disable(void *garbage)
@@ -4817,7 +4944,6 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 
        kvm_mmu_slot_remove_write_access(kvm, mem->slot);
        spin_unlock(&kvm->mmu_lock);
-       kvm_flush_remote_tlbs(kvm);
 
        return 0;
 }
@@ -4831,8 +4957,10 @@ void kvm_arch_flush_shadow(struct kvm *kvm)
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 {
        return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE
-              || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
-              || vcpu->arch.nmi_pending;
+               || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
+               || vcpu->arch.nmi_pending ||
+               (kvm_arch_interrupt_allowed(vcpu) &&
+                kvm_cpu_has_interrupt(vcpu));
 }
 
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
@@ -4862,3 +4990,9 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);