#include <linux/highmem.h>
#include <linux/iommu.h>
#include <linux/intel-iommu.h>
+#include <linux/cpufreq.h>
#include <asm/uaccess.h>
#include <asm/msr.h>
}
kvm_x86_ops->set_cr4(vcpu, cr4);
vcpu->arch.cr4 = cr4;
- vcpu->arch.mmu.base_role.cr4_pge = !!(cr4 & X86_CR4_PGE);
+ vcpu->arch.mmu.base_role.cr4_pge = (cr4 & X86_CR4_PGE) && !tdp_enabled;
kvm_mmu_sync_global(vcpu);
kvm_mmu_reset_context(vcpu);
}
return;
}
+ if (efer & EFER_FFXSR) {
+ struct kvm_cpuid_entry2 *feat;
+
+ feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
+ if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) {
+ printk(KERN_DEBUG "set_efer: #GP, enable FFXSR w/o CPUID capability\n");
+ kvm_inject_gp(vcpu, 0);
+ return;
+ }
+ }
+
if (efer & EFER_SVME) {
struct kvm_cpuid_entry2 *feat;
hv_clock->tsc_to_system_mul);
}
+static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
+
static void kvm_write_guest_time(struct kvm_vcpu *v)
{
struct timespec ts;
if ((!vcpu->time_page))
return;
- if (unlikely(vcpu->hv_clock_tsc_khz != tsc_khz)) {
- kvm_set_time_scale(tsc_khz, &vcpu->hv_clock);
- vcpu->hv_clock_tsc_khz = tsc_khz;
+ if (unlikely(vcpu->hv_clock_tsc_khz != __get_cpu_var(cpu_tsc_khz))) {
+ kvm_set_time_scale(__get_cpu_var(cpu_tsc_khz), &vcpu->hv_clock);
+ vcpu->hv_clock_tsc_khz = __get_cpu_var(cpu_tsc_khz);
}
/* Keep irq disabled to prevent changes to the clock */
mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
}
+static int kvm_request_guest_time_update(struct kvm_vcpu *v)
+{
+ struct kvm_vcpu_arch *vcpu = &v->arch;
+
+ if (!vcpu->time_page)
+ return 0;
+ set_bit(KVM_REQ_KVMCLOCK_UPDATE, &v->requests);
+ return 1;
+}
+
static bool msr_mtrr_valid(unsigned msr)
{
switch (msr) {
vcpu->arch.time_page = NULL;
}
- kvm_write_guest_time(vcpu);
+ kvm_request_guest_time_update(vcpu);
break;
}
default:
case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
case KVM_CAP_SET_TSS_ADDR:
case KVM_CAP_EXT_CPUID:
+ case KVM_CAP_CLOCKSOURCE:
case KVM_CAP_PIT:
case KVM_CAP_NOP_IO_DELAY:
case KVM_CAP_MP_STATE:
case KVM_CAP_SYNC_MMU:
case KVM_CAP_REINJECT_CONTROL:
+ case KVM_CAP_IRQ_INJECT_STATUS:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
case KVM_CAP_IOMMU:
r = iommu_found();
break;
- case KVM_CAP_CLOCKSOURCE:
- r = boot_cpu_has(X86_FEATURE_CONSTANT_TSC);
- break;
default:
r = 0;
break;
if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
goto out;
r = kvm_dev_ioctl_get_supported_cpuid(&cpuid,
- cpuid_arg->entries);
+ cpuid_arg->entries);
if (r)
goto out;
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
kvm_x86_ops->vcpu_load(vcpu, cpu);
- kvm_write_guest_time(vcpu);
+ kvm_request_guest_time_update(vcpu);
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
static int is_efer_nx(void)
{
- u64 efer;
+ unsigned long long efer = 0;
- rdmsrl(MSR_EFER, efer);
+ rdmsrl_safe(MSR_EFER, &efer);
return efer & EFER_NX;
}
}
static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
- struct kvm_cpuid2 *cpuid,
- struct kvm_cpuid_entry2 __user *entries)
+ struct kvm_cpuid2 *cpuid,
+ struct kvm_cpuid_entry2 __user *entries)
{
int r;
}
static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
- struct kvm_cpuid2 *cpuid,
- struct kvm_cpuid_entry2 __user *entries)
+ struct kvm_cpuid2 *cpuid,
+ struct kvm_cpuid_entry2 __user *entries)
{
int r;
goto out;
r = -EFAULT;
if (copy_to_user(entries, &vcpu->arch.cpuid_entries,
- vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
+ vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
goto out;
return 0;
}
static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
- u32 index)
+ u32 index)
{
entry->function = function;
entry->index = index;
cpuid_count(entry->function, entry->index,
- &entry->eax, &entry->ebx, &entry->ecx, &entry->edx);
+ &entry->eax, &entry->ebx, &entry->ecx, &entry->edx);
entry->flags = 0;
}
bit(X86_FEATURE_CMOV) | bit(X86_FEATURE_PSE36) |
bit(X86_FEATURE_MMX) | bit(X86_FEATURE_FXSR) |
bit(X86_FEATURE_SYSCALL) |
- (bit(X86_FEATURE_NX) && is_efer_nx()) |
+ (is_efer_nx() ? bit(X86_FEATURE_NX) : 0) |
#ifdef CONFIG_X86_64
bit(X86_FEATURE_LM) |
#endif
+ bit(X86_FEATURE_FXSR_OPT) |
bit(X86_FEATURE_MMXEXT) |
bit(X86_FEATURE_3DNOWEXT) |
bit(X86_FEATURE_3DNOW);
bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY) |
bit(X86_FEATURE_SVM);
- /* all func 2 cpuid_count() should be called on the same cpu */
+ /* all calls to cpuid_count() should be made on the same cpu */
get_cpu();
do_cpuid_1_ent(entry, function, index);
++*nent;
}
static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
- struct kvm_cpuid_entry2 __user *entries)
+ struct kvm_cpuid_entry2 __user *entries)
{
struct kvm_cpuid_entry2 *cpuid_entries;
int limit, nent = 0, r = -E2BIG;
limit = cpuid_entries[0].eax;
for (func = 1; func <= limit && nent < cpuid->nent; ++func)
do_cpuid_ent(&cpuid_entries[nent], func, 0,
- &nent, cpuid->nent);
+ &nent, cpuid->nent);
r = -E2BIG;
if (nent >= cpuid->nent)
goto out_free;
limit = cpuid_entries[nent - 1].eax;
for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func)
do_cpuid_ent(&cpuid_entries[nent], func, 0,
- &nent, cpuid->nent);
+ &nent, cpuid->nent);
r = -EFAULT;
if (copy_to_user(entries, cpuid_entries,
- nent * sizeof(struct kvm_cpuid_entry2)))
+ nent * sizeof(struct kvm_cpuid_entry2)))
goto out_free;
cpuid->nent = nent;
r = 0;
if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
goto out;
r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
- cpuid_arg->entries);
+ cpuid_arg->entries);
if (r)
goto out;
break;
if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
goto out;
r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid,
- cpuid_arg->entries);
+ cpuid_arg->entries);
if (r)
goto out;
r = -EFAULT;
}
} else
goto out;
+ r = kvm_setup_default_irq_routing(kvm);
+ if (r) {
+ kfree(kvm->arch.vpic);
+ kfree(kvm->arch.vioapic);
+ goto out;
+ }
break;
case KVM_CREATE_PIT:
+ mutex_lock(&kvm->lock);
+ r = -EEXIST;
+ if (kvm->arch.vpit)
+ goto create_pit_unlock;
r = -ENOMEM;
kvm->arch.vpit = kvm_create_pit(kvm);
if (kvm->arch.vpit)
r = 0;
+ create_pit_unlock:
+ mutex_unlock(&kvm->lock);
break;
+ case KVM_IRQ_LINE_STATUS:
case KVM_IRQ_LINE: {
struct kvm_irq_level irq_event;
if (copy_from_user(&irq_event, argp, sizeof irq_event))
goto out;
if (irqchip_in_kernel(kvm)) {
+ __s32 status;
mutex_lock(&kvm->lock);
- kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
- irq_event.irq, irq_event.level);
+ status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
+ irq_event.irq, irq_event.level);
mutex_unlock(&kvm->lock);
+ if (ioctl == KVM_IRQ_LINE_STATUS) {
+ irq_event.status = status;
+ if (copy_to_user(argp, &irq_event,
+ sizeof irq_event))
+ goto out;
+ }
r = 0;
}
break;
return dev;
}
-int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
- struct kvm_vcpu *vcpu)
+static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
+ struct kvm_vcpu *vcpu)
{
void *data = val;
int r = X86EMUL_CONTINUE;
return r;
}
-int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes,
- struct kvm_vcpu *vcpu)
+static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes,
+ struct kvm_vcpu *vcpu)
{
void *data = val;
int r = X86EMUL_CONTINUE;
}
EXPORT_SYMBOL_GPL(kvm_emulate_pio_string);
+static void bounce_off(void *info)
+{
+ /* nothing */
+}
+
+static unsigned int ref_freq;
+static unsigned long tsc_khz_ref;
+
+static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
+ void *data)
+{
+ struct cpufreq_freqs *freq = data;
+ struct kvm *kvm;
+ struct kvm_vcpu *vcpu;
+ int i, send_ipi = 0;
+
+ if (!ref_freq)
+ ref_freq = freq->old;
+
+ if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
+ return 0;
+ if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
+ return 0;
+ per_cpu(cpu_tsc_khz, freq->cpu) = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
+
+ spin_lock(&kvm_lock);
+ list_for_each_entry(kvm, &vm_list, vm_list) {
+ for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+ vcpu = kvm->vcpus[i];
+ if (!vcpu)
+ continue;
+ if (vcpu->cpu != freq->cpu)
+ continue;
+ if (!kvm_request_guest_time_update(vcpu))
+ continue;
+ if (vcpu->cpu != smp_processor_id())
+ send_ipi++;
+ }
+ }
+ spin_unlock(&kvm_lock);
+
+ if (freq->old < freq->new && send_ipi) {
+ /*
+ * We upscale the frequency. Must make the guest
+ * doesn't see old kvmclock values while running with
+ * the new frequency, otherwise we risk the guest sees
+ * time go backwards.
+ *
+ * In case we update the frequency for another cpu
+ * (which might be in guest context) send an interrupt
+ * to kick the cpu out of guest context. Next time
+ * guest context is entered kvmclock will be updated,
+ * so the guest will not see stale values.
+ */
+ smp_call_function_single(freq->cpu, bounce_off, NULL, 1);
+ }
+ return 0;
+}
+
+static struct notifier_block kvmclock_cpufreq_notifier_block = {
+ .notifier_call = kvmclock_cpufreq_notifier
+};
+
int kvm_arch_init(void *opaque)
{
- int r;
+ int r, cpu;
struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque;
if (kvm_x86_ops) {
kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
PT_DIRTY_MASK, PT64_NX_MASK, 0, 0);
+
+ for_each_possible_cpu(cpu)
+ per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
+ if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
+ tsc_khz_ref = tsc_khz;
+ cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
+ CPUFREQ_TRANSITION_NOTIFIER);
+ }
+
return 0;
out:
void kvm_arch_exit(void)
{
+ if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
+ cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
+ CPUFREQ_TRANSITION_NOTIFIER);
kvm_x86_ops = NULL;
kvm_mmu_module_exit();
}
if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index)
return 0;
if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) &&
- !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT))
+ !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT))
return 0;
return 1;
}
if (!best || e->function > best->function)
best = e;
}
-
return best;
}
if (vcpu->requests) {
if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests))
__kvm_migrate_timers(vcpu);
+ if (test_and_clear_bit(KVM_REQ_KVMCLOCK_UPDATE, &vcpu->requests))
+ kvm_write_guest_time(vcpu);
if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests))
kvm_mmu_sync_roots(vcpu);
if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
{
+ if (vcpu->arch.time_page) {
+ kvm_release_page_dirty(vcpu->arch.time_page);
+ vcpu->arch.time_page = NULL;
+ }
+
kvm_x86_ops->vcpu_free(vcpu);
}