if (cpuid != boot_cpuid) {
flags |= 0x00040000UL; /* "remain halted" */
*pflags = flags;
- cpu_clear(cpuid, cpu_present_map);
- cpu_clear(cpuid, cpu_possible_map);
+ set_cpu_present(cpuid, false);
+ set_cpu_possible(cpuid, false);
halt();
}
#endif
#ifdef CONFIG_SMP
/* Wait for the secondaries to halt. */
- cpu_clear(boot_cpuid, cpu_present_map);
- cpu_clear(boot_cpuid, cpu_possible_map);
+ set_cpu_present(boot_cpuid, false);
+ set_cpu_possible(boot_cpuid, false);
while (cpus_weight(cpu_present_map))
barrier();
#endif
smp_callin(void)
{
int cpuid = hard_smp_processor_id();
- cpumask_t mask = cpu_online_map;
- if (cpu_test_and_set(cpuid, mask)) {
+ if (cpu_online(cpuid)) {
printk("??, cpu 0x%x already present??\n", cpuid);
BUG();
}
+ set_cpu_online(cpuid, true);
/* Turn on machine checks. */
wrmces(7);
((char *)cpubase + i*hwrpb->processor_size);
if ((cpu->flags & 0x1cc) == 0x1cc) {
smp_num_probed++;
- cpu_set(i, cpu_possible_map);
- cpu_set(i, cpu_present_map);
+ set_cpu_possible(i, true);
+ set_cpu_present(i, true);
cpu->pal_revision = boot_cpu_palrev;
}
/* Nothing to do on a UP box, or when told not to. */
if (smp_num_probed == 1 || max_cpus == 0) {
- cpu_possible_map = cpumask_of_cpu(boot_cpuid);
- cpu_present_map = cpumask_of_cpu(boot_cpuid);
+ init_cpu_possible(cpumask_of(boot_cpuid));
+ init_cpu_present(cpumask_of(boot_cpuid));
printk(KERN_INFO "SMP mode deactivated.\n");
return;
}
#include <linux/ioctl.h>
+/* Select x86 specific features in <linux/kvm.h> */
+#define __KVM_HAVE_IOAPIC
+#define __KVM_HAVE_DEVICE_ASSIGNMENT
+
/* Architectural interrupt line count. */
#define KVM_NR_INTERRUPTS 256
}
}
+void kvm_arch_sync_events(struct kvm *kvm)
+{
+}
+
void kvm_arch_destroy_vm(struct kvm *kvm)
{
kvm_iommu_unmap_guest(kvm);
if (!vmm_fpswa_interface)
return (fpswa_ret_t) {-1, 0, 0, 0};
- /*
- * Just let fpswa driver to use hardware fp registers.
- * No fp register is valid in memory.
- */
memset(&fp_state, 0, sizeof(fp_state_t));
/*
+ * compute fp_state. only FP registers f6 - f11 are used by the
+ * vmm, so set those bits in the mask and set the low volatile
+ * pointer to point to these registers.
+ */
+ fp_state.bitmask_low64 = 0xfc0; /* bit6..bit11 */
+
+ fp_state.fp_state_low_volatile = (fp_state_low_volatile_t *) ®s->f6;
+
+ /*
* unsigned long (*EFI_FPSWA) (
* unsigned long trap_type,
* void *Bundle,
status = vmm_handle_fpu_swa(0, regs, isr);
if (!status)
return ;
- else if (-EAGAIN == status) {
- vcpu_decrement_iip(vcpu);
- return ;
- }
break;
}
}
}
+void kvm_arch_sync_events(struct kvm *kvm)
+{
+}
+
void kvm_arch_destroy_vm(struct kvm *kvm)
{
kvmppc_free_vcpus(kvm);
}
}
+void kvm_arch_sync_events(struct kvm *kvm)
+{
+}
+
void kvm_arch_destroy_vm(struct kvm *kvm)
{
kvm_free_vcpus(kvm);
#include <linux/types.h>
#include <linux/ioctl.h>
+/* Select x86 specific features in <linux/kvm.h> */
+#define __KVM_HAVE_PIT
+#define __KVM_HAVE_IOAPIC
+#define __KVM_HAVE_DEVICE_ASSIGNMENT
+#define __KVM_HAVE_MSI
+#define __KVM_HAVE_USER_NMI
+
/* Architectural interrupt line count. */
#define KVM_NR_INTERRUPTS 256
data->cpu = pol->cpu;
data->currpstate = HW_PSTATE_INVALID;
- rc = powernow_k8_cpu_init_acpi(data);
- if (rc) {
+ if (powernow_k8_cpu_init_acpi(data)) {
/*
* Use the PSB BIOS structure. This is only availabe on
* an UP version, and is deprecated by AMD.
"ACPI maintainers and complain to your BIOS "
"vendor.\n");
#endif
- goto err_out;
+ kfree(data);
+ return -ENODEV;
}
if (pol->cpu != 0) {
printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for "
"CPU other than CPU0. Complain to your BIOS "
"vendor.\n");
- goto err_out;
+ kfree(data);
+ return -ENODEV;
}
rc = find_psb_table(data);
if (rc) {
- goto err_out;
+ kfree(data);
+ return -ENODEV;
}
/* Take a crude guess here.
* That guess was in microseconds, so multiply with 1000 */
hrtimer_add_expires_ns(&pt->timer, pt->period);
pt->scheduled = hrtimer_get_expires_ns(&pt->timer);
if (pt->period)
- ps->channels[0].count_load_time = hrtimer_get_expires(&pt->timer);
+ ps->channels[0].count_load_time = ktime_get();
return (pt->period == 0 ? 0 : 1);
}
}
EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
-void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
-{
- kvm_apic_timer_intr_post(vcpu, vec);
- /* TODO: PIT, RTC etc. */
-}
-EXPORT_SYMBOL_GPL(kvm_timer_intr_post);
-
void __kvm_migrate_timers(struct kvm_vcpu *vcpu)
{
__kvm_migrate_apic_timer(vcpu);
void kvm_pic_reset(struct kvm_kpic_state *s);
-void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu);
#include "kvm_cache_regs.h"
#include "irq.h"
+#ifndef CONFIG_X86_64
+#define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
+#else
+#define mod_64(x, y) ((x) % (y))
+#endif
+
#define PRId64 "d"
#define PRIx64 "llx"
#define PRIu64 "u"
static u32 apic_get_tmcct(struct kvm_lapic *apic)
{
- u64 counter_passed;
- ktime_t passed, now;
+ ktime_t remaining;
+ s64 ns;
u32 tmcct;
ASSERT(apic != NULL);
- now = apic->timer.dev.base->get_time();
- tmcct = apic_get_reg(apic, APIC_TMICT);
-
/* if initial count is 0, current count should also be 0 */
- if (tmcct == 0)
+ if (apic_get_reg(apic, APIC_TMICT) == 0)
return 0;
- if (unlikely(ktime_to_ns(now) <=
- ktime_to_ns(apic->timer.last_update))) {
- /* Wrap around */
- passed = ktime_add(( {
- (ktime_t) {
- .tv64 = KTIME_MAX -
- (apic->timer.last_update).tv64}; }
- ), now);
- apic_debug("time elapsed\n");
- } else
- passed = ktime_sub(now, apic->timer.last_update);
-
- counter_passed = div64_u64(ktime_to_ns(passed),
- (APIC_BUS_CYCLE_NS * apic->timer.divide_count));
-
- if (counter_passed > tmcct) {
- if (unlikely(!apic_lvtt_period(apic))) {
- /* one-shot timers stick at 0 until reset */
- tmcct = 0;
- } else {
- /*
- * periodic timers reset to APIC_TMICT when they
- * hit 0. The while loop simulates this happening N
- * times. (counter_passed %= tmcct) would also work,
- * but might be slower or not work on 32-bit??
- */
- while (counter_passed > tmcct)
- counter_passed -= tmcct;
- tmcct -= counter_passed;
- }
- } else {
- tmcct -= counter_passed;
- }
+ remaining = hrtimer_expires_remaining(&apic->timer.dev);
+ if (ktime_to_ns(remaining) < 0)
+ remaining = ktime_set(0, 0);
+
+ ns = mod_64(ktime_to_ns(remaining), apic->timer.period);
+ tmcct = div64_u64(ns, (APIC_BUS_CYCLE_NS * apic->timer.divide_count));
return tmcct;
}
{
ktime_t now = apic->timer.dev.base->get_time();
- apic->timer.last_update = now;
-
apic->timer.period = apic_get_reg(apic, APIC_TMICT) *
APIC_BUS_CYCLE_NS * apic->timer.divide_count;
atomic_set(&apic->timer.pending, 0);
}
}
-void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
-{
- struct kvm_lapic *apic = vcpu->arch.apic;
-
- if (apic && apic_lvt_vector(apic, APIC_LVTT) == vec)
- apic->timer.last_update = ktime_add_ns(
- apic->timer.last_update,
- apic->timer.period);
-}
-
int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
{
int vector = kvm_apic_has_interrupt(vcpu);
atomic_t pending;
s64 period; /* unit: ns */
u32 divide_count;
- ktime_t last_update;
struct hrtimer dev;
} timer;
struct kvm_vcpu *vcpu;
void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu);
int kvm_lapic_enabled(struct kvm_vcpu *vcpu);
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
-void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu);
if (largepage)
spte |= PT_PAGE_SIZE_MASK;
if (mt_mask) {
- mt_mask = get_memory_type(vcpu, gfn) <<
- kvm_x86_ops->get_mt_mask_shift();
+ if (!kvm_is_mmio_pfn(pfn)) {
+ mt_mask = get_memory_type(vcpu, gfn) <<
+ kvm_x86_ops->get_mt_mask_shift();
+ mt_mask |= VMX_EPT_IGMT_BIT;
+ } else
+ mt_mask = MTRR_TYPE_UNCACHABLE <<
+ kvm_x86_ops->get_mt_mask_shift();
spte |= mt_mask;
}
/* Okay, we can deliver the interrupt: grab it and update PIC state. */
intr_vector = kvm_cpu_get_interrupt(vcpu);
svm_inject_irq(svm, intr_vector);
- kvm_timer_intr_post(vcpu, intr_vector);
out:
update_cr8_intercept(vcpu);
}
data = vmcs_readl(GUEST_SYSENTER_ESP);
break;
default:
+ vmx_load_host_state(to_vmx(vcpu));
msr = find_msr_entry(to_vmx(vcpu), msr_index);
if (msr) {
data = msr->data;
}
if (vcpu->arch.interrupt.pending) {
vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
- kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr);
if (kvm_cpu_has_interrupt(vcpu))
enable_irq_window(vcpu);
}
if (vm_need_ept()) {
bypass_guest_pf = 0;
kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
- VMX_EPT_WRITABLE_MASK |
- VMX_EPT_IGMT_BIT);
+ VMX_EPT_WRITABLE_MASK);
kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull,
VMX_EPT_EXECUTABLE_MASK,
VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
case KVM_CAP_SET_TSS_ADDR:
case KVM_CAP_EXT_CPUID:
- case KVM_CAP_CLOCKSOURCE:
case KVM_CAP_PIT:
case KVM_CAP_NOP_IO_DELAY:
case KVM_CAP_MP_STATE:
case KVM_CAP_IOMMU:
r = iommu_found();
break;
+ case KVM_CAP_CLOCKSOURCE:
+ r = boot_cpu_has(X86_FEATURE_CONSTANT_TSC);
+ break;
default:
r = 0;
break;
}
-void kvm_arch_destroy_vm(struct kvm *kvm)
+void kvm_arch_sync_events(struct kvm *kvm)
{
kvm_free_all_assigned_devices(kvm);
+}
+
+void kvm_arch_destroy_vm(struct kvm *kvm)
+{
kvm_iommu_unmap_guest(kvm);
kvm_free_pit(kvm);
kfree(kvm->arch.vpic);
/* global iommu list, set NULL for ignored DMAR units */
static struct intel_iommu **g_iommus;
+static int rwbf_quirk = 0;
+
/*
* 0: Present
* 1-11: Reserved
u32 val;
unsigned long flag;
- if (!cap_rwbf(iommu->cap))
+ if (!rwbf_quirk && !cap_rwbf(iommu->cap))
return;
val = iommu->gcmd | DMA_GCMD_WBF;
.unmap = intel_iommu_unmap_range,
.iova_to_phys = intel_iommu_iova_to_phys,
};
+
+static void __devinit quirk_iommu_rwbf(struct pci_dev *dev)
+{
+ /* Mobile 4 Series Chipset neglects to set RWBF capability,
+ but needs it */
+ printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
+ rwbf_quirk = 1;
+}
+
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
*/
for_each_possible_cpu(cpu) {
struct mnt_writer *cpu_writer = &per_cpu(mnt_writers, cpu);
- if (cpu_writer->mnt != mnt)
- continue;
spin_lock(&cpu_writer->lock);
+ if (cpu_writer->mnt != mnt) {
+ spin_unlock(&cpu_writer->lock);
+ continue;
+ }
atomic_add(cpu_writer->count, &mnt->__mnt_writers);
cpu_writer->count = 0;
/*
__u32 pad;
union {
char dummy[512]; /* reserving space */
-#ifdef CONFIG_X86
+#ifdef __KVM_HAVE_PIT
struct kvm_pic_state pic;
#endif
-#if defined(CONFIG_X86) || defined(CONFIG_IA64)
+#ifdef __KVM_HAVE_IOAPIC
struct kvm_ioapic_state ioapic;
#endif
} chip;
#define KVM_CAP_MP_STATE 14
#define KVM_CAP_COALESCED_MMIO 15
#define KVM_CAP_SYNC_MMU 16 /* Changes to host mmap are reflected in guest */
-#if defined(CONFIG_X86)||defined(CONFIG_IA64)
+#ifdef __KVM_HAVE_DEVICE_ASSIGNMENT
#define KVM_CAP_DEVICE_ASSIGNMENT 17
#endif
#define KVM_CAP_IOMMU 18
-#if defined(CONFIG_X86)
+#ifdef __KVM_HAVE_MSI
#define KVM_CAP_DEVICE_MSI 20
#endif
/* Bug in KVM_SET_USER_MEMORY_REGION fixed: */
#define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21
-#if defined(CONFIG_X86)
+#ifdef __KVM_HAVE_USER_NMI
#define KVM_CAP_USER_NMI 22
#endif
struct kvm *kvm_arch_create_vm(void);
void kvm_arch_destroy_vm(struct kvm *kvm);
void kvm_free_all_assigned_devices(struct kvm *kvm);
+void kvm_arch_sync_events(struct kvm *kvm);
int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
{
int i, r = 0;
- down_read(&kvm->slots_lock);
for (i = 0; i < kvm->nmemslots; i++) {
r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
kvm->memslots[i].npages);
if (r)
break;
}
- up_read(&kvm->slots_lock);
+
return r;
}
static int kvm_iommu_unmap_memslots(struct kvm *kvm)
{
int i;
- down_read(&kvm->slots_lock);
+
for (i = 0; i < kvm->nmemslots; i++) {
kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
kvm->memslots[i].npages);
}
- up_read(&kvm->slots_lock);
return 0;
}
assigned_dev->host_irq_disabled = false;
}
mutex_unlock(&assigned_dev->kvm->lock);
- kvm_put_kvm(assigned_dev->kvm);
}
static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
struct kvm_assigned_dev_kernel *assigned_dev =
(struct kvm_assigned_dev_kernel *) dev_id;
- kvm_get_kvm(assigned_dev->kvm);
-
schedule_work(&assigned_dev->interrupt_work);
disable_irq_nosync(irq);
}
}
+/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
static void kvm_free_assigned_irq(struct kvm *kvm,
struct kvm_assigned_dev_kernel *assigned_dev)
{
if (!assigned_dev->irq_requested_type)
return;
- if (cancel_work_sync(&assigned_dev->interrupt_work))
- /* We had pending work. That means we will have to take
- * care of kvm_put_kvm.
- */
- kvm_put_kvm(kvm);
+ /*
+ * In kvm_free_device_irq, cancel_work_sync return true if:
+ * 1. work is scheduled, and then cancelled.
+ * 2. work callback is executed.
+ *
+ * The first one ensured that the irq is disabled and no more events
+ * would happen. But for the second one, the irq may be enabled (e.g.
+ * for MSI). So we disable irq here to prevent further events.
+ *
+ * Notice this maybe result in nested disable if the interrupt type is
+ * INTx, but it's OK for we are going to free it.
+ *
+ * If this function is a part of VM destroy, please ensure that till
+ * now, the kvm state is still legal for probably we also have to wait
+ * interrupt_work done.
+ */
+ disable_irq_nosync(assigned_dev->host_irq);
+ cancel_work_sync(&assigned_dev->interrupt_work);
free_irq(assigned_dev->host_irq, (void *)assigned_dev);
if (irqchip_in_kernel(kvm)) {
if (!msi2intx &&
- adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) {
- free_irq(adev->host_irq, (void *)kvm);
+ (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)) {
+ free_irq(adev->host_irq, (void *)adev);
pci_disable_msi(adev->dev);
}
struct kvm_assigned_dev_kernel *match;
struct pci_dev *dev;
+ down_read(&kvm->slots_lock);
mutex_lock(&kvm->lock);
match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
out:
mutex_unlock(&kvm->lock);
+ up_read(&kvm->slots_lock);
return r;
out_list_del:
list_del(&match->list);
out_free:
kfree(match);
mutex_unlock(&kvm->lock);
+ up_read(&kvm->slots_lock);
return r;
}
#endif
return young;
}
+static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
+ struct mm_struct *mm)
+{
+ struct kvm *kvm = mmu_notifier_to_kvm(mn);
+ kvm_arch_flush_shadow(kvm);
+}
+
static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
.invalidate_page = kvm_mmu_notifier_invalidate_page,
.invalidate_range_start = kvm_mmu_notifier_invalidate_range_start,
.invalidate_range_end = kvm_mmu_notifier_invalidate_range_end,
.clear_flush_young = kvm_mmu_notifier_clear_flush_young,
+ .release = kvm_mmu_notifier_release,
};
#endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */
{
struct mm_struct *mm = kvm->mm;
+ kvm_arch_sync_events(kvm);
spin_lock(&kvm_lock);
list_del(&kvm->vm_list);
spin_unlock(&kvm_lock);