KVM: x86: Enable NMI Watchdog via in-kernel PIT source
[safe/jmp/linux-2.6] / arch / x86 / kvm / i8254.c
index 735ec9a..580cc1d 100644 (file)
@@ -198,18 +198,16 @@ static int __pit_timer_fn(struct kvm_kpit_state *ps)
        struct kvm_vcpu *vcpu0 = ps->pit->kvm->vcpus[0];
        struct kvm_kpit_timer *pt = &ps->pit_timer;
 
-       atomic_inc(&pt->pending);
-       smp_mb__after_atomic_inc();
-       if (vcpu0) {
+       if (!atomic_inc_and_test(&pt->pending))
                set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests);
-               if (waitqueue_active(&vcpu0->wq)) {
-                       vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE;
-                       wake_up_interruptible(&vcpu0->wq);
-               }
-       }
 
-       pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period);
-       pt->scheduled = ktime_to_ns(pt->timer.expires);
+       if (vcpu0 && waitqueue_active(&vcpu0->wq))
+               wake_up_interruptible(&vcpu0->wq);
+
+       hrtimer_add_expires_ns(&pt->timer, pt->period);
+       pt->scheduled = hrtimer_get_expires_ns(&pt->timer);
+       if (pt->period)
+               ps->channels[0].count_load_time = hrtimer_get_expires(&pt->timer);
 
        return (pt->period == 0 ? 0 : 1);
 }
@@ -218,12 +216,22 @@ int pit_has_pending_timer(struct kvm_vcpu *vcpu)
 {
        struct kvm_pit *pit = vcpu->kvm->arch.vpit;
 
-       if (pit && vcpu->vcpu_id == 0 && pit->pit_state.inject_pending)
+       if (pit && vcpu->vcpu_id == 0 && pit->pit_state.irq_ack)
                return atomic_read(&pit->pit_state.pit_timer.pending);
-
        return 0;
 }
 
+static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
+{
+       struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state,
+                                                irq_ack_notifier);
+       spin_lock(&ps->inject_lock);
+       if (atomic_dec_return(&ps->pit_timer.pending) < 0)
+               atomic_inc(&ps->pit_timer.pending);
+       ps->irq_ack = 1;
+       spin_unlock(&ps->inject_lock);
+}
+
 static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
 {
        struct kvm_kpit_state *ps;
@@ -249,7 +257,7 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
 
        timer = &pit->pit_state.pit_timer.timer;
        if (hrtimer_cancel(timer))
-               hrtimer_start(timer, timer->expires, HRTIMER_MODE_ABS);
+               hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
 }
 
 static void destroy_pit_timer(struct kvm_kpit_timer *pt)
@@ -258,8 +266,9 @@ static void destroy_pit_timer(struct kvm_kpit_timer *pt)
        hrtimer_cancel(&pt->timer);
 }
 
-static void create_pit_timer(struct kvm_kpit_timer *pt, u32 val, int is_period)
+static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period)
 {
+       struct kvm_kpit_timer *pt = &ps->pit_timer;
        s64 interval;
 
        interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
@@ -271,6 +280,7 @@ static void create_pit_timer(struct kvm_kpit_timer *pt, u32 val, int is_period)
        pt->period = (is_period == 0) ? 0 : interval;
        pt->timer.function = pit_timer_fn;
        atomic_set(&pt->pending, 0);
+       ps->irq_ack = 1;
 
        hrtimer_start(&pt->timer, ktime_add_ns(ktime_get(), interval),
                      HRTIMER_MODE_ABS);
@@ -305,10 +315,11 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val)
        case 1:
         /* FIXME: enhance mode 4 precision */
        case 4:
-               create_pit_timer(&ps->pit_timer, val, 0);
+               create_pit_timer(ps, val, 0);
                break;
        case 2:
-               create_pit_timer(&ps->pit_timer, val, 1);
+       case 3:
+               create_pit_timer(ps, val, 1);
                break;
        default:
                destroy_pit_timer(&ps->pit_timer);
@@ -459,7 +470,8 @@ static void pit_ioport_read(struct kvm_io_device *this,
        mutex_unlock(&pit_state->lock);
 }
 
-static int pit_in_range(struct kvm_io_device *this, gpa_t addr)
+static int pit_in_range(struct kvm_io_device *this, gpa_t addr,
+                       int len, int is_write)
 {
        return ((addr >= KVM_PIT_BASE_ADDRESS) &&
                (addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH));
@@ -500,7 +512,8 @@ static void speaker_ioport_read(struct kvm_io_device *this,
        mutex_unlock(&pit_state->lock);
 }
 
-static int speaker_in_range(struct kvm_io_device *this, gpa_t addr)
+static int speaker_in_range(struct kvm_io_device *this, gpa_t addr,
+                           int len, int is_write)
 {
        return (addr == KVM_SPEAKER_BASE_ADDRESS);
 }
@@ -520,7 +533,7 @@ void kvm_pit_reset(struct kvm_pit *pit)
        mutex_unlock(&pit->pit_state.lock);
 
        atomic_set(&pit->pit_state.pit_timer.pending, 0);
-       pit->pit_state.inject_pending = 1;
+       pit->pit_state.irq_ack = 1;
 }
 
 struct kvm_pit *kvm_create_pit(struct kvm *kvm)
@@ -532,8 +545,17 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm)
        if (!pit)
                return NULL;
 
+       mutex_lock(&kvm->lock);
+       pit->irq_source_id = kvm_request_irq_source_id(kvm);
+       mutex_unlock(&kvm->lock);
+       if (pit->irq_source_id < 0) {
+               kfree(pit);
+               return NULL;
+       }
+
        mutex_init(&pit->pit_state.lock);
        mutex_lock(&pit->pit_state.lock);
+       spin_lock_init(&pit->pit_state.inject_lock);
 
        /* Initialize PIO device */
        pit->dev.read = pit_ioport_read;
@@ -555,6 +577,9 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm)
        pit_state->pit = pit;
        hrtimer_init(&pit_state->pit_timer.timer,
                     CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+       pit_state->irq_ack_notifier.gsi = 0;
+       pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq;
+       kvm_register_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier);
        mutex_unlock(&pit->pit_state.lock);
 
        kvm_pit_reset(pit);
@@ -570,6 +595,7 @@ void kvm_free_pit(struct kvm *kvm)
                mutex_lock(&kvm->arch.vpit->pit_state.lock);
                timer = &kvm->arch.vpit->pit_state.pit_timer.timer;
                hrtimer_cancel(timer);
+               kvm_free_irq_source_id(kvm, kvm->arch.vpit->irq_source_id);
                mutex_unlock(&kvm->arch.vpit->pit_state.lock);
                kfree(kvm->arch.vpit);
        }
@@ -577,12 +603,25 @@ void kvm_free_pit(struct kvm *kvm)
 
 static void __inject_pit_timer_intr(struct kvm *kvm)
 {
+       struct kvm_vcpu *vcpu;
+       int i;
+
        mutex_lock(&kvm->lock);
-       kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 1);
-       kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 0);
-       kvm_pic_set_irq(pic_irqchip(kvm), 0, 1);
-       kvm_pic_set_irq(pic_irqchip(kvm), 0, 0);
+       kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
+       kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
        mutex_unlock(&kvm->lock);
+
+       /*
+        * Provides NMI watchdog support in IOAPIC mode.
+        * The route is: PIT -> PIC -> LVT0 in NMI mode,
+        * timer IRQs will continue to flow through the IOAPIC.
+        */
+       for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+               vcpu = kvm->vcpus[i];
+               if (!vcpu)
+                       continue;
+               kvm_apic_local_deliver(vcpu, APIC_LVT0);
+       }
 }
 
 void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
@@ -592,37 +631,19 @@ void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
        struct kvm_kpit_state *ps;
 
        if (vcpu && pit) {
+               int inject = 0;
                ps = &pit->pit_state;
 
-               /* Try to inject pending interrupts when:
-                * 1. Pending exists
-                * 2. Last interrupt was accepted or waited for too long time*/
-               if (atomic_read(&ps->pit_timer.pending) &&
-                   (ps->inject_pending ||
-                   (jiffies - ps->last_injected_time
-                               >= KVM_MAX_PIT_INTR_INTERVAL))) {
-                       ps->inject_pending = 0;
-                       __inject_pit_timer_intr(kvm);
-                       ps->last_injected_time = jiffies;
-               }
-       }
-}
-
-void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
-{
-       struct kvm_arch *arch = &vcpu->kvm->arch;
-       struct kvm_kpit_state *ps;
-
-       if (vcpu && arch->vpit) {
-               ps = &arch->vpit->pit_state;
-               if (atomic_read(&ps->pit_timer.pending) &&
-               (((arch->vpic->pics[0].imr & 1) == 0 &&
-                 arch->vpic->pics[0].irq_base == vec) ||
-                 (arch->vioapic->redirtbl[0].fields.vector == vec &&
-                 arch->vioapic->redirtbl[0].fields.mask != 1))) {
-                       ps->inject_pending = 1;
-                       atomic_dec(&ps->pit_timer.pending);
-                       ps->channels[0].count_load_time = ktime_get();
+               /* Try to inject pending interrupts when
+                * last one has been acked.
+                */
+               spin_lock(&ps->inject_lock);
+               if (atomic_read(&ps->pit_timer.pending) && ps->irq_ack) {
+                       ps->irq_ack = 0;
+                       inject = 1;
                }
+               spin_unlock(&ps->inject_lock);
+               if (inject)
+                       __inject_pit_timer_intr(kvm);
        }
 }