KVM: Add instruction emulation statistics

[safe/jmp/linux-2.6] / drivers / kvm / lapic.c
diff --git a/drivers/kvm/lapic.c b/drivers/kvm/lapic.c

index 4b5c77d..64f74bd 100644 (file)
--- a/drivers/kvm/lapic.c
+++ b/drivers/kvm/lapic.c
@@ -18,6 +18,8 @@
   */
  
  #include "kvm.h"
+#include "x86.h"
+
  #include <linux/kvm.h>
  #include <linux/mm.h>
  #include <linux/highmem.h>
@@ -170,6 +172,19 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic)
         return result;
  }
  
+int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
+{
+       struct kvm_lapic *apic = vcpu->apic;
+       int highest_irr;
+
+       if (!apic)
+               return 0;
+       highest_irr = apic_find_highest_irr(apic);
+
+       return highest_irr;
+}
+EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr);
+
  int kvm_apic_set_irq(struct kvm_lapic *apic, u8 vec, u8 trig)
  {
         if (!apic_test_and_set_irr(vec, apic)) {
@@ -299,7 +314,8 @@ static int apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
  static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
                              int vector, int level, int trig_mode)
  {
-       int result = 0;
+       int orig_irr, result = 0;
+       struct kvm_vcpu *vcpu = apic->vcpu;
  
         switch (delivery_mode) {
         case APIC_DM_FIXED:
@@ -308,7 +324,8 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
                 if (unlikely(!apic_enabled(apic)))
                         break;
  
-               if (apic_test_and_set_irr(vector, apic) && trig_mode) {
+               orig_irr = apic_test_and_set_irr(vector, apic);
+               if (orig_irr && trig_mode) {
                         apic_debug("level trig mode repeatedly for vector %d",
                                    vector);
                         break;
@@ -320,9 +337,15 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
                 } else
                         apic_clear_vector(vector, apic->regs + APIC_TMR);
  
-               kvm_vcpu_kick(apic->vcpu);
+               if (vcpu->mp_state == VCPU_MP_STATE_RUNNABLE)
+                       kvm_vcpu_kick(vcpu);
+               else if (vcpu->mp_state == VCPU_MP_STATE_HALTED) {
+                       vcpu->mp_state = VCPU_MP_STATE_RUNNABLE;
+                       if (waitqueue_active(&vcpu->wq))
+                               wake_up_interruptible(&vcpu->wq);
+               }
  
-               result = 1;
+               result = (orig_irr == 0);
                 break;
  
         case APIC_DM_REMRD:
@@ -337,11 +360,30 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
                 break;
  
         case APIC_DM_INIT:
-               printk(KERN_DEBUG "Ignoring guest INIT\n");
+               if (level) {
+                       if (vcpu->mp_state == VCPU_MP_STATE_RUNNABLE)
+                               printk(KERN_DEBUG
+                                      "INIT on a runnable vcpu %d\n",
+                                      vcpu->vcpu_id);
+                       vcpu->mp_state = VCPU_MP_STATE_INIT_RECEIVED;
+                       kvm_vcpu_kick(vcpu);
+               } else {
+                       printk(KERN_DEBUG
+                              "Ignoring de-assert INIT to vcpu %d\n",
+                              vcpu->vcpu_id);
+               }
+
                 break;
  
         case APIC_DM_STARTUP:
-               printk(KERN_DEBUG "Ignoring guest STARTUP\n");
+               printk(KERN_DEBUG "SIPI to vcpu %d vector 0x%02x\n",
+                      vcpu->vcpu_id, vector);
+               if (vcpu->mp_state == VCPU_MP_STATE_INIT_RECEIVED) {
+                       vcpu->sipi_vector = vector;
+                       vcpu->mp_state = VCPU_MP_STATE_SIPI_RECEIVED;
+                       if (waitqueue_active(&vcpu->wq))
+                               wake_up_interruptible(&vcpu->wq);
+               }
                 break;
  
         default:
@@ -355,13 +397,29 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
  struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector,
                                        unsigned long bitmap)
  {
-       int vcpu_id;
+       int last;
+       int next;
+       struct kvm_lapic *apic = NULL;
+
+       last = kvm->round_robin_prev_vcpu;
+       next = last;
+
+       do {
+               if (++next == KVM_MAX_VCPUS)
+                       next = 0;
+               if (kvm->vcpus[next] == NULL || !test_bit(next, &bitmap))
+                       continue;
+               apic = kvm->vcpus[next]->apic;
+               if (apic && apic_enabled(apic))
+                       break;
+               apic = NULL;
+       } while (next != last);
+       kvm->round_robin_prev_vcpu = next;
  
-       /* TODO for real round robin */
-       vcpu_id = fls(bitmap) - 1;
-       if (vcpu_id < 0)
+       if (!apic)
                 printk(KERN_DEBUG "vcpu not ready for apic_round_robin\n");
-       return kvm->vcpus[vcpu_id]->apic;
+
+       return apic;
  }
  
  static void apic_set_eoi(struct kvm_lapic *apic)
@@ -431,12 +489,19 @@ static void apic_send_ipi(struct kvm_lapic *apic)
  
  static u32 apic_get_tmcct(struct kvm_lapic *apic)
  {
-       u32 counter_passed;
-       ktime_t passed, now = apic->timer.dev.base->get_time();
-       u32 tmcct = apic_get_reg(apic, APIC_TMICT);
+       u64 counter_passed;
+       ktime_t passed, now;
+       u32 tmcct;
  
         ASSERT(apic != NULL);
  
+       now = apic->timer.dev.base->get_time();
+       tmcct = apic_get_reg(apic, APIC_TMICT);
+
+       /* if initial count is 0, current count should also be 0 */
+       if (tmcct == 0)
+               return 0;
+
         if (unlikely(ktime_to_ns(now) <=
                 ktime_to_ns(apic->timer.last_update))) {
                 /* Wrap around */
@@ -451,15 +516,24 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
  
         counter_passed = div64_64(ktime_to_ns(passed),
                                   (APIC_BUS_CYCLE_NS * apic->timer.divide_count));
-       tmcct -= counter_passed;
  
-       if (tmcct <= 0) {
-               if (unlikely(!apic_lvtt_period(apic)))
+       if (counter_passed > tmcct) {
+               if (unlikely(!apic_lvtt_period(apic))) {
+                       /* one-shot timers stick at 0 until reset */
                         tmcct = 0;
-               else
-                       do {
-                               tmcct += apic_get_reg(apic, APIC_TMICT);
-                       } while (tmcct <= 0);
+               } else {
+                       /*
+                        * periodic timers reset to APIC_TMICT when they
+                        * hit 0. The while loop simulates this happening N
+                        * times. (counter_passed %= tmcct) would also work,
+                        * but might be slower or not work on 32-bit??
+                        */
+                       while (counter_passed > tmcct)
+                               counter_passed -= tmcct;
+                       tmcct -= counter_passed;
+               }
+       } else {
+               tmcct -= counter_passed;
         }
  
         return tmcct;
@@ -483,6 +557,7 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
                 break;
  
         default:
+               apic_update_ppr(apic);
                 val = apic_get_reg(apic, offset);
                 break;
         }
@@ -682,19 +757,17 @@ static int apic_mmio_range(struct kvm_io_device *this, gpa_t addr)
         return ret;
  }
  
-void kvm_free_apic(struct kvm_lapic *apic)
+void kvm_free_lapic(struct kvm_vcpu *vcpu)
  {
-       if (!apic)
+       if (!vcpu->apic)
                 return;
  
-       hrtimer_cancel(&apic->timer.dev);
+       hrtimer_cancel(&vcpu->apic->timer.dev);
  
-       if (apic->regs_page) {
-               __free_page(apic->regs_page);
-               apic->regs_page = 0;
-       }
+       if (vcpu->apic->regs_page)
+               __free_page(vcpu->apic->regs_page);
  
-       kfree(apic);
+       kfree(vcpu->apic);
  }
  
  /*
@@ -705,7 +778,7 @@ void kvm_free_apic(struct kvm_lapic *apic)
  
  void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
  {
-       struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
+       struct kvm_lapic *apic = vcpu->apic;
  
         if (!apic)
                 return;
@@ -714,7 +787,7 @@ void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
  
  u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
  {
-       struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
+       struct kvm_lapic *apic = vcpu->apic;
         u64 tpr;
  
         if (!apic)
@@ -723,10 +796,11 @@ u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
  
         return (tpr & 0xf0) >> 4;
  }
+EXPORT_SYMBOL_GPL(kvm_lapic_get_cr8);
  
  void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
  {
-       struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
+       struct kvm_lapic *apic = vcpu->apic;
  
         if (!apic) {
                 value |= MSR_IA32_APICBASE_BSP;
@@ -752,7 +826,7 @@ u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu)
  }
  EXPORT_SYMBOL_GPL(kvm_lapic_get_base);
  
-static void lapic_reset(struct kvm_vcpu *vcpu)
+void kvm_lapic_reset(struct kvm_vcpu *vcpu)
  {
         struct kvm_lapic *apic;
         int i;
@@ -771,6 +845,8 @@ static void lapic_reset(struct kvm_vcpu *vcpu)
  
         for (i = 0; i < APIC_LVT_NUM; i++)
                 apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
+       apic_set_reg(apic, APIC_LVT0,
+                    SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
  
         apic_set_reg(apic, APIC_DFR, 0xffffffffU);
         apic_set_reg(apic, APIC_SPIV, 0xff);
@@ -786,7 +862,7 @@ static void lapic_reset(struct kvm_vcpu *vcpu)
                 apic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
                 apic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
         }
-       apic->timer.divide_count = 0;
+       update_divide_count(apic);
         atomic_set(&apic->timer.pending, 0);
         if (vcpu->vcpu_id == 0)
                 vcpu->apic_base |= MSR_IA32_APICBASE_BSP;
@@ -797,10 +873,11 @@ static void lapic_reset(struct kvm_vcpu *vcpu)
                    vcpu, kvm_apic_id(apic),
                    vcpu->apic_base, apic->base_address);
  }
+EXPORT_SYMBOL_GPL(kvm_lapic_reset);
  
  int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
  {
-       struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
+       struct kvm_lapic *apic = vcpu->apic;
         int ret = 0;
  
         if (!apic)
@@ -809,44 +886,42 @@ int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
  
         return ret;
  }
+EXPORT_SYMBOL_GPL(kvm_lapic_enabled);
  
  /*
   *----------------------------------------------------------------------
   * timer interface
   *----------------------------------------------------------------------
   */
+
+/* TODO: make sure __apic_timer_fn runs in current pCPU */
  static int __apic_timer_fn(struct kvm_lapic *apic)
  {
-       u32 vector;
         int result = 0;
+       wait_queue_head_t *q = &apic->vcpu->wq;
  
-       if (unlikely(!apic_enabled(apic) ||
-                    !apic_lvt_enabled(apic, APIC_LVTT))) {
-               apic_debug("%s: time interrupt although apic is down\n",
-                          __FUNCTION__);
-               return 0;
-       }
-
-       vector = apic_lvt_vector(apic, APIC_LVTT);
-       apic->timer.last_update = apic->timer.dev.expires;
         atomic_inc(&apic->timer.pending);
-       __apic_accept_irq(apic, APIC_DM_FIXED, vector, 1, 0);
-
+       if (waitqueue_active(q)) {
+               apic->vcpu->mp_state = VCPU_MP_STATE_RUNNABLE;
+               wake_up_interruptible(q);
+       }
         if (apic_lvtt_period(apic)) {
-               u32 offset;
-               u32 tmict = apic_get_reg(apic, APIC_TMICT);
-
-               offset = APIC_BUS_CYCLE_NS * apic->timer.divide_count * tmict;
-
                 result = 1;
                 apic->timer.dev.expires = ktime_add_ns(
                                         apic->timer.dev.expires,
                                         apic->timer.period);
         }
-
         return result;
  }
  
+static int __inject_apic_timer_irq(struct kvm_lapic *apic)
+{
+       int vector;
+
+       vector = apic_lvt_vector(apic, APIC_LVTT);
+       return __apic_accept_irq(apic, APIC_DM_FIXED, vector, 1, 0);
+}
+
  static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
  {
         struct kvm_lapic *apic;
@@ -879,7 +954,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
         if (apic->regs_page == NULL) {
                 printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
                        vcpu->vcpu_id);
-               goto nomem;
+               goto nomem_free_apic;
         }
         apic->regs = page_address(apic->regs_page);
         memset(apic->regs, 0, PAGE_SIZE);
@@ -890,15 +965,16 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
         apic->base_address = APIC_DEFAULT_PHYS_BASE;
         vcpu->apic_base = APIC_DEFAULT_PHYS_BASE;
  
-       lapic_reset(vcpu);
+       kvm_lapic_reset(vcpu);
         apic->dev.read = apic_mmio_read;
         apic->dev.write = apic_mmio_write;
         apic->dev.in_range = apic_mmio_range;
         apic->dev.private = apic;
  
         return 0;
+nomem_free_apic:
+       kfree(apic);
  nomem:
-       kvm_free_apic(apic);
         return -ENOMEM;
  }
  EXPORT_SYMBOL_GPL(kvm_create_lapic);
@@ -911,6 +987,7 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
         if (!apic || !apic_enabled(apic))
                 return -1;
  
+       apic_update_ppr(apic);
         highest_irr = apic_find_highest_irr(apic);
         if ((highest_irr == -1) ||
             ((highest_irr & 0xF0) <= apic_get_reg(apic, APIC_PROCPRI)))
@@ -918,6 +995,42 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
         return highest_irr;
  }
  
+int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
+{
+       u32 lvt0 = apic_get_reg(vcpu->apic, APIC_LVT0);
+       int r = 0;
+
+       if (vcpu->vcpu_id == 0) {
+               if (!apic_hw_enabled(vcpu->apic))
+                       r = 1;
+               if ((lvt0 & APIC_LVT_MASKED) == 0 &&
+                   GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT)
+                       r = 1;
+       }
+       return r;
+}
+
+void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
+{
+       struct kvm_lapic *apic = vcpu->apic;
+
+       if (apic && apic_lvt_enabled(apic, APIC_LVTT) &&
+               atomic_read(&apic->timer.pending) > 0) {
+               if (__inject_apic_timer_irq(apic))
+                       atomic_dec(&apic->timer.pending);
+       }
+}
+
+void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
+{
+       struct kvm_lapic *apic = vcpu->apic;
+
+       if (apic && apic_lvt_vector(apic, APIC_LVTT) == vec)
+               apic->timer.last_update = ktime_add_ns(
+                               apic->timer.last_update,
+                               apic->timer.period);
+}
+
  int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
  {
         int vector = kvm_apic_has_interrupt(vcpu);
@@ -931,3 +1044,30 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
         apic_clear_irr(vector, apic);
         return vector;
  }
+
+void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu)
+{
+       struct kvm_lapic *apic = vcpu->apic;
+
+       apic->base_address = vcpu->apic_base &
+                            MSR_IA32_APICBASE_BASE;
+       apic_set_reg(apic, APIC_LVR, APIC_VERSION);
+       apic_update_ppr(apic);
+       hrtimer_cancel(&apic->timer.dev);
+       update_divide_count(apic);
+       start_apic_timer(apic);
+}
+
+void kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
+{
+       struct kvm_lapic *apic = vcpu->apic;
+       struct hrtimer *timer;
+
+       if (!apic)
+               return;
+
+       timer = &apic->timer.dev;
+       if (hrtimer_cancel(timer))
+               hrtimer_start(timer, timer->expires, HRTIMER_MODE_ABS);
+}
+EXPORT_SYMBOL_GPL(kvm_migrate_apic_timer);