KVM: Timer event should not unconditionally unhalt vcpu.
[safe/jmp/linux-2.6] / arch / ia64 / kvm / kvm-ia64.c
index 9408b30..d2a90fd 100644 (file)
@@ -31,6 +31,8 @@
 #include <linux/bitops.h>
 #include <linux/hrtimer.h>
 #include <linux/uaccess.h>
+#include <linux/iommu.h>
+#include <linux/intel-iommu.h>
 
 #include <asm/pgtable.h>
 #include <asm/gcc_intrin.h>
 #include <asm/cacheflush.h>
 #include <asm/div64.h>
 #include <asm/tlb.h>
+#include <asm/elf.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/clksupport.h>
+#include <asm/sn/shub_mmr.h>
 
 #include "misc.h"
 #include "vti.h"
 #include "iodev.h"
 #include "ioapic.h"
 #include "lapic.h"
+#include "irq.h"
 
 static unsigned long kvm_vmm_base;
 static unsigned long kvm_vsa_base;
@@ -61,18 +68,22 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
        { NULL }
 };
 
-
-struct fdesc{
-    unsigned long ip;
-    unsigned long gp;
-};
+static unsigned long kvm_get_itc(struct kvm_vcpu *vcpu)
+{
+#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
+       if (vcpu->kvm->arch.is_sn2)
+               return rtc_time();
+       else
+#endif
+               return ia64_getreg(_IA64_REG_AR_ITC);
+}
 
 static void kvm_flush_icache(unsigned long start, unsigned long len)
 {
        int l;
 
        for (l = 0; l < (len + 32); l += 32)
-               ia64_fc(start + l);
+               ia64_fc((void *)(start + l));
 
        ia64_sync_i();
        ia64_srlz_i();
@@ -121,13 +132,12 @@ void kvm_arch_hardware_enable(void *garbage)
        unsigned long saved_psr;
        int slot;
 
-       pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base),
-                               PAGE_KERNEL));
+       pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL));
        local_irq_save(saved_psr);
        slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
+       local_irq_restore(saved_psr);
        if (slot < 0)
                return;
-       local_irq_restore(saved_psr);
 
        spin_lock(&vp_lock);
        status = ia64_pal_vp_init_env(kvm_vsa_base ?
@@ -160,9 +170,9 @@ void kvm_arch_hardware_disable(void *garbage)
 
        local_irq_save(saved_psr);
        slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
+       local_irq_restore(saved_psr);
        if (slot < 0)
                return;
-       local_irq_restore(saved_psr);
 
        status = ia64_pal_vp_exit_env(host_iva);
        if (status)
@@ -183,13 +193,16 @@ int kvm_dev_ioctl_check_extension(long ext)
 
        switch (ext) {
        case KVM_CAP_IRQCHIP:
-       case KVM_CAP_USER_MEMORY:
-
+       case KVM_CAP_MP_STATE:
+       case KVM_CAP_IRQ_INJECT_STATUS:
                r = 1;
                break;
        case KVM_CAP_COALESCED_MMIO:
                r = KVM_COALESCED_MMIO_PAGE_OFFSET;
                break;
+       case KVM_CAP_IOMMU:
+               r = iommu_found();
+               break;
        default:
                r = 0;
        }
@@ -282,6 +295,18 @@ static int handle_sal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 }
 
+static int __apic_accept_irq(struct kvm_vcpu *vcpu, uint64_t vector)
+{
+       struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
+
+       if (!test_and_set_bit(vector, &vpd->irr[0])) {
+               vcpu->arch.irq_new_pending = 1;
+               kvm_vcpu_kick(vcpu);
+               return 1;
+       }
+       return 0;
+}
+
 /*
  *  offset: address offset to IPI space.
  *  value:  deliver value.
@@ -291,20 +316,20 @@ static void vcpu_deliver_ipi(struct kvm_vcpu *vcpu, uint64_t dm,
 {
        switch (dm) {
        case SAPIC_FIXED:
-               kvm_apic_set_irq(vcpu, vector, 0);
                break;
        case SAPIC_NMI:
-               kvm_apic_set_irq(vcpu, 2, 0);
+               vector = 2;
                break;
        case SAPIC_EXTINT:
-               kvm_apic_set_irq(vcpu, 0, 0);
+               vector = 0;
                break;
        case SAPIC_INIT:
        case SAPIC_PMI:
        default:
                printk(KERN_ERR"kvm: Unimplemented Deliver reserved IPI!\n");
-               break;
+               return;
        }
+       __apic_accept_irq(vcpu, vector);
 }
 
 static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id,
@@ -313,7 +338,7 @@ static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id,
        union ia64_lid lid;
        int i;
 
-       for (i = 0; i < KVM_MAX_VCPUS; i++) {
+       for (i = 0; i < kvm->arch.online_vcpus; i++) {
                if (kvm->vcpus[i]) {
                        lid.val = VCPU_LID(kvm->vcpus[i]);
                        if (lid.id == id && lid.eid == eid)
@@ -384,9 +409,10 @@ static int handle_global_purge(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        struct kvm *kvm = vcpu->kvm;
        struct call_data call_data;
        int i;
+
        call_data.ptc_g_data = p->u.ptc_g_data;
 
-       for (i = 0; i < KVM_MAX_VCPUS; i++) {
+       for (i = 0; i < kvm->arch.online_vcpus; i++) {
                if (!kvm->vcpus[i] || kvm->vcpus[i]->arch.mp_state ==
                                                KVM_MP_STATE_UNINITIALIZED ||
                                        vcpu == kvm->vcpus[i])
@@ -411,39 +437,62 @@ static int handle_switch_rr6(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        return 1;
 }
 
+static int kvm_sn2_setup_mappings(struct kvm_vcpu *vcpu)
+{
+       unsigned long pte, rtc_phys_addr, map_addr;
+       int slot;
+
+       map_addr = KVM_VMM_BASE + (1UL << KVM_VMM_SHIFT);
+       rtc_phys_addr = LOCAL_MMR_OFFSET | SH_RTC;
+       pte = pte_val(mk_pte_phys(rtc_phys_addr, PAGE_KERNEL_UC));
+       slot = ia64_itr_entry(0x3, map_addr, pte, PAGE_SHIFT);
+       vcpu->arch.sn_rtc_tr_slot = slot;
+       if (slot < 0) {
+               printk(KERN_ERR "Mayday mayday! RTC mapping failed!\n");
+               slot = 0;
+       }
+       return slot;
+}
+
 int kvm_emulate_halt(struct kvm_vcpu *vcpu)
 {
 
        ktime_t kt;
        long itc_diff;
        unsigned long vcpu_now_itc;
-
        unsigned long expires;
        struct hrtimer *p_ht = &vcpu->arch.hlt_timer;
        unsigned long cyc_per_usec = local_cpu_data->cyc_per_usec;
        struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
 
-       vcpu_now_itc = ia64_getreg(_IA64_REG_AR_ITC) + vcpu->arch.itc_offset;
+       if (irqchip_in_kernel(vcpu->kvm)) {
 
-       if (time_after(vcpu_now_itc, vpd->itm)) {
-               vcpu->arch.timer_check = 1;
-               return 1;
-       }
-       itc_diff = vpd->itm - vcpu_now_itc;
-       if (itc_diff < 0)
-               itc_diff = -itc_diff;
+               vcpu_now_itc = kvm_get_itc(vcpu) + vcpu->arch.itc_offset;
 
-       expires = div64_u64(itc_diff, cyc_per_usec);
-       kt = ktime_set(0, 1000 * expires);
-       vcpu->arch.ht_active = 1;
-       hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS);
+               if (time_after(vcpu_now_itc, vpd->itm)) {
+                       vcpu->arch.timer_check = 1;
+                       return 1;
+               }
+               itc_diff = vpd->itm - vcpu_now_itc;
+               if (itc_diff < 0)
+                       itc_diff = -itc_diff;
+
+               expires = div64_u64(itc_diff, cyc_per_usec);
+               kt = ktime_set(0, 1000 * expires);
+
+               vcpu->arch.ht_active = 1;
+               hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS);
 
-       if (irqchip_in_kernel(vcpu->kvm)) {
                vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
                kvm_vcpu_block(vcpu);
                hrtimer_cancel(p_ht);
                vcpu->arch.ht_active = 0;
 
+               if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests) ||
+                               kvm_cpu_has_pending_timer(vcpu))
+                       if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
+                               vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+
                if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
                        return -EINTR;
                return 1;
@@ -466,6 +515,13 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu,
        return 1;
 }
 
+static int handle_vcpu_debug(struct kvm_vcpu *vcpu,
+                               struct kvm_run *kvm_run)
+{
+       printk("VMM: %s", vcpu->arch.log_buf);
+       return 1;
+}
+
 static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu,
                struct kvm_run *kvm_run) = {
        [EXIT_REASON_VM_PANIC]              = handle_vm_error,
@@ -477,16 +533,13 @@ static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu,
        [EXIT_REASON_EXTERNAL_INTERRUPT]    = handle_external_interrupt,
        [EXIT_REASON_IPI]                   = handle_ipi,
        [EXIT_REASON_PTC_G]                 = handle_global_purge,
+       [EXIT_REASON_DEBUG]                 = handle_vcpu_debug,
 
 };
 
 static const int kvm_vti_max_exit_handlers =
                sizeof(kvm_vti_exit_handlers)/sizeof(*kvm_vti_exit_handlers);
 
-static void kvm_prepare_guest_switch(struct kvm_vcpu *vcpu)
-{
-}
-
 static uint32_t kvm_get_exit_reason(struct kvm_vcpu *vcpu)
 {
        struct exit_ctl_data *p_exit_data;
@@ -539,18 +592,29 @@ static int kvm_insert_vmm_mapping(struct kvm_vcpu *vcpu)
        if (r < 0)
                goto out;
        vcpu->arch.vm_tr_slot = r;
+
+#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
+       if (kvm->arch.is_sn2) {
+               r = kvm_sn2_setup_mappings(vcpu);
+               if (r < 0)
+                       goto out;
+       }
+#endif
+
        r = 0;
 out:
        return r;
-
 }
 
 static void kvm_purge_vmm_mapping(struct kvm_vcpu *vcpu)
 {
-
+       struct kvm *kvm = vcpu->kvm;
        ia64_ptr_entry(0x3, vcpu->arch.vmm_tr_slot);
        ia64_ptr_entry(0x3, vcpu->arch.vm_tr_slot);
-
+#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
+       if (kvm->arch.is_sn2)
+               ia64_ptr_entry(0x3, vcpu->arch.sn_rtc_tr_slot);
+#endif
 }
 
 static int kvm_vcpu_pre_transition(struct kvm_vcpu *vcpu)
@@ -598,22 +662,22 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        int r;
 
 again:
-       preempt_disable();
-
-       kvm_prepare_guest_switch(vcpu);
-       local_irq_disable();
-
        if (signal_pending(current)) {
-               local_irq_enable();
-               preempt_enable();
                r = -EINTR;
                kvm_run->exit_reason = KVM_EXIT_INTR;
                goto out;
        }
 
+       /*
+        * down_read() may sleep and return with interrupts enabled
+        */
+       down_read(&vcpu->kvm->slots_lock);
+
+       preempt_disable();
+       local_irq_disable();
+
        vcpu->guest_mode = 1;
        kvm_guest_enter();
-
        r = vti_vcpu_run(vcpu, kvm_run);
        if (r < 0) {
                local_irq_enable();
@@ -633,9 +697,8 @@ again:
         * But we need to prevent reordering, hence this barrier():
         */
        barrier();
-
        kvm_guest_exit();
-
+       up_read(&vcpu->kvm->slots_lock);
        preempt_enable();
 
        r = kvm_handle_exit(kvm_run, vcpu);
@@ -670,15 +733,16 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
        vcpu_load(vcpu);
 
+       if (vcpu->sigset_active)
+               sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
+
        if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
                kvm_vcpu_block(vcpu);
-               vcpu_put(vcpu);
-               return -EAGAIN;
+               clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
+               r = -EAGAIN;
+               goto out;
        }
 
-       if (vcpu->sigset_active)
-               sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
-
        if (vcpu->mmio_needed) {
                memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
                kvm_set_mmio_data(vcpu);
@@ -686,7 +750,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
                vcpu->mmio_needed = 0;
        }
        r = __vcpu_run(vcpu, kvm_run);
-
+out:
        if (vcpu->sigset_active)
                sigprocmask(SIG_SETMASK, &sigsaved, NULL);
 
@@ -694,27 +758,24 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        return r;
 }
 
-/*
- * Allocate 16M memory for every vm to hold its specific data.
- * Its memory map is defined in kvm_host.h.
- */
 static struct kvm *kvm_alloc_kvm(void)
 {
 
        struct kvm *kvm;
        uint64_t  vm_base;
 
+       BUG_ON(sizeof(struct kvm) > KVM_VM_STRUCT_SIZE);
+
        vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE));
 
        if (!vm_base)
                return ERR_PTR(-ENOMEM);
-       printk(KERN_DEBUG"kvm: VM data's base Address:0x%lx\n", vm_base);
 
-       /* Zero all pages before use! */
        memset((void *)vm_base, 0, KVM_VM_DATA_SIZE);
-
-       kvm = (struct kvm *)(vm_base + KVM_VM_OFS);
+       kvm = (struct kvm *)(vm_base +
+                       offsetof(struct kvm_vm_data, kvm_vm_struct));
        kvm->arch.vm_base = vm_base;
+       printk(KERN_DEBUG"kvm: vm's data area:0x%lx\n", vm_base);
 
        return kvm;
 }
@@ -756,26 +817,21 @@ static void kvm_build_io_pmt(struct kvm *kvm)
 
 static void kvm_init_vm(struct kvm *kvm)
 {
-       long vm_base;
-
        BUG_ON(!kvm);
 
        kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0;
        kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4;
        kvm->arch.vmm_init_rr = VMM_INIT_RR;
 
-       vm_base = kvm->arch.vm_base;
-       if (vm_base) {
-               kvm->arch.vhpt_base = vm_base + KVM_VHPT_OFS;
-               kvm->arch.vtlb_base = vm_base + KVM_VTLB_OFS;
-               kvm->arch.vpd_base  = vm_base + KVM_VPD_OFS;
-       }
-
        /*
         *Fill P2M entries for MMIO/IO ranges
         */
        kvm_build_io_pmt(kvm);
 
+       INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
+
+       /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
+       set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
 }
 
 struct  kvm *kvm_arch_create_vm(void)
@@ -784,8 +840,13 @@ struct  kvm *kvm_arch_create_vm(void)
 
        if (IS_ERR(kvm))
                return ERR_PTR(-ENOMEM);
+
+       kvm->arch.is_sn2 = ia64_platform_is("sn2");
+
        kvm_init_vm(kvm);
 
+       kvm->arch.online_vcpus = 0;
+
        return kvm;
 
 }
@@ -830,9 +891,8 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
 
 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 {
-       int i;
        struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
-       int r;
+       int i;
 
        vcpu_load(vcpu);
 
@@ -849,18 +909,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 
        vpd->vpr = regs->vpd.vpr;
 
-       r = -EFAULT;
-       r = copy_from_user(&vcpu->arch.guest, regs->saved_guest,
-                                               sizeof(union context));
-       if (r)
-               goto out;
-       r = copy_from_user(vcpu + 1, regs->saved_stack +
-                       sizeof(struct kvm_vcpu),
-                       IA64_STK_OFFSET - sizeof(struct kvm_vcpu));
-       if (r)
-               goto out;
-       vcpu->arch.exit_data =
-               ((struct kvm_vcpu *)(regs->saved_stack))->arch.exit_data;
+       memcpy(&vcpu->arch.guest, &regs->saved_guest, sizeof(union context));
 
        RESTORE_REGS(mp_state);
        RESTORE_REGS(vmm_rr);
@@ -890,13 +939,12 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
        RESTORE_REGS(saved_gp);
 
        vcpu->arch.irq_new_pending = 1;
-       vcpu->arch.itc_offset = regs->saved_itc - ia64_getreg(_IA64_REG_AR_ITC);
+       vcpu->arch.itc_offset = regs->saved_itc - kvm_get_itc(vcpu);
        set_bit(KVM_REQ_RESUME, &vcpu->requests);
 
        vcpu_put(vcpu);
-       r = 0;
-out:
-       return r;
+
+       return 0;
 }
 
 long kvm_arch_vm_ioctl(struct file *filp,
@@ -930,7 +978,13 @@ long kvm_arch_vm_ioctl(struct file *filp,
                r = kvm_ioapic_init(kvm);
                if (r)
                        goto out;
+               r = kvm_setup_default_irq_routing(kvm);
+               if (r) {
+                       kfree(kvm->arch.vioapic);
+                       goto out;
+               }
                break;
+       case KVM_IRQ_LINE_STATUS:
        case KVM_IRQ_LINE: {
                struct kvm_irq_level irq_event;
 
@@ -938,11 +992,17 @@ long kvm_arch_vm_ioctl(struct file *filp,
                if (copy_from_user(&irq_event, argp, sizeof irq_event))
                        goto out;
                if (irqchip_in_kernel(kvm)) {
+                       __s32 status;
                        mutex_lock(&kvm->lock);
-                       kvm_ioapic_set_irq(kvm->arch.vioapic,
-                                               irq_event.irq,
-                                               irq_event.level);
+                       status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
+                                   irq_event.irq, irq_event.level);
                        mutex_unlock(&kvm->lock);
+                       if (ioctl == KVM_IRQ_LINE_STATUS) {
+                               irq_event.status = status;
+                               if (copy_to_user(argp, &irq_event,
+                                                       sizeof irq_event))
+                                       goto out;
+                       }
                        r = 0;
                }
                break;
@@ -1112,7 +1172,7 @@ static void kvm_migrate_hlt_timer(struct kvm_vcpu *vcpu)
        struct hrtimer *p_ht = &vcpu->arch.hlt_timer;
 
        if (hrtimer_cancel(p_ht))
-               hrtimer_start(p_ht, p_ht->expires, HRTIMER_MODE_ABS);
+               hrtimer_start_expires(p_ht, HRTIMER_MODE_ABS);
 }
 
 static enum hrtimer_restart hlt_timer_fn(struct hrtimer *data)
@@ -1121,15 +1181,16 @@ static enum hrtimer_restart hlt_timer_fn(struct hrtimer *data)
        wait_queue_head_t *q;
 
        vcpu  = container_of(data, struct kvm_vcpu, arch.hlt_timer);
+       q = &vcpu->wq;
+
        if (vcpu->arch.mp_state != KVM_MP_STATE_HALTED)
                goto out;
 
-       q = &vcpu->wq;
-       if (waitqueue_active(q)) {
-               vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+       if (waitqueue_active(q))
                wake_up_interruptible(q);
-       }
+
 out:
+       vcpu->arch.timer_fired = 1;
        vcpu->arch.timer_check = 1;
        return HRTIMER_NORESTART;
 }
@@ -1158,10 +1219,11 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
                /*Set entry address for first run.*/
                regs->cr_iip = PALE_RESET_ENTRY;
 
-               /*Initilize itc offset for vcpus*/
-               itc_offset = 0UL - ia64_getreg(_IA64_REG_AR_ITC);
-               for (i = 0; i < MAX_VCPU_NUM; i++) {
-                       v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i);
+               /*Initialize itc offset for vcpus*/
+               itc_offset = 0UL - kvm_get_itc(vcpu);
+               for (i = 0; i < kvm->arch.online_vcpus; i++) {
+                       v = (struct kvm_vcpu *)((char *)vcpu +
+                                       sizeof(struct kvm_vcpu_data) * i);
                        v->arch.itc_offset = itc_offset;
                        v->arch.last_itc = 0;
                }
@@ -1175,7 +1237,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
        vcpu->arch.apic->vcpu = vcpu;
 
        p_ctx->gr[1] = 0;
-       p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + IA64_STK_OFFSET);
+       p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + KVM_STK_OFFSET);
        p_ctx->gr[13] = (unsigned long)vmm_vcpu;
        p_ctx->psr = 0x1008522000UL;
        p_ctx->ar[40] = FPSR_DEFAULT; /*fpsr*/
@@ -1210,12 +1272,12 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
        vcpu->arch.hlt_timer.function = hlt_timer_fn;
 
        vcpu->arch.last_run_cpu = -1;
-       vcpu->arch.vpd = (struct vpd *)VPD_ADDR(vcpu->vcpu_id);
+       vcpu->arch.vpd = (struct vpd *)VPD_BASE(vcpu->vcpu_id);
        vcpu->arch.vsa_base = kvm_vsa_base;
        vcpu->arch.__gp = kvm_vmm_gp;
        vcpu->arch.dirty_log_lock_pa = __pa(&kvm->arch.dirty_log_lock);
-       vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_ADDR(vcpu->vcpu_id);
-       vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_ADDR(vcpu->vcpu_id);
+       vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_BASE(vcpu->vcpu_id);
+       vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_BASE(vcpu->vcpu_id);
        init_ptce_info(vcpu);
 
        r = 0;
@@ -1253,6 +1315,7 @@ static int vti_vcpu_setup(struct kvm_vcpu *vcpu, int id)
 uninit:
        kvm_vcpu_uninit(vcpu);
 fail:
+       local_irq_restore(psr);
        return r;
 }
 
@@ -1264,12 +1327,22 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
        int r;
        int cpu;
 
+       BUG_ON(sizeof(struct kvm_vcpu) > VCPU_STRUCT_SIZE/2);
+
+       r = -EINVAL;
+       if (id >= KVM_MAX_VCPUS) {
+               printk(KERN_ERR"kvm: Can't configure vcpus > %ld",
+                               KVM_MAX_VCPUS);
+               goto fail;
+       }
+
        r = -ENOMEM;
        if (!vm_base) {
                printk(KERN_ERR"kvm: Create vcpu[%d] error!\n", id);
                goto fail;
        }
-       vcpu = (struct kvm_vcpu *)(vm_base + KVM_VCPU_OFS + VCPU_SIZE * id);
+       vcpu = (struct kvm_vcpu *)(vm_base + offsetof(struct kvm_vm_data,
+                                       vcpu_data[id].vcpu_struct));
        vcpu->kvm = kvm;
 
        cpu = get_cpu();
@@ -1282,6 +1355,8 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
                goto fail;
        }
 
+       kvm->arch.online_vcpus++;
+
        return vcpu;
 fail:
        return ERR_PTR(r);
@@ -1302,8 +1377,8 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
        return -EINVAL;
 }
 
-int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
-               struct kvm_debug_guest *dbg)
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+                                       struct kvm_guest_debug *dbg)
 {
        return -EINVAL;
 }
@@ -1336,8 +1411,16 @@ static void kvm_release_vm_pages(struct kvm *kvm)
        }
 }
 
+void kvm_arch_sync_events(struct kvm *kvm)
+{
+}
+
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
+       kvm_iommu_unmap_guest(kvm);
+#ifdef  KVM_CAP_DEVICE_ASSIGNMENT
+       kvm_free_all_assigned_devices(kvm);
+#endif
        kfree(kvm->arch.vioapic);
        kvm_release_vm_pages(kvm);
        kvm_free_physmem(kvm);
@@ -1361,9 +1444,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 {
-       int i;
-       int r;
        struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
+       int i;
+
        vcpu_load(vcpu);
 
        for (i = 0; i < 16; i++) {
@@ -1378,14 +1461,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
        regs->vpd.vpsr = vpd->vpsr;
        regs->vpd.vpr = vpd->vpr;
 
-       r = -EFAULT;
-       r = copy_to_user(regs->saved_guest, &vcpu->arch.guest,
-                                       sizeof(union context));
-       if (r)
-               goto out;
-       r = copy_to_user(regs->saved_stack, (void *)vcpu, IA64_STK_OFFSET);
-       if (r)
-               goto out;
+       memcpy(&regs->saved_guest, &vcpu->arch.guest, sizeof(union context));
+
        SAVE_REGS(mp_state);
        SAVE_REGS(vmm_rr);
        memcpy(regs->itrs, vcpu->arch.itrs, sizeof(struct thash_data) * NITRS);
@@ -1405,7 +1482,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
        }
        for (i = 0; i < 4; i++)
                regs->insvc[i] = vcpu->arch.insvc[i];
-       regs->saved_itc = vcpu->arch.itc_offset + ia64_getreg(_IA64_REG_AR_ITC);
+       regs->saved_itc = vcpu->arch.itc_offset + kvm_get_itc(vcpu);
        SAVE_REGS(xtp);
        SAVE_REGS(metaphysical_rr0);
        SAVE_REGS(metaphysical_rr4);
@@ -1413,10 +1490,26 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
        SAVE_REGS(metaphysical_saved_rr4);
        SAVE_REGS(fp_psr);
        SAVE_REGS(saved_gp);
+
        vcpu_put(vcpu);
-       r = 0;
-out:
-       return r;
+       return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_stack(struct kvm_vcpu *vcpu,
+                                 struct kvm_ia64_vcpu_stack *stack)
+{
+       memcpy(stack, vcpu, sizeof(struct kvm_ia64_vcpu_stack));
+       return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_stack(struct kvm_vcpu *vcpu,
+                                 struct kvm_ia64_vcpu_stack *stack)
+{
+       memcpy(vcpu + 1, &stack->stack[0] + sizeof(struct kvm_vcpu),
+              sizeof(struct kvm_ia64_vcpu_stack) - sizeof(struct kvm_vcpu));
+
+       vcpu->arch.exit_data = ((struct kvm_vcpu *)stack)->arch.exit_data;
+       return 0;
 }
 
 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
@@ -1428,9 +1521,78 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 
 
 long kvm_arch_vcpu_ioctl(struct file *filp,
-               unsigned int ioctl, unsigned long arg)
+                        unsigned int ioctl, unsigned long arg)
 {
-       return -EINVAL;
+       struct kvm_vcpu *vcpu = filp->private_data;
+       void __user *argp = (void __user *)arg;
+       struct kvm_ia64_vcpu_stack *stack = NULL;
+       long r;
+
+       switch (ioctl) {
+       case KVM_IA64_VCPU_GET_STACK: {
+               struct kvm_ia64_vcpu_stack __user *user_stack;
+               void __user *first_p = argp;
+
+               r = -EFAULT;
+               if (copy_from_user(&user_stack, first_p, sizeof(void *)))
+                       goto out;
+
+               if (!access_ok(VERIFY_WRITE, user_stack,
+                              sizeof(struct kvm_ia64_vcpu_stack))) {
+                       printk(KERN_INFO "KVM_IA64_VCPU_GET_STACK: "
+                              "Illegal user destination address for stack\n");
+                       goto out;
+               }
+               stack = kzalloc(sizeof(struct kvm_ia64_vcpu_stack), GFP_KERNEL);
+               if (!stack) {
+                       r = -ENOMEM;
+                       goto out;
+               }
+
+               r = kvm_arch_vcpu_ioctl_get_stack(vcpu, stack);
+               if (r)
+                       goto out;
+
+               if (copy_to_user(user_stack, stack,
+                                sizeof(struct kvm_ia64_vcpu_stack)))
+                       goto out;
+
+               break;
+       }
+       case KVM_IA64_VCPU_SET_STACK: {
+               struct kvm_ia64_vcpu_stack __user *user_stack;
+               void __user *first_p = argp;
+
+               r = -EFAULT;
+               if (copy_from_user(&user_stack, first_p, sizeof(void *)))
+                       goto out;
+
+               if (!access_ok(VERIFY_READ, user_stack,
+                           sizeof(struct kvm_ia64_vcpu_stack))) {
+                       printk(KERN_INFO "KVM_IA64_VCPU_SET_STACK: "
+                              "Illegal user address for stack\n");
+                       goto out;
+               }
+               stack = kmalloc(sizeof(struct kvm_ia64_vcpu_stack), GFP_KERNEL);
+               if (!stack) {
+                       r = -ENOMEM;
+                       goto out;
+               }
+               if (copy_from_user(stack, user_stack,
+                                  sizeof(struct kvm_ia64_vcpu_stack)))
+                       goto out;
+
+               r = kvm_arch_vcpu_ioctl_set_stack(vcpu, stack);
+               break;
+       }
+
+       default:
+               r = -EINVAL;
+       }
+
+out:
+       kfree(stack);
+       return r;
 }
 
 int kvm_arch_set_memory_region(struct kvm *kvm,
@@ -1439,25 +1601,38 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
                int user_alloc)
 {
        unsigned long i;
-       struct page *page;
+       unsigned long pfn;
        int npages = mem->memory_size >> PAGE_SHIFT;
        struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot];
        unsigned long base_gfn = memslot->base_gfn;
 
+       if (base_gfn + npages > (KVM_MAX_MEM_SIZE >> PAGE_SHIFT))
+               return -ENOMEM;
+
        for (i = 0; i < npages; i++) {
-               page = gfn_to_page(kvm, base_gfn + i);
-               kvm_set_pmt_entry(kvm, base_gfn + i,
-                               page_to_pfn(page) << PAGE_SHIFT,
-                               _PAGE_AR_RWX|_PAGE_MA_WB);
-               memslot->rmap[i] = (unsigned long)page;
+               pfn = gfn_to_pfn(kvm, base_gfn + i);
+               if (!kvm_is_mmio_pfn(pfn)) {
+                       kvm_set_pmt_entry(kvm, base_gfn + i,
+                                       pfn << PAGE_SHIFT,
+                               _PAGE_AR_RWX | _PAGE_MA_WB);
+                       memslot->rmap[i] = (unsigned long)pfn_to_page(pfn);
+               } else {
+                       kvm_set_pmt_entry(kvm, base_gfn + i,
+                                       GPFN_PHYS_MMIO | (pfn << PAGE_SHIFT),
+                                       _PAGE_MA_UC);
+                       memslot->rmap[i] = 0;
+                       }
        }
 
        return 0;
 }
 
+void kvm_arch_flush_shadow(struct kvm *kvm)
+{
+}
 
 long kvm_arch_dev_ioctl(struct file *filp,
-               unsigned int ioctl, unsigned long arg)
+                       unsigned int ioctl, unsigned long arg)
 {
        return -EINVAL;
 }
@@ -1496,8 +1671,37 @@ out:
        return 0;
 }
 
+
+/*
+ * On SN2, the ITC isn't stable, so copy in fast path code to use the
+ * SN2 RTC, replacing the ITC based default verion.
+ */
+static void kvm_patch_vmm(struct kvm_vmm_info *vmm_info,
+                         struct module *module)
+{
+       unsigned long new_ar, new_ar_sn2;
+       unsigned long module_base;
+
+       if (!ia64_platform_is("sn2"))
+               return;
+
+       module_base = (unsigned long)module->module_core;
+
+       new_ar = kvm_vmm_base + vmm_info->patch_mov_ar - module_base;
+       new_ar_sn2 = kvm_vmm_base + vmm_info->patch_mov_ar_sn2 - module_base;
+
+       printk(KERN_INFO "kvm: Patching ITC emulation to use SGI SN2 RTC "
+              "as source\n");
+
+       /*
+        * Copy the SN2 version of mov_ar into place. They are both
+        * the same size, so 6 bundles is sufficient (6 * 0x10).
+        */
+       memcpy((void *)new_ar, (void *)new_ar_sn2, 0x60);
+}
+
 static int kvm_relocate_vmm(struct kvm_vmm_info *vmm_info,
-                                               struct module *module)
+                           struct module *module)
 {
        unsigned long module_base;
        unsigned long vmm_size;
@@ -1519,6 +1723,7 @@ static int kvm_relocate_vmm(struct kvm_vmm_info *vmm_info,
                return -EFAULT;
 
        memcpy((void *)kvm_vmm_base, (void *)module_base, vmm_size);
+       kvm_patch_vmm(vmm_info, module);
        kvm_flush_icache(kvm_vmm_base, vmm_size);
 
        /*Recalculate kvm_vmm_info based on new VMM*/
@@ -1608,8 +1813,8 @@ static int kvm_ia64_sync_dirty_log(struct kvm *kvm,
        struct kvm_memory_slot *memslot;
        int r, i;
        long n, base;
-       unsigned long *dirty_bitmap = (unsigned long *)((void *)kvm - KVM_VM_OFS
-                                       + KVM_MEM_DIRTY_LOG_OFS);
+       unsigned long *dirty_bitmap = (unsigned long *)(kvm->arch.vm_base +
+                       offsetof(struct kvm_vm_data, kvm_mem_dirty_log));
 
        r = -EINVAL;
        if (log->slot >= KVM_MEMORY_SLOTS)
@@ -1683,31 +1888,19 @@ static void vcpu_kick_intr(void *info)
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
 {
        int ipi_pcpu = vcpu->cpu;
+       int cpu = get_cpu();
 
        if (waitqueue_active(&vcpu->wq))
                wake_up_interruptible(&vcpu->wq);
 
-       if (vcpu->guest_mode)
+       if (vcpu->guest_mode && cpu != ipi_pcpu)
                smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0);
+       put_cpu();
 }
 
-int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig)
+int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
 {
-
-       struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
-
-       if (!test_and_set_bit(vec, &vpd->irr[0])) {
-               vcpu->arch.irq_new_pending = 1;
-                if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
-                       kvm_vcpu_kick(vcpu);
-               else if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) {
-                       vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
-                       if (waitqueue_active(&vcpu->wq))
-                               wake_up_interruptible(&vcpu->wq);
-               }
-               return 1;
-       }
-       return 0;
+       return __apic_accept_irq(vcpu, irq->vector);
 }
 
 int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest)
@@ -1720,20 +1913,18 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
        return 0;
 }
 
-struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector,
-                                      unsigned long bitmap)
+int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
 {
-       struct kvm_vcpu *lvcpu = kvm->vcpus[0];
-       int i;
-
-       for (i = 1; i < KVM_MAX_VCPUS; i++) {
-               if (!kvm->vcpus[i])
-                       continue;
-               if (lvcpu->arch.xtp > kvm->vcpus[i]->arch.xtp)
-                       lvcpu = kvm->vcpus[i];
-       }
+       return vcpu1->arch.xtp - vcpu2->arch.xtp;
+}
 
-       return lvcpu;
+int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
+               int short_hand, int dest, int dest_mode)
+{
+       struct kvm_lapic *target = vcpu->arch.apic;
+       return (dest_mode == 0) ?
+               kvm_apic_match_physical_addr(target, dest) :
+               kvm_apic_match_logical_addr(target, dest);
 }
 
 static int find_highest_bits(int *dat)
@@ -1774,7 +1965,7 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
 
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 {
-       return 0;
+       return vcpu->arch.timer_fired;
 }
 
 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
@@ -1790,11 +1981,43 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
                                    struct kvm_mp_state *mp_state)
 {
-       return -EINVAL;
+       vcpu_load(vcpu);
+       mp_state->mp_state = vcpu->arch.mp_state;
+       vcpu_put(vcpu);
+       return 0;
+}
+
+static int vcpu_reset(struct kvm_vcpu *vcpu)
+{
+       int r;
+       long psr;
+       local_irq_save(psr);
+       r = kvm_insert_vmm_mapping(vcpu);
+       if (r)
+               goto fail;
+
+       vcpu->arch.launched = 0;
+       kvm_arch_vcpu_uninit(vcpu);
+       r = kvm_arch_vcpu_init(vcpu);
+       if (r)
+               goto fail;
+
+       kvm_purge_vmm_mapping(vcpu);
+       r = 0;
+fail:
+       local_irq_restore(psr);
+       return r;
 }
 
 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
                                    struct kvm_mp_state *mp_state)
 {
-       return -EINVAL;
+       int r = 0;
+
+       vcpu_load(vcpu);
+       vcpu->arch.mp_state = mp_state->mp_state;
+       if (vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)
+               r = vcpu_reset(vcpu);
+       vcpu_put(vcpu);
+       return r;
 }