KVM: fix cleanup_srcu_struct on vm destruction
[safe/jmp/linux-2.6] / arch / s390 / kvm / kvm-s390.c
index c18b21d..4929286 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * s390host.c --  hosting zSeries kernel virtual machines
  *
- * Copyright IBM Corp. 2008
+ * Copyright IBM Corp. 2008,2009
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License (version 2 only)
@@ -10,6 +10,7 @@
  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  *               Christian Borntraeger <borntraeger@de.ibm.com>
  *               Heiko Carstens <heiko.carstens@de.ibm.com>
+ *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  */
 
 #include <linux/compiler.h>
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/timer.h>
+#include <asm/asm-offsets.h>
 #include <asm/lowcore.h>
 #include <asm/pgtable.h>
 #include <asm/nmi.h>
+#include <asm/system.h>
 #include "kvm-s390.h"
 #include "gaccess.h"
 
@@ -69,11 +72,13 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
        { NULL }
 };
 
+static unsigned long long *facilities;
 
 /* Section: not file related */
-void kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void *garbage)
 {
        /* every s390 is virtualization enabled ;-) */
+       return 0;
 }
 
 void kvm_arch_hardware_disable(void *garbage)
@@ -113,10 +118,16 @@ long kvm_arch_dev_ioctl(struct file *filp,
 
 int kvm_dev_ioctl_check_extension(long ext)
 {
+       int r;
+
        switch (ext) {
+       case KVM_CAP_S390_PSW:
+               r = 1;
+               break;
        default:
-               return 0;
+               r = 0;
        }
+       return r;
 }
 
 /* Section: vm related */
@@ -147,7 +158,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
                break;
        }
        default:
-               r = -EINVAL;
+               r = -ENOTTY;
        }
 
        return r;
@@ -208,13 +219,17 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 static void kvm_free_vcpus(struct kvm *kvm)
 {
        unsigned int i;
+       struct kvm_vcpu *vcpu;
 
-       for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-               if (kvm->vcpus[i]) {
-                       kvm_arch_vcpu_destroy(kvm->vcpus[i]);
-                       kvm->vcpus[i] = NULL;
-               }
-       }
+       kvm_for_each_vcpu(i, vcpu, kvm)
+               kvm_arch_vcpu_destroy(vcpu);
+
+       mutex_lock(&kvm->lock);
+       for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
+               kvm->vcpus[i] = NULL;
+
+       atomic_set(&kvm->online_vcpus, 0);
+       mutex_unlock(&kvm->lock);
 }
 
 void kvm_arch_sync_events(struct kvm *kvm)
@@ -227,6 +242,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
        kvm_free_physmem(kvm);
        free_page((unsigned long)(kvm->arch.sca));
        debug_unregister(kvm->arch.dbf);
+       cleanup_srcu_struct(&kvm->srcu);
        kfree(kvm);
 }
 
@@ -276,18 +292,13 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
        vcpu->arch.sie_block->gbea = 1;
 }
 
-/* The current code can have up to 256 pages for virtio */
-#define VIRTIODESCSPACE (256ul * 4096ul)
-
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
        atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH);
-       vcpu->arch.sie_block->gmslm = vcpu->kvm->arch.guest_memsize +
-                                     vcpu->kvm->arch.guest_origin +
-                                     VIRTIODESCSPACE - 1ul;
-       vcpu->arch.sie_block->gmsor = vcpu->kvm->arch.guest_origin;
+       set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests);
        vcpu->arch.sie_block->ecb   = 2;
        vcpu->arch.sie_block->eca   = 0xC1002001U;
+       vcpu->arch.sie_block->fac   = (int) (long) facilities;
        hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
        tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet,
                     (unsigned long) vcpu);
@@ -316,8 +327,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
        BUG_ON(!kvm->arch.sca);
        if (!kvm->arch.sca->cpu[id].sda)
                kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
-       else
-               BUG_ON(!kvm->vcpus[id]); /* vcpu does already exist */
        vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32);
        vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
 
@@ -419,8 +428,10 @@ static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
        vcpu_load(vcpu);
        if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING)
                rc = -EBUSY;
-       else
-               vcpu->arch.sie_block->gpsw = psw;
+       else {
+               vcpu->run->psw_mask = psw.mask;
+               vcpu->run->psw_addr = psw.addr;
+       }
        vcpu_put(vcpu);
        return rc;
 }
@@ -487,9 +498,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
        vcpu_load(vcpu);
 
+rerun_vcpu:
+       if (vcpu->requests)
+               if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
+                       kvm_s390_vcpu_set_mem(vcpu);
+
        /* verify, that memory has been registered */
-       if (!vcpu->kvm->arch.guest_memsize) {
+       if (!vcpu->arch.sie_block->gmslm) {
                vcpu_put(vcpu);
+               VCPU_EVENT(vcpu, 3, "%s", "no memory registered to run vcpu");
                return -EINVAL;
        }
 
@@ -502,16 +519,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
        switch (kvm_run->exit_reason) {
        case KVM_EXIT_S390_SIEIC:
-               vcpu->arch.sie_block->gpsw.mask = kvm_run->s390_sieic.mask;
-               vcpu->arch.sie_block->gpsw.addr = kvm_run->s390_sieic.addr;
-               break;
        case KVM_EXIT_UNKNOWN:
+       case KVM_EXIT_INTR:
        case KVM_EXIT_S390_RESET:
                break;
        default:
                BUG();
        }
 
+       vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
+       vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
+
        might_fault();
 
        do {
@@ -519,15 +537,18 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
                rc = kvm_handle_sie_intercept(vcpu);
        } while (!signal_pending(current) && !rc);
 
-       if (signal_pending(current) && !rc)
+       if (rc == SIE_INTERCEPT_RERUNVCPU)
+               goto rerun_vcpu;
+
+       if (signal_pending(current) && !rc) {
+               kvm_run->exit_reason = KVM_EXIT_INTR;
                rc = -EINTR;
+       }
 
-       if (rc == -ENOTSUPP) {
+       if (rc == -EOPNOTSUPP) {
                /* intercept cannot be handled in-kernel, prepare kvm-run */
                kvm_run->exit_reason         = KVM_EXIT_S390_SIEIC;
                kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
-               kvm_run->s390_sieic.mask     = vcpu->arch.sie_block->gpsw.mask;
-               kvm_run->s390_sieic.addr     = vcpu->arch.sie_block->gpsw.addr;
                kvm_run->s390_sieic.ipa      = vcpu->arch.sie_block->ipa;
                kvm_run->s390_sieic.ipb      = vcpu->arch.sie_block->ipb;
                rc = 0;
@@ -539,6 +560,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
                rc = 0;
        }
 
+       kvm_run->psw_mask     = vcpu->arch.sie_block->gpsw.mask;
+       kvm_run->psw_addr     = vcpu->arch.sie_block->gpsw.addr;
+
        if (vcpu->sigset_active)
                sigprocmask(SIG_SETMASK, &sigsaved, NULL);
 
@@ -581,45 +605,45 @@ int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
        } else
                prefix = 0;
 
-       if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, fp_regs),
+       if (__guestcopy(vcpu, addr + offsetof(struct save_area, fp_regs),
                        vcpu->arch.guest_fpregs.fprs, 128, prefix))
                return -EFAULT;
 
-       if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, gp_regs),
+       if (__guestcopy(vcpu, addr + offsetof(struct save_area, gp_regs),
                        vcpu->arch.guest_gprs, 128, prefix))
                return -EFAULT;
 
-       if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, psw),
+       if (__guestcopy(vcpu, addr + offsetof(struct save_area, psw),
                        &vcpu->arch.sie_block->gpsw, 16, prefix))
                return -EFAULT;
 
-       if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, pref_reg),
+       if (__guestcopy(vcpu, addr + offsetof(struct save_area, pref_reg),
                        &vcpu->arch.sie_block->prefix, 4, prefix))
                return -EFAULT;
 
        if (__guestcopy(vcpu,
-                       addr + offsetof(struct save_area_s390x, fp_ctrl_reg),
+                       addr + offsetof(struct save_area, fp_ctrl_reg),
                        &vcpu->arch.guest_fpregs.fpc, 4, prefix))
                return -EFAULT;
 
-       if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, tod_reg),
+       if (__guestcopy(vcpu, addr + offsetof(struct save_area, tod_reg),
                        &vcpu->arch.sie_block->todpr, 4, prefix))
                return -EFAULT;
 
-       if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, timer),
+       if (__guestcopy(vcpu, addr + offsetof(struct save_area, timer),
                        &vcpu->arch.sie_block->cputm, 8, prefix))
                return -EFAULT;
 
-       if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, clk_cmp),
+       if (__guestcopy(vcpu, addr + offsetof(struct save_area, clk_cmp),
                        &vcpu->arch.sie_block->ckc, 8, prefix))
                return -EFAULT;
 
-       if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, acc_regs),
+       if (__guestcopy(vcpu, addr + offsetof(struct save_area, acc_regs),
                        &vcpu->arch.guest_acrs, 64, prefix))
                return -EFAULT;
 
        if (__guestcopy(vcpu,
-                       addr + offsetof(struct save_area_s390x, ctrl_regs),
+                       addr + offsetof(struct save_area, ctrl_regs),
                        &vcpu->arch.sie_block->gcr, 128, prefix))
                return -EFAULT;
        return 0;
@@ -667,13 +691,12 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 }
 
 /* Section: memory related */
-int kvm_arch_set_memory_region(struct kvm *kvm,
-                               struct kvm_userspace_memory_region *mem,
-                               struct kvm_memory_slot old,
-                               int user_alloc)
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+                                  struct kvm_memory_slot *memslot,
+                                  struct kvm_memory_slot old,
+                                  struct kvm_userspace_memory_region *mem,
+                                  int user_alloc)
 {
-       int i;
-
        /* A few sanity checks. We can have exactly one memory slot which has
           to start at guest virtual zero and which has to be located at a
           page boundary in userland and which has to end at a page boundary.
@@ -681,7 +704,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
           vmas. It is okay to mmap() and munmap() stuff in this slot after
           doing this call at any time */
 
-       if (mem->slot || kvm->arch.guest_memsize)
+       if (mem->slot)
                return -EINVAL;
 
        if (mem->guest_phys_addr)
@@ -696,36 +719,23 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
        if (!user_alloc)
                return -EINVAL;
 
-       /* lock all vcpus */
-       for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-               if (!kvm->vcpus[i])
-                       continue;
-               if (!mutex_trylock(&kvm->vcpus[i]->mutex))
-                       goto fail_out;
-       }
-
-       kvm->arch.guest_origin = mem->userspace_addr;
-       kvm->arch.guest_memsize = mem->memory_size;
-
-       /* update sie control blocks, and unlock all vcpus */
-       for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-               if (kvm->vcpus[i]) {
-                       kvm->vcpus[i]->arch.sie_block->gmsor =
-                               kvm->arch.guest_origin;
-                       kvm->vcpus[i]->arch.sie_block->gmslm =
-                               kvm->arch.guest_memsize +
-                               kvm->arch.guest_origin +
-                               VIRTIODESCSPACE - 1ul;
-                       mutex_unlock(&kvm->vcpus[i]->mutex);
-               }
-       }
-
        return 0;
+}
 
-fail_out:
-       for (; i >= 0; i--)
-               mutex_unlock(&kvm->vcpus[i]->mutex);
-       return -EINVAL;
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+                               struct kvm_userspace_memory_region *mem,
+                               struct kvm_memory_slot old,
+                               int user_alloc)
+{
+       int i;
+       struct kvm_vcpu *vcpu;
+
+       /* request update of sie control block for all available vcpus */
+       kvm_for_each_vcpu(i, vcpu, kvm) {
+               if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
+                       continue;
+               kvm_s390_inject_sigp_stop(vcpu, ACTION_RELOADVCPU_ON_STOP);
+       }
 }
 
 void kvm_arch_flush_shadow(struct kvm *kvm)
@@ -739,11 +749,29 @@ gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
 
 static int __init kvm_s390_init(void)
 {
-       return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE);
+       int ret;
+       ret = kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE);
+       if (ret)
+               return ret;
+
+       /*
+        * guests can ask for up to 255+1 double words, we need a full page
+        * to hold the maximum amount of facilites. On the other hand, we
+        * only set facilities that are known to work in KVM.
+        */
+       facilities = (unsigned long long *) get_zeroed_page(GFP_DMA);
+       if (!facilities) {
+               kvm_exit();
+               return -ENOMEM;
+       }
+       stfle(facilities, 1);
+       facilities[0] &= 0xff00fff3f0700000ULL;
+       return 0;
 }
 
 static void __exit kvm_s390_exit(void)
 {
+       free_page((unsigned long) facilities);
        kvm_exit();
 }