on_each_cpu(): kill unused 'retry' parameter
[safe/jmp/linux-2.6] / virt / kvm / kvm_main.c
index 8d0b7c1..d4eae6a 100644 (file)
@@ -40,6 +40,7 @@
 #include <linux/kvm_para.h>
 #include <linux/pagemap.h>
 #include <linux/mman.h>
+#include <linux/swap.h>
 
 #include <asm/processor.h>
 #include <asm/io.h>
@@ -59,7 +60,7 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_cache);
 
 static __read_mostly struct preempt_ops kvm_preempt_ops;
 
-static struct dentry *debugfs_dir;
+struct dentry *kvm_debugfs_dir;
 
 static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
                           unsigned long arg);
@@ -119,6 +120,29 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
        smp_call_function_mask(cpus, ack_flush, NULL, 1);
 }
 
+void kvm_reload_remote_mmus(struct kvm *kvm)
+{
+       int i, cpu;
+       cpumask_t cpus;
+       struct kvm_vcpu *vcpu;
+
+       cpus_clear(cpus);
+       for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+               vcpu = kvm->vcpus[i];
+               if (!vcpu)
+                       continue;
+               if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
+                       continue;
+               cpu = vcpu->cpu;
+               if (cpu != -1 && cpu != raw_smp_processor_id())
+                       cpu_set(cpu, cpus);
+       }
+       if (cpus_empty(cpus))
+               return;
+       smp_call_function_mask(cpus, ack_flush, NULL, 1);
+}
+
+
 int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
 {
        struct page *page;
@@ -165,9 +189,12 @@ static struct kvm *kvm_create_vm(void)
 
        kvm->mm = current->mm;
        atomic_inc(&kvm->mm->mm_count);
+       spin_lock_init(&kvm->mmu_lock);
        kvm_io_bus_init(&kvm->pio_bus);
        mutex_init(&kvm->lock);
        kvm_io_bus_init(&kvm->mmio_bus);
+       init_rwsem(&kvm->slots_lock);
+       atomic_set(&kvm->users_count, 1);
        spin_lock(&kvm_lock);
        list_add(&kvm->vm_list, &vm_list);
        spin_unlock(&kvm_lock);
@@ -187,9 +214,13 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
        if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
                vfree(free->dirty_bitmap);
 
+       if (!dont || free->lpage_info != dont->lpage_info)
+               vfree(free->lpage_info);
+
        free->npages = 0;
        free->dirty_bitmap = NULL;
        free->rmap = NULL;
+       free->lpage_info = NULL;
 }
 
 void kvm_free_physmem(struct kvm *kvm)
@@ -213,11 +244,25 @@ static void kvm_destroy_vm(struct kvm *kvm)
        mmdrop(mm);
 }
 
+void kvm_get_kvm(struct kvm *kvm)
+{
+       atomic_inc(&kvm->users_count);
+}
+EXPORT_SYMBOL_GPL(kvm_get_kvm);
+
+void kvm_put_kvm(struct kvm *kvm)
+{
+       if (atomic_dec_and_test(&kvm->users_count))
+               kvm_destroy_vm(kvm);
+}
+EXPORT_SYMBOL_GPL(kvm_put_kvm);
+
+
 static int kvm_vm_release(struct inode *inode, struct file *filp)
 {
        struct kvm *kvm = filp->private_data;
 
-       kvm_destroy_vm(kvm);
+       kvm_put_kvm(kvm);
        return 0;
 }
 
@@ -299,6 +344,25 @@ int __kvm_set_memory_region(struct kvm *kvm,
                new.user_alloc = user_alloc;
                new.userspace_addr = mem->userspace_addr;
        }
+       if (npages && !new.lpage_info) {
+               int largepages = npages / KVM_PAGES_PER_HPAGE;
+               if (npages % KVM_PAGES_PER_HPAGE)
+                       largepages++;
+               if (base_gfn % KVM_PAGES_PER_HPAGE)
+                       largepages++;
+
+               new.lpage_info = vmalloc(largepages * sizeof(*new.lpage_info));
+
+               if (!new.lpage_info)
+                       goto out_free;
+
+               memset(new.lpage_info, 0, largepages * sizeof(*new.lpage_info));
+
+               if (base_gfn % KVM_PAGES_PER_HPAGE)
+                       new.lpage_info[0].write_count = 1;
+               if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE)
+                       new.lpage_info[largepages-1].write_count = 1;
+       }
 
        /* Allocate page dirty bitmap if needed */
        if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) {
@@ -338,9 +402,9 @@ int kvm_set_memory_region(struct kvm *kvm,
 {
        int r;
 
-       down_write(&current->mm->mmap_sem);
+       down_write(&kvm->slots_lock);
        r = __kvm_set_memory_region(kvm, mem, user_alloc);
-       up_write(&current->mm->mmap_sem);
+       up_write(&kvm->slots_lock);
        return r;
 }
 EXPORT_SYMBOL_GPL(kvm_set_memory_region);
@@ -395,6 +459,12 @@ int is_error_page(struct page *page)
 }
 EXPORT_SYMBOL_GPL(is_error_page);
 
+int is_error_pfn(pfn_t pfn)
+{
+       return pfn == bad_pfn;
+}
+EXPORT_SYMBOL_GPL(is_error_pfn);
+
 static inline unsigned long bad_hva(void)
 {
        return PAGE_OFFSET;
@@ -442,7 +512,7 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
 }
 EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);
 
-static unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
+unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
 {
        struct kvm_memory_slot *slot;
 
@@ -452,11 +522,12 @@ static unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
                return bad_hva();
        return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
 }
+EXPORT_SYMBOL_GPL(gfn_to_hva);
 
 /*
  * Requires current->mm->mmap_sem to be held
  */
-struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
+pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
 {
        struct page *page[1];
        unsigned long addr;
@@ -467,7 +538,7 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
        addr = gfn_to_hva(kvm, gfn);
        if (kvm_is_error_hva(addr)) {
                get_page(bad_page);
-               return bad_page;
+               return page_to_pfn(bad_page);
        }
 
        npages = get_user_pages(current, current->mm, addr, 1, 1, 1, page,
@@ -475,27 +546,71 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
 
        if (npages != 1) {
                get_page(bad_page);
-               return bad_page;
+               return page_to_pfn(bad_page);
        }
 
-       return page[0];
+       return page_to_pfn(page[0]);
+}
+
+EXPORT_SYMBOL_GPL(gfn_to_pfn);
+
+struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
+{
+       return pfn_to_page(gfn_to_pfn(kvm, gfn));
 }
 
 EXPORT_SYMBOL_GPL(gfn_to_page);
 
 void kvm_release_page_clean(struct page *page)
 {
-       put_page(page);
+       kvm_release_pfn_clean(page_to_pfn(page));
 }
 EXPORT_SYMBOL_GPL(kvm_release_page_clean);
 
+void kvm_release_pfn_clean(pfn_t pfn)
+{
+       put_page(pfn_to_page(pfn));
+}
+EXPORT_SYMBOL_GPL(kvm_release_pfn_clean);
+
 void kvm_release_page_dirty(struct page *page)
 {
+       kvm_release_pfn_dirty(page_to_pfn(page));
+}
+EXPORT_SYMBOL_GPL(kvm_release_page_dirty);
+
+void kvm_release_pfn_dirty(pfn_t pfn)
+{
+       kvm_set_pfn_dirty(pfn);
+       kvm_release_pfn_clean(pfn);
+}
+EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty);
+
+void kvm_set_page_dirty(struct page *page)
+{
+       kvm_set_pfn_dirty(page_to_pfn(page));
+}
+EXPORT_SYMBOL_GPL(kvm_set_page_dirty);
+
+void kvm_set_pfn_dirty(pfn_t pfn)
+{
+       struct page *page = pfn_to_page(pfn);
        if (!PageReserved(page))
                SetPageDirty(page);
-       put_page(page);
 }
-EXPORT_SYMBOL_GPL(kvm_release_page_dirty);
+EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty);
+
+void kvm_set_pfn_accessed(pfn_t pfn)
+{
+       mark_page_accessed(pfn_to_page(pfn));
+}
+EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed);
+
+void kvm_get_pfn(pfn_t pfn)
+{
+       get_page(pfn_to_page(pfn));
+}
+EXPORT_SYMBOL_GPL(kvm_get_pfn);
 
 static int next_segment(unsigned long len, int offset)
 {
@@ -643,24 +758,26 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
  */
 void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 {
-       DECLARE_WAITQUEUE(wait, current);
+       DEFINE_WAIT(wait);
+
+       for (;;) {
+               prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
 
-       add_wait_queue(&vcpu->wq, &wait);
+               if (kvm_cpu_has_interrupt(vcpu))
+                       break;
+               if (kvm_cpu_has_pending_timer(vcpu))
+                       break;
+               if (kvm_arch_vcpu_runnable(vcpu))
+                       break;
+               if (signal_pending(current))
+                       break;
 
-       /*
-        * We will block until either an interrupt or a signal wakes us up
-        */
-       while (!kvm_cpu_has_interrupt(vcpu)
-              && !signal_pending(current)
-              && !kvm_arch_vcpu_runnable(vcpu)) {
-               set_current_state(TASK_INTERRUPTIBLE);
                vcpu_put(vcpu);
                schedule();
                vcpu_load(vcpu);
        }
 
-       __set_current_state(TASK_RUNNING);
-       remove_wait_queue(&vcpu->wq, &wait);
+       finish_wait(&vcpu->wq, &wait);
 }
 
 void kvm_resched(struct kvm_vcpu *vcpu)
@@ -678,8 +795,10 @@ static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
        if (vmf->pgoff == 0)
                page = virt_to_page(vcpu->run);
+#ifdef CONFIG_X86
        else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET)
                page = virt_to_page(vcpu->arch.pio_data);
+#endif
        else
                return VM_FAULT_SIGBUS;
        get_page(page);
@@ -701,11 +820,11 @@ static int kvm_vcpu_release(struct inode *inode, struct file *filp)
 {
        struct kvm_vcpu *vcpu = filp->private_data;
 
-       fput(vcpu->kvm->filp);
+       kvm_put_kvm(vcpu->kvm);
        return 0;
 }
 
-static struct file_operations kvm_vcpu_fops = {
+static const struct file_operations kvm_vcpu_fops = {
        .release        = kvm_vcpu_release,
        .unlocked_ioctl = kvm_vcpu_ioctl,
        .compat_ioctl   = kvm_vcpu_ioctl,
@@ -717,15 +836,9 @@ static struct file_operations kvm_vcpu_fops = {
  */
 static int create_vcpu_fd(struct kvm_vcpu *vcpu)
 {
-       int fd, r;
-       struct inode *inode;
-       struct file *file;
-
-       r = anon_inode_getfd(&fd, &inode, &file,
-                            "kvm-vcpu", &kvm_vcpu_fops, vcpu);
-       if (r)
-               return r;
-       atomic_inc(&vcpu->kvm->filp->f_count);
+       int fd = anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu);
+       if (fd < 0)
+               kvm_put_kvm(vcpu->kvm);
        return fd;
 }
 
@@ -760,6 +873,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
        mutex_unlock(&kvm->lock);
 
        /* Now it's all set up, let userspace reach it */
+       kvm_get_kvm(kvm);
        r = create_vcpu_fd(vcpu);
        if (r < 0)
                goto unlink;
@@ -802,28 +916,39 @@ static long kvm_vcpu_ioctl(struct file *filp,
                r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
                break;
        case KVM_GET_REGS: {
-               struct kvm_regs kvm_regs;
+               struct kvm_regs *kvm_regs;
 
-               memset(&kvm_regs, 0, sizeof kvm_regs);
-               r = kvm_arch_vcpu_ioctl_get_regs(vcpu, &kvm_regs);
-               if (r)
+               r = -ENOMEM;
+               kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL);
+               if (!kvm_regs)
                        goto out;
+               r = kvm_arch_vcpu_ioctl_get_regs(vcpu, kvm_regs);
+               if (r)
+                       goto out_free1;
                r = -EFAULT;
-               if (copy_to_user(argp, &kvm_regs, sizeof kvm_regs))
-                       goto out;
+               if (copy_to_user(argp, kvm_regs, sizeof(struct kvm_regs)))
+                       goto out_free1;
                r = 0;
+out_free1:
+               kfree(kvm_regs);
                break;
        }
        case KVM_SET_REGS: {
-               struct kvm_regs kvm_regs;
+               struct kvm_regs *kvm_regs;
 
-               r = -EFAULT;
-               if (copy_from_user(&kvm_regs, argp, sizeof kvm_regs))
+               r = -ENOMEM;
+               kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL);
+               if (!kvm_regs)
                        goto out;
-               r = kvm_arch_vcpu_ioctl_set_regs(vcpu, &kvm_regs);
+               r = -EFAULT;
+               if (copy_from_user(kvm_regs, argp, sizeof(struct kvm_regs)))
+                       goto out_free2;
+               r = kvm_arch_vcpu_ioctl_set_regs(vcpu, kvm_regs);
                if (r)
-                       goto out;
+                       goto out_free2;
                r = 0;
+out_free2:
+               kfree(kvm_regs);
                break;
        }
        case KVM_GET_SREGS: {
@@ -851,6 +976,30 @@ static long kvm_vcpu_ioctl(struct file *filp,
                r = 0;
                break;
        }
+       case KVM_GET_MP_STATE: {
+               struct kvm_mp_state mp_state;
+
+               r = kvm_arch_vcpu_ioctl_get_mpstate(vcpu, &mp_state);
+               if (r)
+                       goto out;
+               r = -EFAULT;
+               if (copy_to_user(argp, &mp_state, sizeof mp_state))
+                       goto out;
+               r = 0;
+               break;
+       }
+       case KVM_SET_MP_STATE: {
+               struct kvm_mp_state mp_state;
+
+               r = -EFAULT;
+               if (copy_from_user(&mp_state, argp, sizeof mp_state))
+                       goto out;
+               r = kvm_arch_vcpu_ioctl_set_mpstate(vcpu, &mp_state);
+               if (r)
+                       goto out;
+               r = 0;
+               break;
+       }
        case KVM_TRANSLATE: {
                struct kvm_translation tr;
 
@@ -1005,7 +1154,7 @@ static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma)
        return 0;
 }
 
-static struct file_operations kvm_vm_fops = {
+static const struct file_operations kvm_vm_fops = {
        .release        = kvm_vm_release,
        .unlocked_ioctl = kvm_vm_ioctl,
        .compat_ioctl   = kvm_vm_ioctl,
@@ -1014,21 +1163,15 @@ static struct file_operations kvm_vm_fops = {
 
 static int kvm_dev_ioctl_create_vm(void)
 {
-       int fd, r;
-       struct inode *inode;
-       struct file *file;
+       int fd;
        struct kvm *kvm;
 
        kvm = kvm_create_vm();
        if (IS_ERR(kvm))
                return PTR_ERR(kvm);
-       r = anon_inode_getfd(&fd, &inode, &file, "kvm-vm", &kvm_vm_fops, kvm);
-       if (r) {
-               kvm_destroy_vm(kvm);
-               return r;
-       }
-
-       kvm->filp = file;
+       fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm);
+       if (fd < 0)
+               kvm_put_kvm(kvm);
 
        return fd;
 }
@@ -1059,7 +1202,15 @@ static long kvm_dev_ioctl(struct file *filp,
                r = -EINVAL;
                if (arg)
                        goto out;
-               r = 2 * PAGE_SIZE;
+               r = PAGE_SIZE;     /* struct kvm_run */
+#ifdef CONFIG_X86
+               r += PAGE_SIZE;    /* pio data page */
+#endif
+               break;
+       case KVM_TRACE_ENABLE:
+       case KVM_TRACE_PAUSE:
+       case KVM_TRACE_DISABLE:
+               r = kvm_trace_ioctl(ioctl, arg);
                break;
        default:
                return kvm_arch_dev_ioctl(filp, ioctl, arg);
@@ -1115,12 +1266,12 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
        case CPU_UP_CANCELED:
                printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
                       cpu);
-               smp_call_function_single(cpu, hardware_disable, NULL, 0, 1);
+               smp_call_function_single(cpu, hardware_disable, NULL, 1);
                break;
        case CPU_ONLINE:
                printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
                       cpu);
-               smp_call_function_single(cpu, hardware_enable, NULL, 0, 1);
+               smp_call_function_single(cpu, hardware_enable, NULL, 1);
                break;
        }
        return NOTIFY_OK;
@@ -1135,7 +1286,7 @@ static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
                 * in vmx root mode.
                 */
                printk(KERN_INFO "kvm: exiting hardware virtualization\n");
-               on_each_cpu(hardware_disable, NULL, 0, 1);
+               on_each_cpu(hardware_disable, NULL, 1);
        }
        return NOTIFY_OK;
 }
@@ -1187,38 +1338,38 @@ static struct notifier_block kvm_cpu_notifier = {
        .priority = 20, /* must be > scheduler priority */
 };
 
-static u64 vm_stat_get(void *_offset)
+static int vm_stat_get(void *_offset, u64 *val)
 {
        unsigned offset = (long)_offset;
-       u64 total = 0;
        struct kvm *kvm;
 
+       *val = 0;
        spin_lock(&kvm_lock);
        list_for_each_entry(kvm, &vm_list, vm_list)
-               total += *(u32 *)((void *)kvm + offset);
+               *val += *(u32 *)((void *)kvm + offset);
        spin_unlock(&kvm_lock);
-       return total;
+       return 0;
 }
 
 DEFINE_SIMPLE_ATTRIBUTE(vm_stat_fops, vm_stat_get, NULL, "%llu\n");
 
-static u64 vcpu_stat_get(void *_offset)
+static int vcpu_stat_get(void *_offset, u64 *val)
 {
        unsigned offset = (long)_offset;
-       u64 total = 0;
        struct kvm *kvm;
        struct kvm_vcpu *vcpu;
        int i;
 
+       *val = 0;
        spin_lock(&kvm_lock);
        list_for_each_entry(kvm, &vm_list, vm_list)
                for (i = 0; i < KVM_MAX_VCPUS; ++i) {
                        vcpu = kvm->vcpus[i];
                        if (vcpu)
-                               total += *(u32 *)((void *)vcpu + offset);
+                               *val += *(u32 *)((void *)vcpu + offset);
                }
        spin_unlock(&kvm_lock);
-       return total;
+       return 0;
 }
 
 DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_fops, vcpu_stat_get, NULL, "%llu\n");
@@ -1232,9 +1383,9 @@ static void kvm_init_debug(void)
 {
        struct kvm_stats_debugfs_item *p;
 
-       debugfs_dir = debugfs_create_dir("kvm", NULL);
+       kvm_debugfs_dir = debugfs_create_dir("kvm", NULL);
        for (p = debugfs_entries; p->name; ++p)
-               p->dentry = debugfs_create_file(p->name, 0444, debugfs_dir,
+               p->dentry = debugfs_create_file(p->name, 0444, kvm_debugfs_dir,
                                                (void *)(long)p->offset,
                                                stat_fops[p->kind]);
 }
@@ -1245,7 +1396,7 @@ static void kvm_exit_debug(void)
 
        for (p = debugfs_entries; p->name; ++p)
                debugfs_remove(p->dentry);
-       debugfs_remove(debugfs_dir);
+       debugfs_remove(kvm_debugfs_dir);
 }
 
 static int kvm_suspend(struct sys_device *dev, pm_message_t state)
@@ -1272,6 +1423,7 @@ static struct sys_device kvm_sysdev = {
 };
 
 struct page *bad_page;
+pfn_t bad_pfn;
 
 static inline
 struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn)
@@ -1313,6 +1465,8 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
                goto out;
        }
 
+       bad_pfn = page_to_pfn(bad_page);
+
        r = kvm_arch_hardware_setup();
        if (r < 0)
                goto out_free_0;
@@ -1320,12 +1474,12 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
        for_each_online_cpu(cpu) {
                smp_call_function_single(cpu,
                                kvm_arch_check_processor_compat,
-                               &r, 0, 1);
+                               &r, 1);
                if (r < 0)
                        goto out_free_1;
        }
 
-       on_each_cpu(hardware_enable, NULL, 0, 1);
+       on_each_cpu(hardware_enable, NULL, 1);
        r = register_cpu_notifier(&kvm_cpu_notifier);
        if (r)
                goto out_free_2;
@@ -1371,7 +1525,7 @@ out_free_3:
        unregister_reboot_notifier(&kvm_reboot_notifier);
        unregister_cpu_notifier(&kvm_cpu_notifier);
 out_free_2:
-       on_each_cpu(hardware_disable, NULL, 0, 1);
+       on_each_cpu(hardware_disable, NULL, 1);
 out_free_1:
        kvm_arch_hardware_unsetup();
 out_free_0:
@@ -1386,13 +1540,14 @@ EXPORT_SYMBOL_GPL(kvm_init);
 
 void kvm_exit(void)
 {
+       kvm_trace_cleanup();
        misc_deregister(&kvm_dev);
        kmem_cache_destroy(kvm_vcpu_cache);
        sysdev_unregister(&kvm_sysdev);
        sysdev_class_unregister(&kvm_sysdev_class);
        unregister_reboot_notifier(&kvm_reboot_notifier);
        unregister_cpu_notifier(&kvm_cpu_notifier);
-       on_each_cpu(hardware_disable, NULL, 0, 1);
+       on_each_cpu(hardware_disable, NULL, 1);
        kvm_arch_hardware_unsetup();
        kvm_arch_exit();
        kvm_exit_debug();