KVM: Export necessary function for EPT
[safe/jmp/linux-2.6] / virt / kvm / kvm_main.c
index 32fbf80..f7ba099 100644 (file)
@@ -40,6 +40,7 @@
 #include <linux/kvm_para.h>
 #include <linux/pagemap.h>
 #include <linux/mman.h>
+#include <linux/swap.h>
 
 #include <asm/processor.h>
 #include <asm/io.h>
@@ -59,7 +60,7 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_cache);
 
 static __read_mostly struct preempt_ops kvm_preempt_ops;
 
-static struct dentry *debugfs_dir;
+struct dentry *kvm_debugfs_dir;
 
 static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
                           unsigned long arg);
@@ -119,6 +120,29 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
        smp_call_function_mask(cpus, ack_flush, NULL, 1);
 }
 
+void kvm_reload_remote_mmus(struct kvm *kvm)
+{
+       int i, cpu;
+       cpumask_t cpus;
+       struct kvm_vcpu *vcpu;
+
+       cpus_clear(cpus);
+       for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+               vcpu = kvm->vcpus[i];
+               if (!vcpu)
+                       continue;
+               if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
+                       continue;
+               cpu = vcpu->cpu;
+               if (cpu != -1 && cpu != raw_smp_processor_id())
+                       cpu_set(cpu, cpus);
+       }
+       if (cpus_empty(cpus))
+               return;
+       smp_call_function_mask(cpus, ack_flush, NULL, 1);
+}
+
+
 int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
 {
        struct page *page;
@@ -169,6 +193,8 @@ static struct kvm *kvm_create_vm(void)
        kvm_io_bus_init(&kvm->pio_bus);
        mutex_init(&kvm->lock);
        kvm_io_bus_init(&kvm->mmio_bus);
+       init_rwsem(&kvm->slots_lock);
+       atomic_set(&kvm->users_count, 1);
        spin_lock(&kvm_lock);
        list_add(&kvm->vm_list, &vm_list);
        spin_unlock(&kvm_lock);
@@ -188,9 +214,13 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
        if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
                vfree(free->dirty_bitmap);
 
+       if (!dont || free->lpage_info != dont->lpage_info)
+               vfree(free->lpage_info);
+
        free->npages = 0;
        free->dirty_bitmap = NULL;
        free->rmap = NULL;
+       free->lpage_info = NULL;
 }
 
 void kvm_free_physmem(struct kvm *kvm)
@@ -214,11 +244,25 @@ static void kvm_destroy_vm(struct kvm *kvm)
        mmdrop(mm);
 }
 
+void kvm_get_kvm(struct kvm *kvm)
+{
+       atomic_inc(&kvm->users_count);
+}
+EXPORT_SYMBOL_GPL(kvm_get_kvm);
+
+void kvm_put_kvm(struct kvm *kvm)
+{
+       if (atomic_dec_and_test(&kvm->users_count))
+               kvm_destroy_vm(kvm);
+}
+EXPORT_SYMBOL_GPL(kvm_put_kvm);
+
+
 static int kvm_vm_release(struct inode *inode, struct file *filp)
 {
        struct kvm *kvm = filp->private_data;
 
-       kvm_destroy_vm(kvm);
+       kvm_put_kvm(kvm);
        return 0;
 }
 
@@ -300,6 +344,25 @@ int __kvm_set_memory_region(struct kvm *kvm,
                new.user_alloc = user_alloc;
                new.userspace_addr = mem->userspace_addr;
        }
+       if (npages && !new.lpage_info) {
+               int largepages = npages / KVM_PAGES_PER_HPAGE;
+               if (npages % KVM_PAGES_PER_HPAGE)
+                       largepages++;
+               if (base_gfn % KVM_PAGES_PER_HPAGE)
+                       largepages++;
+
+               new.lpage_info = vmalloc(largepages * sizeof(*new.lpage_info));
+
+               if (!new.lpage_info)
+                       goto out_free;
+
+               memset(new.lpage_info, 0, largepages * sizeof(*new.lpage_info));
+
+               if (base_gfn % KVM_PAGES_PER_HPAGE)
+                       new.lpage_info[0].write_count = 1;
+               if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE)
+                       new.lpage_info[largepages-1].write_count = 1;
+       }
 
        /* Allocate page dirty bitmap if needed */
        if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) {
@@ -339,9 +402,9 @@ int kvm_set_memory_region(struct kvm *kvm,
 {
        int r;
 
-       down_write(&current->mm->mmap_sem);
+       down_write(&kvm->slots_lock);
        r = __kvm_set_memory_region(kvm, mem, user_alloc);
-       up_write(&current->mm->mmap_sem);
+       up_write(&kvm->slots_lock);
        return r;
 }
 EXPORT_SYMBOL_GPL(kvm_set_memory_region);
@@ -396,6 +459,12 @@ int is_error_page(struct page *page)
 }
 EXPORT_SYMBOL_GPL(is_error_page);
 
+int is_error_pfn(pfn_t pfn)
+{
+       return pfn == bad_pfn;
+}
+EXPORT_SYMBOL_GPL(is_error_pfn);
+
 static inline unsigned long bad_hva(void)
 {
        return PAGE_OFFSET;
@@ -443,7 +512,7 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
 }
 EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);
 
-static unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
+unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
 {
        struct kvm_memory_slot *slot;
 
@@ -453,11 +522,12 @@ static unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
                return bad_hva();
        return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
 }
+EXPORT_SYMBOL_GPL(gfn_to_hva);
 
 /*
  * Requires current->mm->mmap_sem to be held
  */
-struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
+pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
 {
        struct page *page[1];
        unsigned long addr;
@@ -468,7 +538,7 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
        addr = gfn_to_hva(kvm, gfn);
        if (kvm_is_error_hva(addr)) {
                get_page(bad_page);
-               return bad_page;
+               return page_to_pfn(bad_page);
        }
 
        npages = get_user_pages(current, current->mm, addr, 1, 1, 1, page,
@@ -476,27 +546,71 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
 
        if (npages != 1) {
                get_page(bad_page);
-               return bad_page;
+               return page_to_pfn(bad_page);
        }
 
-       return page[0];
+       return page_to_pfn(page[0]);
+}
+
+EXPORT_SYMBOL_GPL(gfn_to_pfn);
+
+struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
+{
+       return pfn_to_page(gfn_to_pfn(kvm, gfn));
 }
 
 EXPORT_SYMBOL_GPL(gfn_to_page);
 
 void kvm_release_page_clean(struct page *page)
 {
-       put_page(page);
+       kvm_release_pfn_clean(page_to_pfn(page));
 }
 EXPORT_SYMBOL_GPL(kvm_release_page_clean);
 
+void kvm_release_pfn_clean(pfn_t pfn)
+{
+       put_page(pfn_to_page(pfn));
+}
+EXPORT_SYMBOL_GPL(kvm_release_pfn_clean);
+
 void kvm_release_page_dirty(struct page *page)
 {
+       kvm_release_pfn_dirty(page_to_pfn(page));
+}
+EXPORT_SYMBOL_GPL(kvm_release_page_dirty);
+
+void kvm_release_pfn_dirty(pfn_t pfn)
+{
+       kvm_set_pfn_dirty(pfn);
+       kvm_release_pfn_clean(pfn);
+}
+EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty);
+
+void kvm_set_page_dirty(struct page *page)
+{
+       kvm_set_pfn_dirty(page_to_pfn(page));
+}
+EXPORT_SYMBOL_GPL(kvm_set_page_dirty);
+
+void kvm_set_pfn_dirty(pfn_t pfn)
+{
+       struct page *page = pfn_to_page(pfn);
        if (!PageReserved(page))
                SetPageDirty(page);
-       put_page(page);
 }
-EXPORT_SYMBOL_GPL(kvm_release_page_dirty);
+EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty);
+
+void kvm_set_pfn_accessed(pfn_t pfn)
+{
+       mark_page_accessed(pfn_to_page(pfn));
+}
+EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed);
+
+void kvm_get_pfn(pfn_t pfn)
+{
+       get_page(pfn_to_page(pfn));
+}
+EXPORT_SYMBOL_GPL(kvm_get_pfn);
 
 static int next_segment(unsigned long len, int offset)
 {
@@ -553,7 +667,9 @@ int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
        addr = gfn_to_hva(kvm, gfn);
        if (kvm_is_error_hva(addr))
                return -EFAULT;
+       pagefault_disable();
        r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len);
+       pagefault_enable();
        if (r)
                return -EFAULT;
        return 0;
@@ -650,6 +766,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
         * We will block until either an interrupt or a signal wakes us up
         */
        while (!kvm_cpu_has_interrupt(vcpu)
+              && !kvm_cpu_has_pending_timer(vcpu)
               && !signal_pending(current)
               && !kvm_arch_vcpu_runnable(vcpu)) {
                set_current_state(TASK_INTERRUPTIBLE);
@@ -677,8 +794,10 @@ static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
        if (vmf->pgoff == 0)
                page = virt_to_page(vcpu->run);
+#ifdef CONFIG_X86
        else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET)
                page = virt_to_page(vcpu->arch.pio_data);
+#endif
        else
                return VM_FAULT_SIGBUS;
        get_page(page);
@@ -700,11 +819,11 @@ static int kvm_vcpu_release(struct inode *inode, struct file *filp)
 {
        struct kvm_vcpu *vcpu = filp->private_data;
 
-       fput(vcpu->kvm->filp);
+       kvm_put_kvm(vcpu->kvm);
        return 0;
 }
 
-static struct file_operations kvm_vcpu_fops = {
+static const struct file_operations kvm_vcpu_fops = {
        .release        = kvm_vcpu_release,
        .unlocked_ioctl = kvm_vcpu_ioctl,
        .compat_ioctl   = kvm_vcpu_ioctl,
@@ -716,15 +835,9 @@ static struct file_operations kvm_vcpu_fops = {
  */
 static int create_vcpu_fd(struct kvm_vcpu *vcpu)
 {
-       int fd, r;
-       struct inode *inode;
-       struct file *file;
-
-       r = anon_inode_getfd(&fd, &inode, &file,
-                            "kvm-vcpu", &kvm_vcpu_fops, vcpu);
-       if (r)
-               return r;
-       atomic_inc(&vcpu->kvm->filp->f_count);
+       int fd = anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu);
+       if (fd < 0)
+               kvm_put_kvm(vcpu->kvm);
        return fd;
 }
 
@@ -759,6 +872,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
        mutex_unlock(&kvm->lock);
 
        /* Now it's all set up, let userspace reach it */
+       kvm_get_kvm(kvm);
        r = create_vcpu_fd(vcpu);
        if (r < 0)
                goto unlink;
@@ -801,28 +915,39 @@ static long kvm_vcpu_ioctl(struct file *filp,
                r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
                break;
        case KVM_GET_REGS: {
-               struct kvm_regs kvm_regs;
+               struct kvm_regs *kvm_regs;
 
-               memset(&kvm_regs, 0, sizeof kvm_regs);
-               r = kvm_arch_vcpu_ioctl_get_regs(vcpu, &kvm_regs);
-               if (r)
+               r = -ENOMEM;
+               kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL);
+               if (!kvm_regs)
                        goto out;
+               r = kvm_arch_vcpu_ioctl_get_regs(vcpu, kvm_regs);
+               if (r)
+                       goto out_free1;
                r = -EFAULT;
-               if (copy_to_user(argp, &kvm_regs, sizeof kvm_regs))
-                       goto out;
+               if (copy_to_user(argp, kvm_regs, sizeof(struct kvm_regs)))
+                       goto out_free1;
                r = 0;
+out_free1:
+               kfree(kvm_regs);
                break;
        }
        case KVM_SET_REGS: {
-               struct kvm_regs kvm_regs;
+               struct kvm_regs *kvm_regs;
 
-               r = -EFAULT;
-               if (copy_from_user(&kvm_regs, argp, sizeof kvm_regs))
+               r = -ENOMEM;
+               kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL);
+               if (!kvm_regs)
                        goto out;
-               r = kvm_arch_vcpu_ioctl_set_regs(vcpu, &kvm_regs);
+               r = -EFAULT;
+               if (copy_from_user(kvm_regs, argp, sizeof(struct kvm_regs)))
+                       goto out_free2;
+               r = kvm_arch_vcpu_ioctl_set_regs(vcpu, kvm_regs);
                if (r)
-                       goto out;
+                       goto out_free2;
                r = 0;
+out_free2:
+               kfree(kvm_regs);
                break;
        }
        case KVM_GET_SREGS: {
@@ -850,6 +975,30 @@ static long kvm_vcpu_ioctl(struct file *filp,
                r = 0;
                break;
        }
+       case KVM_GET_MP_STATE: {
+               struct kvm_mp_state mp_state;
+
+               r = kvm_arch_vcpu_ioctl_get_mpstate(vcpu, &mp_state);
+               if (r)
+                       goto out;
+               r = -EFAULT;
+               if (copy_to_user(argp, &mp_state, sizeof mp_state))
+                       goto out;
+               r = 0;
+               break;
+       }
+       case KVM_SET_MP_STATE: {
+               struct kvm_mp_state mp_state;
+
+               r = -EFAULT;
+               if (copy_from_user(&mp_state, argp, sizeof mp_state))
+                       goto out;
+               r = kvm_arch_vcpu_ioctl_set_mpstate(vcpu, &mp_state);
+               if (r)
+                       goto out;
+               r = 0;
+               break;
+       }
        case KVM_TRANSLATE: {
                struct kvm_translation tr;
 
@@ -1004,7 +1153,7 @@ static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma)
        return 0;
 }
 
-static struct file_operations kvm_vm_fops = {
+static const struct file_operations kvm_vm_fops = {
        .release        = kvm_vm_release,
        .unlocked_ioctl = kvm_vm_ioctl,
        .compat_ioctl   = kvm_vm_ioctl,
@@ -1013,21 +1162,15 @@ static struct file_operations kvm_vm_fops = {
 
 static int kvm_dev_ioctl_create_vm(void)
 {
-       int fd, r;
-       struct inode *inode;
-       struct file *file;
+       int fd;
        struct kvm *kvm;
 
        kvm = kvm_create_vm();
        if (IS_ERR(kvm))
                return PTR_ERR(kvm);
-       r = anon_inode_getfd(&fd, &inode, &file, "kvm-vm", &kvm_vm_fops, kvm);
-       if (r) {
-               kvm_destroy_vm(kvm);
-               return r;
-       }
-
-       kvm->filp = file;
+       fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm);
+       if (fd < 0)
+               kvm_put_kvm(kvm);
 
        return fd;
 }
@@ -1058,7 +1201,15 @@ static long kvm_dev_ioctl(struct file *filp,
                r = -EINVAL;
                if (arg)
                        goto out;
-               r = 2 * PAGE_SIZE;
+               r = PAGE_SIZE;     /* struct kvm_run */
+#ifdef CONFIG_X86
+               r += PAGE_SIZE;    /* pio data page */
+#endif
+               break;
+       case KVM_TRACE_ENABLE:
+       case KVM_TRACE_PAUSE:
+       case KVM_TRACE_DISABLE:
+               r = kvm_trace_ioctl(ioctl, arg);
                break;
        default:
                return kvm_arch_dev_ioctl(filp, ioctl, arg);
@@ -1231,9 +1382,9 @@ static void kvm_init_debug(void)
 {
        struct kvm_stats_debugfs_item *p;
 
-       debugfs_dir = debugfs_create_dir("kvm", NULL);
+       kvm_debugfs_dir = debugfs_create_dir("kvm", NULL);
        for (p = debugfs_entries; p->name; ++p)
-               p->dentry = debugfs_create_file(p->name, 0444, debugfs_dir,
+               p->dentry = debugfs_create_file(p->name, 0444, kvm_debugfs_dir,
                                                (void *)(long)p->offset,
                                                stat_fops[p->kind]);
 }
@@ -1244,7 +1395,7 @@ static void kvm_exit_debug(void)
 
        for (p = debugfs_entries; p->name; ++p)
                debugfs_remove(p->dentry);
-       debugfs_remove(debugfs_dir);
+       debugfs_remove(kvm_debugfs_dir);
 }
 
 static int kvm_suspend(struct sys_device *dev, pm_message_t state)
@@ -1271,6 +1422,7 @@ static struct sys_device kvm_sysdev = {
 };
 
 struct page *bad_page;
+pfn_t bad_pfn;
 
 static inline
 struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn)
@@ -1312,6 +1464,8 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
                goto out;
        }
 
+       bad_pfn = page_to_pfn(bad_page);
+
        r = kvm_arch_hardware_setup();
        if (r < 0)
                goto out_free_0;
@@ -1385,6 +1539,7 @@ EXPORT_SYMBOL_GPL(kvm_init);
 
 void kvm_exit(void)
 {
+       kvm_trace_cleanup();
        misc_deregister(&kvm_dev);
        kmem_cache_destroy(kvm_vcpu_cache);
        sysdev_unregister(&kvm_sysdev);