KVM: x86 emulator: address size and operand size overrides are sticky
[safe/jmp/linux-2.6] / drivers / kvm / kvm_main.c
index 3946025..721e660 100644 (file)
@@ -46,6 +46,7 @@
 #include <asm/io.h>
 #include <asm/uaccess.h>
 #include <asm/desc.h>
+#include <asm/pgtable.h>
 
 MODULE_AUTHOR("Qumranet");
 MODULE_LICENSE("GPL");
@@ -114,6 +115,9 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
                if (cpu != -1 && cpu != raw_smp_processor_id())
                        cpu_set(cpu, cpus);
        }
+       if (cpus_empty(cpus))
+               return;
+       ++kvm->stat.remote_tlb_flush;
        smp_call_function_mask(cpus, ack_flush, NULL, 1);
 }
 
@@ -156,18 +160,20 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_uninit);
 
 static struct kvm *kvm_create_vm(void)
 {
-       struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
+       struct kvm *kvm = kvm_arch_create_vm();
 
-       if (!kvm)
-               return ERR_PTR(-ENOMEM);
+       if (IS_ERR(kvm))
+               goto out;
 
+       kvm->mm = current->mm;
+       atomic_inc(&kvm->mm->mm_count);
        kvm_io_bus_init(&kvm->pio_bus);
        mutex_init(&kvm->lock);
-       INIT_LIST_HEAD(&kvm->active_mmu_pages);
        kvm_io_bus_init(&kvm->mmio_bus);
        spin_lock(&kvm_lock);
        list_add(&kvm->vm_list, &vm_list);
        spin_unlock(&kvm_lock);
+out:
        return kvm;
 }
 
@@ -188,7 +194,7 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
        free->rmap = NULL;
 }
 
-static void kvm_free_physmem(struct kvm *kvm)
+void kvm_free_physmem(struct kvm *kvm)
 {
        int i;
 
@@ -196,44 +202,17 @@ static void kvm_free_physmem(struct kvm *kvm)
                kvm_free_physmem_slot(&kvm->memslots[i], NULL);
 }
 
-static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
-{
-       vcpu_load(vcpu);
-       kvm_mmu_unload(vcpu);
-       vcpu_put(vcpu);
-}
-
-static void kvm_free_vcpus(struct kvm *kvm)
-{
-       unsigned int i;
-
-       /*
-        * Unpin any mmu pages first.
-        */
-       for (i = 0; i < KVM_MAX_VCPUS; ++i)
-               if (kvm->vcpus[i])
-                       kvm_unload_vcpu_mmu(kvm->vcpus[i]);
-       for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-               if (kvm->vcpus[i]) {
-                       kvm_arch_vcpu_free(kvm->vcpus[i]);
-                       kvm->vcpus[i] = NULL;
-               }
-       }
-
-}
-
 static void kvm_destroy_vm(struct kvm *kvm)
 {
+       struct mm_struct *mm = kvm->mm;
+
        spin_lock(&kvm_lock);
        list_del(&kvm->vm_list);
        spin_unlock(&kvm_lock);
        kvm_io_bus_destroy(&kvm->pio_bus);
        kvm_io_bus_destroy(&kvm->mmio_bus);
-       kfree(kvm->vpic);
-       kfree(kvm->vioapic);
-       kvm_free_vcpus(kvm);
-       kvm_free_physmem(kvm);
-       kfree(kvm);
+       kvm_arch_destroy_vm(kvm);
+       mmdrop(mm);
 }
 
 static int kvm_vm_release(struct inode *inode, struct file *filp)
@@ -320,33 +299,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
                memset(new.rmap, 0, npages * sizeof(*new.rmap));
 
                new.user_alloc = user_alloc;
-               if (user_alloc)
-                       new.userspace_addr = mem->userspace_addr;
-               else {
-                       down_write(&current->mm->mmap_sem);
-                       new.userspace_addr = do_mmap(NULL, 0,
-                                                    npages * PAGE_SIZE,
-                                                    PROT_READ | PROT_WRITE,
-                                                    MAP_SHARED | MAP_ANONYMOUS,
-                                                    0);
-                       up_write(&current->mm->mmap_sem);
-
-                       if (IS_ERR((void *)new.userspace_addr))
-                               goto out_free;
-               }
-       } else {
-               if (!old.user_alloc && old.rmap) {
-                       int ret;
-
-                       down_write(&current->mm->mmap_sem);
-                       ret = do_munmap(current->mm, old.userspace_addr,
-                                       old.npages * PAGE_SIZE);
-                       up_write(&current->mm->mmap_sem);
-                       if (ret < 0)
-                               printk(KERN_WARNING
-                                      "kvm_vm_ioctl_set_memory_region: "
-                                      "failed to munmap memory\n");
-               }
+               new.userspace_addr = mem->userspace_addr;
        }
 
        /* Allocate page dirty bitmap if needed */
@@ -362,28 +315,13 @@ int __kvm_set_memory_region(struct kvm *kvm,
        if (mem->slot >= kvm->nmemslots)
                kvm->nmemslots = mem->slot + 1;
 
-       if (!kvm->n_requested_mmu_pages) {
-               unsigned int n_pages;
-
-               if (npages) {
-                       n_pages = npages * KVM_PERMILLE_MMU_PAGES / 1000;
-                       kvm_mmu_change_mmu_pages(kvm, kvm->n_alloc_mmu_pages +
-                                                n_pages);
-               } else {
-                       unsigned int nr_mmu_pages;
-
-                       n_pages = old.npages * KVM_PERMILLE_MMU_PAGES / 1000;
-                       nr_mmu_pages = kvm->n_alloc_mmu_pages - n_pages;
-                       nr_mmu_pages = max(nr_mmu_pages,
-                                       (unsigned int) KVM_MIN_ALLOC_MMU_PAGES);
-                       kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
-               }
-       }
-
        *memslot = new;
 
-       kvm_mmu_slot_remove_write_access(kvm, mem->slot);
-       kvm_flush_remote_tlbs(kvm);
+       r = kvm_arch_set_memory_region(kvm, mem, old, user_alloc);
+       if (r) {
+               *memslot = old;
+               goto out_free;
+       }
 
        kvm_free_physmem_slot(&old, &new);
        return 0;
@@ -419,19 +357,14 @@ int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
        return kvm_set_memory_region(kvm, mem, user_alloc);
 }
 
-/*
- * Get (and clear) the dirty memory log for a memory slot.
- */
-static int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
-                                     struct kvm_dirty_log *log)
+int kvm_get_dirty_log(struct kvm *kvm,
+                       struct kvm_dirty_log *log, int *is_dirty)
 {
        struct kvm_memory_slot *memslot;
        int r, i;
        int n;
        unsigned long any = 0;
 
-       mutex_lock(&kvm->lock);
-
        r = -EINVAL;
        if (log->slot >= KVM_MEMORY_SLOTS)
                goto out;
@@ -450,17 +383,11 @@ static int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
        if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n))
                goto out;
 
-       /* If nothing is dirty, don't bother messing with page tables. */
-       if (any) {
-               kvm_mmu_slot_remove_write_access(kvm, log->slot);
-               kvm_flush_remote_tlbs(kvm);
-               memset(memslot->dirty_bitmap, 0, n);
-       }
+       if (any)
+               *is_dirty = 1;
 
        r = 0;
-
 out:
-       mutex_unlock(&kvm->lock);
        return r;
 }
 
@@ -481,20 +408,6 @@ int kvm_is_error_hva(unsigned long addr)
 }
 EXPORT_SYMBOL_GPL(kvm_is_error_hva);
 
-gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
-{
-       int i;
-       struct kvm_mem_alias *alias;
-
-       for (i = 0; i < kvm->naliases; ++i) {
-               alias = &kvm->aliases[i];
-               if (gfn >= alias->base_gfn
-                   && gfn < alias->base_gfn + alias->npages)
-                       return alias->target_gfn + gfn - alias->base_gfn;
-       }
-       return gfn;
-}
-
 static struct kvm_memory_slot *__gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
 {
        int i;
@@ -583,13 +496,19 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
 
 EXPORT_SYMBOL_GPL(gfn_to_page);
 
-void kvm_release_page(struct page *page)
+void kvm_release_page_clean(struct page *page)
+{
+       put_page(page);
+}
+EXPORT_SYMBOL_GPL(kvm_release_page_clean);
+
+void kvm_release_page_dirty(struct page *page)
 {
        if (!PageReserved(page))
                SetPageDirty(page);
        put_page(page);
 }
-EXPORT_SYMBOL_GPL(kvm_release_page);
+EXPORT_SYMBOL_GPL(kvm_release_page_dirty);
 
 static int next_segment(unsigned long len, int offset)
 {
@@ -674,22 +593,7 @@ int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
 
 int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len)
 {
-       void *page_virt;
-       struct page *page;
-
-       page = gfn_to_page(kvm, gfn);
-       if (is_error_page(page)) {
-               kvm_release_page(page);
-               return -EFAULT;
-       }
-       page_virt = kmap_atomic(page, KM_USER0);
-
-       memset(page_virt + offset, 0, len);
-
-       kunmap_atomic(page_virt, KM_USER0);
-       kvm_release_page(page);
-       mark_page_dirty(kvm, gfn);
-       return 0;
+       return kvm_write_guest_page(kvm, gfn, empty_zero_page, offset, len);
 }
 EXPORT_SYMBOL_GPL(kvm_clear_guest_page);
 
@@ -859,6 +763,10 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
 
        preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops);
 
+       r = kvm_arch_vcpu_setup(vcpu);
+       if (r)
+               goto vcpu_destroy;
+
        mutex_lock(&kvm->lock);
        if (kvm->vcpus[n]) {
                r = -EEXIST;
@@ -879,7 +787,7 @@ unlink:
        kvm->vcpus[n] = NULL;
        mutex_unlock(&kvm->lock);
 vcpu_destroy:
-       kvm_arch_vcpu_destory(vcpu);
+       kvm_arch_vcpu_destroy(vcpu);
        return r;
 }
 
@@ -901,6 +809,8 @@ static long kvm_vcpu_ioctl(struct file *filp,
        void __user *argp = (void __user *)arg;
        int r;
 
+       if (vcpu->kvm->mm != current->mm)
+               return -EIO;
        switch (ioctl) {
        case KVM_RUN:
                r = -EINVAL;
@@ -1059,6 +969,8 @@ static long kvm_vm_ioctl(struct file *filp,
        void __user *argp = (void __user *)arg;
        int r;
 
+       if (kvm->mm != current->mm)
+               return -EIO;
        switch (ioctl) {
        case KVM_CREATE_VCPU:
                r = kvm_vm_ioctl_create_vcpu(kvm, arg);
@@ -1110,7 +1022,7 @@ static struct page *kvm_vm_nopage(struct vm_area_struct *vma,
        /* current->mm->mmap_sem is already held so call lockless version */
        page = __gfn_to_page(kvm, pgoff);
        if (is_error_page(page)) {
-               kvm_release_page(page);
+               kvm_release_page_clean(page);
                return NOPAGE_SIGBUS;
        }
        if (type != NULL)
@@ -1311,7 +1223,22 @@ static struct notifier_block kvm_cpu_notifier = {
        .priority = 20, /* must be > scheduler priority */
 };
 
-static u64 stat_get(void *_offset)
+static u64 vm_stat_get(void *_offset)
+{
+       unsigned offset = (long)_offset;
+       u64 total = 0;
+       struct kvm *kvm;
+
+       spin_lock(&kvm_lock);
+       list_for_each_entry(kvm, &vm_list, vm_list)
+               total += *(u32 *)((void *)kvm + offset);
+       spin_unlock(&kvm_lock);
+       return total;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(vm_stat_fops, vm_stat_get, NULL, "%llu\n");
+
+static u64 vcpu_stat_get(void *_offset)
 {
        unsigned offset = (long)_offset;
        u64 total = 0;
@@ -1330,9 +1257,14 @@ static u64 stat_get(void *_offset)
        return total;
 }
 
-DEFINE_SIMPLE_ATTRIBUTE(stat_fops, stat_get, NULL, "%llu\n");
+DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_fops, vcpu_stat_get, NULL, "%llu\n");
+
+static struct file_operations *stat_fops[] = {
+       [KVM_STAT_VCPU] = &vcpu_stat_fops,
+       [KVM_STAT_VM]   = &vm_stat_fops,
+};
 
-static __init void kvm_init_debug(void)
+static void kvm_init_debug(void)
 {
        struct kvm_stats_debugfs_item *p;
 
@@ -1340,7 +1272,7 @@ static __init void kvm_init_debug(void)
        for (p = debugfs_entries; p->name; ++p)
                p->dentry = debugfs_create_file(p->name, 0444, debugfs_dir,
                                                (void *)(long)p->offset,
-                                               &stat_fops);
+                                               stat_fops[p->kind]);
 }
 
 static void kvm_exit_debug(void)
@@ -1404,10 +1336,6 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
        int r;
        int cpu;
 
-       r = kvm_mmu_module_init();
-       if (r)
-               goto out4;
-
        kvm_init_debug();
 
        r = kvm_arch_init(opaque);
@@ -1449,7 +1377,8 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
 
        /* A kmem cache lets us meet the alignment requirements of fx_save. */
        kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size,
-                                          __alignof__(struct kvm_vcpu), 0, 0);
+                                          __alignof__(struct kvm_vcpu),
+                                          0, NULL);
        if (!kvm_vcpu_cache) {
                r = -ENOMEM;
                goto out_free_4;
@@ -1466,8 +1395,6 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
        kvm_preempt_ops.sched_in = kvm_sched_in;
        kvm_preempt_ops.sched_out = kvm_sched_out;
 
-       kvm_mmu_set_nonpresent_ptes(0ull, 0ull);
-
        return 0;
 
 out_free:
@@ -1486,7 +1413,6 @@ out_free_0:
 out:
        kvm_arch_exit();
        kvm_exit_debug();
-       kvm_mmu_module_exit();
 out4:
        return r;
 }
@@ -1505,6 +1431,5 @@ void kvm_exit(void)
        kvm_arch_exit();
        kvm_exit_debug();
        __free_page(bad_page);
-       kvm_mmu_module_exit();
 }
 EXPORT_SYMBOL_GPL(kvm_exit);