X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=virt%2Fkvm%2Fkvm_main.c;h=d4eae6af07382dc404d278ff1bf67814f8449fb2;hb=15c8b6c1aaaf1c4edd67e2f02e4d8e1bd1a51c0d;hp=8d0b7c16c2f781b6c3b120115bf1442c16df4d5d;hpb=7ec54588210df29ea637e6054489bc942c0ef371;p=safe%2Fjmp%2Flinux-2.6 diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 8d0b7c1..d4eae6a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -59,7 +60,7 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_cache); static __read_mostly struct preempt_ops kvm_preempt_ops; -static struct dentry *debugfs_dir; +struct dentry *kvm_debugfs_dir; static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, unsigned long arg); @@ -119,6 +120,29 @@ void kvm_flush_remote_tlbs(struct kvm *kvm) smp_call_function_mask(cpus, ack_flush, NULL, 1); } +void kvm_reload_remote_mmus(struct kvm *kvm) +{ + int i, cpu; + cpumask_t cpus; + struct kvm_vcpu *vcpu; + + cpus_clear(cpus); + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + vcpu = kvm->vcpus[i]; + if (!vcpu) + continue; + if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) + continue; + cpu = vcpu->cpu; + if (cpu != -1 && cpu != raw_smp_processor_id()) + cpu_set(cpu, cpus); + } + if (cpus_empty(cpus)) + return; + smp_call_function_mask(cpus, ack_flush, NULL, 1); +} + + int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) { struct page *page; @@ -165,9 +189,12 @@ static struct kvm *kvm_create_vm(void) kvm->mm = current->mm; atomic_inc(&kvm->mm->mm_count); + spin_lock_init(&kvm->mmu_lock); kvm_io_bus_init(&kvm->pio_bus); mutex_init(&kvm->lock); kvm_io_bus_init(&kvm->mmio_bus); + init_rwsem(&kvm->slots_lock); + atomic_set(&kvm->users_count, 1); spin_lock(&kvm_lock); list_add(&kvm->vm_list, &vm_list); spin_unlock(&kvm_lock); @@ -187,9 +214,13 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot *free, if (!dont || free->dirty_bitmap != dont->dirty_bitmap) vfree(free->dirty_bitmap); + if (!dont || free->lpage_info != dont->lpage_info) + vfree(free->lpage_info); + free->npages = 0; free->dirty_bitmap = NULL; free->rmap = NULL; + free->lpage_info = NULL; } void kvm_free_physmem(struct kvm *kvm) @@ -213,11 +244,25 @@ static void kvm_destroy_vm(struct kvm *kvm) mmdrop(mm); } +void kvm_get_kvm(struct kvm *kvm) +{ + atomic_inc(&kvm->users_count); +} +EXPORT_SYMBOL_GPL(kvm_get_kvm); + +void kvm_put_kvm(struct kvm *kvm) +{ + if (atomic_dec_and_test(&kvm->users_count)) + kvm_destroy_vm(kvm); +} +EXPORT_SYMBOL_GPL(kvm_put_kvm); + + static int kvm_vm_release(struct inode *inode, struct file *filp) { struct kvm *kvm = filp->private_data; - kvm_destroy_vm(kvm); + kvm_put_kvm(kvm); return 0; } @@ -299,6 +344,25 @@ int __kvm_set_memory_region(struct kvm *kvm, new.user_alloc = user_alloc; new.userspace_addr = mem->userspace_addr; } + if (npages && !new.lpage_info) { + int largepages = npages / KVM_PAGES_PER_HPAGE; + if (npages % KVM_PAGES_PER_HPAGE) + largepages++; + if (base_gfn % KVM_PAGES_PER_HPAGE) + largepages++; + + new.lpage_info = vmalloc(largepages * sizeof(*new.lpage_info)); + + if (!new.lpage_info) + goto out_free; + + memset(new.lpage_info, 0, largepages * sizeof(*new.lpage_info)); + + if (base_gfn % KVM_PAGES_PER_HPAGE) + new.lpage_info[0].write_count = 1; + if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE) + new.lpage_info[largepages-1].write_count = 1; + } /* Allocate page dirty bitmap if needed */ if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { @@ -338,9 +402,9 @@ int kvm_set_memory_region(struct kvm *kvm, { int r; - down_write(¤t->mm->mmap_sem); + down_write(&kvm->slots_lock); r = __kvm_set_memory_region(kvm, mem, user_alloc); - up_write(¤t->mm->mmap_sem); + up_write(&kvm->slots_lock); return r; } EXPORT_SYMBOL_GPL(kvm_set_memory_region); @@ -395,6 +459,12 @@ int is_error_page(struct page *page) } EXPORT_SYMBOL_GPL(is_error_page); +int is_error_pfn(pfn_t pfn) +{ + return pfn == bad_pfn; +} +EXPORT_SYMBOL_GPL(is_error_pfn); + static inline unsigned long bad_hva(void) { return PAGE_OFFSET; @@ -442,7 +512,7 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) } EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); -static unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) +unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) { struct kvm_memory_slot *slot; @@ -452,11 +522,12 @@ static unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) return bad_hva(); return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); } +EXPORT_SYMBOL_GPL(gfn_to_hva); /* * Requires current->mm->mmap_sem to be held */ -struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) +pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) { struct page *page[1]; unsigned long addr; @@ -467,7 +538,7 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) addr = gfn_to_hva(kvm, gfn); if (kvm_is_error_hva(addr)) { get_page(bad_page); - return bad_page; + return page_to_pfn(bad_page); } npages = get_user_pages(current, current->mm, addr, 1, 1, 1, page, @@ -475,27 +546,71 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) if (npages != 1) { get_page(bad_page); - return bad_page; + return page_to_pfn(bad_page); } - return page[0]; + return page_to_pfn(page[0]); +} + +EXPORT_SYMBOL_GPL(gfn_to_pfn); + +struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) +{ + return pfn_to_page(gfn_to_pfn(kvm, gfn)); } EXPORT_SYMBOL_GPL(gfn_to_page); void kvm_release_page_clean(struct page *page) { - put_page(page); + kvm_release_pfn_clean(page_to_pfn(page)); } EXPORT_SYMBOL_GPL(kvm_release_page_clean); +void kvm_release_pfn_clean(pfn_t pfn) +{ + put_page(pfn_to_page(pfn)); +} +EXPORT_SYMBOL_GPL(kvm_release_pfn_clean); + void kvm_release_page_dirty(struct page *page) { + kvm_release_pfn_dirty(page_to_pfn(page)); +} +EXPORT_SYMBOL_GPL(kvm_release_page_dirty); + +void kvm_release_pfn_dirty(pfn_t pfn) +{ + kvm_set_pfn_dirty(pfn); + kvm_release_pfn_clean(pfn); +} +EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty); + +void kvm_set_page_dirty(struct page *page) +{ + kvm_set_pfn_dirty(page_to_pfn(page)); +} +EXPORT_SYMBOL_GPL(kvm_set_page_dirty); + +void kvm_set_pfn_dirty(pfn_t pfn) +{ + struct page *page = pfn_to_page(pfn); if (!PageReserved(page)) SetPageDirty(page); - put_page(page); } -EXPORT_SYMBOL_GPL(kvm_release_page_dirty); +EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty); + +void kvm_set_pfn_accessed(pfn_t pfn) +{ + mark_page_accessed(pfn_to_page(pfn)); +} +EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed); + +void kvm_get_pfn(pfn_t pfn) +{ + get_page(pfn_to_page(pfn)); +} +EXPORT_SYMBOL_GPL(kvm_get_pfn); static int next_segment(unsigned long len, int offset) { @@ -643,24 +758,26 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn) */ void kvm_vcpu_block(struct kvm_vcpu *vcpu) { - DECLARE_WAITQUEUE(wait, current); + DEFINE_WAIT(wait); + + for (;;) { + prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); - add_wait_queue(&vcpu->wq, &wait); + if (kvm_cpu_has_interrupt(vcpu)) + break; + if (kvm_cpu_has_pending_timer(vcpu)) + break; + if (kvm_arch_vcpu_runnable(vcpu)) + break; + if (signal_pending(current)) + break; - /* - * We will block until either an interrupt or a signal wakes us up - */ - while (!kvm_cpu_has_interrupt(vcpu) - && !signal_pending(current) - && !kvm_arch_vcpu_runnable(vcpu)) { - set_current_state(TASK_INTERRUPTIBLE); vcpu_put(vcpu); schedule(); vcpu_load(vcpu); } - __set_current_state(TASK_RUNNING); - remove_wait_queue(&vcpu->wq, &wait); + finish_wait(&vcpu->wq, &wait); } void kvm_resched(struct kvm_vcpu *vcpu) @@ -678,8 +795,10 @@ static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf) if (vmf->pgoff == 0) page = virt_to_page(vcpu->run); +#ifdef CONFIG_X86 else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET) page = virt_to_page(vcpu->arch.pio_data); +#endif else return VM_FAULT_SIGBUS; get_page(page); @@ -701,11 +820,11 @@ static int kvm_vcpu_release(struct inode *inode, struct file *filp) { struct kvm_vcpu *vcpu = filp->private_data; - fput(vcpu->kvm->filp); + kvm_put_kvm(vcpu->kvm); return 0; } -static struct file_operations kvm_vcpu_fops = { +static const struct file_operations kvm_vcpu_fops = { .release = kvm_vcpu_release, .unlocked_ioctl = kvm_vcpu_ioctl, .compat_ioctl = kvm_vcpu_ioctl, @@ -717,15 +836,9 @@ static struct file_operations kvm_vcpu_fops = { */ static int create_vcpu_fd(struct kvm_vcpu *vcpu) { - int fd, r; - struct inode *inode; - struct file *file; - - r = anon_inode_getfd(&fd, &inode, &file, - "kvm-vcpu", &kvm_vcpu_fops, vcpu); - if (r) - return r; - atomic_inc(&vcpu->kvm->filp->f_count); + int fd = anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu); + if (fd < 0) + kvm_put_kvm(vcpu->kvm); return fd; } @@ -760,6 +873,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) mutex_unlock(&kvm->lock); /* Now it's all set up, let userspace reach it */ + kvm_get_kvm(kvm); r = create_vcpu_fd(vcpu); if (r < 0) goto unlink; @@ -802,28 +916,39 @@ static long kvm_vcpu_ioctl(struct file *filp, r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run); break; case KVM_GET_REGS: { - struct kvm_regs kvm_regs; + struct kvm_regs *kvm_regs; - memset(&kvm_regs, 0, sizeof kvm_regs); - r = kvm_arch_vcpu_ioctl_get_regs(vcpu, &kvm_regs); - if (r) + r = -ENOMEM; + kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL); + if (!kvm_regs) goto out; + r = kvm_arch_vcpu_ioctl_get_regs(vcpu, kvm_regs); + if (r) + goto out_free1; r = -EFAULT; - if (copy_to_user(argp, &kvm_regs, sizeof kvm_regs)) - goto out; + if (copy_to_user(argp, kvm_regs, sizeof(struct kvm_regs))) + goto out_free1; r = 0; +out_free1: + kfree(kvm_regs); break; } case KVM_SET_REGS: { - struct kvm_regs kvm_regs; + struct kvm_regs *kvm_regs; - r = -EFAULT; - if (copy_from_user(&kvm_regs, argp, sizeof kvm_regs)) + r = -ENOMEM; + kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL); + if (!kvm_regs) goto out; - r = kvm_arch_vcpu_ioctl_set_regs(vcpu, &kvm_regs); + r = -EFAULT; + if (copy_from_user(kvm_regs, argp, sizeof(struct kvm_regs))) + goto out_free2; + r = kvm_arch_vcpu_ioctl_set_regs(vcpu, kvm_regs); if (r) - goto out; + goto out_free2; r = 0; +out_free2: + kfree(kvm_regs); break; } case KVM_GET_SREGS: { @@ -851,6 +976,30 @@ static long kvm_vcpu_ioctl(struct file *filp, r = 0; break; } + case KVM_GET_MP_STATE: { + struct kvm_mp_state mp_state; + + r = kvm_arch_vcpu_ioctl_get_mpstate(vcpu, &mp_state); + if (r) + goto out; + r = -EFAULT; + if (copy_to_user(argp, &mp_state, sizeof mp_state)) + goto out; + r = 0; + break; + } + case KVM_SET_MP_STATE: { + struct kvm_mp_state mp_state; + + r = -EFAULT; + if (copy_from_user(&mp_state, argp, sizeof mp_state)) + goto out; + r = kvm_arch_vcpu_ioctl_set_mpstate(vcpu, &mp_state); + if (r) + goto out; + r = 0; + break; + } case KVM_TRANSLATE: { struct kvm_translation tr; @@ -1005,7 +1154,7 @@ static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma) return 0; } -static struct file_operations kvm_vm_fops = { +static const struct file_operations kvm_vm_fops = { .release = kvm_vm_release, .unlocked_ioctl = kvm_vm_ioctl, .compat_ioctl = kvm_vm_ioctl, @@ -1014,21 +1163,15 @@ static struct file_operations kvm_vm_fops = { static int kvm_dev_ioctl_create_vm(void) { - int fd, r; - struct inode *inode; - struct file *file; + int fd; struct kvm *kvm; kvm = kvm_create_vm(); if (IS_ERR(kvm)) return PTR_ERR(kvm); - r = anon_inode_getfd(&fd, &inode, &file, "kvm-vm", &kvm_vm_fops, kvm); - if (r) { - kvm_destroy_vm(kvm); - return r; - } - - kvm->filp = file; + fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm); + if (fd < 0) + kvm_put_kvm(kvm); return fd; } @@ -1059,7 +1202,15 @@ static long kvm_dev_ioctl(struct file *filp, r = -EINVAL; if (arg) goto out; - r = 2 * PAGE_SIZE; + r = PAGE_SIZE; /* struct kvm_run */ +#ifdef CONFIG_X86 + r += PAGE_SIZE; /* pio data page */ +#endif + break; + case KVM_TRACE_ENABLE: + case KVM_TRACE_PAUSE: + case KVM_TRACE_DISABLE: + r = kvm_trace_ioctl(ioctl, arg); break; default: return kvm_arch_dev_ioctl(filp, ioctl, arg); @@ -1115,12 +1266,12 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, case CPU_UP_CANCELED: printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", cpu); - smp_call_function_single(cpu, hardware_disable, NULL, 0, 1); + smp_call_function_single(cpu, hardware_disable, NULL, 1); break; case CPU_ONLINE: printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n", cpu); - smp_call_function_single(cpu, hardware_enable, NULL, 0, 1); + smp_call_function_single(cpu, hardware_enable, NULL, 1); break; } return NOTIFY_OK; @@ -1135,7 +1286,7 @@ static int kvm_reboot(struct notifier_block *notifier, unsigned long val, * in vmx root mode. */ printk(KERN_INFO "kvm: exiting hardware virtualization\n"); - on_each_cpu(hardware_disable, NULL, 0, 1); + on_each_cpu(hardware_disable, NULL, 1); } return NOTIFY_OK; } @@ -1187,38 +1338,38 @@ static struct notifier_block kvm_cpu_notifier = { .priority = 20, /* must be > scheduler priority */ }; -static u64 vm_stat_get(void *_offset) +static int vm_stat_get(void *_offset, u64 *val) { unsigned offset = (long)_offset; - u64 total = 0; struct kvm *kvm; + *val = 0; spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) - total += *(u32 *)((void *)kvm + offset); + *val += *(u32 *)((void *)kvm + offset); spin_unlock(&kvm_lock); - return total; + return 0; } DEFINE_SIMPLE_ATTRIBUTE(vm_stat_fops, vm_stat_get, NULL, "%llu\n"); -static u64 vcpu_stat_get(void *_offset) +static int vcpu_stat_get(void *_offset, u64 *val) { unsigned offset = (long)_offset; - u64 total = 0; struct kvm *kvm; struct kvm_vcpu *vcpu; int i; + *val = 0; spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) for (i = 0; i < KVM_MAX_VCPUS; ++i) { vcpu = kvm->vcpus[i]; if (vcpu) - total += *(u32 *)((void *)vcpu + offset); + *val += *(u32 *)((void *)vcpu + offset); } spin_unlock(&kvm_lock); - return total; + return 0; } DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_fops, vcpu_stat_get, NULL, "%llu\n"); @@ -1232,9 +1383,9 @@ static void kvm_init_debug(void) { struct kvm_stats_debugfs_item *p; - debugfs_dir = debugfs_create_dir("kvm", NULL); + kvm_debugfs_dir = debugfs_create_dir("kvm", NULL); for (p = debugfs_entries; p->name; ++p) - p->dentry = debugfs_create_file(p->name, 0444, debugfs_dir, + p->dentry = debugfs_create_file(p->name, 0444, kvm_debugfs_dir, (void *)(long)p->offset, stat_fops[p->kind]); } @@ -1245,7 +1396,7 @@ static void kvm_exit_debug(void) for (p = debugfs_entries; p->name; ++p) debugfs_remove(p->dentry); - debugfs_remove(debugfs_dir); + debugfs_remove(kvm_debugfs_dir); } static int kvm_suspend(struct sys_device *dev, pm_message_t state) @@ -1272,6 +1423,7 @@ static struct sys_device kvm_sysdev = { }; struct page *bad_page; +pfn_t bad_pfn; static inline struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) @@ -1313,6 +1465,8 @@ int kvm_init(void *opaque, unsigned int vcpu_size, goto out; } + bad_pfn = page_to_pfn(bad_page); + r = kvm_arch_hardware_setup(); if (r < 0) goto out_free_0; @@ -1320,12 +1474,12 @@ int kvm_init(void *opaque, unsigned int vcpu_size, for_each_online_cpu(cpu) { smp_call_function_single(cpu, kvm_arch_check_processor_compat, - &r, 0, 1); + &r, 1); if (r < 0) goto out_free_1; } - on_each_cpu(hardware_enable, NULL, 0, 1); + on_each_cpu(hardware_enable, NULL, 1); r = register_cpu_notifier(&kvm_cpu_notifier); if (r) goto out_free_2; @@ -1371,7 +1525,7 @@ out_free_3: unregister_reboot_notifier(&kvm_reboot_notifier); unregister_cpu_notifier(&kvm_cpu_notifier); out_free_2: - on_each_cpu(hardware_disable, NULL, 0, 1); + on_each_cpu(hardware_disable, NULL, 1); out_free_1: kvm_arch_hardware_unsetup(); out_free_0: @@ -1386,13 +1540,14 @@ EXPORT_SYMBOL_GPL(kvm_init); void kvm_exit(void) { + kvm_trace_cleanup(); misc_deregister(&kvm_dev); kmem_cache_destroy(kvm_vcpu_cache); sysdev_unregister(&kvm_sysdev); sysdev_class_unregister(&kvm_sysdev_class); unregister_reboot_notifier(&kvm_reboot_notifier); unregister_cpu_notifier(&kvm_cpu_notifier); - on_each_cpu(hardware_disable, NULL, 0, 1); + on_each_cpu(hardware_disable, NULL, 1); kvm_arch_hardware_unsetup(); kvm_arch_exit(); kvm_exit_debug();