X-Git-Url: http://ftp.safe.ca/?p=safe%2Fjmp%2Flinux-2.6;a=blobdiff_plain;f=arch%2Fx86%2Fkvm%2Fmmu.c;h=6fbcb48d5a9bb8e144adddb81fa71d99c1b039d7;hp=6ed7d633aefece84228ed12dc25e71e7eb047840;hb=3be2264be3c00865116f997dc53ebcc90fe7fc4b;hpb=805d32dea4dfb8319aaf7e73a681ad410a5e331a diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 6ed7d63..6fbcb48 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -173,12 +173,7 @@ struct kvm_shadow_walk_iterator { shadow_walk_okay(&(_walker)); \ shadow_walk_next(&(_walker))) - -struct kvm_unsync_walk { - int (*entry) (struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk); -}; - -typedef int (*mmu_parent_walk_fn) (struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp); +typedef int (*mmu_parent_walk_fn) (struct kvm_mmu_page *sp); static struct kmem_cache *pte_chain_cache; static struct kmem_cache *rmap_desc_cache; @@ -222,7 +217,7 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, } EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); -static int is_write_protection(struct kvm_vcpu *vcpu) +static bool is_write_protection(struct kvm_vcpu *vcpu) { return kvm_read_cr0_bits(vcpu, X86_CR0_WP); } @@ -436,9 +431,9 @@ static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) int i; gfn = unalias_gfn(kvm, gfn); + slot = gfn_to_memslot_unaliased(kvm, gfn); for (i = PT_DIRECTORY_LEVEL; i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { - slot = gfn_to_memslot_unaliased(kvm, gfn); write_count = slot_largepage_idx(gfn, slot, i); *write_count -= 1; WARN_ON(*write_count < 0); @@ -652,7 +647,6 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) { struct kvm_rmap_desc *desc; - struct kvm_rmap_desc *prev_desc; u64 *prev_spte; int i; @@ -664,7 +658,6 @@ static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) return NULL; } desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); - prev_desc = NULL; prev_spte = NULL; while (desc) { for (i = 0; i < RMAP_EXT && desc->sptes[i]; ++i) { @@ -792,7 +785,7 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, int retval = 0; struct kvm_memslots *slots; - slots = rcu_dereference(kvm->memslots); + slots = kvm_memslots(kvm); for (i = 0; i < slots->nmemslots; i++) { struct kvm_memory_slot *memslot = &slots->memslots[i]; @@ -923,7 +916,6 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); set_page_private(virt_to_page(sp->spt), (unsigned long)sp); list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); - INIT_LIST_HEAD(&sp->oos_link); bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); sp->multimapped = 0; sp->parent_pte = parent_pte; @@ -1007,8 +999,7 @@ static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp, } -static void mmu_parent_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, - mmu_parent_walk_fn fn) +static void mmu_parent_walk(struct kvm_mmu_page *sp, mmu_parent_walk_fn fn) { struct kvm_pte_chain *pte_chain; struct hlist_node *node; @@ -1017,8 +1008,8 @@ static void mmu_parent_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, if (!sp->multimapped && sp->parent_pte) { parent_sp = page_header(__pa(sp->parent_pte)); - fn(vcpu, parent_sp); - mmu_parent_walk(vcpu, parent_sp, fn); + fn(parent_sp); + mmu_parent_walk(parent_sp, fn); return; } hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) @@ -1026,8 +1017,8 @@ static void mmu_parent_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, if (!pte_chain->parent_ptes[i]) break; parent_sp = page_header(__pa(pte_chain->parent_ptes[i])); - fn(vcpu, parent_sp); - mmu_parent_walk(vcpu, parent_sp, fn); + fn(parent_sp); + mmu_parent_walk(parent_sp, fn); } } @@ -1064,16 +1055,15 @@ static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp) } } -static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) +static int unsync_walk_fn(struct kvm_mmu_page *sp) { kvm_mmu_update_parents_unsync(sp); return 1; } -static void kvm_mmu_mark_parents_unsync(struct kvm_vcpu *vcpu, - struct kvm_mmu_page *sp) +static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp) { - mmu_parent_walk(vcpu, sp, unsync_walk_fn); + mmu_parent_walk(sp, unsync_walk_fn); kvm_mmu_update_parents_unsync(sp); } @@ -1199,6 +1189,7 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) { WARN_ON(!sp->unsync); + trace_kvm_mmu_sync_page(sp); sp->unsync = 0; --kvm->stat.mmu_unsync; } @@ -1207,12 +1198,11 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp); static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) { - if (sp->role.glevels != vcpu->arch.mmu.root_level) { + if (sp->role.cr4_pae != !!is_pae(vcpu)) { kvm_mmu_zap_page(vcpu->kvm, sp); return 1; } - trace_kvm_mmu_sync_page(sp); if (rmap_write_protect(vcpu->kvm, sp->gfn)) kvm_flush_remote_tlbs(vcpu->kvm); kvm_unlink_unsync_page(vcpu->kvm, sp); @@ -1330,7 +1320,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, role.level = level; role.direct = direct; if (role.direct) - role.glevels = 0; + role.cr4_pae = 0; role.access = access; if (vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) { quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); @@ -1351,7 +1341,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, mmu_page_add_parent_pte(vcpu, sp, parent_pte); if (sp->unsync_children) { set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests); - kvm_mmu_mark_parents_unsync(vcpu, sp); + kvm_mmu_mark_parents_unsync(sp); } trace_kvm_mmu_get_page(sp, false); return sp; @@ -1573,13 +1563,14 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) r = 0; index = kvm_page_table_hashfn(gfn); bucket = &kvm->arch.mmu_page_hash[index]; +restart: hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) if (sp->gfn == gfn && !sp->role.direct) { pgprintk("%s: gfn %lx role %x\n", __func__, gfn, sp->role.word); r = 1; if (kvm_mmu_zap_page(kvm, sp)) - n = bucket->first; + goto restart; } return r; } @@ -1593,13 +1584,14 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn) index = kvm_page_table_hashfn(gfn); bucket = &kvm->arch.mmu_page_hash[index]; +restart: hlist_for_each_entry_safe(sp, node, nn, bucket, hash_link) { if (sp->gfn == gfn && !sp->role.direct && !sp->role.invalid) { pgprintk("%s: zap %lx %x\n", __func__, gfn, sp->role.word); if (kvm_mmu_zap_page(kvm, sp)) - nn = bucket->first; + goto restart; } } } @@ -1626,20 +1618,6 @@ static void mmu_convert_notrap(struct kvm_mmu_page *sp) } } -struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva) -{ - struct page *page; - - gpa_t gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL); - - if (gpa == UNMAPPED_GVA) - return NULL; - - page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); - - return page; -} - /* * The function is based on mtrr_type_lookup() in * arch/x86/kernel/cpu/mtrr/generic.c @@ -1752,7 +1730,6 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) struct kvm_mmu_page *s; struct hlist_node *node, *n; - trace_kvm_mmu_unsync_page(sp); index = kvm_page_table_hashfn(sp->gfn); bucket = &vcpu->kvm->arch.mmu_page_hash[index]; /* don't unsync if pagetable is shadowed with multiple roles */ @@ -1762,10 +1739,11 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) if (s->role.word != sp->role.word) return 1; } + trace_kvm_mmu_unsync_page(sp); ++vcpu->kvm->stat.mmu_unsync; sp->unsync = 1; - kvm_mmu_mark_parents_unsync(vcpu, sp); + kvm_mmu_mark_parents_unsync(sp); mmu_convert_notrap(sp); return 0; @@ -1892,6 +1870,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, child = page_header(pte & PT64_BASE_ADDR_MASK); mmu_page_remove_parent_pte(child, sptep); + __set_spte(sptep, shadow_trap_nonpresent_pte); + kvm_flush_remote_tlbs(vcpu->kvm); } else if (pfn != spte_to_pfn(*sptep)) { pgprintk("hfn old %lx new %lx\n", spte_to_pfn(*sptep), pfn); @@ -2081,21 +2061,23 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu) hpa_t root = vcpu->arch.mmu.root_hpa; ASSERT(!VALID_PAGE(root)); - if (tdp_enabled) - direct = 1; if (mmu_check_root(vcpu, root_gfn)) return 1; + if (tdp_enabled) { + direct = 1; + root_gfn = 0; + } + spin_lock(&vcpu->kvm->mmu_lock); sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL, direct, ACC_ALL, NULL); root = __pa(sp->spt); ++sp->root_count; + spin_unlock(&vcpu->kvm->mmu_lock); vcpu->arch.mmu.root_hpa = root; return 0; } direct = !is_paging(vcpu); - if (tdp_enabled) - direct = 1; for (i = 0; i < 4; ++i) { hpa_t root = vcpu->arch.mmu.pae_root[i]; @@ -2111,11 +2093,18 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu) root_gfn = 0; if (mmu_check_root(vcpu, root_gfn)) return 1; + if (tdp_enabled) { + direct = 1; + root_gfn = i << 30; + } + spin_lock(&vcpu->kvm->mmu_lock); sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL, direct, ACC_ALL, NULL); root = __pa(sp->spt); ++sp->root_count; + spin_unlock(&vcpu->kvm->mmu_lock); + vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK; } vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root); @@ -2444,7 +2433,8 @@ static int init_kvm_softmmu(struct kvm_vcpu *vcpu) else r = paging32_init_context(vcpu); - vcpu->arch.mmu.base_role.glevels = vcpu->arch.mmu.root_level; + vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu); + vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu); return r; } @@ -2484,7 +2474,9 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu) goto out; spin_lock(&vcpu->kvm->mmu_lock); kvm_mmu_free_some_pages(vcpu); + spin_unlock(&vcpu->kvm->mmu_lock); r = mmu_alloc_roots(vcpu); + spin_lock(&vcpu->kvm->mmu_lock); mmu_sync_roots(vcpu); spin_unlock(&vcpu->kvm->mmu_lock); if (r) @@ -2533,7 +2525,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, } ++vcpu->kvm->stat.mmu_pte_updated; - if (sp->role.glevels == PT32_ROOT_LEVEL) + if (!sp->role.cr4_pae) paging32_update_pte(vcpu, sp, spte, new); else paging64_update_pte(vcpu, sp, spte, new); @@ -2679,10 +2671,12 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, } index = kvm_page_table_hashfn(gfn); bucket = &vcpu->kvm->arch.mmu_page_hash[index]; + +restart: hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) { if (sp->gfn != gfn || sp->role.direct || sp->role.invalid) continue; - pte_size = sp->role.glevels == PT32_ROOT_LEVEL ? 4 : 8; + pte_size = sp->role.cr4_pae ? 8 : 4; misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); misaligned |= bytes < 4; if (misaligned || flooded) { @@ -2699,14 +2693,14 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, pgprintk("misaligned: gpa %llx bytes %d role %x\n", gpa, bytes, sp->role.word); if (kvm_mmu_zap_page(vcpu->kvm, sp)) - n = bucket->first; + goto restart; ++vcpu->kvm->stat.mmu_flooded; continue; } page_offset = offset; level = sp->role.level; npte = 1; - if (sp->role.glevels == PT32_ROOT_LEVEL) { + if (!sp->role.cr4_pae) { page_offset <<= 1; /* 32->64 */ /* * A 32-bit pde maps 4MB while the shadow pdes map @@ -2908,22 +2902,23 @@ void kvm_mmu_zap_all(struct kvm *kvm) struct kvm_mmu_page *sp, *node; spin_lock(&kvm->mmu_lock); +restart: list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) if (kvm_mmu_zap_page(kvm, sp)) - node = container_of(kvm->arch.active_mmu_pages.next, - struct kvm_mmu_page, link); + goto restart; + spin_unlock(&kvm->mmu_lock); kvm_flush_remote_tlbs(kvm); } -static void kvm_mmu_remove_one_alloc_mmu_page(struct kvm *kvm) +static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm) { struct kvm_mmu_page *page; page = container_of(kvm->arch.active_mmu_pages.prev, struct kvm_mmu_page, link); - kvm_mmu_zap_page(kvm, page); + return kvm_mmu_zap_page(kvm, page) + 1; } static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) @@ -2935,7 +2930,7 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { - int npages, idx; + int npages, idx, freed_pages; idx = srcu_read_lock(&kvm->srcu); spin_lock(&kvm->mmu_lock); @@ -2943,8 +2938,8 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) kvm->arch.n_free_mmu_pages; cache_count += npages; if (!kvm_freed && nr_to_scan > 0 && npages > 0) { - kvm_mmu_remove_one_alloc_mmu_page(kvm); - cache_count--; + freed_pages = kvm_mmu_remove_some_alloc_mmu_pages(kvm); + cache_count -= freed_pages; kvm_freed = kvm; } nr_to_scan--; @@ -3019,7 +3014,8 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm) unsigned int nr_pages = 0; struct kvm_memslots *slots; - slots = rcu_dereference(kvm->memslots); + slots = kvm_memslots(kvm); + for (i = 0; i < slots->nmemslots; i++) nr_pages += slots->memslots[i].npages; @@ -3295,7 +3291,7 @@ static int count_rmaps(struct kvm_vcpu *vcpu) int i, j, k, idx; idx = srcu_read_lock(&kvm->srcu); - slots = rcu_dereference(kvm->memslots); + slots = kvm_memslots(kvm); for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { struct kvm_memory_slot *m = &slots->memslots[i]; struct kvm_rmap_desc *d;