KVM: MMU: invalidate and flush on spte small->large page size change
[safe/jmp/linux-2.6] / arch / x86 / kvm / mmu.c
index ddfa865..6fbcb48 100644 (file)
@@ -217,7 +217,7 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
 
-static int is_write_protection(struct kvm_vcpu *vcpu)
+static bool is_write_protection(struct kvm_vcpu *vcpu)
 {
        return kvm_read_cr0_bits(vcpu, X86_CR0_WP);
 }
@@ -1189,6 +1189,7 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
 static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
        WARN_ON(!sp->unsync);
+       trace_kvm_mmu_sync_page(sp);
        sp->unsync = 0;
        --kvm->stat.mmu_unsync;
 }
@@ -1202,7 +1203,6 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
                return 1;
        }
 
-       trace_kvm_mmu_sync_page(sp);
        if (rmap_write_protect(vcpu->kvm, sp->gfn))
                kvm_flush_remote_tlbs(vcpu->kvm);
        kvm_unlink_unsync_page(vcpu->kvm, sp);
@@ -1730,7 +1730,6 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
        struct kvm_mmu_page *s;
        struct hlist_node *node, *n;
 
-       trace_kvm_mmu_unsync_page(sp);
        index = kvm_page_table_hashfn(sp->gfn);
        bucket = &vcpu->kvm->arch.mmu_page_hash[index];
        /* don't unsync if pagetable is shadowed with multiple roles */
@@ -1740,6 +1739,7 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
                if (s->role.word != sp->role.word)
                        return 1;
        }
+       trace_kvm_mmu_unsync_page(sp);
        ++vcpu->kvm->stat.mmu_unsync;
        sp->unsync = 1;
 
@@ -1870,6 +1870,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 
                        child = page_header(pte & PT64_BASE_ADDR_MASK);
                        mmu_page_remove_parent_pte(child, sptep);
+                       __set_spte(sptep, shadow_trap_nonpresent_pte);
+                       kvm_flush_remote_tlbs(vcpu->kvm);
                } else if (pfn != spte_to_pfn(*sptep)) {
                        pgprintk("hfn old %lx new %lx\n",
                                 spte_to_pfn(*sptep), pfn);
@@ -2059,21 +2061,23 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu)
                hpa_t root = vcpu->arch.mmu.root_hpa;
 
                ASSERT(!VALID_PAGE(root));
-               if (tdp_enabled)
-                       direct = 1;
                if (mmu_check_root(vcpu, root_gfn))
                        return 1;
+               if (tdp_enabled) {
+                       direct = 1;
+                       root_gfn = 0;
+               }
+               spin_lock(&vcpu->kvm->mmu_lock);
                sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
                                      PT64_ROOT_LEVEL, direct,
                                      ACC_ALL, NULL);
                root = __pa(sp->spt);
                ++sp->root_count;
+               spin_unlock(&vcpu->kvm->mmu_lock);
                vcpu->arch.mmu.root_hpa = root;
                return 0;
        }
        direct = !is_paging(vcpu);
-       if (tdp_enabled)
-               direct = 1;
        for (i = 0; i < 4; ++i) {
                hpa_t root = vcpu->arch.mmu.pae_root[i];
 
@@ -2089,11 +2093,18 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu)
                        root_gfn = 0;
                if (mmu_check_root(vcpu, root_gfn))
                        return 1;
+               if (tdp_enabled) {
+                       direct = 1;
+                       root_gfn = i << 30;
+               }
+               spin_lock(&vcpu->kvm->mmu_lock);
                sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
                                      PT32_ROOT_LEVEL, direct,
                                      ACC_ALL, NULL);
                root = __pa(sp->spt);
                ++sp->root_count;
+               spin_unlock(&vcpu->kvm->mmu_lock);
+
                vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK;
        }
        vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root);
@@ -2423,6 +2434,7 @@ static int init_kvm_softmmu(struct kvm_vcpu *vcpu)
                r = paging32_init_context(vcpu);
 
        vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu);
+       vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu);
 
        return r;
 }
@@ -2462,7 +2474,9 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
                goto out;
        spin_lock(&vcpu->kvm->mmu_lock);
        kvm_mmu_free_some_pages(vcpu);
+       spin_unlock(&vcpu->kvm->mmu_lock);
        r = mmu_alloc_roots(vcpu);
+       spin_lock(&vcpu->kvm->mmu_lock);
        mmu_sync_roots(vcpu);
        spin_unlock(&vcpu->kvm->mmu_lock);
        if (r)
@@ -2898,13 +2912,13 @@ restart:
        kvm_flush_remote_tlbs(kvm);
 }
 
-static void kvm_mmu_remove_one_alloc_mmu_page(struct kvm *kvm)
+static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm)
 {
        struct kvm_mmu_page *page;
 
        page = container_of(kvm->arch.active_mmu_pages.prev,
                            struct kvm_mmu_page, link);
-       kvm_mmu_zap_page(kvm, page);
+       return kvm_mmu_zap_page(kvm, page) + 1;
 }
 
 static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask)
@@ -2916,7 +2930,7 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask)
        spin_lock(&kvm_lock);
 
        list_for_each_entry(kvm, &vm_list, vm_list) {
-               int npages, idx;
+               int npages, idx, freed_pages;
 
                idx = srcu_read_lock(&kvm->srcu);
                spin_lock(&kvm->mmu_lock);
@@ -2924,8 +2938,8 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask)
                         kvm->arch.n_free_mmu_pages;
                cache_count += npages;
                if (!kvm_freed && nr_to_scan > 0 && npages > 0) {
-                       kvm_mmu_remove_one_alloc_mmu_page(kvm);
-                       cache_count--;
+                       freed_pages = kvm_mmu_remove_some_alloc_mmu_pages(kvm);
+                       cache_count -= freed_pages;
                        kvm_freed = kvm;
                }
                nr_to_scan--;