X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=arch%2Fx86%2Fkvm%2Fmmu.c;h=50fe8541ed3906f5f8f3991d26b2c084adbac9d6;hb=2aaf65e8c40468b198b709a765abe311f91c1a1d;hp=5f97dbd242916a7cadebde539d964e29d7bcabcb;hpb=988a2cae6a3c0dea6df59808a935a9a697bfc28c;p=safe%2Fjmp%2Flinux-2.6 diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 5f97dbd..50fe854 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -250,6 +250,15 @@ static int is_rmap_spte(u64 pte) return is_shadow_present_pte(pte); } +static int is_last_spte(u64 pte, int level) +{ + if (level == PT_PAGE_TABLE_LEVEL) + return 1; + if (level == PT_DIRECTORY_LEVEL && is_large_pte(pte)) + return 1; + return 0; +} + static pfn_t spte_to_pfn(u64 pte) { return (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; @@ -1313,25 +1322,17 @@ static void kvm_mmu_page_unlink_children(struct kvm *kvm, pt = sp->spt; - if (sp->role.level == PT_PAGE_TABLE_LEVEL) { - for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { - if (is_shadow_present_pte(pt[i])) - rmap_remove(kvm, &pt[i]); - pt[i] = shadow_trap_nonpresent_pte; - } - return; - } - for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { ent = pt[i]; if (is_shadow_present_pte(ent)) { - if (!is_large_pte(ent)) { + if (!is_last_spte(ent, sp->role.level)) { ent &= PT64_BASE_ADDR_MASK; mmu_page_remove_parent_pte(page_header(ent), &pt[i]); } else { - --kvm->stat.lpages; + if (is_large_pte(ent)) + --kvm->stat.lpages; rmap_remove(kvm, &pt[i]); } } @@ -2381,8 +2382,7 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu, pte = *spte; if (is_shadow_present_pte(pte)) { - if (sp->role.level == PT_PAGE_TABLE_LEVEL || - is_large_pte(pte)) + if (is_last_spte(pte, sp->role.level)) rmap_remove(vcpu->kvm, spte); else { child = page_header(pte & PT64_BASE_ADDR_MASK); @@ -2673,8 +2673,9 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) ++vcpu->stat.mmio_exits; return 0; case EMULATE_FAIL: - kvm_report_emulation_failure(vcpu, "pagetable"); - return 1; + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; + return 0; default: BUG(); } @@ -3044,6 +3045,55 @@ static gva_t canonicalize(gva_t gva) return gva; } + +typedef void (*inspect_spte_fn) (struct kvm *kvm, struct kvm_mmu_page *sp, + u64 *sptep); + +static void __mmu_spte_walk(struct kvm *kvm, struct kvm_mmu_page *sp, + inspect_spte_fn fn) +{ + int i; + + for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { + u64 ent = sp->spt[i]; + + if (is_shadow_present_pte(ent)) { + if (sp->role.level > 1 && !is_large_pte(ent)) { + struct kvm_mmu_page *child; + child = page_header(ent & PT64_BASE_ADDR_MASK); + __mmu_spte_walk(kvm, child, fn); + } + if (sp->role.level == 1) + fn(kvm, sp, &sp->spt[i]); + } + } +} + +static void mmu_spte_walk(struct kvm_vcpu *vcpu, inspect_spte_fn fn) +{ + int i; + struct kvm_mmu_page *sp; + + if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) + return; + if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { + hpa_t root = vcpu->arch.mmu.root_hpa; + sp = page_header(root); + __mmu_spte_walk(vcpu->kvm, sp, fn); + return; + } + for (i = 0; i < 4; ++i) { + hpa_t root = vcpu->arch.mmu.pae_root[i]; + + if (root && VALID_PAGE(root)) { + root &= PT64_BASE_ADDR_MASK; + sp = page_header(root); + __mmu_spte_walk(vcpu->kvm, sp, fn); + } + } + return; +} + static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte, gva_t va, int level) { @@ -3059,12 +3109,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte, va = canonicalize(va); if (level > 1) { - if (ent == shadow_notrap_nonpresent_pte) - printk(KERN_ERR "audit: (%s) nontrapping pte" - " in nonleaf level: levels %d gva %lx" - " level %d pte %llx\n", audit_msg, - vcpu->arch.mmu.root_level, va, level, ent); - else + if (is_shadow_present_pte(ent)) audit_mappings_page(vcpu, ent, va, level - 1); } else { gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va); @@ -3072,6 +3117,11 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte, pfn_t pfn = gfn_to_pfn(vcpu->kvm, gfn); hpa_t hpa = (hpa_t)pfn << PAGE_SHIFT; + if (is_error_pfn(pfn)) { + kvm_release_pfn_clean(pfn); + continue; + } + if (is_shadow_present_pte(ent) && (ent & PT64_BASE_ADDR_MASK) != hpa) printk(KERN_ERR "xx audit error: (%s) levels %d" @@ -3136,9 +3186,47 @@ static int count_rmaps(struct kvm_vcpu *vcpu) return nmaps; } -static int count_writable_mappings(struct kvm_vcpu *vcpu) +void inspect_spte_has_rmap(struct kvm *kvm, struct kvm_mmu_page *sp, u64 *sptep) +{ + unsigned long *rmapp; + struct kvm_mmu_page *rev_sp; + gfn_t gfn; + + if (*sptep & PT_WRITABLE_MASK) { + rev_sp = page_header(__pa(sptep)); + gfn = rev_sp->gfns[sptep - rev_sp->spt]; + + if (!gfn_to_memslot(kvm, gfn)) { + if (!printk_ratelimit()) + return; + printk(KERN_ERR "%s: no memslot for gfn %ld\n", + audit_msg, gfn); + printk(KERN_ERR "%s: index %ld of sp (gfn=%lx)\n", + audit_msg, sptep - rev_sp->spt, + rev_sp->gfn); + dump_stack(); + return; + } + + rmapp = gfn_to_rmap(kvm, rev_sp->gfns[sptep - rev_sp->spt], 0); + if (!*rmapp) { + if (!printk_ratelimit()) + return; + printk(KERN_ERR "%s: no rmap for writable spte %llx\n", + audit_msg, *sptep); + dump_stack(); + } + } + +} + +void audit_writable_sptes_have_rmaps(struct kvm_vcpu *vcpu) +{ + mmu_spte_walk(vcpu, inspect_spte_has_rmap); +} + +static void check_writable_mappings_rmap(struct kvm_vcpu *vcpu) { - int nmaps = 0; struct kvm_mmu_page *sp; int i; @@ -3155,20 +3243,16 @@ static int count_writable_mappings(struct kvm_vcpu *vcpu) continue; if (!(ent & PT_WRITABLE_MASK)) continue; - ++nmaps; + inspect_spte_has_rmap(vcpu->kvm, sp, &pt[i]); } } - return nmaps; + return; } static void audit_rmap(struct kvm_vcpu *vcpu) { - int n_rmap = count_rmaps(vcpu); - int n_actual = count_writable_mappings(vcpu); - - if (n_rmap != n_actual) - printk(KERN_ERR "%s: (%s) rmap %d actual %d\n", - __func__, audit_msg, n_rmap, n_actual); + check_writable_mappings_rmap(vcpu); + count_rmaps(vcpu); } static void audit_write_protection(struct kvm_vcpu *vcpu) @@ -3176,20 +3260,28 @@ static void audit_write_protection(struct kvm_vcpu *vcpu) struct kvm_mmu_page *sp; struct kvm_memory_slot *slot; unsigned long *rmapp; + u64 *spte; gfn_t gfn; list_for_each_entry(sp, &vcpu->kvm->arch.active_mmu_pages, link) { if (sp->role.direct) continue; + if (sp->unsync) + continue; gfn = unalias_gfn(vcpu->kvm, sp->gfn); slot = gfn_to_memslot_unaliased(vcpu->kvm, sp->gfn); rmapp = &slot->rmap[gfn - slot->base_gfn]; - if (*rmapp) - printk(KERN_ERR "%s: (%s) shadow page has writable" - " mappings: gfn %lx role %x\n", + + spte = rmap_next(vcpu->kvm, rmapp, NULL); + while (spte) { + if (*spte & PT_WRITABLE_MASK) + printk(KERN_ERR "%s: (%s) shadow page has " + "writable mappings: gfn %lx role %x\n", __func__, audit_msg, sp->gfn, sp->role.word); + spte = rmap_next(vcpu->kvm, rmapp, spte); + } } } @@ -3201,7 +3293,9 @@ static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg) audit_msg = msg; audit_rmap(vcpu); audit_write_protection(vcpu); - audit_mappings(vcpu); + if (strcmp("pre pte write", audit_msg) != 0) + audit_mappings(vcpu); + audit_writable_sptes_have_rmaps(vcpu); dbg = olddbg; }