From 08e850c6536db302050c0287649e68e3bbdfe2c7 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 15 Mar 2010 13:59:57 +0200 Subject: [PATCH] KVM: MMU: Reinstate pte prefetch on invlpg Commit fb341f57 removed the pte prefetch on guest invlpg, citing guest races. However, the SDM is adamant that prefetch is allowed: "The processor may create entries in paging-structure caches for translations required for prefetches and for accesses that are a result of speculative execution that would never actually occur in the executed code path." And, in fact, there was a race in the prefetch code: we picked up the pte without the mmu lock held, so an older invlpg could install the pte over a newer invlpg. Reinstate the prefetch logic, but this time note whether another invlpg has executed using a counter. If a race occured, do not install the pte. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/mmu.c | 37 +++++++++++++++++++++++-------------- arch/x86/kvm/paging_tmpl.h | 15 +++++++++++++++ 3 files changed, 39 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ea1b6c6..28826c8 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -389,6 +389,7 @@ struct kvm_arch { unsigned int n_free_mmu_pages; unsigned int n_requested_mmu_pages; unsigned int n_alloc_mmu_pages; + atomic_t invlpg_counter; struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; /* * Hash table of struct kvm_mmu_page. diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 91f8b17..064c3ef 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2613,20 +2613,11 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, int flooded = 0; int npte; int r; + int invlpg_counter; pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); - switch (bytes) { - case 4: - gentry = *(const u32 *)new; - break; - case 8: - gentry = *(const u64 *)new; - break; - default: - gentry = 0; - break; - } + invlpg_counter = atomic_read(&vcpu->kvm->arch.invlpg_counter); /* * Assume that the pte write on a page table of the same type @@ -2634,16 +2625,34 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, * (might be false while changing modes). Note it is verified later * by update_pte(). */ - if (is_pae(vcpu) && bytes == 4) { + if ((is_pae(vcpu) && bytes == 4) || !new) { /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ - gpa &= ~(gpa_t)7; - r = kvm_read_guest(vcpu->kvm, gpa, &gentry, 8); + if (is_pae(vcpu)) { + gpa &= ~(gpa_t)7; + bytes = 8; + } + r = kvm_read_guest(vcpu->kvm, gpa, &gentry, min(bytes, 8)); if (r) gentry = 0; + new = (const u8 *)&gentry; + } + + switch (bytes) { + case 4: + gentry = *(const u32 *)new; + break; + case 8: + gentry = *(const u64 *)new; + break; + default: + gentry = 0; + break; } mmu_guess_page_from_pte_write(vcpu, gpa, gentry); spin_lock(&vcpu->kvm->mmu_lock); + if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter) + gentry = 0; kvm_mmu_access_page(vcpu, gfn); kvm_mmu_free_some_pages(vcpu); ++vcpu->kvm->stat.mmu_pte_write; diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 4b37e1a..067797a 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -463,6 +463,7 @@ out_unlock: static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) { struct kvm_shadow_walk_iterator iterator; + gpa_t pte_gpa = -1; int level; u64 *sptep; int need_flush = 0; @@ -476,6 +477,10 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) if (level == PT_PAGE_TABLE_LEVEL || ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) || ((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) { + struct kvm_mmu_page *sp = page_header(__pa(sptep)); + + pte_gpa = (sp->gfn << PAGE_SHIFT); + pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); if (is_shadow_present_pte(*sptep)) { rmap_remove(vcpu->kvm, sptep); @@ -493,7 +498,17 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) if (need_flush) kvm_flush_remote_tlbs(vcpu->kvm); + + atomic_inc(&vcpu->kvm->arch.invlpg_counter); + spin_unlock(&vcpu->kvm->mmu_lock); + + if (pte_gpa == -1) + return; + + if (mmu_topup_memory_caches(vcpu)) + return; + kvm_mmu_pte_write(vcpu, pte_gpa, NULL, sizeof(pt_element_t), 0); } static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, -- 1.8.2.3