Merge branch 'topic/core-cleanup' into for-linus
[safe/jmp/linux-2.6] / arch / x86 / kvm / mmu.c
index ff2b2e8..19a8906 100644 (file)
@@ -18,6 +18,7 @@
  */
 
 #include "mmu.h"
+#include "x86.h"
 #include "kvm_cache_regs.h"
 
 #include <linux/kvm_host.h>
@@ -30,6 +31,7 @@
 #include <linux/hugetlb.h>
 #include <linux/compiler.h>
 #include <linux/srcu.h>
+#include <linux/slab.h>
 
 #include <asm/page.h>
 #include <asm/cmpxchg.h>
@@ -137,12 +139,6 @@ module_param(oos_shadow, bool, 0644);
 #define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK \
                        | PT64_NX_MASK)
 
-#define PFERR_PRESENT_MASK (1U << 0)
-#define PFERR_WRITE_MASK (1U << 1)
-#define PFERR_USER_MASK (1U << 2)
-#define PFERR_RSVD_MASK (1U << 3)
-#define PFERR_FETCH_MASK (1U << 4)
-
 #define RMAP_EXT 4
 
 #define ACC_EXEC_MASK    1
@@ -150,6 +146,9 @@ module_param(oos_shadow, bool, 0644);
 #define ACC_USER_MASK    PT_USER_MASK
 #define ACC_ALL          (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)
 
+#include <trace/events/kvm.h>
+
+#undef TRACE_INCLUDE_FILE
 #define CREATE_TRACE_POINTS
 #include "mmutrace.h"
 
@@ -236,7 +235,7 @@ static int is_cpuid_PSE36(void)
 
 static int is_nx(struct kvm_vcpu *vcpu)
 {
-       return vcpu->arch.shadow_efer & EFER_NX;
+       return vcpu->arch.efer & EFER_NX;
 }
 
 static int is_shadow_present_pte(u64 pte)
@@ -467,24 +466,10 @@ static int has_wrprotected_page(struct kvm *kvm,
 
 static int host_mapping_level(struct kvm *kvm, gfn_t gfn)
 {
-       unsigned long page_size = PAGE_SIZE;
-       struct vm_area_struct *vma;
-       unsigned long addr;
+       unsigned long page_size;
        int i, ret = 0;
 
-       addr = gfn_to_hva(kvm, gfn);
-       if (kvm_is_error_hva(addr))
-               return PT_PAGE_TABLE_LEVEL;
-
-       down_read(&current->mm->mmap_sem);
-       vma = find_vma(current->mm, addr);
-       if (!vma)
-               goto out;
-
-       page_size = vma_kernel_pagesize(vma);
-
-out:
-       up_read(&current->mm->mmap_sem);
+       page_size = kvm_host_page_size(kvm, gfn);
 
        for (i = PT_PAGE_TABLE_LEVEL;
             i < (PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES); ++i) {
@@ -805,6 +790,7 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
                                         unsigned long data))
 {
        int i, j;
+       int ret;
        int retval = 0;
        struct kvm_memslots *slots;
 
@@ -819,16 +805,17 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
                if (hva >= start && hva < end) {
                        gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
 
-                       retval |= handler(kvm, &memslot->rmap[gfn_offset],
-                                         data);
+                       ret = handler(kvm, &memslot->rmap[gfn_offset], data);
 
                        for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) {
                                int idx = gfn_offset;
                                idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j);
-                               retval |= handler(kvm,
+                               ret |= handler(kvm,
                                        &memslot->lpage_info[j][idx].rmap_pde,
                                        data);
                        }
+                       trace_kvm_age_page(hva, memslot, ret);
+                       retval |= ret;
                }
        }
 
@@ -851,9 +838,15 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
        u64 *spte;
        int young = 0;
 
-       /* always return old for EPT */
+       /*
+        * Emulate the accessed bit for EPT, by checking if this page has
+        * an EPT mapping, and clearing it if it does. On the next access,
+        * a new EPT mapping will be established.
+        * This has some overhead, but not as much as the cost of swapping
+        * out actively used pages or breaking up actively used hugepages.
+        */
        if (!shadow_accessed_mask)
-               return 0;
+               return kvm_unmap_rmapp(kvm, rmapp, data);
 
        spte = rmap_next(kvm, rmapp, NULL);
        while (spte) {
@@ -1497,8 +1490,8 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
                for_each_sp(pages, sp, parents, i) {
                        kvm_mmu_zap_page(kvm, sp);
                        mmu_pages_clear_parents(&parents);
+                       zapped++;
                }
-               zapped += pages.nr;
                kvm_mmu_pages_init(parent, &parents, &pages);
        }
 
@@ -1549,14 +1542,16 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages)
         */
 
        if (used_pages > kvm_nr_mmu_pages) {
-               while (used_pages > kvm_nr_mmu_pages) {
+               while (used_pages > kvm_nr_mmu_pages &&
+                       !list_empty(&kvm->arch.active_mmu_pages)) {
                        struct kvm_mmu_page *page;
 
                        page = container_of(kvm->arch.active_mmu_pages.prev,
                                            struct kvm_mmu_page, link);
-                       kvm_mmu_zap_page(kvm, page);
+                       used_pages -= kvm_mmu_zap_page(kvm, page);
                        used_pages--;
                }
+               kvm_nr_mmu_pages = used_pages;
                kvm->arch.n_free_mmu_pages = 0;
        }
        else
@@ -1603,7 +1598,8 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
                    && !sp->role.invalid) {
                        pgprintk("%s: zap %lx %x\n",
                                 __func__, gfn, sp->role.word);
-                       kvm_mmu_zap_page(kvm, sp);
+                       if (kvm_mmu_zap_page(kvm, sp))
+                               nn = bucket->first;
                }
        }
 }
@@ -1634,7 +1630,7 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
 {
        struct page *page;
 
-       gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva);
+       gpa_t gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL);
 
        if (gpa == UNMAPPED_GVA)
                return NULL;
@@ -2157,8 +2153,11 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
        spin_unlock(&vcpu->kvm->mmu_lock);
 }
 
-static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr)
+static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr,
+                                 u32 access, u32 *error)
 {
+       if (error)
+               *error = 0;
        return vaddr;
 }
 
@@ -2742,7 +2741,7 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
        if (tdp_enabled)
                return 0;
 
-       gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva);
+       gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL);
 
        spin_lock(&vcpu->kvm->mmu_lock);
        r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
@@ -2842,16 +2841,13 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
         */
        page = alloc_page(GFP_KERNEL | __GFP_DMA32);
        if (!page)
-               goto error_1;
+               return -ENOMEM;
+
        vcpu->arch.mmu.pae_root = page_address(page);
        for (i = 0; i < 4; ++i)
                vcpu->arch.mmu.pae_root[i] = INVALID_PAGE;
 
        return 0;
-
-error_1:
-       free_mmu_pages(vcpu);
-       return -ENOMEM;
 }
 
 int kvm_mmu_create(struct kvm_vcpu *vcpu)
@@ -3242,7 +3238,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
                if (is_shadow_present_pte(ent) && !is_last_spte(ent, level))
                        audit_mappings_page(vcpu, ent, va, level - 1);
                else {
-                       gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va);
+                       gpa_t gpa = kvm_mmu_gva_to_gpa_read(vcpu, va, NULL);
                        gfn_t gfn = gpa >> PAGE_SHIFT;
                        pfn_t pfn = gfn_to_pfn(vcpu->kvm, gfn);
                        hpa_t hpa = (hpa_t)pfn << PAGE_SHIFT;