KVM: MMU: Add tracepoint for guest page aging

[safe/jmp/linux-2.6] / arch / x86 / kvm / mmu.c
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c

index 4f5508c..7397932 100644 (file)
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -18,6 +18,7 @@
   */
  
  #include "mmu.h"
+#include "x86.h"
  #include "kvm_cache_regs.h"
  
  #include <linux/kvm_host.h>
@@ -150,6 +151,9 @@ module_param(oos_shadow, bool, 0644);
  #define ACC_USER_MASK    PT_USER_MASK
  #define ACC_ALL          (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)
  
+#include <trace/events/kvm.h>
+
+#undef TRACE_INCLUDE_FILE
  #define CREATE_TRACE_POINTS
  #include "mmutrace.h"
  
@@ -226,7 +230,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
  
  static int is_write_protection(struct kvm_vcpu *vcpu)
  {
-       return vcpu->arch.cr0 & X86_CR0_WP;
+       return kvm_read_cr0_bits(vcpu, X86_CR0_WP);
  }
  
  static int is_cpuid_PSE36(void)
@@ -236,7 +240,7 @@ static int is_cpuid_PSE36(void)
  
  static int is_nx(struct kvm_vcpu *vcpu)
  {
-       return vcpu->arch.shadow_efer & EFER_NX;
+       return vcpu->arch.efer & EFER_NX;
  }
  
  static int is_shadow_present_pte(u64 pte)
@@ -250,7 +254,7 @@ static int is_large_pte(u64 pte)
         return pte & PT_PAGE_SIZE_MASK;
  }
  
-static int is_writeble_pte(unsigned long pte)
+static int is_writable_pte(unsigned long pte)
  {
         return pte & PT_WRITABLE_MASK;
  }
@@ -467,24 +471,10 @@ static int has_wrprotected_page(struct kvm *kvm,
  
  static int host_mapping_level(struct kvm *kvm, gfn_t gfn)
  {
-       unsigned long page_size = PAGE_SIZE;
-       struct vm_area_struct *vma;
-       unsigned long addr;
+       unsigned long page_size;
         int i, ret = 0;
  
-       addr = gfn_to_hva(kvm, gfn);
-       if (kvm_is_error_hva(addr))
-               return PT_PAGE_TABLE_LEVEL;
-
-       down_read(&current->mm->mmap_sem);
-       vma = find_vma(current->mm, addr);
-       if (!vma)
-               goto out;
-
-       page_size = vma_kernel_pagesize(vma);
-
-out:
-       up_read(&current->mm->mmap_sem);
+       page_size = kvm_host_page_size(kvm, gfn);
  
         for (i = PT_PAGE_TABLE_LEVEL;
              i < (PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES); ++i) {
@@ -632,7 +622,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
         pfn = spte_to_pfn(*spte);
         if (*spte & shadow_accessed_mask)
                 kvm_set_pfn_accessed(pfn);
-       if (is_writeble_pte(*spte))
+       if (is_writable_pte(*spte))
                 kvm_set_pfn_dirty(pfn);
         rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level);
         if (!*rmapp) {
@@ -708,7 +698,7 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn)
                 BUG_ON(!spte);
                 BUG_ON(!(*spte & PT_PRESENT_MASK));
                 rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte);
-               if (is_writeble_pte(*spte)) {
+               if (is_writable_pte(*spte)) {
                         __set_spte(spte, *spte & ~PT_WRITABLE_MASK);
                         write_protected = 1;
                 }
@@ -732,7 +722,7 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn)
                         BUG_ON(!(*spte & PT_PRESENT_MASK));
                         BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK));
                         pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn);
-                       if (is_writeble_pte(*spte)) {
+                       if (is_writable_pte(*spte)) {
                                 rmap_remove(kvm, spte);
                                 --kvm->stat.lpages;
                                 __set_spte(spte, shadow_trap_nonpresent_pte);
@@ -787,7 +777,7 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
  
                         new_spte &= ~PT_WRITABLE_MASK;
                         new_spte &= ~SPTE_HOST_WRITEABLE;
-                       if (is_writeble_pte(*spte))
+                       if (is_writable_pte(*spte))
                                 kvm_set_pfn_dirty(spte_to_pfn(*spte));
                         __set_spte(spte, new_spte);
                         spte = rmap_next(kvm, rmapp, spte);
@@ -805,6 +795,7 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
                                          unsigned long data))
  {
         int i, j;
+       int ret;
         int retval = 0;
         struct kvm_memslots *slots;
  
@@ -819,16 +810,17 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
                 if (hva >= start && hva < end) {
                         gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
  
-                       retval |= handler(kvm, &memslot->rmap[gfn_offset],
-                                         data);
+                       ret = handler(kvm, &memslot->rmap[gfn_offset], data);
  
                         for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) {
                                 int idx = gfn_offset;
                                 idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j);
-                               retval |= handler(kvm,
+                               ret |= handler(kvm,
                                         &memslot->lpage_info[j][idx].rmap_pde,
                                         data);
                         }
+                       trace_kvm_age_page(hva, memslot, ret);
+                       retval |= ret;
                 }
         }
  
@@ -851,9 +843,15 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
         u64 *spte;
         int young = 0;
  
-       /* always return old for EPT */
+       /*
+        * Emulate the accessed bit for EPT, by checking if this page has
+        * an EPT mapping, and clearing it if it does. On the next access,
+        * a new EPT mapping will be established.
+        * This has some overhead, but not as much as the cost of swapping
+        * out actively used pages or breaking up actively used hugepages.
+        */
         if (!shadow_accessed_mask)
-               return 0;
+               return kvm_unmap_rmapp(kvm, rmapp, data);
  
         spte = rmap_next(kvm, rmapp, NULL);
         while (spte) {
@@ -1847,7 +1845,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
                  * is responsibility of mmu_get_page / kvm_sync_page.
                  * Same reasoning can be applied to dirty page accounting.
                  */
-               if (!can_unsync && is_writeble_pte(*sptep))
+               if (!can_unsync && is_writable_pte(*sptep))
                         goto set_pte;
  
                 if (mmu_need_write_protect(vcpu, gfn, can_unsync)) {
@@ -1855,7 +1853,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
                                  __func__, gfn);
                         ret = 1;
                         pte_access &= ~ACC_WRITE_MASK;
-                       if (is_writeble_pte(spte))
+                       if (is_writable_pte(spte))
                                 spte &= ~PT_WRITABLE_MASK;
                 }
         }
@@ -1876,7 +1874,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
                          bool reset_host_protection)
  {
         int was_rmapped = 0;
-       int was_writeble = is_writeble_pte(*sptep);
+       int was_writable = is_writable_pte(*sptep);
         int rmap_count;
  
         pgprintk("%s: spte %llx access %x write_fault %d"
@@ -1927,7 +1925,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
                 if (rmap_count > RMAP_RECYCLE_THRESHOLD)
                         rmap_recycle(vcpu, sptep, gfn);
         } else {
-               if (was_writeble)
+               if (was_writable)
                         kvm_release_pfn_dirty(pfn);
                 else
                         kvm_release_pfn_clean(pfn);
@@ -2842,16 +2840,13 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
          */
         page = alloc_page(GFP_KERNEL | __GFP_DMA32);
         if (!page)
-               goto error_1;
+               return -ENOMEM;
+
         vcpu->arch.mmu.pae_root = page_address(page);
         for (i = 0; i < 4; ++i)
                 vcpu->arch.mmu.pae_root[i] = INVALID_PAGE;
  
         return 0;
-
-error_1:
-       free_mmu_pages(vcpu);
-       return -ENOMEM;
  }
  
  int kvm_mmu_create(struct kvm_vcpu *vcpu)