string: factorize skip_spaces and export it to be generally available
[safe/jmp/linux-2.6] / arch / x86 / kvm / mmu.c
index 110c224..4c3e5b2 100644 (file)
@@ -108,6 +108,9 @@ module_param(oos_shadow, bool, 0644);
 
 #define PT32_LEVEL_MASK(level) \
                (((1ULL << PT32_LEVEL_BITS) - 1) << PT32_LEVEL_SHIFT(level))
+#define PT32_LVL_OFFSET_MASK(level) \
+       (PT32_BASE_ADDR_MASK & ((1ULL << (PAGE_SHIFT + (((level) - 1) \
+                                               * PT32_LEVEL_BITS))) - 1))
 
 #define PT32_INDEX(address, level)\
        (((address) >> PT32_LEVEL_SHIFT(level)) & ((1 << PT32_LEVEL_BITS) - 1))
@@ -116,10 +119,19 @@ module_param(oos_shadow, bool, 0644);
 #define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1))
 #define PT64_DIR_BASE_ADDR_MASK \
        (PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + PT64_LEVEL_BITS)) - 1))
+#define PT64_LVL_ADDR_MASK(level) \
+       (PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \
+                                               * PT64_LEVEL_BITS))) - 1))
+#define PT64_LVL_OFFSET_MASK(level) \
+       (PT64_BASE_ADDR_MASK & ((1ULL << (PAGE_SHIFT + (((level) - 1) \
+                                               * PT64_LEVEL_BITS))) - 1))
 
 #define PT32_BASE_ADDR_MASK PAGE_MASK
 #define PT32_DIR_BASE_ADDR_MASK \
        (PAGE_MASK & ~((1ULL << (PAGE_SHIFT + PT32_LEVEL_BITS)) - 1))
+#define PT32_LVL_ADDR_MASK(level) \
+       (PAGE_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \
+                                           * PT32_LEVEL_BITS))) - 1))
 
 #define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK \
                        | PT64_NX_MASK)
@@ -130,6 +142,7 @@ module_param(oos_shadow, bool, 0644);
 #define PFERR_RSVD_MASK (1U << 3)
 #define PFERR_FETCH_MASK (1U << 4)
 
+#define PT_PDPE_LEVEL 3
 #define PT_DIRECTORY_LEVEL 2
 #define PT_PAGE_TABLE_LEVEL 1
 
@@ -143,6 +156,8 @@ module_param(oos_shadow, bool, 0644);
 #define CREATE_TRACE_POINTS
 #include "mmutrace.h"
 
+#define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
+
 #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
 
 struct kvm_rmap_desc {
@@ -621,9 +636,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
        if (*spte & shadow_accessed_mask)
                kvm_set_pfn_accessed(pfn);
        if (is_writeble_pte(*spte))
-               kvm_release_pfn_dirty(pfn);
-       else
-               kvm_release_pfn_clean(pfn);
+               kvm_set_pfn_dirty(pfn);
        rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level);
        if (!*rmapp) {
                printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
@@ -735,7 +748,8 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn)
        return write_protected;
 }
 
-static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
+static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
+                          unsigned long data)
 {
        u64 *spte;
        int need_tlb_flush = 0;
@@ -750,8 +764,47 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
        return need_tlb_flush;
 }
 
+static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
+                            unsigned long data)
+{
+       int need_flush = 0;
+       u64 *spte, new_spte;
+       pte_t *ptep = (pte_t *)data;
+       pfn_t new_pfn;
+
+       WARN_ON(pte_huge(*ptep));
+       new_pfn = pte_pfn(*ptep);
+       spte = rmap_next(kvm, rmapp, NULL);
+       while (spte) {
+               BUG_ON(!is_shadow_present_pte(*spte));
+               rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte);
+               need_flush = 1;
+               if (pte_write(*ptep)) {
+                       rmap_remove(kvm, spte);
+                       __set_spte(spte, shadow_trap_nonpresent_pte);
+                       spte = rmap_next(kvm, rmapp, NULL);
+               } else {
+                       new_spte = *spte &~ (PT64_BASE_ADDR_MASK);
+                       new_spte |= (u64)new_pfn << PAGE_SHIFT;
+
+                       new_spte &= ~PT_WRITABLE_MASK;
+                       new_spte &= ~SPTE_HOST_WRITEABLE;
+                       if (is_writeble_pte(*spte))
+                               kvm_set_pfn_dirty(spte_to_pfn(*spte));
+                       __set_spte(spte, new_spte);
+                       spte = rmap_next(kvm, rmapp, spte);
+               }
+       }
+       if (need_flush)
+               kvm_flush_remote_tlbs(kvm);
+
+       return 0;
+}
+
 static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
-                         int (*handler)(struct kvm *kvm, unsigned long *rmapp))
+                         unsigned long data,
+                         int (*handler)(struct kvm *kvm, unsigned long *rmapp,
+                                        unsigned long data))
 {
        int i, j;
        int retval = 0;
@@ -773,13 +826,15 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
                if (hva >= start && hva < end) {
                        gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
 
-                       retval |= handler(kvm, &memslot->rmap[gfn_offset]);
+                       retval |= handler(kvm, &memslot->rmap[gfn_offset],
+                                         data);
 
                        for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) {
                                int idx = gfn_offset;
                                idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j);
                                retval |= handler(kvm,
-                                       &memslot->lpage_info[j][idx].rmap_pde);
+                                       &memslot->lpage_info[j][idx].rmap_pde,
+                                       data);
                        }
                }
        }
@@ -789,10 +844,16 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
 
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
 {
-       return kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
+       return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp);
+}
+
+void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
+{
+       kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp);
 }
 
-static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp)
+static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
+                        unsigned long data)
 {
        u64 *spte;
        int young = 0;
@@ -828,13 +889,13 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
        gfn = unalias_gfn(vcpu->kvm, gfn);
        rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
 
-       kvm_unmap_rmapp(vcpu->kvm, rmapp);
+       kvm_unmap_rmapp(vcpu->kvm, rmapp, 0);
        kvm_flush_remote_tlbs(vcpu->kvm);
 }
 
 int kvm_age_hva(struct kvm *kvm, unsigned long hva)
 {
-       return kvm_handle_hva(kvm, hva, kvm_age_rmapp);
+       return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp);
 }
 
 #ifdef MMU_DEBUG
@@ -1743,7 +1804,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
                    unsigned pte_access, int user_fault,
                    int write_fault, int dirty, int level,
                    gfn_t gfn, pfn_t pfn, bool speculative,
-                   bool can_unsync)
+                   bool can_unsync, bool reset_host_protection)
 {
        u64 spte;
        int ret = 0;
@@ -1770,6 +1831,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
                spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn,
                        kvm_is_mmio_pfn(pfn));
 
+       if (reset_host_protection)
+               spte |= SPTE_HOST_WRITEABLE;
+
        spte |= (u64)pfn << PAGE_SHIFT;
 
        if ((pte_access & ACC_WRITE_MASK)
@@ -1815,7 +1879,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
                         unsigned pt_access, unsigned pte_access,
                         int user_fault, int write_fault, int dirty,
                         int *ptwrite, int level, gfn_t gfn,
-                        pfn_t pfn, bool speculative)
+                        pfn_t pfn, bool speculative,
+                        bool reset_host_protection)
 {
        int was_rmapped = 0;
        int was_writeble = is_writeble_pte(*sptep);
@@ -1847,7 +1912,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
        }
 
        if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault,
-                     dirty, level, gfn, pfn, speculative, true)) {
+                     dirty, level, gfn, pfn, speculative, true,
+                     reset_host_protection)) {
                if (write_fault)
                        *ptwrite = 1;
                kvm_x86_ops->tlb_flush(vcpu);
@@ -1864,8 +1930,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
        page_header_update_slot(vcpu->kvm, sptep, gfn);
        if (!was_rmapped) {
                rmap_count = rmap_add(vcpu, sptep, gfn);
-               if (!is_rmap_spte(*sptep))
-                       kvm_release_pfn_clean(pfn);
+               kvm_release_pfn_clean(pfn);
                if (rmap_count > RMAP_RECYCLE_THRESHOLD)
                        rmap_recycle(vcpu, sptep, gfn);
        } else {
@@ -1896,7 +1961,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
                if (iterator.level == level) {
                        mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL,
                                     0, write, 1, &pt_write,
-                                    level, gfn, pfn, false);
+                                    level, gfn, pfn, false, true);
                        ++vcpu->stat.pf_fixed;
                        break;
                }
@@ -2273,7 +2338,9 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level)
                context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
                        rsvd_bits(maxphyaddr, 51);
                context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3];
-               context->rsvd_bits_mask[1][2] = context->rsvd_bits_mask[0][2];
+               context->rsvd_bits_mask[1][2] = exb_bit_rsvd |
+                       rsvd_bits(maxphyaddr, 51) |
+                       rsvd_bits(13, 29);
                context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
                        rsvd_bits(maxphyaddr, 51) |
                        rsvd_bits(13, 20);              /* large page */
@@ -2463,11 +2530,8 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
                                  const void *new)
 {
        if (sp->role.level != PT_PAGE_TABLE_LEVEL) {
-               if (vcpu->arch.update_pte.level == PT_PAGE_TABLE_LEVEL ||
-                   sp->role.glevels == PT32_ROOT_LEVEL) {
-                       ++vcpu->kvm->stat.mmu_pde_zapped;
-                       return;
-               }
+               ++vcpu->kvm->stat.mmu_pde_zapped;
+               return;
         }
 
        ++vcpu->kvm->stat.mmu_pte_updated;
@@ -2513,8 +2577,6 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
        u64 gpte = 0;
        pfn_t pfn;
 
-       vcpu->arch.update_pte.level = PT_PAGE_TABLE_LEVEL;
-
        if (bytes != 4 && bytes != 8)
                return;
 
@@ -2542,11 +2604,6 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
                return;
        gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
 
-       if (is_large_pte(gpte) &&
-           (mapping_level(vcpu, gfn) == PT_DIRECTORY_LEVEL)) {
-               gfn &= ~(KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL) - 1);
-               vcpu->arch.update_pte.level = PT_DIRECTORY_LEVEL;
-       }
        vcpu->arch.update_pte.mmu_seq = vcpu->kvm->mmu_notifier_seq;
        smp_rmb();
        pfn = gfn_to_pfn(vcpu->kvm, gfn);
@@ -2689,6 +2746,9 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
        gpa_t gpa;
        int r;
 
+       if (tdp_enabled)
+               return 0;
+
        gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva);
 
        spin_lock(&vcpu->kvm->mmu_lock);
@@ -2700,7 +2760,8 @@ EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt);
 
 void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
 {
-       while (vcpu->kvm->arch.n_free_mmu_pages < KVM_REFILL_PAGES) {
+       while (vcpu->kvm->arch.n_free_mmu_pages < KVM_REFILL_PAGES &&
+              !list_empty(&vcpu->kvm->arch.active_mmu_pages)) {
                struct kvm_mmu_page *sp;
 
                sp = container_of(vcpu->kvm->arch.active_mmu_pages.prev,
@@ -2728,7 +2789,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)
        if (r)
                goto out;
 
-       er = emulate_instruction(vcpu, vcpu->run, cr2, error_code, 0);
+       er = emulate_instruction(vcpu, cr2, error_code, 0);
 
        switch (er) {
        case EMULATE_DONE:
@@ -2739,6 +2800,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)
        case EMULATE_FAIL:
                vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
                vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+               vcpu->run->internal.ndata = 0;
                return 0;
        default:
                BUG();
@@ -2780,14 +2842,6 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
 
        ASSERT(vcpu);
 
-       spin_lock(&vcpu->kvm->mmu_lock);
-       if (vcpu->kvm->arch.n_requested_mmu_pages)
-               vcpu->kvm->arch.n_free_mmu_pages =
-                                       vcpu->kvm->arch.n_requested_mmu_pages;
-       else
-               vcpu->kvm->arch.n_free_mmu_pages =
-                                       vcpu->kvm->arch.n_alloc_mmu_pages;
-       spin_unlock(&vcpu->kvm->mmu_lock);
        /*
         * When emulating 32-bit mode, cr3 is only 32 bits even on x86_64.
         * Therefore we need to allocate shadow page tables in the first