#define PFERR_PRESENT_MASK (1U << 0)
#define PFERR_WRITE_MASK (1U << 1)
#define PFERR_USER_MASK (1U << 2)
+#define PFERR_RSVD_MASK (1U << 3)
#define PFERR_FETCH_MASK (1U << 4)
#define PT_DIRECTORY_LEVEL 2
static u64 __read_mostly shadow_user_mask;
static u64 __read_mostly shadow_accessed_mask;
static u64 __read_mostly shadow_dirty_mask;
-static u64 __read_mostly shadow_mt_mask;
+
+static inline u64 rsvd_bits(int s, int e)
+{
+ return ((1ULL << (e - s + 1)) - 1) << s;
+}
void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
{
EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes);
void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
- u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask)
+ u64 dirty_mask, u64 nx_mask, u64 x_mask)
{
shadow_user_mask = user_mask;
shadow_accessed_mask = accessed_mask;
shadow_dirty_mask = dirty_mask;
shadow_nx_mask = nx_mask;
shadow_x_mask = x_mask;
- shadow_mt_mask = mt_mask;
}
EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
return vcpu->arch.shadow_efer & EFER_NX;
}
-static int is_present_pte(unsigned long pte)
-{
- return pte & PT_PRESENT_MASK;
-}
-
static int is_shadow_present_pte(u64 pte)
{
return pte != shadow_trap_nonpresent_pte
set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
INIT_LIST_HEAD(&sp->oos_link);
- ASSERT(is_empty_shadow_page(sp->spt));
bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
sp->multimapped = 0;
sp->parent_pte = parent_pte;
idx < 512; \
idx = find_next_bit(bitmap, 512, idx+1))
-int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
- int idx)
+static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
+ int idx)
{
int i;
index = kvm_page_table_hashfn(gfn);
bucket = &kvm->arch.mmu_page_hash[index];
hlist_for_each_entry(sp, node, bucket, hash_link)
- if (sp->gfn == gfn && !sp->role.metaphysical
+ if (sp->gfn == gfn && !sp->role.direct
&& !sp->role.invalid) {
pgprintk("%s: found role %x\n",
__func__, sp->role.word);
return NULL;
}
-static void kvm_unlink_unsync_global(struct kvm *kvm, struct kvm_mmu_page *sp)
-{
- list_del(&sp->oos_link);
- --kvm->stat.mmu_unsync_global;
-}
-
static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
{
WARN_ON(!sp->unsync);
sp->unsync = 0;
- if (sp->global)
- kvm_unlink_unsync_global(kvm, sp);
--kvm->stat.mmu_unsync;
}
i < pvec.nr && ({ sp = pvec.page[i].sp; 1;}); \
i = mmu_pages_next(&pvec, &parents, i))
-int mmu_pages_next(struct kvm_mmu_pages *pvec, struct mmu_page_path *parents,
- int i)
+static int mmu_pages_next(struct kvm_mmu_pages *pvec,
+ struct mmu_page_path *parents,
+ int i)
{
int n;
return n;
}
-void mmu_pages_clear_parents(struct mmu_page_path *parents)
+static void mmu_pages_clear_parents(struct mmu_page_path *parents)
{
struct kvm_mmu_page *sp;
unsigned int level = 0;
gfn_t gfn,
gva_t gaddr,
unsigned level,
- int metaphysical,
+ int direct,
unsigned access,
u64 *parent_pte)
{
role = vcpu->arch.mmu.base_role;
role.level = level;
- role.metaphysical = metaphysical;
+ role.direct = direct;
role.access = access;
if (vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) {
quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level));
pgprintk("%s: adding gfn %lx role %x\n", __func__, gfn, role.word);
sp->gfn = gfn;
sp->role = role;
- sp->global = role.cr4_pge;
hlist_add_head(&sp->hash_link, bucket);
- if (!metaphysical) {
+ if (!direct) {
if (rmap_write_protect(vcpu->kvm, gfn))
kvm_flush_remote_tlbs(vcpu->kvm);
account_shadowed(vcpu->kvm, gfn);
kvm_mmu_page_unlink_children(kvm, sp);
kvm_mmu_unlink_parents(kvm, sp);
kvm_flush_remote_tlbs(kvm);
- if (!sp->role.invalid && !sp->role.metaphysical)
+ if (!sp->role.invalid && !sp->role.direct)
unaccount_shadowed(kvm, sp->gfn);
if (sp->unsync)
kvm_unlink_unsync_page(kvm, sp);
index = kvm_page_table_hashfn(gfn);
bucket = &kvm->arch.mmu_page_hash[index];
hlist_for_each_entry_safe(sp, node, n, bucket, hash_link)
- if (sp->gfn == gfn && !sp->role.metaphysical) {
+ if (sp->gfn == gfn && !sp->role.direct) {
pgprintk("%s: gfn %lx role %x\n", __func__, gfn,
sp->role.word);
r = 1;
index = kvm_page_table_hashfn(gfn);
bucket = &kvm->arch.mmu_page_hash[index];
hlist_for_each_entry_safe(sp, node, nn, bucket, hash_link) {
- if (sp->gfn == gfn && !sp->role.metaphysical
+ if (sp->gfn == gfn && !sp->role.direct
&& !sp->role.invalid) {
pgprintk("%s: zap %lx %x\n",
__func__, gfn, sp->role.word);
return mtrr_state->def_type;
}
-static u8 get_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
+u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
{
u8 mtrr;
mtrr = MTRR_TYPE_WRBACK;
return mtrr;
}
+EXPORT_SYMBOL_GPL(kvm_get_guest_memory_type);
static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
{
bucket = &vcpu->kvm->arch.mmu_page_hash[index];
/* don't unsync if pagetable is shadowed with multiple roles */
hlist_for_each_entry_safe(s, node, n, bucket, hash_link) {
- if (s->gfn != sp->gfn || s->role.metaphysical)
+ if (s->gfn != sp->gfn || s->role.direct)
continue;
if (s->role.word != sp->role.word)
return 1;
++vcpu->kvm->stat.mmu_unsync;
sp->unsync = 1;
- if (sp->global) {
- list_add(&sp->oos_link, &vcpu->kvm->arch.oos_global_pages);
- ++vcpu->kvm->stat.mmu_unsync_global;
- } else
- kvm_mmu_mark_parents_unsync(vcpu, sp);
+ kvm_mmu_mark_parents_unsync(vcpu, sp);
mmu_convert_notrap(sp);
return 0;
static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
unsigned pte_access, int user_fault,
int write_fault, int dirty, int largepage,
- int global, gfn_t gfn, pfn_t pfn, bool speculative,
+ gfn_t gfn, pfn_t pfn, bool speculative,
bool can_unsync)
{
u64 spte;
int ret = 0;
- u64 mt_mask = shadow_mt_mask;
- struct kvm_mmu_page *sp = page_header(__pa(shadow_pte));
-
- if (!global && sp->global) {
- sp->global = 0;
- if (sp->unsync) {
- kvm_unlink_unsync_global(vcpu->kvm, sp);
- kvm_mmu_mark_parents_unsync(vcpu, sp);
- }
- }
/*
* We don't set the accessed bit, since we sometimes want to see
spte |= shadow_user_mask;
if (largepage)
spte |= PT_PAGE_SIZE_MASK;
- if (mt_mask) {
- if (!kvm_is_mmio_pfn(pfn)) {
- mt_mask = get_memory_type(vcpu, gfn) <<
- kvm_x86_ops->get_mt_mask_shift();
- mt_mask |= VMX_EPT_IGMT_BIT;
- } else
- mt_mask = MTRR_TYPE_UNCACHABLE <<
- kvm_x86_ops->get_mt_mask_shift();
- spte |= mt_mask;
- }
+ if (tdp_enabled)
+ spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn,
+ kvm_is_mmio_pfn(pfn));
spte |= (u64)pfn << PAGE_SHIFT;
static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
unsigned pt_access, unsigned pte_access,
int user_fault, int write_fault, int dirty,
- int *ptwrite, int largepage, int global,
- gfn_t gfn, pfn_t pfn, bool speculative)
+ int *ptwrite, int largepage, gfn_t gfn,
+ pfn_t pfn, bool speculative)
{
int was_rmapped = 0;
int was_writeble = is_writeble_pte(*shadow_pte);
pgprintk("hfn old %lx new %lx\n",
spte_to_pfn(*shadow_pte), pfn);
rmap_remove(vcpu->kvm, shadow_pte);
- } else {
- if (largepage)
- was_rmapped = is_large_pte(*shadow_pte);
- else
- was_rmapped = 1;
- }
+ } else
+ was_rmapped = 1;
}
if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault,
- dirty, largepage, global, gfn, pfn, speculative, true)) {
+ dirty, largepage, gfn, pfn, speculative, true)) {
if (write_fault)
*ptwrite = 1;
kvm_x86_ops->tlb_flush(vcpu);
|| (largepage && iterator.level == PT_DIRECTORY_LEVEL)) {
mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL,
0, write, 1, &pt_write,
- largepage, 0, gfn, pfn, false);
+ largepage, gfn, pfn, false);
++vcpu->stat.pf_fixed;
break;
}
int i;
gfn_t root_gfn;
struct kvm_mmu_page *sp;
- int metaphysical = 0;
+ int direct = 0;
root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT;
ASSERT(!VALID_PAGE(root));
if (tdp_enabled)
- metaphysical = 1;
+ direct = 1;
sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
- PT64_ROOT_LEVEL, metaphysical,
+ PT64_ROOT_LEVEL, direct,
ACC_ALL, NULL);
root = __pa(sp->spt);
++sp->root_count;
vcpu->arch.mmu.root_hpa = root;
return;
}
- metaphysical = !is_paging(vcpu);
+ direct = !is_paging(vcpu);
if (tdp_enabled)
- metaphysical = 1;
+ direct = 1;
for (i = 0; i < 4; ++i) {
hpa_t root = vcpu->arch.mmu.pae_root[i];
} else if (vcpu->arch.mmu.root_level == 0)
root_gfn = 0;
sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
- PT32_ROOT_LEVEL, metaphysical,
+ PT32_ROOT_LEVEL, direct,
ACC_ALL, NULL);
root = __pa(sp->spt);
++sp->root_count;
}
}
-static void mmu_sync_global(struct kvm_vcpu *vcpu)
-{
- struct kvm *kvm = vcpu->kvm;
- struct kvm_mmu_page *sp, *n;
-
- list_for_each_entry_safe(sp, n, &kvm->arch.oos_global_pages, oos_link)
- kvm_sync_page(vcpu, sp);
-}
-
void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
{
spin_lock(&vcpu->kvm->mmu_lock);
spin_unlock(&vcpu->kvm->mmu_lock);
}
-void kvm_mmu_sync_global(struct kvm_vcpu *vcpu)
-{
- spin_lock(&vcpu->kvm->mmu_lock);
- mmu_sync_global(vcpu);
- spin_unlock(&vcpu->kvm->mmu_lock);
-}
-
static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr)
{
return vaddr;
nonpaging_free(vcpu);
}
+static bool is_rsvd_bits_set(struct kvm_vcpu *vcpu, u64 gpte, int level)
+{
+ int bit7;
+
+ bit7 = (gpte >> 7) & 1;
+ return (gpte & vcpu->arch.mmu.rsvd_bits_mask[bit7][level-1]) != 0;
+}
+
#define PTTYPE 64
#include "paging_tmpl.h"
#undef PTTYPE
#include "paging_tmpl.h"
#undef PTTYPE
+static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level)
+{
+ struct kvm_mmu *context = &vcpu->arch.mmu;
+ int maxphyaddr = cpuid_maxphyaddr(vcpu);
+ u64 exb_bit_rsvd = 0;
+
+ if (!is_nx(vcpu))
+ exb_bit_rsvd = rsvd_bits(63, 63);
+ switch (level) {
+ case PT32_ROOT_LEVEL:
+ /* no rsvd bits for 2 level 4K page table entries */
+ context->rsvd_bits_mask[0][1] = 0;
+ context->rsvd_bits_mask[0][0] = 0;
+ if (is_cpuid_PSE36())
+ /* 36bits PSE 4MB page */
+ context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21);
+ else
+ /* 32 bits PSE 4MB page */
+ context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21);
+ context->rsvd_bits_mask[1][0] = ~0ull;
+ break;
+ case PT32E_ROOT_LEVEL:
+ context->rsvd_bits_mask[0][2] =
+ rsvd_bits(maxphyaddr, 63) |
+ rsvd_bits(7, 8) | rsvd_bits(1, 2); /* PDPTE */
+ context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
+ rsvd_bits(maxphyaddr, 62); /* PDE */
+ context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
+ rsvd_bits(maxphyaddr, 62); /* PTE */
+ context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
+ rsvd_bits(maxphyaddr, 62) |
+ rsvd_bits(13, 20); /* large page */
+ context->rsvd_bits_mask[1][0] = ~0ull;
+ break;
+ case PT64_ROOT_LEVEL:
+ context->rsvd_bits_mask[0][3] = exb_bit_rsvd |
+ rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+ context->rsvd_bits_mask[0][2] = exb_bit_rsvd |
+ rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+ context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
+ rsvd_bits(maxphyaddr, 51);
+ context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
+ rsvd_bits(maxphyaddr, 51);
+ context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3];
+ context->rsvd_bits_mask[1][2] = context->rsvd_bits_mask[0][2];
+ context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
+ rsvd_bits(maxphyaddr, 51) |
+ rsvd_bits(13, 20); /* large page */
+ context->rsvd_bits_mask[1][0] = ~0ull;
+ break;
+ }
+}
+
static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level)
{
struct kvm_mmu *context = &vcpu->arch.mmu;
static int paging64_init_context(struct kvm_vcpu *vcpu)
{
+ reset_rsvds_bits_mask(vcpu, PT64_ROOT_LEVEL);
return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL);
}
{
struct kvm_mmu *context = &vcpu->arch.mmu;
+ reset_rsvds_bits_mask(vcpu, PT32_ROOT_LEVEL);
context->new_cr3 = paging_new_cr3;
context->page_fault = paging32_page_fault;
context->gva_to_gpa = paging32_gva_to_gpa;
static int paging32E_init_context(struct kvm_vcpu *vcpu)
{
+ reset_rsvds_bits_mask(vcpu, PT32E_ROOT_LEVEL);
return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL);
}
context->gva_to_gpa = nonpaging_gva_to_gpa;
context->root_level = 0;
} else if (is_long_mode(vcpu)) {
+ reset_rsvds_bits_mask(vcpu, PT64_ROOT_LEVEL);
context->gva_to_gpa = paging64_gva_to_gpa;
context->root_level = PT64_ROOT_LEVEL;
} else if (is_pae(vcpu)) {
+ reset_rsvds_bits_mask(vcpu, PT32E_ROOT_LEVEL);
context->gva_to_gpa = paging64_gva_to_gpa;
context->root_level = PT32E_ROOT_LEVEL;
} else {
+ reset_rsvds_bits_mask(vcpu, PT32_ROOT_LEVEL);
context->gva_to_gpa = paging32_gva_to_gpa;
context->root_level = PT32_ROOT_LEVEL;
}
index = kvm_page_table_hashfn(gfn);
bucket = &vcpu->kvm->arch.mmu_page_hash[index];
hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) {
- if (sp->gfn != gfn || sp->role.metaphysical || sp->role.invalid)
+ if (sp->gfn != gfn || sp->role.direct || sp->role.invalid)
continue;
pte_size = sp->role.glevels == PT32_ROOT_LEVEL ? 4 : 8;
misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1);
static void free_mmu_pages(struct kvm_vcpu *vcpu)
{
- struct kvm_mmu_page *sp;
-
- while (!list_empty(&vcpu->kvm->arch.active_mmu_pages)) {
- sp = container_of(vcpu->kvm->arch.active_mmu_pages.next,
- struct kvm_mmu_page, link);
- kvm_mmu_zap_page(vcpu->kvm, sp);
- cond_resched();
- }
free_page((unsigned long)vcpu->arch.mmu.pae_root);
}
static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu)
{
- kvm_x86_ops->tlb_flush(vcpu);
- set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests);
+ kvm_set_cr3(vcpu, vcpu->arch.cr3);
return 1;
}
" in nonleaf level: levels %d gva %lx"
" level %d pte %llx\n", audit_msg,
vcpu->arch.mmu.root_level, va, level, ent);
-
- audit_mappings_page(vcpu, ent, va, level - 1);
+ else
+ audit_mappings_page(vcpu, ent, va, level - 1);
} else {
gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va);
- hpa_t hpa = (hpa_t)gpa_to_pfn(vcpu, gpa) << PAGE_SHIFT;
+ gfn_t gfn = gpa >> PAGE_SHIFT;
+ pfn_t pfn = gfn_to_pfn(vcpu->kvm, gfn);
+ hpa_t hpa = (hpa_t)pfn << PAGE_SHIFT;
if (is_shadow_present_pte(ent)
&& (ent & PT64_BASE_ADDR_MASK) != hpa)
gfn_t gfn;
list_for_each_entry(sp, &vcpu->kvm->arch.active_mmu_pages, link) {
- if (sp->role.metaphysical)
+ if (sp->role.direct)
continue;
gfn = unalias_gfn(vcpu->kvm, sp->gfn);