X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=drivers%2Fkvm%2Fmmu.c;h=c738fb1cea305eddbcca4563d5905c2ffd173d8a;hb=d28c6cfbbc5e2d4fccfe6d733995ed5971ca87f6;hp=c6f972914f082e9af20ca6af54d155104ef5d2ba;hpb=68a99f6d37aa65e848e09ec6ea52848e93bd5de2;p=safe%2Fjmp%2Flinux-2.6 diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index c6f9729..c738fb1 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -131,7 +131,7 @@ static int dbg = 1; (((address) >> PT32_LEVEL_SHIFT(level)) & ((1 << PT32_LEVEL_BITS) - 1)) -#define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & PAGE_MASK) +#define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1)) #define PT64_DIR_BASE_ADDR_MASK \ (PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + PT64_LEVEL_BITS)) - 1)) @@ -143,6 +143,7 @@ static int dbg = 1; #define PFERR_PRESENT_MASK (1U << 0) #define PFERR_WRITE_MASK (1U << 1) #define PFERR_USER_MASK (1U << 2) +#define PFERR_FETCH_MASK (1U << 4) #define PT64_ROOT_LEVEL 4 #define PT32_ROOT_LEVEL 2 @@ -168,6 +169,11 @@ static int is_cpuid_PSE36(void) return 1; } +static int is_nx(struct kvm_vcpu *vcpu) +{ + return vcpu->shadow_efer & EFER_NX; +} + static int is_present_pte(unsigned long pte) { return pte & PT_PRESENT_MASK; @@ -292,18 +298,18 @@ static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte) if (!is_rmap_pte(*spte)) return; page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT); - if (!page->private) { + if (!page_private(page)) { rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte); - page->private = (unsigned long)spte; - } else if (!(page->private & 1)) { + set_page_private(page,(unsigned long)spte); + } else if (!(page_private(page) & 1)) { rmap_printk("rmap_add: %p %llx 1->many\n", spte, *spte); desc = mmu_alloc_rmap_desc(vcpu); - desc->shadow_ptes[0] = (u64 *)page->private; + desc->shadow_ptes[0] = (u64 *)page_private(page); desc->shadow_ptes[1] = spte; - page->private = (unsigned long)desc | 1; + set_page_private(page,(unsigned long)desc | 1); } else { rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte); - desc = (struct kvm_rmap_desc *)(page->private & ~1ul); + desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul); while (desc->shadow_ptes[RMAP_EXT-1] && desc->more) desc = desc->more; if (desc->shadow_ptes[RMAP_EXT-1]) { @@ -327,16 +333,16 @@ static void rmap_desc_remove_entry(struct kvm_vcpu *vcpu, for (j = RMAP_EXT - 1; !desc->shadow_ptes[j] && j > i; --j) ; desc->shadow_ptes[i] = desc->shadow_ptes[j]; - desc->shadow_ptes[j] = 0; + desc->shadow_ptes[j] = NULL; if (j != 0) return; if (!prev_desc && !desc->more) - page->private = (unsigned long)desc->shadow_ptes[0]; + set_page_private(page,(unsigned long)desc->shadow_ptes[0]); else if (prev_desc) prev_desc->more = desc->more; else - page->private = (unsigned long)desc->more | 1; + set_page_private(page,(unsigned long)desc->more | 1); mmu_free_rmap_desc(vcpu, desc); } @@ -350,20 +356,20 @@ static void rmap_remove(struct kvm_vcpu *vcpu, u64 *spte) if (!is_rmap_pte(*spte)) return; page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT); - if (!page->private) { + if (!page_private(page)) { printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); BUG(); - } else if (!(page->private & 1)) { + } else if (!(page_private(page) & 1)) { rmap_printk("rmap_remove: %p %llx 1->0\n", spte, *spte); - if ((u64 *)page->private != spte) { + if ((u64 *)page_private(page) != spte) { printk(KERN_ERR "rmap_remove: %p %llx 1->BUG\n", spte, *spte); BUG(); } - page->private = 0; + set_page_private(page,0); } else { rmap_printk("rmap_remove: %p %llx many->many\n", spte, *spte); - desc = (struct kvm_rmap_desc *)(page->private & ~1ul); + desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul); prev_desc = NULL; while (desc) { for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; ++i) @@ -392,16 +398,16 @@ static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn) BUG_ON(!slot); page = gfn_to_page(slot, gfn); - while (page->private) { - if (!(page->private & 1)) - spte = (u64 *)page->private; + while (page_private(page)) { + if (!(page_private(page) & 1)) + spte = (u64 *)page_private(page); else { - desc = (struct kvm_rmap_desc *)(page->private & ~1ul); + desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul); spte = desc->shadow_ptes[0]; } BUG_ON(!spte); - BUG_ON((*spte & PT64_BASE_ADDR_MASK) != - page_to_pfn(page) << PAGE_SHIFT); + BUG_ON((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT + != page_to_pfn(page)); BUG_ON(!(*spte & PT_PRESENT_MASK)); BUG_ON(!(*spte & PT_WRITABLE_MASK)); rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); @@ -455,7 +461,6 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, list_add(&page->link, &vcpu->kvm->active_mmu_pages); ASSERT(is_empty_shadow_page(page->page_hpa)); page->slot_bitmap = 0; - page->global = 1; page->multimapped = 0; page->parent_pte = parent_pte; --vcpu->kvm->n_free_mmu_pages; @@ -563,6 +568,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, gva_t gaddr, unsigned level, int metaphysical, + unsigned hugepage_access, u64 *parent_pte) { union kvm_mmu_page_role role; @@ -576,6 +582,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, role.glevels = vcpu->mmu.root_level; role.level = level; role.metaphysical = metaphysical; + role.hugepage_access = hugepage_access; if (vcpu->mmu.root_level <= PT32_ROOT_LEVEL) { quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; @@ -729,6 +736,15 @@ hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva) return gpa_to_hpa(vcpu, gpa); } +struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva) +{ + gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva); + + if (gpa == UNMAPPED_GVA) + return NULL; + return pfn_to_page(gpa_to_hpa(vcpu, gpa) >> PAGE_SHIFT); +} + static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) { } @@ -766,7 +782,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p) >> PAGE_SHIFT; new_table = kvm_mmu_get_page(vcpu, pseudo_gfn, v, level - 1, - 1, &table[index]); + 1, 0, &table[index]); if (!new_table) { pgprintk("nonpaging_map: ENOMEM\n"); return -ENOMEM; @@ -821,7 +837,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) ASSERT(!VALID_PAGE(root)); page = kvm_mmu_get_page(vcpu, root_gfn, 0, - PT64_ROOT_LEVEL, 0, NULL); + PT64_ROOT_LEVEL, 0, 0, NULL); root = page->page_hpa; ++page->root_count; vcpu->mmu.root_hpa = root; @@ -838,7 +854,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) root_gfn = 0; page = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL, !is_paging(vcpu), - NULL); + 0, NULL); root = page->page_hpa; ++page->root_count; vcpu->mmu.pae_root[i] = root | PT_PRESENT_MASK; @@ -912,11 +928,6 @@ static void paging_new_cr3(struct kvm_vcpu *vcpu) kvm_arch_ops->set_cr3(vcpu, vcpu->mmu.root_hpa); } -static void mark_pagetable_nonglobal(void *shadow_pte) -{ - page_header(__pa(shadow_pte))->global = 0; -} - static inline void set_pte_common(struct kvm_vcpu *vcpu, u64 *shadow_pte, gpa_t gaddr, @@ -934,9 +945,6 @@ static inline void set_pte_common(struct kvm_vcpu *vcpu, *shadow_pte |= access_bits; - if (!(*shadow_pte & PT_GLOBAL_MASK)) - mark_pagetable_nonglobal(shadow_pte); - if (is_error_hpa(paddr)) { *shadow_pte |= gaddr; *shadow_pte |= PT_SHADOW_IO_MARK; @@ -992,16 +1000,6 @@ static inline int fix_read_pf(u64 *shadow_ent) return 0; } -static int may_access(u64 pte, int write, int user) -{ - - if (user && !(pte & PT_USER_MASK)) - return 0; - if (write && !(pte & PT_WRITABLE_MASK)) - return 0; - return 1; -} - static void paging_free(struct kvm_vcpu *vcpu) { nonpaging_free(vcpu); @@ -1097,22 +1095,40 @@ out: return r; } +static void mmu_pre_write_zap_pte(struct kvm_vcpu *vcpu, + struct kvm_mmu_page *page, + u64 *spte) +{ + u64 pte; + struct kvm_mmu_page *child; + + pte = *spte; + if (is_present_pte(pte)) { + if (page->role.level == PT_PAGE_TABLE_LEVEL) + rmap_remove(vcpu, spte); + else { + child = page_header(pte & PT64_BASE_ADDR_MASK); + mmu_page_remove_parent_pte(vcpu, child, spte); + } + } + *spte = 0; +} + void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes) { gfn_t gfn = gpa >> PAGE_SHIFT; struct kvm_mmu_page *page; - struct kvm_mmu_page *child; struct hlist_node *node, *n; struct hlist_head *bucket; unsigned index; u64 *spte; - u64 pte; unsigned offset = offset_in_page(gpa); unsigned pte_size; unsigned page_offset; unsigned misaligned; int level; int flooded = 0; + int npte; pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes); if (gfn == vcpu->last_pt_write_gfn) { @@ -1148,22 +1164,27 @@ void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes) } page_offset = offset; level = page->role.level; + npte = 1; if (page->role.glevels == PT32_ROOT_LEVEL) { - page_offset <<= 1; /* 32->64 */ + page_offset <<= 1; /* 32->64 */ + /* + * A 32-bit pde maps 4MB while the shadow pdes map + * only 2MB. So we need to double the offset again + * and zap two pdes instead of one. + */ + if (level == PT32_ROOT_LEVEL) { + page_offset &= ~7; /* kill rounding error */ + page_offset <<= 1; + npte = 2; + } page_offset &= ~PAGE_MASK; } spte = __va(page->page_hpa); spte += page_offset / sizeof(*spte); - pte = *spte; - if (is_present_pte(pte)) { - if (level == PT_PAGE_TABLE_LEVEL) - rmap_remove(vcpu, spte); - else { - child = page_header(pte & PT64_BASE_ADDR_MASK); - mmu_page_remove_parent_pte(vcpu, child, spte); - } + while (npte--) { + mmu_pre_write_zap_pte(vcpu, page, spte); + ++spte; } - *spte = 0; } } @@ -1222,7 +1243,7 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu) INIT_LIST_HEAD(&page_header->link); if ((page = alloc_page(GFP_KERNEL)) == NULL) goto error_1; - page->private = (unsigned long)page_header; + set_page_private(page, (unsigned long)page_header); page_header->page_hpa = (hpa_t)page_to_pfn(page) << PAGE_SHIFT; memset(__va(page_header->page_hpa), 0, PAGE_SIZE); list_add(&page_header->link, &vcpu->free_pages); @@ -1341,7 +1362,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte, static void audit_mappings(struct kvm_vcpu *vcpu) { - int i; + unsigned i; if (vcpu->mmu.root_level == 4) audit_mappings_page(vcpu, vcpu->mmu.root_hpa, 0, 4);