X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=drivers%2Fkvm%2Fkvm.h;h=db2bc6f168cd968b1664269b982a2f0baa90eadb;hb=2cc51560aed0edb291341089d3475e1fbe8bfd04;hp=c2db4218d6ccfce71ce649fea9a7e28cef131a23;hpb=a9058ecd3cd72634cf548588ce79b3f225c9ca32;p=safe%2Fjmp%2Flinux-2.6 diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index c2db421..db2bc6f 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -11,11 +11,14 @@ #include #include #include +#include #include "vmx.h" #include +#include #define CR0_PE_MASK (1ULL << 0) +#define CR0_MP_MASK (1ULL << 1) #define CR0_TS_MASK (1ULL << 3) #define CR0_NE_MASK (1ULL << 5) #define CR0_WP_MASK (1ULL << 16) @@ -40,7 +43,8 @@ (CR0_PG_MASK | CR0_PE_MASK | CR0_WP_MASK | CR0_NE_MASK \ | CR0_NW_MASK | CR0_CD_MASK) #define KVM_VM_CR0_ALWAYS_ON \ - (CR0_PG_MASK | CR0_PE_MASK | CR0_WP_MASK | CR0_NE_MASK) + (CR0_PG_MASK | CR0_PE_MASK | CR0_WP_MASK | CR0_NE_MASK | CR0_TS_MASK \ + | CR0_MP_MASK) #define KVM_GUEST_CR4_MASK \ (CR4_PSE_MASK | CR4_PAE_MASK | CR4_PGE_MASK | CR4_VMXE_MASK | CR4_VME_MASK) #define KVM_PMODE_VM_CR4_ALWAYS_ON (CR4_VMXE_MASK | CR4_PAE_MASK) @@ -50,14 +54,19 @@ #define UNMAPPED_GVA (~(gpa_t)0) #define KVM_MAX_VCPUS 1 +#define KVM_ALIAS_SLOTS 4 #define KVM_MEMORY_SLOTS 4 -#define KVM_NUM_MMU_PAGES 256 +#define KVM_NUM_MMU_PAGES 1024 +#define KVM_MIN_FREE_MMU_PAGES 5 +#define KVM_REFILL_PAGES 25 +#define KVM_MAX_CPUID_ENTRIES 40 #define FX_IMAGE_SIZE 512 #define FX_IMAGE_ALIGN 16 #define FX_BUF_SIZE (2 * FX_IMAGE_SIZE + FX_IMAGE_ALIGN) #define DE_VECTOR 0 +#define NM_VECTOR 7 #define DF_VECTOR 8 #define TS_VECTOR 10 #define NP_VECTOR 11 @@ -70,6 +79,8 @@ #define IOPL_SHIFT 12 +#define KVM_PIO_PAGE_OFFSET 1 + /* * Address types: * @@ -89,14 +100,55 @@ typedef unsigned long hva_t; typedef u64 hpa_t; typedef unsigned long hfn_t; +#define NR_PTE_CHAIN_ENTRIES 5 + +struct kvm_pte_chain { + u64 *parent_ptes[NR_PTE_CHAIN_ENTRIES]; + struct hlist_node link; +}; + +/* + * kvm_mmu_page_role, below, is defined as: + * + * bits 0:3 - total guest paging levels (2-4, or zero for real mode) + * bits 4:7 - page table level for this shadow (1-4) + * bits 8:9 - page table quadrant for 2-level guests + * bit 16 - "metaphysical" - gfn is not a real page (huge page/real mode) + * bits 17:18 - "access" - the user and writable bits of a huge page pde + */ +union kvm_mmu_page_role { + unsigned word; + struct { + unsigned glevels : 4; + unsigned level : 4; + unsigned quadrant : 2; + unsigned pad_for_nice_hex_output : 6; + unsigned metaphysical : 1; + unsigned hugepage_access : 2; + }; +}; + struct kvm_mmu_page { struct list_head link; - hpa_t page_hpa; + struct hlist_node hash_link; + + /* + * The following two entries are used to key the shadow page in the + * hash table. + */ + gfn_t gfn; + union kvm_mmu_page_role role; + + u64 *spt; unsigned long slot_bitmap; /* One bit set per slot which has memory * in this shadow page. */ - int global; /* Set if all ptes in this page are global */ - u64 *parent_pte; + int multimapped; /* More than one parent_pte? */ + int root_count; /* Currently serving as active root */ + union { + u64 *parent_pte; /* !multimapped */ + struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */ + }; }; struct vmcs { @@ -117,14 +169,26 @@ struct kvm_vcpu; struct kvm_mmu { void (*new_cr3)(struct kvm_vcpu *vcpu); int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err); - void (*inval_page)(struct kvm_vcpu *vcpu, gva_t gva); void (*free)(struct kvm_vcpu *vcpu); gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva); hpa_t root_hpa; int root_level; int shadow_root_level; + + u64 *pae_root; }; +#define KVM_NR_MEM_OBJS 20 + +struct kvm_mmu_memory_cache { + int nobjs; + void *objects[KVM_NR_MEM_OBJS]; +}; + +/* + * We don't want allocation failures within the mmu code, so we preallocate + * enough memory for a single page fault in a cache. + */ struct kvm_guest_debug { int enabled; unsigned long bp[4]; @@ -164,6 +228,36 @@ enum { VCPU_SREG_LDTR, }; +struct kvm_pio_request { + unsigned long count; + int cur_count; + struct page *guest_pages[2]; + unsigned guest_page_offset; + int in; + int size; + int string; + int down; + int rep; +}; + +struct kvm_stat { + u32 pf_fixed; + u32 pf_guest; + u32 tlb_flush; + u32 invlpg; + + u32 exits; + u32 io_exits; + u32 mmio_exits; + u32 signal_exits; + u32 irq_window_exits; + u32 halt_exits; + u32 request_irq_exits; + u32 irq_exits; + u32 light_exits; + u32 efer_reload; +}; + struct kvm_vcpu { struct kvm *kvm; union { @@ -173,6 +267,9 @@ struct kvm_vcpu { struct mutex mutex; int cpu; int launched; + u64 host_tsc; + struct kvm_run *run; + int interrupt_window_open; unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */ #define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long) unsigned long irq_pending[NR_IRQ_WORDS]; @@ -182,11 +279,21 @@ struct kvm_vcpu { unsigned long cr0; unsigned long cr2; unsigned long cr3; + gpa_t para_state_gpa; + struct page *para_state_page; + gpa_t hypercall_gpa; unsigned long cr4; unsigned long cr8; + u64 pdptrs[4]; /* pae */ u64 shadow_efer; u64 apic_base; + u64 ia32_misc_enable_msr; int nmsrs; + int save_nmsrs; + int msr_offset_efer; +#ifdef CONFIG_X86_64 + int msr_offset_kernel_gs_base; +#endif struct vmx_msr_entry *guest_msrs; struct vmx_msr_entry *host_msrs; @@ -194,11 +301,24 @@ struct kvm_vcpu { struct kvm_mmu_page page_header_buf[KVM_NUM_MMU_PAGES]; struct kvm_mmu mmu; + struct kvm_mmu_memory_cache mmu_pte_chain_cache; + struct kvm_mmu_memory_cache mmu_rmap_desc_cache; + + gfn_t last_pt_write_gfn; + int last_pt_write_count; + struct kvm_guest_debug guest_debug; char fx_buf[FX_BUF_SIZE]; char *host_fx_image; char *guest_fx_image; + int fpu_active; + int guest_fpu_loaded; + struct vmx_host_state { + int loaded; + u16 fs_sel, gs_sel, ldt_sel; + int fs_gs_ldt_reload_needed; + } vmx_host_state; int mmio_needed; int mmio_read_completed; @@ -206,6 +326,14 @@ struct kvm_vcpu { int mmio_size; unsigned char mmio_data[8]; gpa_t mmio_phys_addr; + gva_t mmio_fault_cr2; + struct kvm_pio_request pio; + void *pio_data; + + int sigset_active; + sigset_t sigset; + + struct kvm_stat stat; struct { int active; @@ -217,6 +345,15 @@ struct kvm_vcpu { u32 ar; } tr, es, ds, fs, gs; } rmode; + + int cpuid_nent; + struct kvm_cpuid_entry cpuid_entries[KVM_MAX_CPUID_ENTRIES]; +}; + +struct kvm_mem_alias { + gfn_t base_gfn; + unsigned long npages; + gfn_t target_gfn; }; struct kvm_memory_slot { @@ -229,25 +366,22 @@ struct kvm_memory_slot { struct kvm { spinlock_t lock; /* protects everything except vcpus */ + int naliases; + struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS]; int nmemslots; struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS]; + /* + * Hash table of struct kvm_mmu_page. + */ struct list_head active_mmu_pages; + int n_free_mmu_pages; + struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; struct kvm_vcpu vcpus[KVM_MAX_VCPUS]; int memory_config_version; int busy; -}; - -struct kvm_stat { - u32 pf_fixed; - u32 pf_guest; - u32 tlb_flush; - u32 invlpg; - - u32 exits; - u32 io_exits; - u32 mmio_exits; - u32 signal_exits; - u32 irq_exits; + unsigned long rmap_overflow; + struct list_head vm_list; + struct file *filp; }; struct descriptor_table { @@ -266,8 +400,9 @@ struct kvm_arch_ops { int (*vcpu_create)(struct kvm_vcpu *vcpu); void (*vcpu_free)(struct kvm_vcpu *vcpu); - struct kvm_vcpu *(*vcpu_load)(struct kvm_vcpu *vcpu); + void (*vcpu_load)(struct kvm_vcpu *vcpu); void (*vcpu_put)(struct kvm_vcpu *vcpu); + void (*vcpu_decache)(struct kvm_vcpu *vcpu); int (*set_guest_debug)(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg); @@ -279,9 +414,8 @@ struct kvm_arch_ops { void (*set_segment)(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); + void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu); void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); - void (*set_cr0_no_modeswitch)(struct kvm_vcpu *vcpu, - unsigned long cr0); void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer); @@ -307,9 +441,10 @@ struct kvm_arch_ops { int (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run); int (*vcpu_setup)(struct kvm_vcpu *vcpu); void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); + void (*patch_hypercall)(struct kvm_vcpu *vcpu, + unsigned char *hypercall_addr); }; -extern struct kvm_stat kvm_stat; extern struct kvm_arch_ops *kvm_arch_ops; #define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt) @@ -318,27 +453,29 @@ extern struct kvm_arch_ops *kvm_arch_ops; int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module); void kvm_exit_arch(void); +int kvm_mmu_module_init(void); +void kvm_mmu_module_exit(void); + void kvm_mmu_destroy(struct kvm_vcpu *vcpu); -int kvm_mmu_init(struct kvm_vcpu *vcpu); +int kvm_mmu_create(struct kvm_vcpu *vcpu); +int kvm_mmu_setup(struct kvm_vcpu *vcpu); int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); -void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); +void kvm_mmu_slot_remove_write_access(struct kvm_vcpu *vcpu, int slot); +void kvm_mmu_zap_all(struct kvm_vcpu *vcpu); hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa); #define HPA_MSB ((sizeof(hpa_t) * 8) - 1) #define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB) static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; } hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva); +struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva); void kvm_emulator_want_group7_invlpg(void); extern hpa_t bad_page_address; -static inline struct page *gfn_to_page(struct kvm_memory_slot *slot, gfn_t gfn) -{ - return slot->phys_mem[gfn - slot->base_gfn]; -} - +struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); void mark_page_dirty(struct kvm *kvm, gfn_t gfn); @@ -361,6 +498,10 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long value, struct x86_emulate_ctxt; +int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, + int size, unsigned long count, int string, int down, + gva_t address, int rep, unsigned port); +void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); int emulate_clts(struct kvm_vcpu *vcpu); int emulator_get_dr(struct x86_emulate_ctxt* ctxt, int dr, @@ -374,15 +515,16 @@ void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr0); void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr0); void lmsw(struct kvm_vcpu *vcpu, unsigned long msw); -#ifdef CONFIG_X86_64 -void set_efer(struct kvm_vcpu *vcpu, u64 efer); -#endif +int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); +int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data); void fx_init(struct kvm_vcpu *vcpu); void load_msrs(struct vmx_msr_entry *e, int n); void save_msrs(struct vmx_msr_entry *e, int n); void kvm_resched(struct kvm_vcpu *vcpu); +void kvm_load_guest_fpu(struct kvm_vcpu *vcpu); +void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); int kvm_read_guest(struct kvm_vcpu *vcpu, gva_t addr, @@ -396,10 +538,19 @@ int kvm_write_guest(struct kvm_vcpu *vcpu, unsigned long segment_base(u16 selector); -static inline struct page *_gfn_to_page(struct kvm *kvm, gfn_t gfn) +void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, + const u8 *old, const u8 *new, int bytes); +int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); +void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); + +int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run); + +static inline int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, + u32 error_code) { - struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); - return (slot) ? slot->phys_mem[gfn - slot->base_gfn] : NULL; + if (unlikely(vcpu->kvm->n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES)) + kvm_mmu_free_some_pages(vcpu); + return vcpu->mmu.page_fault(vcpu, gva, error_code); } static inline int is_long_mode(struct kvm_vcpu *vcpu) @@ -435,7 +586,7 @@ static inline struct kvm_mmu_page *page_header(hpa_t shadow_page) { struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT); - return (struct kvm_mmu_page *)page->private; + return (struct kvm_mmu_page *)page_private(page); } static inline u16 read_fs(void) @@ -472,7 +623,7 @@ static inline void load_gs(u16 sel) #ifndef load_ldt static inline void load_ldt(u16 sel) { - asm ("lldt %0" : : "g"(sel)); + asm ("lldt %0" : : "rm"(sel)); } #endif @@ -541,19 +692,4 @@ static inline u32 get_rdx_init_val(void) #define TSS_REDIRECTION_SIZE (256 / 8) #define RMODE_TSS_SIZE (TSS_BASE_SIZE + TSS_REDIRECTION_SIZE + TSS_IOPB_SIZE + 1) -#ifdef CONFIG_X86_64 - -/* - * When emulating 32-bit mode, cr3 is only 32 bits even on x86_64. Therefore - * we need to allocate shadow page tables in the first 4GB of memory, which - * happens to fit the DMA32 zone. - */ -#define GFP_KVM_MMU (GFP_KERNEL | __GFP_DMA32) - -#else - -#define GFP_KVM_MMU GFP_KERNEL - -#endif - #endif