2 * s390host.c -- hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
15 #include <linux/compiler.h>
16 #include <linux/err.h>
18 #include <linux/init.h>
19 #include <linux/kvm.h>
20 #include <linux/kvm_host.h>
21 #include <linux/module.h>
22 #include <linux/slab.h>
23 #include <linux/timer.h>
24 #include <asm/lowcore.h>
25 #include <asm/pgtable.h>
30 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
32 struct kvm_stats_debugfs_item debugfs_entries[] = {
33 { "userspace_handled", VCPU_STAT(exit_userspace) },
34 { "exit_validity", VCPU_STAT(exit_validity) },
35 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
36 { "exit_external_request", VCPU_STAT(exit_external_request) },
37 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
38 { "exit_instruction", VCPU_STAT(exit_instruction) },
39 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
40 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
41 { "instruction_lctg", VCPU_STAT(instruction_lctg) },
42 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
43 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
44 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
45 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
46 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
47 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
48 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
49 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
50 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
55 /* Section: not file related */
56 void kvm_arch_hardware_enable(void *garbage)
58 /* every s390 is virtualization enabled ;-) */
61 void kvm_arch_hardware_disable(void *garbage)
65 void decache_vcpus_on_cpu(int cpu)
69 int kvm_arch_hardware_setup(void)
74 void kvm_arch_hardware_unsetup(void)
78 void kvm_arch_check_processor_compat(void *rtn)
82 int kvm_arch_init(void *opaque)
87 void kvm_arch_exit(void)
91 /* Section: device related */
92 long kvm_arch_dev_ioctl(struct file *filp,
93 unsigned int ioctl, unsigned long arg)
95 if (ioctl == KVM_S390_ENABLE_SIE)
96 return s390_enable_sie();
100 int kvm_dev_ioctl_check_extension(long ext)
105 /* Section: vm related */
107 * Get (and clear) the dirty memory log for a memory slot.
109 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
110 struct kvm_dirty_log *log)
115 long kvm_arch_vm_ioctl(struct file *filp,
116 unsigned int ioctl, unsigned long arg)
118 struct kvm *kvm = filp->private_data;
119 void __user *argp = (void __user *)arg;
123 case KVM_S390_INTERRUPT: {
124 struct kvm_s390_interrupt s390int;
127 if (copy_from_user(&s390int, argp, sizeof(s390int)))
129 r = kvm_s390_inject_vm(kvm, &s390int);
139 struct kvm *kvm_arch_create_vm(void)
145 rc = s390_enable_sie();
150 kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
154 kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
158 sprintf(debug_name, "kvm-%u", current->pid);
160 kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
164 spin_lock_init(&kvm->arch.float_int.lock);
165 INIT_LIST_HEAD(&kvm->arch.float_int.list);
167 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
168 VM_EVENT(kvm, 3, "%s", "vm created");
170 try_module_get(THIS_MODULE);
174 free_page((unsigned long)(kvm->arch.sca));
181 void kvm_arch_destroy_vm(struct kvm *kvm)
183 debug_unregister(kvm->arch.dbf);
184 free_page((unsigned long)(kvm->arch.sca));
186 module_put(THIS_MODULE);
189 /* Section: vcpu related */
190 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
195 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
197 /* kvm common code refers to this, but does'nt call it */
201 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
203 save_fp_regs(&vcpu->arch.host_fpregs);
204 save_access_regs(vcpu->arch.host_acrs);
205 vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK;
206 restore_fp_regs(&vcpu->arch.guest_fpregs);
207 restore_access_regs(vcpu->arch.guest_acrs);
209 if (signal_pending(current))
210 atomic_set_mask(CPUSTAT_STOP_INT,
211 &vcpu->arch.sie_block->cpuflags);
214 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
216 save_fp_regs(&vcpu->arch.guest_fpregs);
217 save_access_regs(vcpu->arch.guest_acrs);
218 restore_fp_regs(&vcpu->arch.host_fpregs);
219 restore_access_regs(vcpu->arch.host_acrs);
222 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
224 /* this equals initial cpu reset in pop, but we don't switch to ESA */
225 vcpu->arch.sie_block->gpsw.mask = 0UL;
226 vcpu->arch.sie_block->gpsw.addr = 0UL;
227 vcpu->arch.sie_block->prefix = 0UL;
228 vcpu->arch.sie_block->ihcpu = 0xffff;
229 vcpu->arch.sie_block->cputm = 0UL;
230 vcpu->arch.sie_block->ckc = 0UL;
231 vcpu->arch.sie_block->todpr = 0;
232 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
233 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
234 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
235 vcpu->arch.guest_fpregs.fpc = 0;
236 asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
237 vcpu->arch.sie_block->gbea = 1;
240 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
242 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH);
243 vcpu->arch.sie_block->gmslm = 0xffffffffffUL;
244 vcpu->arch.sie_block->gmsor = 0x000000000000;
245 vcpu->arch.sie_block->ecb = 2;
246 vcpu->arch.sie_block->eca = 0xC1002001U;
247 setup_timer(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup,
248 (unsigned long) vcpu);
252 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
255 struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
261 vcpu->arch.sie_block = (struct sie_block *) get_zeroed_page(GFP_KERNEL);
263 if (!vcpu->arch.sie_block)
266 vcpu->arch.sie_block->icpua = id;
267 BUG_ON(!kvm->arch.sca);
268 BUG_ON(kvm->arch.sca->cpu[id].sda);
269 kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
270 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32);
271 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
273 spin_lock_init(&vcpu->arch.local_int.lock);
274 INIT_LIST_HEAD(&vcpu->arch.local_int.list);
275 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
276 spin_lock_bh(&kvm->arch.float_int.lock);
277 kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
278 init_waitqueue_head(&vcpu->arch.local_int.wq);
279 spin_unlock_bh(&kvm->arch.float_int.lock);
281 rc = kvm_vcpu_init(vcpu, kvm, id);
284 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
285 vcpu->arch.sie_block);
287 try_module_get(THIS_MODULE);
296 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
298 VCPU_EVENT(vcpu, 3, "%s", "destroy cpu");
299 free_page((unsigned long)(vcpu->arch.sie_block));
301 module_put(THIS_MODULE);
304 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
306 /* kvm common code refers to this, but never calls it */
311 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
314 kvm_s390_vcpu_initial_reset(vcpu);
319 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
322 memcpy(&vcpu->arch.guest_gprs, ®s->gprs, sizeof(regs->gprs));
327 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
330 memcpy(®s->gprs, &vcpu->arch.guest_gprs, sizeof(regs->gprs));
335 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
336 struct kvm_sregs *sregs)
339 memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs));
340 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
345 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
346 struct kvm_sregs *sregs)
349 memcpy(&sregs->acrs, &vcpu->arch.guest_acrs, sizeof(sregs->acrs));
350 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
355 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
358 memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
359 vcpu->arch.guest_fpregs.fpc = fpu->fpc;
364 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
367 memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
368 fpu->fpc = vcpu->arch.guest_fpregs.fpc;
373 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
378 if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING)
381 vcpu->arch.sie_block->gpsw = psw;
386 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
387 struct kvm_translation *tr)
389 return -EINVAL; /* not implemented yet */
392 int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
393 struct kvm_debug_guest *dbg)
395 return -EINVAL; /* not implemented yet */
398 static void __vcpu_run(struct kvm_vcpu *vcpu)
400 memcpy(&vcpu->arch.sie_block->gg14, &vcpu->arch.guest_gprs[14], 16);
405 vcpu->arch.sie_block->icptcode = 0;
409 VCPU_EVENT(vcpu, 6, "entering sie flags %x",
410 atomic_read(&vcpu->arch.sie_block->cpuflags));
411 sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs);
412 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
413 vcpu->arch.sie_block->icptcode);
418 memcpy(&vcpu->arch.guest_gprs[14], &vcpu->arch.sie_block->gg14, 16);
421 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
428 if (vcpu->sigset_active)
429 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
431 atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
433 BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL);
435 switch (kvm_run->exit_reason) {
436 case KVM_EXIT_S390_SIEIC:
437 vcpu->arch.sie_block->gpsw.mask = kvm_run->s390_sieic.mask;
438 vcpu->arch.sie_block->gpsw.addr = kvm_run->s390_sieic.addr;
440 case KVM_EXIT_UNKNOWN:
441 case KVM_EXIT_S390_RESET:
450 kvm_s390_deliver_pending_interrupts(vcpu);
452 rc = kvm_handle_sie_intercept(vcpu);
453 } while (!signal_pending(current) && !rc);
455 if (signal_pending(current) && !rc)
458 if (rc == -ENOTSUPP) {
459 /* intercept cannot be handled in-kernel, prepare kvm-run */
460 kvm_run->exit_reason = KVM_EXIT_S390_SIEIC;
461 kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
462 kvm_run->s390_sieic.mask = vcpu->arch.sie_block->gpsw.mask;
463 kvm_run->s390_sieic.addr = vcpu->arch.sie_block->gpsw.addr;
464 kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
465 kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
469 if (rc == -EREMOTE) {
470 /* intercept was handled, but userspace support is needed
471 * kvm_run has been prepared by the handler */
475 if (vcpu->sigset_active)
476 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
480 vcpu->stat.exit_userspace++;
484 static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, const void *from,
485 unsigned long n, int prefix)
488 return copy_to_guest(vcpu, guestdest, from, n);
490 return copy_to_guest_absolute(vcpu, guestdest, from, n);
494 * store status at address
495 * we use have two special cases:
496 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
497 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
499 int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
501 const unsigned char archmode = 1;
504 if (addr == KVM_S390_STORE_STATUS_NOADDR) {
505 if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1))
507 addr = SAVE_AREA_BASE;
509 } else if (addr == KVM_S390_STORE_STATUS_PREFIXED) {
510 if (copy_to_guest(vcpu, 163ul, &archmode, 1))
512 addr = SAVE_AREA_BASE;
517 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, fp_regs),
518 vcpu->arch.guest_fpregs.fprs, 128, prefix))
521 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, gp_regs),
522 vcpu->arch.guest_gprs, 128, prefix))
525 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, psw),
526 &vcpu->arch.sie_block->gpsw, 16, prefix))
529 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, pref_reg),
530 &vcpu->arch.sie_block->prefix, 4, prefix))
533 if (__guestcopy(vcpu,
534 addr + offsetof(struct save_area_s390x, fp_ctrl_reg),
535 &vcpu->arch.guest_fpregs.fpc, 4, prefix))
538 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, tod_reg),
539 &vcpu->arch.sie_block->todpr, 4, prefix))
542 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, timer),
543 &vcpu->arch.sie_block->cputm, 8, prefix))
546 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, clk_cmp),
547 &vcpu->arch.sie_block->ckc, 8, prefix))
550 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, acc_regs),
551 &vcpu->arch.guest_acrs, 64, prefix))
554 if (__guestcopy(vcpu,
555 addr + offsetof(struct save_area_s390x, ctrl_regs),
556 &vcpu->arch.sie_block->gcr, 128, prefix))
561 static int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
566 rc = __kvm_s390_vcpu_store_status(vcpu, addr);
571 long kvm_arch_vcpu_ioctl(struct file *filp,
572 unsigned int ioctl, unsigned long arg)
574 struct kvm_vcpu *vcpu = filp->private_data;
575 void __user *argp = (void __user *)arg;
578 case KVM_S390_INTERRUPT: {
579 struct kvm_s390_interrupt s390int;
581 if (copy_from_user(&s390int, argp, sizeof(s390int)))
583 return kvm_s390_inject_vcpu(vcpu, &s390int);
585 case KVM_S390_STORE_STATUS:
586 return kvm_s390_vcpu_store_status(vcpu, arg);
587 case KVM_S390_SET_INITIAL_PSW: {
590 if (copy_from_user(&psw, argp, sizeof(psw)))
592 return kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
594 case KVM_S390_INITIAL_RESET:
595 return kvm_arch_vcpu_ioctl_initial_reset(vcpu);
602 /* Section: memory related */
603 int kvm_arch_set_memory_region(struct kvm *kvm,
604 struct kvm_userspace_memory_region *mem,
605 struct kvm_memory_slot old,
608 /* A few sanity checks. We can have exactly one memory slot which has
609 to start at guest virtual zero and which has to be located at a
610 page boundary in userland and which has to end at a page boundary.
611 The memory in userland is ok to be fragmented into various different
612 vmas. It is okay to mmap() and munmap() stuff in this slot after
613 doing this call at any time */
618 if (mem->guest_phys_addr)
621 if (mem->userspace_addr & (PAGE_SIZE - 1))
624 if (mem->memory_size & (PAGE_SIZE - 1))
627 kvm->arch.guest_origin = mem->userspace_addr;
628 kvm->arch.guest_memsize = mem->memory_size;
630 /* FIXME: we do want to interrupt running CPUs and update their memory
631 configuration now to avoid race conditions. But hey, changing the
632 memory layout while virtual CPUs are running is usually bad
633 programming practice. */
638 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
643 static int __init kvm_s390_init(void)
645 return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE);
648 static void __exit kvm_s390_exit(void)
653 module_init(kvm_s390_init);
654 module_exit(kvm_s390_exit);