KVM: Add kvm trace userspace interface
[safe/jmp/linux-2.6] / virt / kvm / kvm_trace.c
1 /*
2  * kvm trace
3  *
4  * It is designed to allow debugging traces of kvm to be generated
5  * on UP / SMP machines.  Each trace entry can be timestamped so that
6  * it's possible to reconstruct a chronological record of trace events.
7  * The implementation refers to blktrace kernel support.
8  *
9  * Copyright (c) 2008 Intel Corporation
10  * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
11  *
12  * Authors: Feng(Eric) Liu, eric.e.liu@intel.com
13  *
14  * Date:    Feb 2008
15  */
16
17 #include <linux/module.h>
18 #include <linux/relay.h>
19 #include <linux/debugfs.h>
20
21 #include <linux/kvm_host.h>
22
23 #define KVM_TRACE_STATE_RUNNING         (1 << 0)
24 #define KVM_TRACE_STATE_PAUSE           (1 << 1)
25 #define KVM_TRACE_STATE_CLEARUP         (1 << 2)
26
27 struct kvm_trace {
28         int trace_state;
29         struct rchan *rchan;
30         struct dentry *lost_file;
31         atomic_t lost_records;
32 };
33 static struct kvm_trace *kvm_trace;
34
35 struct kvm_trace_probe {
36         const char *name;
37         const char *format;
38         u32 cycle_in;
39         marker_probe_func *probe_func;
40 };
41
42 static inline int calc_rec_size(int cycle, int extra)
43 {
44         int rec_size = KVM_TRC_HEAD_SIZE;
45
46         rec_size += extra;
47         return cycle ? rec_size += KVM_TRC_CYCLE_SIZE : rec_size;
48 }
49
50 static void kvm_add_trace(void *probe_private, void *call_data,
51                           const char *format, va_list *args)
52 {
53         struct kvm_trace_probe *p = probe_private;
54         struct kvm_trace *kt = kvm_trace;
55         struct kvm_trace_rec rec;
56         struct kvm_vcpu *vcpu;
57         int    i, extra, size;
58
59         if (unlikely(kt->trace_state != KVM_TRACE_STATE_RUNNING))
60                 return;
61
62         rec.event       = va_arg(*args, u32);
63         vcpu            = va_arg(*args, struct kvm_vcpu *);
64         rec.pid         = current->tgid;
65         rec.vcpu_id     = vcpu->vcpu_id;
66
67         extra           = va_arg(*args, u32);
68         WARN_ON(!(extra <= KVM_TRC_EXTRA_MAX));
69         extra           = min_t(u32, extra, KVM_TRC_EXTRA_MAX);
70         rec.extra_u32   = extra;
71
72         rec.cycle_in    = p->cycle_in;
73
74         if (rec.cycle_in) {
75                 u64 cycle = 0;
76
77                 cycle = get_cycles();
78                 rec.u.cycle.cycle_lo = (u32)cycle;
79                 rec.u.cycle.cycle_hi = (u32)(cycle >> 32);
80
81                 for (i = 0; i < rec.extra_u32; i++)
82                         rec.u.cycle.extra_u32[i] = va_arg(*args, u32);
83         } else {
84                 for (i = 0; i < rec.extra_u32; i++)
85                         rec.u.nocycle.extra_u32[i] = va_arg(*args, u32);
86         }
87
88         size = calc_rec_size(rec.cycle_in, rec.extra_u32 * sizeof(u32));
89         relay_write(kt->rchan, &rec, size);
90 }
91
92 static struct kvm_trace_probe kvm_trace_probes[] = {
93         { "kvm_trace_entryexit", "%u %p %u %u %u %u %u %u", 1, kvm_add_trace },
94         { "kvm_trace_handler", "%u %p %u %u %u %u %u %u", 0, kvm_add_trace },
95 };
96
97 static int lost_records_get(void *data, u64 *val)
98 {
99         struct kvm_trace *kt = data;
100
101         *val = atomic_read(&kt->lost_records);
102         return 0;
103 }
104
105 DEFINE_SIMPLE_ATTRIBUTE(kvm_trace_lost_ops, lost_records_get, NULL, "%llu\n");
106
107 /*
108  *  The relay channel is used in "no-overwrite" mode, it keeps trace of how
109  *  many times we encountered a full subbuffer, to tell user space app the
110  *  lost records there were.
111  */
112 static int kvm_subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
113                                      void *prev_subbuf, size_t prev_padding)
114 {
115         struct kvm_trace *kt;
116
117         if (!relay_buf_full(buf))
118                 return 1;
119
120         kt = buf->chan->private_data;
121         atomic_inc(&kt->lost_records);
122
123         return 0;
124 }
125
126 static struct dentry *kvm_create_buf_file_callack(const char *filename,
127                                                  struct dentry *parent,
128                                                  int mode,
129                                                  struct rchan_buf *buf,
130                                                  int *is_global)
131 {
132         return debugfs_create_file(filename, mode, parent, buf,
133                                    &relay_file_operations);
134 }
135
136 static int kvm_remove_buf_file_callback(struct dentry *dentry)
137 {
138         debugfs_remove(dentry);
139         return 0;
140 }
141
142 static struct rchan_callbacks kvm_relay_callbacks = {
143         .subbuf_start           = kvm_subbuf_start_callback,
144         .create_buf_file        = kvm_create_buf_file_callack,
145         .remove_buf_file        = kvm_remove_buf_file_callback,
146 };
147
148 static int do_kvm_trace_enable(struct kvm_user_trace_setup *kuts)
149 {
150         struct kvm_trace *kt;
151         int i, r = -ENOMEM;
152
153         if (!kuts->buf_size || !kuts->buf_nr)
154                 return -EINVAL;
155
156         kt = kzalloc(sizeof(*kt), GFP_KERNEL);
157         if (!kt)
158                 goto err;
159
160         r = -EIO;
161         atomic_set(&kt->lost_records, 0);
162         kt->lost_file = debugfs_create_file("lost_records", 0444, debugfs_dir,
163                                             kt, &kvm_trace_lost_ops);
164         if (!kt->lost_file)
165                 goto err;
166
167         kt->rchan = relay_open("trace", debugfs_dir, kuts->buf_size,
168                                 kuts->buf_nr, &kvm_relay_callbacks, kt);
169         if (!kt->rchan)
170                 goto err;
171
172         kvm_trace = kt;
173
174         for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) {
175                 struct kvm_trace_probe *p = &kvm_trace_probes[i];
176
177                 r = marker_probe_register(p->name, p->format, p->probe_func, p);
178                 if (r)
179                         printk(KERN_INFO "Unable to register probe %s\n",
180                                p->name);
181         }
182
183         kvm_trace->trace_state = KVM_TRACE_STATE_RUNNING;
184
185         return 0;
186 err:
187         if (kt) {
188                 if (kt->lost_file)
189                         debugfs_remove(kt->lost_file);
190                 if (kt->rchan)
191                         relay_close(kt->rchan);
192                 kfree(kt);
193         }
194         return r;
195 }
196
197 static int kvm_trace_enable(char __user *arg)
198 {
199         struct kvm_user_trace_setup kuts;
200         int ret;
201
202         ret = copy_from_user(&kuts, arg, sizeof(kuts));
203         if (ret)
204                 return -EFAULT;
205
206         ret = do_kvm_trace_enable(&kuts);
207         if (ret)
208                 return ret;
209
210         return 0;
211 }
212
213 static int kvm_trace_pause(void)
214 {
215         struct kvm_trace *kt = kvm_trace;
216         int r = -EINVAL;
217
218         if (kt == NULL)
219                 return r;
220
221         if (kt->trace_state == KVM_TRACE_STATE_RUNNING) {
222                 kt->trace_state = KVM_TRACE_STATE_PAUSE;
223                 relay_flush(kt->rchan);
224                 r = 0;
225         }
226
227         return r;
228 }
229
230 void kvm_trace_cleanup(void)
231 {
232         struct kvm_trace *kt = kvm_trace;
233         int i;
234
235         if (kt == NULL)
236                 return;
237
238         if (kt->trace_state == KVM_TRACE_STATE_RUNNING ||
239             kt->trace_state == KVM_TRACE_STATE_PAUSE) {
240
241                 kt->trace_state = KVM_TRACE_STATE_CLEARUP;
242
243                 for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) {
244                         struct kvm_trace_probe *p = &kvm_trace_probes[i];
245                         marker_probe_unregister(p->name, p->probe_func, p);
246                 }
247
248                 relay_close(kt->rchan);
249                 debugfs_remove(kt->lost_file);
250                 kfree(kt);
251         }
252 }
253
254 int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg)
255 {
256         void __user *argp = (void __user *)arg;
257         long r = -EINVAL;
258
259         if (!capable(CAP_SYS_ADMIN))
260                 return -EPERM;
261
262         switch (ioctl) {
263         case KVM_TRACE_ENABLE:
264                 r = kvm_trace_enable(argp);
265                 break;
266         case KVM_TRACE_PAUSE:
267                 r = kvm_trace_pause();
268                 break;
269         case KVM_TRACE_DISABLE:
270                 r = 0;
271                 kvm_trace_cleanup();
272                 break;
273         }
274
275         return r;
276 }