nfsd: nfsd should drop CAP_MKNOD for non-root
[safe/jmp/linux-2.6] / block / blktrace.c
1 /*
2  * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11  * GNU General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
16  *
17  */
18 #include <linux/kernel.h>
19 #include <linux/blkdev.h>
20 #include <linux/blktrace_api.h>
21 #include <linux/percpu.h>
22 #include <linux/init.h>
23 #include <linux/mutex.h>
24 #include <linux/debugfs.h>
25 #include <linux/time.h>
26 #include <trace/block.h>
27 #include <asm/uaccess.h>
28
29 static unsigned int blktrace_seq __read_mostly = 1;
30
31 /* Global reference count of probes */
32 static DEFINE_MUTEX(blk_probe_mutex);
33 static atomic_t blk_probes_ref = ATOMIC_INIT(0);
34
35 static int blk_register_tracepoints(void);
36 static void blk_unregister_tracepoints(void);
37
38 /*
39  * Send out a notify message.
40  */
41 static void trace_note(struct blk_trace *bt, pid_t pid, int action,
42                        const void *data, size_t len)
43 {
44         struct blk_io_trace *t;
45
46         t = relay_reserve(bt->rchan, sizeof(*t) + len);
47         if (t) {
48                 const int cpu = smp_processor_id();
49
50                 t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
51                 t->time = ktime_to_ns(ktime_get());
52                 t->device = bt->dev;
53                 t->action = action;
54                 t->pid = pid;
55                 t->cpu = cpu;
56                 t->pdu_len = len;
57                 memcpy((void *) t + sizeof(*t), data, len);
58         }
59 }
60
61 /*
62  * Send out a notify for this process, if we haven't done so since a trace
63  * started
64  */
65 static void trace_note_tsk(struct blk_trace *bt, struct task_struct *tsk)
66 {
67         tsk->btrace_seq = blktrace_seq;
68         trace_note(bt, tsk->pid, BLK_TN_PROCESS, tsk->comm, sizeof(tsk->comm));
69 }
70
71 static void trace_note_time(struct blk_trace *bt)
72 {
73         struct timespec now;
74         unsigned long flags;
75         u32 words[2];
76
77         getnstimeofday(&now);
78         words[0] = now.tv_sec;
79         words[1] = now.tv_nsec;
80
81         local_irq_save(flags);
82         trace_note(bt, 0, BLK_TN_TIMESTAMP, words, sizeof(words));
83         local_irq_restore(flags);
84 }
85
86 void __trace_note_message(struct blk_trace *bt, const char *fmt, ...)
87 {
88         int n;
89         va_list args;
90         unsigned long flags;
91         char *buf;
92
93         local_irq_save(flags);
94         buf = per_cpu_ptr(bt->msg_data, smp_processor_id());
95         va_start(args, fmt);
96         n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args);
97         va_end(args);
98
99         trace_note(bt, 0, BLK_TN_MESSAGE, buf, n);
100         local_irq_restore(flags);
101 }
102 EXPORT_SYMBOL_GPL(__trace_note_message);
103
104 static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
105                          pid_t pid)
106 {
107         if (((bt->act_mask << BLK_TC_SHIFT) & what) == 0)
108                 return 1;
109         if (sector < bt->start_lba || sector > bt->end_lba)
110                 return 1;
111         if (bt->pid && pid != bt->pid)
112                 return 1;
113
114         return 0;
115 }
116
117 /*
118  * Data direction bit lookup
119  */
120 static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK_TC_WRITE) };
121
122 /* The ilog2() calls fall out because they're constant */
123 #define MASK_TC_BIT(rw, __name) ( (rw & (1 << BIO_RW_ ## __name)) << \
124           (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - BIO_RW_ ## __name) )
125
126 /*
127  * The worker for the various blk_add_trace*() types. Fills out a
128  * blk_io_trace structure and places it in a per-cpu subbuffer.
129  */
130 static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
131                      int rw, u32 what, int error, int pdu_len, void *pdu_data)
132 {
133         struct task_struct *tsk = current;
134         struct blk_io_trace *t;
135         unsigned long flags;
136         unsigned long *sequence;
137         pid_t pid;
138         int cpu;
139
140         if (unlikely(bt->trace_state != Blktrace_running))
141                 return;
142
143         what |= ddir_act[rw & WRITE];
144         what |= MASK_TC_BIT(rw, BARRIER);
145         what |= MASK_TC_BIT(rw, SYNCIO);
146         what |= MASK_TC_BIT(rw, AHEAD);
147         what |= MASK_TC_BIT(rw, META);
148         what |= MASK_TC_BIT(rw, DISCARD);
149
150         pid = tsk->pid;
151         if (unlikely(act_log_check(bt, what, sector, pid)))
152                 return;
153
154         /*
155          * A word about the locking here - we disable interrupts to reserve
156          * some space in the relay per-cpu buffer, to prevent an irq
157          * from coming in and stepping on our toes.
158          */
159         local_irq_save(flags);
160
161         if (unlikely(tsk->btrace_seq != blktrace_seq))
162                 trace_note_tsk(bt, tsk);
163
164         t = relay_reserve(bt->rchan, sizeof(*t) + pdu_len);
165         if (t) {
166                 cpu = smp_processor_id();
167                 sequence = per_cpu_ptr(bt->sequence, cpu);
168
169                 t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
170                 t->sequence = ++(*sequence);
171                 t->time = ktime_to_ns(ktime_get());
172                 t->sector = sector;
173                 t->bytes = bytes;
174                 t->action = what;
175                 t->pid = pid;
176                 t->device = bt->dev;
177                 t->cpu = cpu;
178                 t->error = error;
179                 t->pdu_len = pdu_len;
180
181                 if (pdu_len)
182                         memcpy((void *) t + sizeof(*t), pdu_data, pdu_len);
183         }
184
185         local_irq_restore(flags);
186 }
187
188 static struct dentry *blk_tree_root;
189 static DEFINE_MUTEX(blk_tree_mutex);
190
191 static void blk_trace_cleanup(struct blk_trace *bt)
192 {
193         debugfs_remove(bt->msg_file);
194         debugfs_remove(bt->dropped_file);
195         relay_close(bt->rchan);
196         free_percpu(bt->sequence);
197         free_percpu(bt->msg_data);
198         kfree(bt);
199         mutex_lock(&blk_probe_mutex);
200         if (atomic_dec_and_test(&blk_probes_ref))
201                 blk_unregister_tracepoints();
202         mutex_unlock(&blk_probe_mutex);
203 }
204
205 int blk_trace_remove(struct request_queue *q)
206 {
207         struct blk_trace *bt;
208
209         bt = xchg(&q->blk_trace, NULL);
210         if (!bt)
211                 return -EINVAL;
212
213         if (bt->trace_state == Blktrace_setup ||
214             bt->trace_state == Blktrace_stopped)
215                 blk_trace_cleanup(bt);
216
217         return 0;
218 }
219 EXPORT_SYMBOL_GPL(blk_trace_remove);
220
221 static int blk_dropped_open(struct inode *inode, struct file *filp)
222 {
223         filp->private_data = inode->i_private;
224
225         return 0;
226 }
227
228 static ssize_t blk_dropped_read(struct file *filp, char __user *buffer,
229                                 size_t count, loff_t *ppos)
230 {
231         struct blk_trace *bt = filp->private_data;
232         char buf[16];
233
234         snprintf(buf, sizeof(buf), "%u\n", atomic_read(&bt->dropped));
235
236         return simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf));
237 }
238
239 static const struct file_operations blk_dropped_fops = {
240         .owner =        THIS_MODULE,
241         .open =         blk_dropped_open,
242         .read =         blk_dropped_read,
243 };
244
245 static int blk_msg_open(struct inode *inode, struct file *filp)
246 {
247         filp->private_data = inode->i_private;
248
249         return 0;
250 }
251
252 static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
253                                 size_t count, loff_t *ppos)
254 {
255         char *msg;
256         struct blk_trace *bt;
257
258         if (count > BLK_TN_MAX_MSG)
259                 return -EINVAL;
260
261         msg = kmalloc(count, GFP_KERNEL);
262         if (msg == NULL)
263                 return -ENOMEM;
264
265         if (copy_from_user(msg, buffer, count)) {
266                 kfree(msg);
267                 return -EFAULT;
268         }
269
270         bt = filp->private_data;
271         __trace_note_message(bt, "%s", msg);
272         kfree(msg);
273
274         return count;
275 }
276
277 static const struct file_operations blk_msg_fops = {
278         .owner =        THIS_MODULE,
279         .open =         blk_msg_open,
280         .write =        blk_msg_write,
281 };
282
283 /*
284  * Keep track of how many times we encountered a full subbuffer, to aid
285  * the user space app in telling how many lost events there were.
286  */
287 static int blk_subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
288                                      void *prev_subbuf, size_t prev_padding)
289 {
290         struct blk_trace *bt;
291
292         if (!relay_buf_full(buf))
293                 return 1;
294
295         bt = buf->chan->private_data;
296         atomic_inc(&bt->dropped);
297         return 0;
298 }
299
300 static int blk_remove_buf_file_callback(struct dentry *dentry)
301 {
302         struct dentry *parent = dentry->d_parent;
303         debugfs_remove(dentry);
304
305         /*
306         * this will fail for all but the last file, but that is ok. what we
307         * care about is the top level buts->name directory going away, when
308         * the last trace file is gone. Then we don't have to rmdir() that
309         * manually on trace stop, so it nicely solves the issue with
310         * force killing of running traces.
311         */
312
313         debugfs_remove(parent);
314         return 0;
315 }
316
317 static struct dentry *blk_create_buf_file_callback(const char *filename,
318                                                    struct dentry *parent,
319                                                    int mode,
320                                                    struct rchan_buf *buf,
321                                                    int *is_global)
322 {
323         return debugfs_create_file(filename, mode, parent, buf,
324                                         &relay_file_operations);
325 }
326
327 static struct rchan_callbacks blk_relay_callbacks = {
328         .subbuf_start           = blk_subbuf_start_callback,
329         .create_buf_file        = blk_create_buf_file_callback,
330         .remove_buf_file        = blk_remove_buf_file_callback,
331 };
332
333 /*
334  * Setup everything required to start tracing
335  */
336 int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
337                         struct blk_user_trace_setup *buts)
338 {
339         struct blk_trace *old_bt, *bt = NULL;
340         struct dentry *dir = NULL;
341         int ret, i;
342
343         if (!buts->buf_size || !buts->buf_nr)
344                 return -EINVAL;
345
346         strncpy(buts->name, name, BLKTRACE_BDEV_SIZE);
347         buts->name[BLKTRACE_BDEV_SIZE - 1] = '\0';
348
349         /*
350          * some device names have larger paths - convert the slashes
351          * to underscores for this to work as expected
352          */
353         for (i = 0; i < strlen(buts->name); i++)
354                 if (buts->name[i] == '/')
355                         buts->name[i] = '_';
356
357         ret = -ENOMEM;
358         bt = kzalloc(sizeof(*bt), GFP_KERNEL);
359         if (!bt)
360                 goto err;
361
362         bt->sequence = alloc_percpu(unsigned long);
363         if (!bt->sequence)
364                 goto err;
365
366         bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG);
367         if (!bt->msg_data)
368                 goto err;
369
370         ret = -ENOENT;
371
372         if (!blk_tree_root) {
373                 blk_tree_root = debugfs_create_dir("block", NULL);
374                 if (!blk_tree_root)
375                         return -ENOMEM;
376         }
377
378         dir = debugfs_create_dir(buts->name, blk_tree_root);
379
380         if (!dir)
381                 goto err;
382
383         bt->dir = dir;
384         bt->dev = dev;
385         atomic_set(&bt->dropped, 0);
386
387         ret = -EIO;
388         bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt, &blk_dropped_fops);
389         if (!bt->dropped_file)
390                 goto err;
391
392         bt->msg_file = debugfs_create_file("msg", 0222, dir, bt, &blk_msg_fops);
393         if (!bt->msg_file)
394                 goto err;
395
396         bt->rchan = relay_open("trace", dir, buts->buf_size,
397                                 buts->buf_nr, &blk_relay_callbacks, bt);
398         if (!bt->rchan)
399                 goto err;
400
401         bt->act_mask = buts->act_mask;
402         if (!bt->act_mask)
403                 bt->act_mask = (u16) -1;
404
405         bt->start_lba = buts->start_lba;
406         bt->end_lba = buts->end_lba;
407         if (!bt->end_lba)
408                 bt->end_lba = -1ULL;
409
410         bt->pid = buts->pid;
411         bt->trace_state = Blktrace_setup;
412
413         mutex_lock(&blk_probe_mutex);
414         if (atomic_add_return(1, &blk_probes_ref) == 1) {
415                 ret = blk_register_tracepoints();
416                 if (ret)
417                         goto probe_err;
418         }
419         mutex_unlock(&blk_probe_mutex);
420
421         ret = -EBUSY;
422         old_bt = xchg(&q->blk_trace, bt);
423         if (old_bt) {
424                 (void) xchg(&q->blk_trace, old_bt);
425                 goto err;
426         }
427
428         return 0;
429 probe_err:
430         atomic_dec(&blk_probes_ref);
431         mutex_unlock(&blk_probe_mutex);
432 err:
433         if (bt) {
434                 if (bt->msg_file)
435                         debugfs_remove(bt->msg_file);
436                 if (bt->dropped_file)
437                         debugfs_remove(bt->dropped_file);
438                 free_percpu(bt->sequence);
439                 free_percpu(bt->msg_data);
440                 if (bt->rchan)
441                         relay_close(bt->rchan);
442                 kfree(bt);
443         }
444         return ret;
445 }
446
447 int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
448                     char __user *arg)
449 {
450         struct blk_user_trace_setup buts;
451         int ret;
452
453         ret = copy_from_user(&buts, arg, sizeof(buts));
454         if (ret)
455                 return -EFAULT;
456
457         ret = do_blk_trace_setup(q, name, dev, &buts);
458         if (ret)
459                 return ret;
460
461         if (copy_to_user(arg, &buts, sizeof(buts)))
462                 return -EFAULT;
463
464         return 0;
465 }
466 EXPORT_SYMBOL_GPL(blk_trace_setup);
467
468 int blk_trace_startstop(struct request_queue *q, int start)
469 {
470         struct blk_trace *bt;
471         int ret;
472
473         if ((bt = q->blk_trace) == NULL)
474                 return -EINVAL;
475
476         /*
477          * For starting a trace, we can transition from a setup or stopped
478          * trace. For stopping a trace, the state must be running
479          */
480         ret = -EINVAL;
481         if (start) {
482                 if (bt->trace_state == Blktrace_setup ||
483                     bt->trace_state == Blktrace_stopped) {
484                         blktrace_seq++;
485                         smp_mb();
486                         bt->trace_state = Blktrace_running;
487
488                         trace_note_time(bt);
489                         ret = 0;
490                 }
491         } else {
492                 if (bt->trace_state == Blktrace_running) {
493                         bt->trace_state = Blktrace_stopped;
494                         relay_flush(bt->rchan);
495                         ret = 0;
496                 }
497         }
498
499         return ret;
500 }
501 EXPORT_SYMBOL_GPL(blk_trace_startstop);
502
503 /**
504  * blk_trace_ioctl: - handle the ioctls associated with tracing
505  * @bdev:       the block device
506  * @cmd:        the ioctl cmd
507  * @arg:        the argument data, if any
508  *
509  **/
510 int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
511 {
512         struct request_queue *q;
513         int ret, start = 0;
514         char b[BDEVNAME_SIZE];
515
516         q = bdev_get_queue(bdev);
517         if (!q)
518                 return -ENXIO;
519
520         mutex_lock(&bdev->bd_mutex);
521
522         switch (cmd) {
523         case BLKTRACESETUP:
524                 bdevname(bdev, b);
525                 ret = blk_trace_setup(q, b, bdev->bd_dev, arg);
526                 break;
527         case BLKTRACESTART:
528                 start = 1;
529         case BLKTRACESTOP:
530                 ret = blk_trace_startstop(q, start);
531                 break;
532         case BLKTRACETEARDOWN:
533                 ret = blk_trace_remove(q);
534                 break;
535         default:
536                 ret = -ENOTTY;
537                 break;
538         }
539
540         mutex_unlock(&bdev->bd_mutex);
541         return ret;
542 }
543
544 /**
545  * blk_trace_shutdown: - stop and cleanup trace structures
546  * @q:    the request queue associated with the device
547  *
548  **/
549 void blk_trace_shutdown(struct request_queue *q)
550 {
551         if (q->blk_trace) {
552                 blk_trace_startstop(q, 0);
553                 blk_trace_remove(q);
554         }
555 }
556
557 /*
558  * blktrace probes
559  */
560
561 /**
562  * blk_add_trace_rq - Add a trace for a request oriented action
563  * @q:          queue the io is for
564  * @rq:         the source request
565  * @what:       the action
566  *
567  * Description:
568  *     Records an action against a request. Will log the bio offset + size.
569  *
570  **/
571 static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
572                                     u32 what)
573 {
574         struct blk_trace *bt = q->blk_trace;
575         int rw = rq->cmd_flags & 0x03;
576
577         if (likely(!bt))
578                 return;
579
580         if (blk_discard_rq(rq))
581                 rw |= (1 << BIO_RW_DISCARD);
582
583         if (blk_pc_request(rq)) {
584                 what |= BLK_TC_ACT(BLK_TC_PC);
585                 __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors,
586                                 sizeof(rq->cmd), rq->cmd);
587         } else  {
588                 what |= BLK_TC_ACT(BLK_TC_FS);
589                 __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
590                                 rw, what, rq->errors, 0, NULL);
591         }
592 }
593
594 static void blk_add_trace_rq_abort(struct request_queue *q, struct request *rq)
595 {
596         blk_add_trace_rq(q, rq, BLK_TA_ABORT);
597 }
598
599 static void blk_add_trace_rq_insert(struct request_queue *q, struct request *rq)
600 {
601         blk_add_trace_rq(q, rq, BLK_TA_INSERT);
602 }
603
604 static void blk_add_trace_rq_issue(struct request_queue *q, struct request *rq)
605 {
606         blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
607 }
608
609 static void blk_add_trace_rq_requeue(struct request_queue *q, struct request *rq)
610 {
611         blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
612 }
613
614 static void blk_add_trace_rq_complete(struct request_queue *q, struct request *rq)
615 {
616         blk_add_trace_rq(q, rq, BLK_TA_COMPLETE);
617 }
618
619 /**
620  * blk_add_trace_bio - Add a trace for a bio oriented action
621  * @q:          queue the io is for
622  * @bio:        the source bio
623  * @what:       the action
624  *
625  * Description:
626  *     Records an action against a bio. Will log the bio offset + size.
627  *
628  **/
629 static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
630                                      u32 what)
631 {
632         struct blk_trace *bt = q->blk_trace;
633
634         if (likely(!bt))
635                 return;
636
637         __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what,
638                         !bio_flagged(bio, BIO_UPTODATE), 0, NULL);
639 }
640
641 static void blk_add_trace_bio_bounce(struct request_queue *q, struct bio *bio)
642 {
643         blk_add_trace_bio(q, bio, BLK_TA_BOUNCE);
644 }
645
646 static void blk_add_trace_bio_complete(struct request_queue *q, struct bio *bio)
647 {
648         blk_add_trace_bio(q, bio, BLK_TA_COMPLETE);
649 }
650
651 static void blk_add_trace_bio_backmerge(struct request_queue *q, struct bio *bio)
652 {
653         blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
654 }
655
656 static void blk_add_trace_bio_frontmerge(struct request_queue *q, struct bio *bio)
657 {
658         blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
659 }
660
661 static void blk_add_trace_bio_queue(struct request_queue *q, struct bio *bio)
662 {
663         blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
664 }
665
666 static void blk_add_trace_getrq(struct request_queue *q, struct bio *bio, int rw)
667 {
668         if (bio)
669                 blk_add_trace_bio(q, bio, BLK_TA_GETRQ);
670         else {
671                 struct blk_trace *bt = q->blk_trace;
672
673                 if (bt)
674                         __blk_add_trace(bt, 0, 0, rw, BLK_TA_GETRQ, 0, 0, NULL);
675         }
676 }
677
678
679 static void blk_add_trace_sleeprq(struct request_queue *q, struct bio *bio, int rw)
680 {
681         if (bio)
682                 blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ);
683         else {
684                 struct blk_trace *bt = q->blk_trace;
685
686                 if (bt)
687                         __blk_add_trace(bt, 0, 0, rw, BLK_TA_SLEEPRQ, 0, 0, NULL);
688         }
689 }
690
691 static void blk_add_trace_plug(struct request_queue *q)
692 {
693         struct blk_trace *bt = q->blk_trace;
694
695         if (bt)
696                 __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL);
697 }
698
699 static void blk_add_trace_unplug_io(struct request_queue *q)
700 {
701         struct blk_trace *bt = q->blk_trace;
702
703         if (bt) {
704                 unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE];
705                 __be64 rpdu = cpu_to_be64(pdu);
706
707                 __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_IO, 0,
708                                 sizeof(rpdu), &rpdu);
709         }
710 }
711
712 static void blk_add_trace_unplug_timer(struct request_queue *q)
713 {
714         struct blk_trace *bt = q->blk_trace;
715
716         if (bt) {
717                 unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE];
718                 __be64 rpdu = cpu_to_be64(pdu);
719
720                 __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_TIMER, 0,
721                                 sizeof(rpdu), &rpdu);
722         }
723 }
724
725 static void blk_add_trace_split(struct request_queue *q, struct bio *bio,
726                                 unsigned int pdu)
727 {
728         struct blk_trace *bt = q->blk_trace;
729
730         if (bt) {
731                 __be64 rpdu = cpu_to_be64(pdu);
732
733                 __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw,
734                                 BLK_TA_SPLIT, !bio_flagged(bio, BIO_UPTODATE),
735                                 sizeof(rpdu), &rpdu);
736         }
737 }
738
739 /**
740  * blk_add_trace_remap - Add a trace for a remap operation
741  * @q:          queue the io is for
742  * @bio:        the source bio
743  * @dev:        target device
744  * @from:       source sector
745  * @to:         target sector
746  *
747  * Description:
748  *     Device mapper or raid target sometimes need to split a bio because
749  *     it spans a stripe (or similar). Add a trace for that action.
750  *
751  **/
752 static void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
753                                        dev_t dev, sector_t from, sector_t to)
754 {
755         struct blk_trace *bt = q->blk_trace;
756         struct blk_io_trace_remap r;
757
758         if (likely(!bt))
759                 return;
760
761         r.device = cpu_to_be32(dev);
762         r.device_from = cpu_to_be32(bio->bi_bdev->bd_dev);
763         r.sector = cpu_to_be64(to);
764
765         __blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP,
766                         !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r);
767 }
768
769 /**
770  * blk_add_driver_data - Add binary message with driver-specific data
771  * @q:          queue the io is for
772  * @rq:         io request
773  * @data:       driver-specific data
774  * @len:        length of driver-specific data
775  *
776  * Description:
777  *     Some drivers might want to write driver-specific data per request.
778  *
779  **/
780 void blk_add_driver_data(struct request_queue *q,
781                          struct request *rq,
782                          void *data, size_t len)
783 {
784         struct blk_trace *bt = q->blk_trace;
785
786         if (likely(!bt))
787                 return;
788
789         if (blk_pc_request(rq))
790                 __blk_add_trace(bt, 0, rq->data_len, 0, BLK_TA_DRV_DATA,
791                                 rq->errors, len, data);
792         else
793                 __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
794                                 0, BLK_TA_DRV_DATA, rq->errors, len, data);
795 }
796 EXPORT_SYMBOL_GPL(blk_add_driver_data);
797
798 static int blk_register_tracepoints(void)
799 {
800         int ret;
801
802         ret = register_trace_block_rq_abort(blk_add_trace_rq_abort);
803         WARN_ON(ret);
804         ret = register_trace_block_rq_insert(blk_add_trace_rq_insert);
805         WARN_ON(ret);
806         ret = register_trace_block_rq_issue(blk_add_trace_rq_issue);
807         WARN_ON(ret);
808         ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue);
809         WARN_ON(ret);
810         ret = register_trace_block_rq_complete(blk_add_trace_rq_complete);
811         WARN_ON(ret);
812         ret = register_trace_block_bio_bounce(blk_add_trace_bio_bounce);
813         WARN_ON(ret);
814         ret = register_trace_block_bio_complete(blk_add_trace_bio_complete);
815         WARN_ON(ret);
816         ret = register_trace_block_bio_backmerge(blk_add_trace_bio_backmerge);
817         WARN_ON(ret);
818         ret = register_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge);
819         WARN_ON(ret);
820         ret = register_trace_block_bio_queue(blk_add_trace_bio_queue);
821         WARN_ON(ret);
822         ret = register_trace_block_getrq(blk_add_trace_getrq);
823         WARN_ON(ret);
824         ret = register_trace_block_sleeprq(blk_add_trace_sleeprq);
825         WARN_ON(ret);
826         ret = register_trace_block_plug(blk_add_trace_plug);
827         WARN_ON(ret);
828         ret = register_trace_block_unplug_timer(blk_add_trace_unplug_timer);
829         WARN_ON(ret);
830         ret = register_trace_block_unplug_io(blk_add_trace_unplug_io);
831         WARN_ON(ret);
832         ret = register_trace_block_split(blk_add_trace_split);
833         WARN_ON(ret);
834         ret = register_trace_block_remap(blk_add_trace_remap);
835         WARN_ON(ret);
836         return 0;
837 }
838
839 static void blk_unregister_tracepoints(void)
840 {
841         unregister_trace_block_remap(blk_add_trace_remap);
842         unregister_trace_block_split(blk_add_trace_split);
843         unregister_trace_block_unplug_io(blk_add_trace_unplug_io);
844         unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer);
845         unregister_trace_block_plug(blk_add_trace_plug);
846         unregister_trace_block_sleeprq(blk_add_trace_sleeprq);
847         unregister_trace_block_getrq(blk_add_trace_getrq);
848         unregister_trace_block_bio_queue(blk_add_trace_bio_queue);
849         unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge);
850         unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge);
851         unregister_trace_block_bio_complete(blk_add_trace_bio_complete);
852         unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce);
853         unregister_trace_block_rq_complete(blk_add_trace_rq_complete);
854         unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue);
855         unregister_trace_block_rq_issue(blk_add_trace_rq_issue);
856         unregister_trace_block_rq_insert(blk_add_trace_rq_insert);
857         unregister_trace_block_rq_abort(blk_add_trace_rq_abort);
858
859         tracepoint_synchronize_unregister();
860 }