trace: set max latency variable to zero on default
[safe/jmp/linux-2.6] / kernel / trace / ring_buffer.c
index 86dc353..9c1e73d 100644 (file)
 
 #include "trace.h"
 
-/* Global flag to disable all recording to ring buffers */
-static int ring_buffers_off __read_mostly;
+/*
+ * A fast way to enable or disable all ring buffers is to
+ * call tracing_on or tracing_off. Turning off the ring buffers
+ * prevents all ring buffers from being recorded to.
+ * Turning this switch on, makes it OK to write to the
+ * ring buffer, if the ring buffer is enabled itself.
+ *
+ * There's three layers that must be on in order to write
+ * to the ring buffer.
+ *
+ * 1) This global flag must be set.
+ * 2) The ring buffer must be enabled for recording.
+ * 3) The per cpu buffer must be enabled for recording.
+ *
+ * In case of an anomaly, this global flag has a bit set that
+ * will permantly disable all ring buffers.
+ */
+
+/*
+ * Global flag to disable all recording to ring buffers
+ *  This has two bits: ON, DISABLED
+ *
+ *  ON   DISABLED
+ * ---- ----------
+ *   0      0        : ring buffers are off
+ *   1      0        : ring buffers are on
+ *   X      1        : ring buffers are permanently disabled
+ */
+
+enum {
+       RB_BUFFERS_ON_BIT       = 0,
+       RB_BUFFERS_DISABLED_BIT = 1,
+};
+
+enum {
+       RB_BUFFERS_ON           = 1 << RB_BUFFERS_ON_BIT,
+       RB_BUFFERS_DISABLED     = 1 << RB_BUFFERS_DISABLED_BIT,
+};
+
+static long ring_buffer_flags __read_mostly = RB_BUFFERS_ON;
 
 /**
  * tracing_on - enable all tracing buffers
@@ -29,8 +67,9 @@ static int ring_buffers_off __read_mostly;
  */
 void tracing_on(void)
 {
-       ring_buffers_off = 0;
+       set_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags);
 }
+EXPORT_SYMBOL_GPL(tracing_on);
 
 /**
  * tracing_off - turn off all tracing buffers
@@ -42,7 +81,19 @@ void tracing_on(void)
  */
 void tracing_off(void)
 {
-       ring_buffers_off = 1;
+       clear_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags);
+}
+EXPORT_SYMBOL_GPL(tracing_off);
+
+/**
+ * tracing_off_permanent - permanently disable ring buffers
+ *
+ * This function, once called, will disable all ring buffers
+ * permanenty.
+ */
+void tracing_off_permanent(void)
+{
+       set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags);
 }
 
 #include "trace.h"
@@ -53,15 +104,23 @@ void tracing_off(void)
 /* FIXME!!! */
 u64 ring_buffer_time_stamp(int cpu)
 {
+       u64 time;
+
+       preempt_disable_notrace();
        /* shift to debug/test normalization and TIME_EXTENTS */
-       return sched_clock() << DEBUG_SHIFT;
+       time = sched_clock() << DEBUG_SHIFT;
+       preempt_enable_no_resched_notrace();
+
+       return time;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_time_stamp);
 
 void ring_buffer_normalize_time_stamp(int cpu, u64 *ts)
 {
        /* Just stupid testing the normalize function and deltas */
        *ts >>= DEBUG_SHIFT;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
 
 #define RB_EVNT_HDR_SIZE (sizeof(struct ring_buffer_event))
 #define RB_ALIGNMENT_SHIFT     2
@@ -109,8 +168,15 @@ rb_event_length(struct ring_buffer_event *event)
  */
 unsigned ring_buffer_event_length(struct ring_buffer_event *event)
 {
-       return rb_event_length(event);
+       unsigned length = rb_event_length(event);
+       if (event->type != RINGBUF_TYPE_DATA)
+               return length;
+       length -= RB_EVNT_HDR_SIZE;
+       if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0]))
+                length -= sizeof(event->array[0]);
+       return length;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_event_length);
 
 /* inline for ring buffer fast paths */
 static inline void *
@@ -132,28 +198,33 @@ void *ring_buffer_event_data(struct ring_buffer_event *event)
 {
        return rb_event_data(event);
 }
+EXPORT_SYMBOL_GPL(ring_buffer_event_data);
 
 #define for_each_buffer_cpu(buffer, cpu)               \
-       for_each_cpu_mask(cpu, buffer->cpumask)
+       for_each_cpu(cpu, buffer->cpumask)
 
 #define TS_SHIFT       27
 #define TS_MASK                ((1ULL << TS_SHIFT) - 1)
 #define TS_DELTA_TEST  (~TS_MASK)
 
-/*
- * This hack stolen from mm/slob.c.
- * We can store per page timing information in the page frame of the page.
- * Thanks to Peter Zijlstra for suggesting this idea.
- */
-struct buffer_page {
+struct buffer_data_page {
        u64              time_stamp;    /* page time stamp */
-       local_t          write;         /* index for next write */
        local_t          commit;        /* write commited index */
+       unsigned char    data[];        /* data of buffer page */
+};
+
+struct buffer_page {
+       local_t          write;         /* index for next write */
        unsigned         read;          /* index for next read */
        struct list_head list;          /* list of free pages */
-       void *page;                     /* Actual data page */
+       struct buffer_data_page *page;  /* Actual data page */
 };
 
+static void rb_init_page(struct buffer_data_page *bpage)
+{
+       local_set(&bpage->commit, 0);
+}
+
 /*
  * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
  * this issue out.
@@ -175,7 +246,7 @@ static inline int test_time_stamp(u64 delta)
        return 0;
 }
 
-#define BUF_PAGE_SIZE PAGE_SIZE
+#define BUF_PAGE_SIZE (PAGE_SIZE - offsetof(struct buffer_data_page, data))
 
 /*
  * head_page == tail_page && head == tail then buffer is empty.
@@ -199,11 +270,10 @@ struct ring_buffer_per_cpu {
 };
 
 struct ring_buffer {
-       unsigned long                   size;
        unsigned                        pages;
        unsigned                        flags;
        int                             cpus;
-       cpumask_t                       cpumask;
+       cpumask_var_t                   cpumask;
        atomic_t                        record_disabled;
 
        struct mutex                    mutex;
@@ -239,19 +309,19 @@ struct ring_buffer_iter {
 static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
 {
        struct list_head *head = &cpu_buffer->pages;
-       struct buffer_page *page, *tmp;
+       struct buffer_page *bpage, *tmp;
 
        if (RB_WARN_ON(cpu_buffer, head->next->prev != head))
                return -1;
        if (RB_WARN_ON(cpu_buffer, head->prev->next != head))
                return -1;
 
-       list_for_each_entry_safe(page, tmp, head, list) {
+       list_for_each_entry_safe(bpage, tmp, head, list) {
                if (RB_WARN_ON(cpu_buffer,
-                              page->list.next->prev != &page->list))
+                              bpage->list.next->prev != &bpage->list))
                        return -1;
                if (RB_WARN_ON(cpu_buffer,
-                              page->list.prev->next != &page->list))
+                              bpage->list.prev->next != &bpage->list))
                        return -1;
        }
 
@@ -262,22 +332,23 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
                             unsigned nr_pages)
 {
        struct list_head *head = &cpu_buffer->pages;
-       struct buffer_page *page, *tmp;
+       struct buffer_page *bpage, *tmp;
        unsigned long addr;
        LIST_HEAD(pages);
        unsigned i;
 
        for (i = 0; i < nr_pages; i++) {
-               page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()),
+               bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
                                    GFP_KERNEL, cpu_to_node(cpu_buffer->cpu));
-               if (!page)
+               if (!bpage)
                        goto free_pages;
-               list_add(&page->list, &pages);
+               list_add(&bpage->list, &pages);
 
                addr = __get_free_page(GFP_KERNEL);
                if (!addr)
                        goto free_pages;
-               page->page = (void *)addr;
+               bpage->page = (void *)addr;
+               rb_init_page(bpage->page);
        }
 
        list_splice(&pages, head);
@@ -287,9 +358,9 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
        return 0;
 
  free_pages:
-       list_for_each_entry_safe(page, tmp, &pages, list) {
-               list_del_init(&page->list);
-               free_buffer_page(page);
+       list_for_each_entry_safe(bpage, tmp, &pages, list) {
+               list_del_init(&bpage->list);
+               free_buffer_page(bpage);
        }
        return -ENOMEM;
 }
@@ -298,7 +369,7 @@ static struct ring_buffer_per_cpu *
 rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
 {
        struct ring_buffer_per_cpu *cpu_buffer;
-       struct buffer_page *page;
+       struct buffer_page *bpage;
        unsigned long addr;
        int ret;
 
@@ -313,16 +384,17 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
        cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
        INIT_LIST_HEAD(&cpu_buffer->pages);
 
-       page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()),
+       bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
                            GFP_KERNEL, cpu_to_node(cpu));
-       if (!page)
+       if (!bpage)
                goto fail_free_buffer;
 
-       cpu_buffer->reader_page = page;
+       cpu_buffer->reader_page = bpage;
        addr = __get_free_page(GFP_KERNEL);
        if (!addr)
                goto fail_free_reader;
-       page->page = (void *)addr;
+       bpage->page = (void *)addr;
+       rb_init_page(bpage->page);
 
        INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
 
@@ -347,14 +419,14 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
 static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
 {
        struct list_head *head = &cpu_buffer->pages;
-       struct buffer_page *page, *tmp;
+       struct buffer_page *bpage, *tmp;
 
        list_del_init(&cpu_buffer->reader_page->list);
        free_buffer_page(cpu_buffer->reader_page);
 
-       list_for_each_entry_safe(page, tmp, head, list) {
-               list_del_init(&page->list);
-               free_buffer_page(page);
+       list_for_each_entry_safe(bpage, tmp, head, list) {
+               list_del_init(&bpage->list);
+               free_buffer_page(bpage);
        }
        kfree(cpu_buffer);
 }
@@ -367,7 +439,7 @@ extern int ring_buffer_page_too_big(void);
 
 /**
  * ring_buffer_alloc - allocate a new ring_buffer
- * @size: the size in bytes that is needed.
+ * @size: the size in bytes per cpu that is needed.
  * @flags: attributes to set for the ring buffer.
  *
  * Currently the only flag that is available is the RB_FL_OVERWRITE
@@ -392,6 +464,9 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
        if (!buffer)
                return NULL;
 
+       if (!alloc_cpumask_var(&buffer->cpumask, GFP_KERNEL))
+               goto fail_free_buffer;
+
        buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
        buffer->flags = flags;
 
@@ -399,14 +474,14 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
        if (buffer->pages == 1)
                buffer->pages++;
 
-       buffer->cpumask = cpu_possible_map;
+       cpumask_copy(buffer->cpumask, cpu_possible_mask);
        buffer->cpus = nr_cpu_ids;
 
        bsize = sizeof(void *) * nr_cpu_ids;
        buffer->buffers = kzalloc(ALIGN(bsize, cache_line_size()),
                                  GFP_KERNEL);
        if (!buffer->buffers)
-               goto fail_free_buffer;
+               goto fail_free_cpumask;
 
        for_each_buffer_cpu(buffer, cpu) {
                buffer->buffers[cpu] =
@@ -426,10 +501,14 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
        }
        kfree(buffer->buffers);
 
+ fail_free_cpumask:
+       free_cpumask_var(buffer->cpumask);
+
  fail_free_buffer:
        kfree(buffer);
        return NULL;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_alloc);
 
 /**
  * ring_buffer_free - free a ring buffer.
@@ -443,15 +522,18 @@ ring_buffer_free(struct ring_buffer *buffer)
        for_each_buffer_cpu(buffer, cpu)
                rb_free_cpu_buffer(buffer->buffers[cpu]);
 
+       free_cpumask_var(buffer->cpumask);
+
        kfree(buffer);
 }
+EXPORT_SYMBOL_GPL(ring_buffer_free);
 
 static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
 
 static void
 rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
 {
-       struct buffer_page *page;
+       struct buffer_page *bpage;
        struct list_head *p;
        unsigned i;
 
@@ -462,9 +544,9 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
                if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages)))
                        return;
                p = cpu_buffer->pages.next;
-               page = list_entry(p, struct buffer_page, list);
-               list_del_init(&page->list);
-               free_buffer_page(page);
+               bpage = list_entry(p, struct buffer_page, list);
+               list_del_init(&bpage->list);
+               free_buffer_page(bpage);
        }
        if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages)))
                return;
@@ -481,7 +563,7 @@ static void
 rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
                struct list_head *pages, unsigned nr_pages)
 {
-       struct buffer_page *page;
+       struct buffer_page *bpage;
        struct list_head *p;
        unsigned i;
 
@@ -492,9 +574,9 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
                if (RB_WARN_ON(cpu_buffer, list_empty(pages)))
                        return;
                p = pages->next;
-               page = list_entry(p, struct buffer_page, list);
-               list_del_init(&page->list);
-               list_add_tail(&page->list, &cpu_buffer->pages);
+               bpage = list_entry(p, struct buffer_page, list);
+               list_del_init(&bpage->list);
+               list_add_tail(&bpage->list, &cpu_buffer->pages);
        }
        rb_reset_cpu(cpu_buffer);
 
@@ -521,12 +603,18 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
 {
        struct ring_buffer_per_cpu *cpu_buffer;
        unsigned nr_pages, rm_pages, new_pages;
-       struct buffer_page *page, *tmp;
+       struct buffer_page *bpage, *tmp;
        unsigned long buffer_size;
        unsigned long addr;
        LIST_HEAD(pages);
        int i, cpu;
 
+       /*
+        * Always succeed at resizing a non-existent buffer:
+        */
+       if (!buffer)
+               return size;
+
        size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
        size *= BUF_PAGE_SIZE;
        buffer_size = buffer->pages * BUF_PAGE_SIZE;
@@ -576,16 +664,17 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
 
        for_each_buffer_cpu(buffer, cpu) {
                for (i = 0; i < new_pages; i++) {
-                       page = kzalloc_node(ALIGN(sizeof(*page),
+                       bpage = kzalloc_node(ALIGN(sizeof(*bpage),
                                                  cache_line_size()),
                                            GFP_KERNEL, cpu_to_node(cpu));
-                       if (!page)
+                       if (!bpage)
                                goto free_pages;
-                       list_add(&page->list, &pages);
+                       list_add(&bpage->list, &pages);
                        addr = __get_free_page(GFP_KERNEL);
                        if (!addr)
                                goto free_pages;
-                       page->page = (void *)addr;
+                       bpage->page = (void *)addr;
+                       rb_init_page(bpage->page);
                }
        }
 
@@ -606,21 +695,29 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
        return size;
 
  free_pages:
-       list_for_each_entry_safe(page, tmp, &pages, list) {
-               list_del_init(&page->list);
-               free_buffer_page(page);
+       list_for_each_entry_safe(bpage, tmp, &pages, list) {
+               list_del_init(&bpage->list);
+               free_buffer_page(bpage);
        }
+       mutex_unlock(&buffer->mutex);
        return -ENOMEM;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_resize);
 
 static inline int rb_null_event(struct ring_buffer_event *event)
 {
        return event->type == RINGBUF_TYPE_PADDING;
 }
 
-static inline void *__rb_page_index(struct buffer_page *page, unsigned index)
+static inline void *
+__rb_data_page_index(struct buffer_data_page *bpage, unsigned index)
+{
+       return bpage->data + index;
+}
+
+static inline void *__rb_page_index(struct buffer_page *bpage, unsigned index)
 {
-       return page->page + index;
+       return bpage->page->data + index;
 }
 
 static inline struct ring_buffer_event *
@@ -650,7 +747,7 @@ static inline unsigned rb_page_write(struct buffer_page *bpage)
 
 static inline unsigned rb_page_commit(struct buffer_page *bpage)
 {
-       return local_read(&bpage->commit);
+       return local_read(&bpage->page->commit);
 }
 
 /* Size is determined by what has been commited */
@@ -696,14 +793,14 @@ static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
 }
 
 static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
-                              struct buffer_page **page)
+                              struct buffer_page **bpage)
 {
-       struct list_head *p = (*page)->list.next;
+       struct list_head *p = (*bpage)->list.next;
 
        if (p == &cpu_buffer->pages)
                p = p->next;
 
-       *page = list_entry(p, struct buffer_page, list);
+       *bpage = list_entry(p, struct buffer_page, list);
 }
 
 static inline unsigned
@@ -742,14 +839,15 @@ rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
                if (RB_WARN_ON(cpu_buffer,
                          cpu_buffer->commit_page == cpu_buffer->tail_page))
                        return;
-               cpu_buffer->commit_page->commit =
+               cpu_buffer->commit_page->page->commit =
                        cpu_buffer->commit_page->write;
                rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
-               cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp;
+               cpu_buffer->write_stamp =
+                       cpu_buffer->commit_page->page->time_stamp;
        }
 
        /* Now set the commit to the event's index */
-       local_set(&cpu_buffer->commit_page->commit, index);
+       local_set(&cpu_buffer->commit_page->page->commit, index);
 }
 
 static inline void
@@ -763,25 +861,38 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
         * back to us). This allows us to do a simple loop to
         * assign the commit to the tail.
         */
+ again:
        while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
-               cpu_buffer->commit_page->commit =
+               cpu_buffer->commit_page->page->commit =
                        cpu_buffer->commit_page->write;
                rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
-               cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp;
+               cpu_buffer->write_stamp =
+                       cpu_buffer->commit_page->page->time_stamp;
                /* add barrier to keep gcc from optimizing too much */
                barrier();
        }
        while (rb_commit_index(cpu_buffer) !=
               rb_page_write(cpu_buffer->commit_page)) {
-               cpu_buffer->commit_page->commit =
+               cpu_buffer->commit_page->page->commit =
                        cpu_buffer->commit_page->write;
                barrier();
        }
+
+       /* again, keep gcc from optimizing */
+       barrier();
+
+       /*
+        * If an interrupt came in just after the first while loop
+        * and pushed the tail page forward, we will be left with
+        * a dangling commit that will never go forward.
+        */
+       if (unlikely(cpu_buffer->commit_page != cpu_buffer->tail_page))
+               goto again;
 }
 
 static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
 {
-       cpu_buffer->read_stamp = cpu_buffer->reader_page->time_stamp;
+       cpu_buffer->read_stamp = cpu_buffer->reader_page->page->time_stamp;
        cpu_buffer->reader_page->read = 0;
 }
 
@@ -800,7 +911,7 @@ static inline void rb_inc_iter(struct ring_buffer_iter *iter)
        else
                rb_inc_page(cpu_buffer, &iter->head_page);
 
-       iter->read_stamp = iter->head_page->time_stamp;
+       iter->read_stamp = iter->head_page->page->time_stamp;
        iter->head = 0;
 }
 
@@ -874,12 +985,15 @@ static struct ring_buffer_event *
 __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
                  unsigned type, unsigned long length, u64 *ts)
 {
-       struct buffer_page *tail_page, *head_page, *reader_page;
+       struct buffer_page *tail_page, *head_page, *reader_page, *commit_page;
        unsigned long tail, write;
        struct ring_buffer *buffer = cpu_buffer->buffer;
        struct ring_buffer_event *event;
        unsigned long flags;
 
+       commit_page = cpu_buffer->commit_page;
+       /* we just need to protect against interrupts */
+       barrier();
        tail_page = cpu_buffer->tail_page;
        write = local_add_return(length, &tail_page->write);
        tail = write - length;
@@ -905,18 +1019,14 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
                 * it all the way around the buffer, bail, and warn
                 * about it.
                 */
-               if (unlikely(next_page == cpu_buffer->commit_page)) {
+               if (unlikely(next_page == commit_page)) {
                        WARN_ON_ONCE(1);
                        goto out_unlock;
                }
 
                if (next_page == head_page) {
-                       if (!(buffer->flags & RB_FL_OVERWRITE)) {
-                               /* reset write */
-                               if (tail <= BUF_PAGE_SIZE)
-                                       local_set(&tail_page->write, tail);
+                       if (!(buffer->flags & RB_FL_OVERWRITE))
                                goto out_unlock;
-                       }
 
                        /* tail_page has not moved yet? */
                        if (tail_page == cpu_buffer->tail_page) {
@@ -936,12 +1046,12 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
                 */
                if (tail_page == cpu_buffer->tail_page) {
                        local_set(&next_page->write, 0);
-                       local_set(&next_page->commit, 0);
+                       local_set(&next_page->page->commit, 0);
                        cpu_buffer->tail_page = next_page;
 
                        /* reread the time stamp */
                        *ts = ring_buffer_time_stamp(cpu_buffer->cpu);
-                       cpu_buffer->tail_page->time_stamp = *ts;
+                       cpu_buffer->tail_page->page->time_stamp = *ts;
                }
 
                /*
@@ -986,11 +1096,15 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
         * this page's time stamp.
         */
        if (!tail && rb_is_commit(cpu_buffer, event))
-               cpu_buffer->commit_page->time_stamp = *ts;
+               cpu_buffer->commit_page->page->time_stamp = *ts;
 
        return event;
 
  out_unlock:
+       /* reset write */
+       if (tail <= BUF_PAGE_SIZE)
+               local_set(&tail_page->write, tail);
+
        __raw_spin_unlock(&cpu_buffer->lock);
        local_irq_restore(flags);
        return NULL;
@@ -1037,7 +1151,7 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
                        event->time_delta = *delta & TS_MASK;
                        event->array[0] = *delta >> TS_SHIFT;
                } else {
-                       cpu_buffer->commit_page->time_stamp = *ts;
+                       cpu_buffer->commit_page->page->time_stamp = *ts;
                        event->time_delta = 0;
                        event->array[0] = 0;
                }
@@ -1172,7 +1286,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
        struct ring_buffer_event *event;
        int cpu, resched;
 
-       if (ring_buffers_off)
+       if (ring_buffer_flags != RB_BUFFERS_ON)
                return NULL;
 
        if (atomic_read(&buffer->record_disabled))
@@ -1183,7 +1297,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
 
        cpu = raw_smp_processor_id();
 
-       if (!cpu_isset(cpu, buffer->cpumask))
+       if (!cpumask_test_cpu(cpu, buffer->cpumask))
                goto out;
 
        cpu_buffer = buffer->buffers[cpu];
@@ -1213,6 +1327,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
        ftrace_preempt_enable(resched);
        return NULL;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
 
 static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
                      struct ring_buffer_event *event)
@@ -1259,6 +1374,7 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
 
 /**
  * ring_buffer_write - write data to the buffer without reserving
@@ -1284,7 +1400,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
        int ret = -EBUSY;
        int cpu, resched;
 
-       if (ring_buffers_off)
+       if (ring_buffer_flags != RB_BUFFERS_ON)
                return -EBUSY;
 
        if (atomic_read(&buffer->record_disabled))
@@ -1294,7 +1410,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
 
        cpu = raw_smp_processor_id();
 
-       if (!cpu_isset(cpu, buffer->cpumask))
+       if (!cpumask_test_cpu(cpu, buffer->cpumask))
                goto out;
 
        cpu_buffer = buffer->buffers[cpu];
@@ -1320,6 +1436,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
 
        return ret;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_write);
 
 static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
 {
@@ -1346,6 +1463,7 @@ void ring_buffer_record_disable(struct ring_buffer *buffer)
 {
        atomic_inc(&buffer->record_disabled);
 }
+EXPORT_SYMBOL_GPL(ring_buffer_record_disable);
 
 /**
  * ring_buffer_record_enable - enable writes to the buffer
@@ -1358,6 +1476,7 @@ void ring_buffer_record_enable(struct ring_buffer *buffer)
 {
        atomic_dec(&buffer->record_disabled);
 }
+EXPORT_SYMBOL_GPL(ring_buffer_record_enable);
 
 /**
  * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer
@@ -1373,12 +1492,13 @@ void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu)
 {
        struct ring_buffer_per_cpu *cpu_buffer;
 
-       if (!cpu_isset(cpu, buffer->cpumask))
+       if (!cpumask_test_cpu(cpu, buffer->cpumask))
                return;
 
        cpu_buffer = buffer->buffers[cpu];
        atomic_inc(&cpu_buffer->record_disabled);
 }
+EXPORT_SYMBOL_GPL(ring_buffer_record_disable_cpu);
 
 /**
  * ring_buffer_record_enable_cpu - enable writes to the buffer
@@ -1392,12 +1512,13 @@ void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
 {
        struct ring_buffer_per_cpu *cpu_buffer;
 
-       if (!cpu_isset(cpu, buffer->cpumask))
+       if (!cpumask_test_cpu(cpu, buffer->cpumask))
                return;
 
        cpu_buffer = buffer->buffers[cpu];
        atomic_dec(&cpu_buffer->record_disabled);
 }
+EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu);
 
 /**
  * ring_buffer_entries_cpu - get the number of entries in a cpu buffer
@@ -1408,12 +1529,13 @@ unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
 {
        struct ring_buffer_per_cpu *cpu_buffer;
 
-       if (!cpu_isset(cpu, buffer->cpumask))
+       if (!cpumask_test_cpu(cpu, buffer->cpumask))
                return 0;
 
        cpu_buffer = buffer->buffers[cpu];
        return cpu_buffer->entries;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu);
 
 /**
  * ring_buffer_overrun_cpu - get the number of overruns in a cpu_buffer
@@ -1424,12 +1546,13 @@ unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
 {
        struct ring_buffer_per_cpu *cpu_buffer;
 
-       if (!cpu_isset(cpu, buffer->cpumask))
+       if (!cpumask_test_cpu(cpu, buffer->cpumask))
                return 0;
 
        cpu_buffer = buffer->buffers[cpu];
        return cpu_buffer->overrun;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu);
 
 /**
  * ring_buffer_entries - get the number of entries in a buffer
@@ -1452,6 +1575,7 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer)
 
        return entries;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_entries);
 
 /**
  * ring_buffer_overrun_cpu - get the number of overruns in buffer
@@ -1474,6 +1598,7 @@ unsigned long ring_buffer_overruns(struct ring_buffer *buffer)
 
        return overruns;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_overruns);
 
 static void rb_iter_reset(struct ring_buffer_iter *iter)
 {
@@ -1490,7 +1615,7 @@ static void rb_iter_reset(struct ring_buffer_iter *iter)
        if (iter->head)
                iter->read_stamp = cpu_buffer->read_stamp;
        else
-               iter->read_stamp = iter->head_page->time_stamp;
+               iter->read_stamp = iter->head_page->page->time_stamp;
 }
 
 /**
@@ -1509,6 +1634,7 @@ void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
        rb_iter_reset(iter);
        spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
 }
+EXPORT_SYMBOL_GPL(ring_buffer_iter_reset);
 
 /**
  * ring_buffer_iter_empty - check if an iterator has no more to read
@@ -1523,6 +1649,7 @@ int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
        return iter->head_page == cpu_buffer->commit_page &&
                iter->head == rb_commit_index(cpu_buffer);
 }
+EXPORT_SYMBOL_GPL(ring_buffer_iter_empty);
 
 static void
 rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
@@ -1634,7 +1761,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
        cpu_buffer->reader_page->list.prev = reader->list.prev;
 
        local_set(&cpu_buffer->reader_page->write, 0);
-       local_set(&cpu_buffer->reader_page->commit, 0);
+       local_set(&cpu_buffer->reader_page->page->commit, 0);
 
        /* Make the reader page now replace the head */
        reader->list.prev->next = &cpu_buffer->reader_page->list;
@@ -1737,7 +1864,7 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
        struct buffer_page *reader;
        int nr_loops = 0;
 
-       if (!cpu_isset(cpu, buffer->cpumask))
+       if (!cpumask_test_cpu(cpu, buffer->cpumask))
                return NULL;
 
        cpu_buffer = buffer->buffers[cpu];
@@ -1789,6 +1916,7 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
 
        return NULL;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_peek);
 
 static struct ring_buffer_event *
 rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
@@ -1849,6 +1977,7 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
 
        return NULL;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_iter_peek);
 
 /**
  * ring_buffer_peek - peek at the next event to be read
@@ -1910,7 +2039,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
        struct ring_buffer_event *event;
        unsigned long flags;
 
-       if (!cpu_isset(cpu, buffer->cpumask))
+       if (!cpumask_test_cpu(cpu, buffer->cpumask))
                return NULL;
 
        spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
@@ -1926,6 +2055,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
 
        return event;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_consume);
 
 /**
  * ring_buffer_read_start - start a non consuming read of the buffer
@@ -1946,7 +2076,7 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
        struct ring_buffer_iter *iter;
        unsigned long flags;
 
-       if (!cpu_isset(cpu, buffer->cpumask))
+       if (!cpumask_test_cpu(cpu, buffer->cpumask))
                return NULL;
 
        iter = kmalloc(sizeof(*iter), GFP_KERNEL);
@@ -1968,6 +2098,7 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
 
        return iter;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_read_start);
 
 /**
  * ring_buffer_finish - finish reading the iterator of the buffer
@@ -1984,6 +2115,7 @@ ring_buffer_read_finish(struct ring_buffer_iter *iter)
        atomic_dec(&cpu_buffer->record_disabled);
        kfree(iter);
 }
+EXPORT_SYMBOL_GPL(ring_buffer_read_finish);
 
 /**
  * ring_buffer_read - read the next item in the ring buffer by the iterator
@@ -2010,6 +2142,7 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
 
        return event;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_read);
 
 /**
  * ring_buffer_size - return the size of the ring buffer (in bytes)
@@ -2019,6 +2152,7 @@ unsigned long ring_buffer_size(struct ring_buffer *buffer)
 {
        return BUF_PAGE_SIZE * buffer->pages;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_size);
 
 static void
 rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
@@ -2026,7 +2160,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
        cpu_buffer->head_page
                = list_entry(cpu_buffer->pages.next, struct buffer_page, list);
        local_set(&cpu_buffer->head_page->write, 0);
-       local_set(&cpu_buffer->head_page->commit, 0);
+       local_set(&cpu_buffer->head_page->page->commit, 0);
 
        cpu_buffer->head_page->read = 0;
 
@@ -2035,7 +2169,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
 
        INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
        local_set(&cpu_buffer->reader_page->write, 0);
-       local_set(&cpu_buffer->reader_page->commit, 0);
+       local_set(&cpu_buffer->reader_page->page->commit, 0);
        cpu_buffer->reader_page->read = 0;
 
        cpu_buffer->overrun = 0;
@@ -2052,7 +2186,7 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
        struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
        unsigned long flags;
 
-       if (!cpu_isset(cpu, buffer->cpumask))
+       if (!cpumask_test_cpu(cpu, buffer->cpumask))
                return;
 
        spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
@@ -2065,6 +2199,7 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
 
        spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
 }
+EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
 
 /**
  * ring_buffer_reset - reset a ring buffer
@@ -2077,6 +2212,7 @@ void ring_buffer_reset(struct ring_buffer *buffer)
        for_each_buffer_cpu(buffer, cpu)
                ring_buffer_reset_cpu(buffer, cpu);
 }
+EXPORT_SYMBOL_GPL(ring_buffer_reset);
 
 /**
  * rind_buffer_empty - is the ring buffer empty?
@@ -2095,6 +2231,7 @@ int ring_buffer_empty(struct ring_buffer *buffer)
        }
        return 1;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_empty);
 
 /**
  * ring_buffer_empty_cpu - is a cpu buffer of a ring buffer empty?
@@ -2105,12 +2242,13 @@ int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
 {
        struct ring_buffer_per_cpu *cpu_buffer;
 
-       if (!cpu_isset(cpu, buffer->cpumask))
+       if (!cpumask_test_cpu(cpu, buffer->cpumask))
                return 1;
 
        cpu_buffer = buffer->buffers[cpu];
        return rb_per_cpu_empty(cpu_buffer);
 }
+EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu);
 
 /**
  * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers
@@ -2128,13 +2266,12 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
        struct ring_buffer_per_cpu *cpu_buffer_a;
        struct ring_buffer_per_cpu *cpu_buffer_b;
 
-       if (!cpu_isset(cpu, buffer_a->cpumask) ||
-           !cpu_isset(cpu, buffer_b->cpumask))
+       if (!cpumask_test_cpu(cpu, buffer_a->cpumask) ||
+           !cpumask_test_cpu(cpu, buffer_b->cpumask))
                return -EINVAL;
 
        /* At least make sure the two buffers are somewhat the same */
-       if (buffer_a->size != buffer_b->size ||
-           buffer_a->pages != buffer_b->pages)
+       if (buffer_a->pages != buffer_b->pages)
                return -EINVAL;
 
        cpu_buffer_a = buffer_a->buffers[cpu];
@@ -2160,17 +2297,180 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
+
+static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer,
+                             struct buffer_data_page *bpage)
+{
+       struct ring_buffer_event *event;
+       unsigned long head;
+
+       __raw_spin_lock(&cpu_buffer->lock);
+       for (head = 0; head < local_read(&bpage->commit);
+            head += rb_event_length(event)) {
+
+               event = __rb_data_page_index(bpage, head);
+               if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
+                       return;
+               /* Only count data entries */
+               if (event->type != RINGBUF_TYPE_DATA)
+                       continue;
+               cpu_buffer->entries--;
+       }
+       __raw_spin_unlock(&cpu_buffer->lock);
+}
+
+/**
+ * ring_buffer_alloc_read_page - allocate a page to read from buffer
+ * @buffer: the buffer to allocate for.
+ *
+ * This function is used in conjunction with ring_buffer_read_page.
+ * When reading a full page from the ring buffer, these functions
+ * can be used to speed up the process. The calling function should
+ * allocate a few pages first with this function. Then when it
+ * needs to get pages from the ring buffer, it passes the result
+ * of this function into ring_buffer_read_page, which will swap
+ * the page that was allocated, with the read page of the buffer.
+ *
+ * Returns:
+ *  The page allocated, or NULL on error.
+ */
+void *ring_buffer_alloc_read_page(struct ring_buffer *buffer)
+{
+       unsigned long addr;
+       struct buffer_data_page *bpage;
+
+       addr = __get_free_page(GFP_KERNEL);
+       if (!addr)
+               return NULL;
+
+       bpage = (void *)addr;
+
+       return bpage;
+}
+
+/**
+ * ring_buffer_free_read_page - free an allocated read page
+ * @buffer: the buffer the page was allocate for
+ * @data: the page to free
+ *
+ * Free a page allocated from ring_buffer_alloc_read_page.
+ */
+void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
+{
+       free_page((unsigned long)data);
+}
+
+/**
+ * ring_buffer_read_page - extract a page from the ring buffer
+ * @buffer: buffer to extract from
+ * @data_page: the page to use allocated from ring_buffer_alloc_read_page
+ * @cpu: the cpu of the buffer to extract
+ * @full: should the extraction only happen when the page is full.
+ *
+ * This function will pull out a page from the ring buffer and consume it.
+ * @data_page must be the address of the variable that was returned
+ * from ring_buffer_alloc_read_page. This is because the page might be used
+ * to swap with a page in the ring buffer.
+ *
+ * for example:
+ *     rpage = ring_buffer_alloc_page(buffer);
+ *     if (!rpage)
+ *             return error;
+ *     ret = ring_buffer_read_page(buffer, &rpage, cpu, 0);
+ *     if (ret)
+ *             process_page(rpage);
+ *
+ * When @full is set, the function will not return true unless
+ * the writer is off the reader page.
+ *
+ * Note: it is up to the calling functions to handle sleeps and wakeups.
+ *  The ring buffer can be used anywhere in the kernel and can not
+ *  blindly call wake_up. The layer that uses the ring buffer must be
+ *  responsible for that.
+ *
+ * Returns:
+ *  1 if data has been transferred
+ *  0 if no data has been transferred.
+ */
+int ring_buffer_read_page(struct ring_buffer *buffer,
+                           void **data_page, int cpu, int full)
+{
+       struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
+       struct ring_buffer_event *event;
+       struct buffer_data_page *bpage;
+       unsigned long flags;
+       int ret = 0;
+
+       if (!data_page)
+               return 0;
+
+       bpage = *data_page;
+       if (!bpage)
+               return 0;
+
+       spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+
+       /*
+        * rb_buffer_peek will get the next ring buffer if
+        * the current reader page is empty.
+        */
+       event = rb_buffer_peek(buffer, cpu, NULL);
+       if (!event)
+               goto out;
+
+       /* check for data */
+       if (!local_read(&cpu_buffer->reader_page->page->commit))
+               goto out;
+       /*
+        * If the writer is already off of the read page, then simply
+        * switch the read page with the given page. Otherwise
+        * we need to copy the data from the reader to the writer.
+        */
+       if (cpu_buffer->reader_page == cpu_buffer->commit_page) {
+               unsigned int read = cpu_buffer->reader_page->read;
+
+               if (full)
+                       goto out;
+               /* The writer is still on the reader page, we must copy */
+               bpage = cpu_buffer->reader_page->page;
+               memcpy(bpage->data,
+                      cpu_buffer->reader_page->page->data + read,
+                      local_read(&bpage->commit) - read);
+
+               /* consume what was read */
+               cpu_buffer->reader_page += read;
+
+       } else {
+               /* swap the pages */
+               rb_init_page(bpage);
+               bpage = cpu_buffer->reader_page->page;
+               cpu_buffer->reader_page->page = *data_page;
+               cpu_buffer->reader_page->read = 0;
+               *data_page = bpage;
+       }
+       ret = 1;
+
+       /* update the entry counter */
+       rb_remove_entries(cpu_buffer, bpage);
+ out:
+       spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+
+       return ret;
+}
 
 static ssize_t
 rb_simple_read(struct file *filp, char __user *ubuf,
               size_t cnt, loff_t *ppos)
 {
-       int *p = filp->private_data;
+       long *p = filp->private_data;
        char buf[64];
        int r;
 
-       /* !ring_buffers_off == tracing_on */
-       r = sprintf(buf, "%d\n", !*p);
+       if (test_bit(RB_BUFFERS_DISABLED_BIT, p))
+               r = sprintf(buf, "permanently disabled\n");
+       else
+               r = sprintf(buf, "%d\n", test_bit(RB_BUFFERS_ON_BIT, p));
 
        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
 }
@@ -2179,7 +2479,7 @@ static ssize_t
 rb_simple_write(struct file *filp, const char __user *ubuf,
                size_t cnt, loff_t *ppos)
 {
-       int *p = filp->private_data;
+       long *p = filp->private_data;
        char buf[64];
        long val;
        int ret;
@@ -2196,8 +2496,10 @@ rb_simple_write(struct file *filp, const char __user *ubuf,
        if (ret < 0)
                return ret;
 
-       /* !ring_buffers_off == tracing_on */
-       *p = !val;
+       if (val)
+               set_bit(RB_BUFFERS_ON_BIT, p);
+       else
+               clear_bit(RB_BUFFERS_ON_BIT, p);
 
        (*ppos)++;
 
@@ -2219,7 +2521,7 @@ static __init int rb_init_debugfs(void)
        d_tracer = tracing_init_dentry();
 
        entry = debugfs_create_file("tracing_on", 0644, d_tracer,
-                                   &ring_buffers_off, &rb_simple_fops);
+                                   &ring_buffer_flags, &rb_simple_fops);
        if (!entry)
                pr_warning("Could not create debugfs 'tracing_on' entry\n");