+/*
+ * Making the ring buffer lockless makes things tricky.
+ * Although writes only happen on the CPU that they are on,
+ * and they only need to worry about interrupts. Reads can
+ * happen on any CPU.
+ *
+ * The reader page is always off the ring buffer, but when the
+ * reader finishes with a page, it needs to swap its page with
+ * a new one from the buffer. The reader needs to take from
+ * the head (writes go to the tail). But if a writer is in overwrite
+ * mode and wraps, it must push the head page forward.
+ *
+ * Here lies the problem.
+ *
+ * The reader must be careful to replace only the head page, and
+ * not another one. As described at the top of the file in the
+ * ASCII art, the reader sets its old page to point to the next
+ * page after head. It then sets the page after head to point to
+ * the old reader page. But if the writer moves the head page
+ * during this operation, the reader could end up with the tail.
+ *
+ * We use cmpxchg to help prevent this race. We also do something
+ * special with the page before head. We set the LSB to 1.
+ *
+ * When the writer must push the page forward, it will clear the
+ * bit that points to the head page, move the head, and then set
+ * the bit that points to the new head page.
+ *
+ * We also don't want an interrupt coming in and moving the head
+ * page on another writer. Thus we use the second LSB to catch
+ * that too. Thus:
+ *
+ * head->list->prev->next bit 1 bit 0
+ * ------- -------
+ * Normal page 0 0
+ * Points to head page 0 1
+ * New head page 1 0
+ *
+ * Note we can not trust the prev pointer of the head page, because:
+ *
+ * +----+ +-----+ +-----+
+ * | |------>| T |---X--->| N |
+ * | |<------| | | |
+ * +----+ +-----+ +-----+
+ * ^ ^ |
+ * | +-----+ | |
+ * +----------| R |----------+ |
+ * | |<-----------+
+ * +-----+
+ *
+ * Key: ---X--> HEAD flag set in pointer
+ * T Tail page
+ * R Reader page
+ * N Next page
+ *
+ * (see __rb_reserve_next() to see where this happens)
+ *
+ * What the above shows is that the reader just swapped out
+ * the reader page with a page in the buffer, but before it
+ * could make the new header point back to the new page added
+ * it was preempted by a writer. The writer moved forward onto
+ * the new page added by the reader and is about to move forward
+ * again.
+ *
+ * You can see, it is legitimate for the previous pointer of
+ * the head (or any page) not to point back to itself. But only
+ * temporarially.
+ */
+
+#define RB_PAGE_NORMAL 0UL
+#define RB_PAGE_HEAD 1UL
+#define RB_PAGE_UPDATE 2UL
+
+
+#define RB_FLAG_MASK 3UL
+
+/* PAGE_MOVED is not part of the mask */
+#define RB_PAGE_MOVED 4UL
+
+/*
+ * rb_list_head - remove any bit
+ */
+static struct list_head *rb_list_head(struct list_head *list)
+{
+ unsigned long val = (unsigned long)list;
+
+ return (struct list_head *)(val & ~RB_FLAG_MASK);
+}
+
+/*
+ * rb_is_head_page - test if the given page is the head page
+ *
+ * Because the reader may move the head_page pointer, we can
+ * not trust what the head page is (it may be pointing to
+ * the reader page). But if the next page is a header page,
+ * its flags will be non zero.
+ */
+static int inline
+rb_is_head_page(struct ring_buffer_per_cpu *cpu_buffer,
+ struct buffer_page *page, struct list_head *list)
+{
+ unsigned long val;
+
+ val = (unsigned long)list->next;
+
+ if ((val & ~RB_FLAG_MASK) != (unsigned long)&page->list)
+ return RB_PAGE_MOVED;
+
+ return val & RB_FLAG_MASK;
+}
+
+/*
+ * rb_is_reader_page
+ *
+ * The unique thing about the reader page, is that, if the
+ * writer is ever on it, the previous pointer never points
+ * back to the reader page.
+ */
+static int rb_is_reader_page(struct buffer_page *page)
+{
+ struct list_head *list = page->list.prev;
+
+ return rb_list_head(list->next) != &page->list;
+}
+
+/*
+ * rb_set_list_to_head - set a list_head to be pointing to head.
+ */
+static void rb_set_list_to_head(struct ring_buffer_per_cpu *cpu_buffer,
+ struct list_head *list)
+{
+ unsigned long *ptr;
+
+ ptr = (unsigned long *)&list->next;
+ *ptr |= RB_PAGE_HEAD;
+ *ptr &= ~RB_PAGE_UPDATE;
+}
+
+/*
+ * rb_head_page_activate - sets up head page
+ */
+static void rb_head_page_activate(struct ring_buffer_per_cpu *cpu_buffer)
+{
+ struct buffer_page *head;
+
+ head = cpu_buffer->head_page;
+ if (!head)
+ return;
+
+ /*
+ * Set the previous list pointer to have the HEAD flag.
+ */
+ rb_set_list_to_head(cpu_buffer, head->list.prev);
+}
+
+static void rb_list_head_clear(struct list_head *list)
+{
+ unsigned long *ptr = (unsigned long *)&list->next;
+
+ *ptr &= ~RB_FLAG_MASK;
+}
+
+/*
+ * rb_head_page_dactivate - clears head page ptr (for free list)
+ */
+static void
+rb_head_page_deactivate(struct ring_buffer_per_cpu *cpu_buffer)
+{
+ struct list_head *hd;
+
+ /* Go through the whole list and clear any pointers found. */
+ rb_list_head_clear(cpu_buffer->pages);
+
+ list_for_each(hd, cpu_buffer->pages)
+ rb_list_head_clear(hd);
+}
+
+static int rb_head_page_set(struct ring_buffer_per_cpu *cpu_buffer,
+ struct buffer_page *head,
+ struct buffer_page *prev,
+ int old_flag, int new_flag)
+{
+ struct list_head *list;
+ unsigned long val = (unsigned long)&head->list;
+ unsigned long ret;
+
+ list = &prev->list;
+
+ val &= ~RB_FLAG_MASK;
+
+ ret = cmpxchg((unsigned long *)&list->next,
+ val | old_flag, val | new_flag);
+
+ /* check if the reader took the page */
+ if ((ret & ~RB_FLAG_MASK) != val)
+ return RB_PAGE_MOVED;
+
+ return ret & RB_FLAG_MASK;
+}
+
+static int rb_head_page_set_update(struct ring_buffer_per_cpu *cpu_buffer,
+ struct buffer_page *head,
+ struct buffer_page *prev,
+ int old_flag)
+{
+ return rb_head_page_set(cpu_buffer, head, prev,
+ old_flag, RB_PAGE_UPDATE);
+}
+
+static int rb_head_page_set_head(struct ring_buffer_per_cpu *cpu_buffer,
+ struct buffer_page *head,
+ struct buffer_page *prev,
+ int old_flag)
+{
+ return rb_head_page_set(cpu_buffer, head, prev,
+ old_flag, RB_PAGE_HEAD);
+}
+
+static int rb_head_page_set_normal(struct ring_buffer_per_cpu *cpu_buffer,
+ struct buffer_page *head,
+ struct buffer_page *prev,
+ int old_flag)
+{
+ return rb_head_page_set(cpu_buffer, head, prev,
+ old_flag, RB_PAGE_NORMAL);
+}
+
+static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
+ struct buffer_page **bpage)
+{
+ struct list_head *p = rb_list_head((*bpage)->list.next);
+
+ *bpage = list_entry(p, struct buffer_page, list);
+}
+
+static struct buffer_page *
+rb_set_head_page(struct ring_buffer_per_cpu *cpu_buffer)
+{
+ struct buffer_page *head;
+ struct buffer_page *page;
+ struct list_head *list;
+ int i;
+
+ if (RB_WARN_ON(cpu_buffer, !cpu_buffer->head_page))
+ return NULL;
+
+ /* sanity check */
+ list = cpu_buffer->pages;
+ if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev->next) != list))
+ return NULL;
+
+ page = head = cpu_buffer->head_page;
+ /*
+ * It is possible that the writer moves the header behind
+ * where we started, and we miss in one loop.
+ * A second loop should grab the header, but we'll do
+ * three loops just because I'm paranoid.
+ */
+ for (i = 0; i < 3; i++) {
+ do {
+ if (rb_is_head_page(cpu_buffer, page, page->list.prev)) {
+ cpu_buffer->head_page = page;
+ return page;
+ }
+ rb_inc_page(cpu_buffer, &page);
+ } while (page != head);
+ }
+
+ RB_WARN_ON(cpu_buffer, 1);
+
+ return NULL;
+}
+
+static int rb_head_page_replace(struct buffer_page *old,
+ struct buffer_page *new)
+{
+ unsigned long *ptr = (unsigned long *)&old->list.prev->next;
+ unsigned long val;
+ unsigned long ret;
+
+ val = *ptr & ~RB_FLAG_MASK;
+ val |= RB_PAGE_HEAD;
+
+ ret = cmpxchg(ptr, val, (unsigned long)&new->list);
+
+ return ret == val;
+}
+
+/*
+ * rb_tail_page_update - move the tail page forward
+ *
+ * Returns 1 if moved tail page, 0 if someone else did.
+ */
+static int rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer,
+ struct buffer_page *tail_page,
+ struct buffer_page *next_page)
+{
+ struct buffer_page *old_tail;
+ unsigned long old_entries;
+ unsigned long old_write;
+ int ret = 0;
+
+ /*
+ * The tail page now needs to be moved forward.
+ *
+ * We need to reset the tail page, but without messing
+ * with possible erasing of data brought in by interrupts
+ * that have moved the tail page and are currently on it.
+ *
+ * We add a counter to the write field to denote this.
+ */
+ old_write = local_add_return(RB_WRITE_INTCNT, &next_page->write);
+ old_entries = local_add_return(RB_WRITE_INTCNT, &next_page->entries);
+
+ /*
+ * Just make sure we have seen our old_write and synchronize
+ * with any interrupts that come in.
+ */
+ barrier();
+
+ /*
+ * If the tail page is still the same as what we think
+ * it is, then it is up to us to update the tail
+ * pointer.
+ */
+ if (tail_page == cpu_buffer->tail_page) {
+ /* Zero the write counter */
+ unsigned long val = old_write & ~RB_WRITE_MASK;
+ unsigned long eval = old_entries & ~RB_WRITE_MASK;
+
+ /*
+ * This will only succeed if an interrupt did
+ * not come in and change it. In which case, we
+ * do not want to modify it.
+ *
+ * We add (void) to let the compiler know that we do not care
+ * about the return value of these functions. We use the
+ * cmpxchg to only update if an interrupt did not already
+ * do it for us. If the cmpxchg fails, we don't care.
+ */
+ (void)local_cmpxchg(&next_page->write, old_write, val);
+ (void)local_cmpxchg(&next_page->entries, old_entries, eval);
+
+ /*
+ * No need to worry about races with clearing out the commit.
+ * it only can increment when a commit takes place. But that
+ * only happens in the outer most nested commit.
+ */
+ local_set(&next_page->page->commit, 0);
+
+ old_tail = cmpxchg(&cpu_buffer->tail_page,
+ tail_page, next_page);
+
+ if (old_tail == tail_page)
+ ret = 1;
+ }
+
+ return ret;
+}
+
+static int rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer,
+ struct buffer_page *bpage)
+{
+ unsigned long val = (unsigned long)bpage;
+
+ if (RB_WARN_ON(cpu_buffer, val & RB_FLAG_MASK))
+ return 1;
+
+ return 0;
+}
+
+/**
+ * rb_check_list - make sure a pointer to a list has the last bits zero
+ */
+static int rb_check_list(struct ring_buffer_per_cpu *cpu_buffer,
+ struct list_head *list)
+{
+ if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev) != list->prev))
+ return 1;
+ if (RB_WARN_ON(cpu_buffer, rb_list_head(list->next) != list->next))
+ return 1;
+ return 0;
+}
+