--- /dev/null
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ *
+ * Jun 2010
+ * Jean-Marc Pigeon <jmp@safe.ca>
+ *
+ * Purpose is to regroup all procedure involved
+ * in system log.
+ * System log need to be containerized to avoid
+ * crossing over critical data between physical host layer
+ * and container layer.
+ *
+ * The principle is to keep a containerized ring buffer
+ * where container kernel data are redirected, kept and
+ * managed.
+ *
+ * Containerized syslog is activated if CLONE_SYSLOG
+ * condition is true.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/bootmem.h>
+#include <linux/slab.h>
+#include <linux/cred.h>
+#include <linux/kref.h>
+#include <linux/user_namespace.h>
+#include <linux/syslog.h>
+
+/*
+ * Static memory definition, used to assign a syslog
+ * to the init process itself
+ *
+ */
+#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT)
+static char __log_buf[__LOG_BUF_LEN];
+
+struct syslog_ns init_syslog_ns = {
+ .kref = {
+ .refcount = ATOMIC_INIT(2),
+ },
+ .handle = 1, /*kernel INIT process pid */
+ .logbuf_lock = __SPIN_LOCK_INITIALIZER(logbuf_lock),
+ .buf_len = __LOG_BUF_LEN,
+ .buf = __log_buf
+};
+
+/*
+ * List of all syslog ns currently allocated
+ * first member of this list (kernel syslog)
+ * can't be removed.
+ */
+struct log_list {
+ spinlock_t list_lock; /*make sure about list access */
+ struct log_list *next; /*next syslog_ns in the list */
+ struct syslog_ns *syslog_ns;
+ } log_list = {
+ .list_lock = __SPIN_LOCK_INITIALIZER(list_lock),
+ .next = (struct log_list *)0,
+ .syslog_ns = &init_syslog_ns
+ };
+/*
+ * removing a syslog reference from the list
+ *
+ */
+static void removing_syslog_ns(struct syslog_ns *syslog_ns)
+
+{
+ int done;
+ struct log_list *start;
+
+ done = false;
+ start = &log_list;
+ while (start->next != (struct log_list *)0) {
+ struct log_list *check;
+ unsigned long flags;
+
+ spin_lock_irqsave(&(start->next->list_lock), flags);
+ check = start->next;
+ if (check->syslog_ns == syslog_ns) {
+ start->next = check->next;
+ done = true;
+ }
+ spin_unlock_irqrestore(&(check->list_lock), flags);
+ if (done == true) {
+ kfree(check);
+ break;
+ }
+ start = start->next;
+ }
+}
+
+/*
+ * adding a syslog_ns to the list of known syslog
+ *
+ */
+static void adding_syslog_ns(struct syslog_ns *syslog_ns)
+
+{
+ int done;
+ struct log_list *start;
+
+ done = false;
+ start = &log_list;
+ while ((done == false) && (start != (struct log_list *)0)) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&(start->list_lock), flags);
+ if (start->next == (struct log_list *)0) {
+ struct log_list *next;
+
+ next = kzalloc(sizeof(struct log_list), GFP_KERNEL);
+ BUG_ON(!next);
+ spin_lock_init(&(next->list_lock));
+ next->syslog_ns = syslog_ns;
+ start->next = next;
+ done = true;
+ }
+ spin_unlock_irqrestore(&(start->list_lock), flags);
+ start = start->next;
+ }
+}
+/*
+ * Procedure to free all ressources tied to syslog
+ *
+ */
+static struct syslog_ns *free_all_syslog_ns(struct syslog_ns *syslog_ns)
+
+{
+ if (syslog_ns != (struct syslog_ns *)0) {
+ (void) removing_syslog_ns(syslog_ns);
+ (void) kfree(syslog_ns->buf);
+ (void) kfree(syslog_ns);
+ syslog_ns = (struct syslog_ns *)0;
+ }
+ return syslog_ns;
+}
+
+/*
+ * Procedure to assign memory for syslog area
+ *
+ */
+static struct syslog_ns *malloc_syslog_ns(unsigned container_buf_len)
+{
+ struct syslog_ns *ns;
+ char *buf;
+
+ ns = (struct syslog_ns *)0;
+ buf = (char *)0;
+
+
+ ns = kzalloc(sizeof(*ns), GFP_KERNEL);
+ buf = kzalloc(container_buf_len, GFP_KERNEL);
+ if ((!ns) || (!buf)) {
+ kfree(buf);
+ kfree(ns);
+ return 0;
+ }
+
+ kref_init(&(ns->kref));
+ spin_lock_init(&(ns->logbuf_lock));
+ ns->handle = current->pid;
+ ns->buf_len = container_buf_len;
+ ns->buf = buf;
+ (void) adding_syslog_ns(ns);
+ return ns;
+}
+/*
+ * Procedure to locate and return a syslog_ns with same handle as pid submitted.
+ * return a NULL pointer if not found;
+ *
+ */
+static struct syslog_ns *find_its_syslog_ns(pid_t pid)
+
+{
+ struct log_list *start;
+
+ start = &log_list;
+ while (start != (struct log_list *)0) {
+ if (start->syslog_ns->handle == pid)
+ return start->syslog_ns;
+ start = start->next;
+ }
+ return 0;
+}
+/*
+ * Procedure to ONLY increase syslog buffer size
+ * If syslog_ns is NULL, assign a brand new syslog_ns
+ *
+ */
+struct syslog_ns *resize_syslog_ns(struct syslog_ns *syslog_ns,
+ unsigned container_buf_len)
+
+{
+ if ((syslog_ns == &init_syslog_ns) &&
+ (container_buf_len > syslog_ns->buf_len)) {
+ int old_buf_len;
+ char *old_buf;
+ char *new_buf;
+ unsigned long flags;
+
+ old_buf_len = syslog_ns->buf_len;
+ old_buf = syslog_ns->buf;
+ new_buf = alloc_bootmem(container_buf_len);
+ if (!new_buf) {
+ (void) printk(KERN_WARNING
+ "log_buf_len: allocation failed\n");
+ return ERR_PTR(-ENOMEM);
+ }
+ spin_lock_irqsave(&(syslog_ns->logbuf_lock), flags);
+ (void) memmove(new_buf, old_buf, old_buf_len);
+ syslog_ns->buf = new_buf;
+ syslog_ns->buf_len = container_buf_len;
+ spin_unlock_irqrestore(&(syslog_ns->logbuf_lock), flags);
+ if (old_buf != __log_buf)
+ (void) free_bootmem((unsigned long)old_buf,
+ old_buf_len);
+ }
+ if (!syslog_ns)
+ return malloc_syslog_ns(container_buf_len);
+ if (syslog_ns->buf_len > container_buf_len) {
+ (void) printk(KERN_WARNING "log_buf_len: Not allowed "
+ "to decrease syslog buffer\n");
+ return ERR_PTR(-EINVAL);
+ }
+ if (syslog_ns->buf_len < container_buf_len) {
+ char *old_buf;
+ char *new_buf;
+ unsigned long flags;
+
+ old_buf = syslog_ns->buf;
+ new_buf = kzalloc(container_buf_len, GFP_KERNEL);
+ if (!new_buf)
+ return ERR_PTR(-ENOMEM);
+ spin_lock_irqsave(&(syslog_ns->logbuf_lock), flags);
+ (void) memmove(new_buf, old_buf, syslog_ns->buf_len);
+ syslog_ns->buf = new_buf;
+ syslog_ns->buf_len = container_buf_len;
+ spin_unlock_irqrestore(&(syslog_ns->logbuf_lock), flags);
+ (void) kfree(old_buf);
+ }
+ (void) printk(KERN_NOTICE "log_buf_len: %u\n", syslog_ns->buf_len);
+ return syslog_ns;
+}
+
+/*
+ * Procedure to use current syslog unless a CLONE_SYSLOG is set
+ * such a new syslog area is defined and used
+ *
+ */
+struct syslog_ns *copy_syslog_ns(unsigned long flags,
+ struct syslog_ns *current_syslog_ns)
+
+{
+/*4096 should be enough for container syslog */
+#define CONTAINER_BUF_LEN 4096
+
+ BUG_ON(!current_syslog_ns);
+ if ((flags & CLONE_SYSLOG) == CLONE_SYSLOG)
+ current_syslog_ns = malloc_syslog_ns(CONTAINER_BUF_LEN);
+ else
+ /*incrementing usage ref count */
+ (void) kref_get(&(current_syslog_ns->kref));
+ return current_syslog_ns;
+}
+
+/*
+ * Procedure to decrement syslog usage count and free memory
+ * if syslog usage count reach zero.
+ *
+ */
+void free_syslog_ns(struct kref *kref)
+
+{
+ struct syslog_ns *sl;
+
+ sl = container_of(kref, struct syslog_ns, kref);
+ sl = free_all_syslog_ns(sl);
+}
+
+/*
+ * Procedure to get the current syslog area linked to a
+ * container (by CLONE_SYSLOG).
+ * if trouble, report host kernel own syslog_ns.
+ *
+ */
+struct syslog_ns *current_syslog_ns(void)
+
+{
+
+ struct syslog_ns *ns;
+
+ ns = (struct syslog_ns *)0;
+ if (current->nsproxy)
+ ns = current->nsproxy->syslog_ns;
+ if (!ns) /*lets try to cover log anyway! */
+ ns = &init_syslog_ns;
+ return ns;
+}
+/*
+ * Procedure to replace current syslog namespace with another.
+ * Return the original current syslog_ns.
+ *
+ */
+struct syslog_ns *switch_syslog_ns(struct syslog_ns *syslog_ns)
+
+{
+ unsigned long flags;
+ struct syslog_ns *old;
+
+ spin_lock_irqsave(&(current_syslog_ns()->logbuf_lock), flags);
+ old = current_syslog_ns();
+ if (syslog_ns)
+ current->nsproxy->syslog_ns = syslog_ns;
+ spin_unlock_irqrestore(&(old->logbuf_lock), flags);
+ return old;
+}
+
+/*
+ * Procedure to locate the syslog handle own by a given
+ * pid or one of its parents lineage.
+ *
+ */
+struct syslog_ns *find_syslog_ns_bypid(pid_t pid)
+
+{
+ while (pid > 1) {
+ struct syslog_ns *its_ns;
+ struct task_struct *ns_task;
+
+ its_ns = find_its_syslog_ns(pid);
+ if (its_ns)
+ return its_ns;
+ ns_task = find_task_by_vpid(pid);
+ if ((ns_task) && (ns_task->real_parent))
+ pid = ns_task->real_parent->pid;
+ else
+ break;
+ }
+ return log_list.syslog_ns;
+}
+
+/*
+ * Procedure to dereference syslog_ns usage, if no reference
+ * anymore, memory is freed.
+ *
+ */
+void put_syslog_ns(struct syslog_ns *ns)
+
+{
+ kref_put(&ns->kref, free_syslog_ns);
+}