Containerized syslog working properly
[safe/jmp/linux-2.6] / kernel / syslog.c
diff --git a/kernel/syslog.c b/kernel/syslog.c
new file mode 100644 (file)
index 0000000..44b1d90
--- /dev/null
@@ -0,0 +1,355 @@
+/*
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License as
+ *  published by the Free Software Foundation, version 2 of the
+ *  License.
+ *
+ *  Jun 2010
+ *  Jean-Marc Pigeon   <jmp@safe.ca>
+ *
+ *  Purpose is to regroup all procedure involved
+ *  in system log.
+ *  System log need to be containerized to avoid
+ *  crossing over critical data between physical host layer
+ *  and container layer.
+ *
+ *  The principle is to keep a containerized ring buffer
+ *  where container kernel data are redirected, kept and
+ *  managed.
+ *
+ *  Containerized syslog is activated if CLONE_SYSLOG
+ *  condition is true.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/bootmem.h>
+#include <linux/slab.h>
+#include <linux/cred.h>
+#include <linux/kref.h>
+#include <linux/user_namespace.h>
+#include <linux/syslog.h>
+
+/*
+ * Static memory definition, used to assign a syslog
+ * to the init process itself
+ *
+ */
+#define __LOG_BUF_LEN   (1 << CONFIG_LOG_BUF_SHIFT)
+static char __log_buf[__LOG_BUF_LEN];
+
+struct syslog_ns init_syslog_ns = {
+       .kref = {
+               .refcount       = ATOMIC_INIT(2),
+       },
+       .handle = 1,    /*kernel INIT process pid       */
+       .logbuf_lock = __SPIN_LOCK_INITIALIZER(logbuf_lock),
+       .buf_len = __LOG_BUF_LEN,
+       .buf = __log_buf
+};
+
+/*
+ * List of all syslog ns currently allocated
+ * first member of this list (kernel syslog)
+ * can't be removed.
+ */
+struct log_list        {
+       spinlock_t list_lock;           /*make sure about list access   */
+       struct log_list *next;          /*next syslog_ns in the list    */
+       struct syslog_ns *syslog_ns;
+       }  log_list = {
+               .list_lock = __SPIN_LOCK_INITIALIZER(list_lock),
+               .next = (struct log_list *)0,
+               .syslog_ns = &init_syslog_ns
+               };
+/*
+ * removing a syslog reference from the list
+ *
+ */
+static void removing_syslog_ns(struct syslog_ns *syslog_ns)
+
+{
+       int done;
+       struct log_list *start;
+
+       done = false;
+       start = &log_list;
+       while (start->next != (struct log_list *)0) {
+               struct log_list *check;
+               unsigned long flags;
+
+               spin_lock_irqsave(&(start->next->list_lock), flags);
+               check = start->next;
+               if (check->syslog_ns == syslog_ns) {
+                       start->next = check->next;
+                       done = true;
+                       }
+               spin_unlock_irqrestore(&(check->list_lock), flags);
+               if (done == true) {
+                       kfree(check);
+                       break;
+                       }
+               start = start->next;
+               }
+}
+
+/*
+ * adding a syslog_ns to the list of known syslog
+ *
+ */
+static void adding_syslog_ns(struct syslog_ns *syslog_ns)
+
+{
+       int done;
+       struct log_list *start;
+
+       done = false;
+       start = &log_list;
+       while ((done == false) && (start != (struct log_list *)0)) {
+               unsigned long flags;
+
+               spin_lock_irqsave(&(start->list_lock), flags);
+               if (start->next == (struct log_list *)0) {
+                       struct log_list *next;
+
+                       next = kzalloc(sizeof(struct log_list), GFP_KERNEL);
+                       BUG_ON(!next);
+                       spin_lock_init(&(next->list_lock));
+                       next->syslog_ns = syslog_ns;
+                       start->next = next;
+                       done = true;
+                       }
+               spin_unlock_irqrestore(&(start->list_lock), flags);
+               start = start->next;
+               }
+}
+/*
+ * Procedure to free all ressources tied to syslog
+ *
+ */
+static struct syslog_ns *free_all_syslog_ns(struct syslog_ns *syslog_ns)
+
+{
+       if (syslog_ns != (struct syslog_ns *)0) {
+               (void) removing_syslog_ns(syslog_ns);
+               (void) kfree(syslog_ns->buf);
+               (void) kfree(syslog_ns);
+               syslog_ns = (struct syslog_ns *)0;
+               }
+       return syslog_ns;
+}
+
+/*
+ * Procedure to assign memory for syslog area
+ *
+ */
+static struct syslog_ns *malloc_syslog_ns(unsigned container_buf_len)
+{
+       struct syslog_ns *ns;
+       char *buf;
+
+       ns = (struct syslog_ns *)0;
+       buf = (char *)0;
+
+
+       ns = kzalloc(sizeof(*ns), GFP_KERNEL);
+       buf = kzalloc(container_buf_len, GFP_KERNEL);
+       if ((!ns) || (!buf)) {
+               kfree(buf);
+               kfree(ns);
+               return 0;
+               }
+
+       kref_init(&(ns->kref));
+       spin_lock_init(&(ns->logbuf_lock));
+       ns->handle = current->pid;
+       ns->buf_len = container_buf_len;
+       ns->buf = buf;
+       (void) adding_syslog_ns(ns);
+       return ns;
+}
+/*
+ * Procedure to locate and return a syslog_ns with same handle as pid submitted.
+ * return a NULL pointer if not found;
+ *
+ */
+static struct syslog_ns *find_its_syslog_ns(pid_t pid)
+
+{
+       struct log_list *start;
+
+       start = &log_list;
+       while (start != (struct log_list *)0) {
+               if (start->syslog_ns->handle == pid)
+                       return start->syslog_ns;
+               start = start->next;
+               }
+       return 0;
+}
+/*
+ * Procedure to ONLY increase syslog buffer size
+ * If syslog_ns is NULL, assign a brand new syslog_ns
+ *
+ */
+struct syslog_ns *resize_syslog_ns(struct syslog_ns *syslog_ns,
+                       unsigned container_buf_len)
+
+{
+       if ((syslog_ns == &init_syslog_ns) &&
+               (container_buf_len > syslog_ns->buf_len)) {
+               int old_buf_len;
+               char *old_buf;
+               char *new_buf;
+               unsigned long flags;
+
+               old_buf_len = syslog_ns->buf_len;
+               old_buf = syslog_ns->buf;
+               new_buf = alloc_bootmem(container_buf_len);
+               if (!new_buf) {
+                       (void) printk(KERN_WARNING
+                               "log_buf_len: allocation failed\n");
+                       return ERR_PTR(-ENOMEM);
+                       }
+               spin_lock_irqsave(&(syslog_ns->logbuf_lock), flags);
+               (void) memmove(new_buf, old_buf, old_buf_len);
+               syslog_ns->buf = new_buf;
+               syslog_ns->buf_len = container_buf_len;
+               spin_unlock_irqrestore(&(syslog_ns->logbuf_lock), flags);
+               if (old_buf != __log_buf)
+                       (void) free_bootmem((unsigned long)old_buf,
+                                           old_buf_len);
+               }
+       if (!syslog_ns)
+               return malloc_syslog_ns(container_buf_len);
+       if (syslog_ns->buf_len > container_buf_len) {
+               (void) printk(KERN_WARNING "log_buf_len: Not allowed "
+                                       "to decrease syslog buffer\n");
+               return ERR_PTR(-EINVAL);
+               }
+       if (syslog_ns->buf_len < container_buf_len) {
+               char *old_buf;
+               char *new_buf;
+               unsigned long flags;
+
+               old_buf = syslog_ns->buf;
+               new_buf = kzalloc(container_buf_len, GFP_KERNEL);
+               if (!new_buf)
+                       return ERR_PTR(-ENOMEM);
+               spin_lock_irqsave(&(syslog_ns->logbuf_lock), flags);
+               (void) memmove(new_buf, old_buf, syslog_ns->buf_len);
+               syslog_ns->buf = new_buf;
+               syslog_ns->buf_len = container_buf_len;
+               spin_unlock_irqrestore(&(syslog_ns->logbuf_lock), flags);
+               (void) kfree(old_buf);
+               }
+       (void) printk(KERN_NOTICE "log_buf_len: %u\n", syslog_ns->buf_len);
+       return syslog_ns;
+}
+
+/*
+ * Procedure to use current syslog unless a CLONE_SYSLOG is set
+ * such a new syslog area is defined and used
+ *
+ */
+struct syslog_ns *copy_syslog_ns(unsigned long flags,
+                               struct syslog_ns *current_syslog_ns)
+
+{
+/*4096 should be enough for container syslog   */
+#define        CONTAINER_BUF_LEN       4096
+
+       BUG_ON(!current_syslog_ns);
+       if ((flags & CLONE_SYSLOG) == CLONE_SYSLOG)
+               current_syslog_ns = malloc_syslog_ns(CONTAINER_BUF_LEN);
+       else
+               /*incrementing usage ref count  */
+               (void) kref_get(&(current_syslog_ns->kref));
+       return current_syslog_ns;
+}
+
+/*
+ * Procedure to decrement syslog usage count and free memory
+ * if syslog usage count reach zero.
+ *
+ */
+void free_syslog_ns(struct kref *kref)
+
+{
+       struct syslog_ns *sl;
+
+       sl = container_of(kref, struct syslog_ns, kref);
+       sl = free_all_syslog_ns(sl);
+}
+
+/*
+ * Procedure to get the current syslog area linked to a
+ * container (by CLONE_SYSLOG).
+ * if trouble, report host kernel own syslog_ns.
+ *
+ */
+struct syslog_ns *current_syslog_ns(void)
+
+{
+
+       struct syslog_ns *ns;
+
+       ns = (struct syslog_ns *)0;
+       if (current->nsproxy)
+               ns = current->nsproxy->syslog_ns;
+       if (!ns)        /*lets try to cover log anyway! */
+               ns = &init_syslog_ns;
+       return ns;
+}
+/*
+ * Procedure to replace current syslog namespace with another.
+ * Return the original current syslog_ns.
+ *
+ */
+struct syslog_ns *switch_syslog_ns(struct syslog_ns *syslog_ns)
+
+{
+       unsigned long flags;
+       struct syslog_ns *old;
+
+       spin_lock_irqsave(&(current_syslog_ns()->logbuf_lock), flags);
+       old = current_syslog_ns();
+       if (syslog_ns)
+               current->nsproxy->syslog_ns = syslog_ns;
+       spin_unlock_irqrestore(&(old->logbuf_lock), flags);
+       return old;
+}
+
+/*
+ * Procedure to locate the syslog handle own by a given
+ * pid or one of its parents lineage.
+ *
+ */
+struct syslog_ns *find_syslog_ns_bypid(pid_t pid)
+
+{
+       while (pid > 1) {
+               struct syslog_ns *its_ns;
+               struct task_struct *ns_task;
+
+               its_ns = find_its_syslog_ns(pid);
+               if (its_ns)
+                       return its_ns;
+               ns_task = find_task_by_vpid(pid);
+               if ((ns_task) && (ns_task->real_parent))
+                       pid = ns_task->real_parent->pid;
+               else
+                       break;
+               }
+       return log_list.syslog_ns;
+}
+
+/*
+ * Procedure to dereference syslog_ns usage, if no reference
+ * anymore, memory is freed.
+ *
+ */
+void put_syslog_ns(struct syslog_ns *ns)
+
+{
+       kref_put(&ns->kref, free_syslog_ns);
+}