Merge branch 'linus' into cont_syslog master cont_sys_log_2
authorJean-Marc Pigeon <jmp@kernel.safe.ca>
Thu, 10 Jun 2010 20:22:51 +0000 (16:22 -0400)
committerJean-Marc Pigeon <jmp@kernel.safe.ca>
Thu, 10 Jun 2010 20:22:51 +0000 (16:22 -0400)
Makefile
include/linux/netdevice.h
include/linux/nsproxy.h
include/linux/syslog.h
kernel/Makefile
kernel/nsproxy.c
kernel/printk.c
kernel/syslog.c [new file with mode: 0644]
net/core/rtnetlink.c
net/ipv4/netfilter/ipt_LOG.c

index 654c31a..31c24d5 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 35
-EXTRAVERSION = -rc2
+EXTRAVERSION = -rc2-JMP-syslog-1
 NAME = Sheep on Meth
 
 # *DOCUMENTATION*
index 40291f3..75a3ef1 100644 (file)
@@ -1020,6 +1020,8 @@ struct net_device {
        /* Network namespace this network device is inside */
        struct net              *nd_net;
 #endif
+       /* to assign a syslog chanel according device ownership */
+       struct syslog_ns        *syslog_ns;
 
        /* mid-layer private */
        void                    *ml_priv;
index 7b370c7..cacd734 100644 (file)
@@ -29,6 +29,7 @@ struct nsproxy {
        struct mnt_namespace *mnt_ns;
        struct pid_namespace *pid_ns;
        struct net           *net_ns;
+       struct syslog_ns     *syslog_ns;
 };
 extern struct nsproxy init_nsproxy;
 
index 3891139..102908c 100644 (file)
 #define SYSLOG_FROM_CALL 0
 #define SYSLOG_FROM_FILE 1
 
+#define        CLONE_SYSLOG \
+       (CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER | \
+        CLONE_NEWPID | CLONE_NEWNET)
+/*structure used to dispatch syslog data according container of origin */
+struct syslog_ns {
+       struct kref kref;       /*syslog_ns reference count & control      */
+       int handle;             /*syslog handle number                     */
+       struct syslog_ns *prvns;/*parent process own syslog                */
+       spinlock_t logbuf_lock; /* access conflict locker                  */
+       unsigned log_start;     /* Index: next char to be read by syslog() */
+       unsigned con_start;     /* Index: next char to be sent to consoles */
+       unsigned log_end;       /* Index: most-recently-written-char + 1   */
+       unsigned logged_chars;  /* Num chars produced since last read+clear*/
+       unsigned buf_len;       /* buffer available space size             */
+       char *buf;              /* allocated ring buffer                   */
+};
+
+/*
+ * Static structure used by nsproxy
+ */
+extern struct syslog_ns init_syslog_ns;
+extern struct syslog_ns *resize_syslog_ns(struct syslog_ns *syslog_ns,
+                                       unsigned container_buf_len);
+extern struct syslog_ns *copy_syslog_ns(unsigned long flags,
+                                       struct syslog_ns *current_syslog_ns);
+extern void free_syslog_ns(struct kref *kref);
+extern struct syslog_ns *current_syslog_ns(void);
+extern struct syslog_ns *switch_syslog_ns(struct syslog_ns *syslog_ns);
+extern struct syslog_ns *find_syslog_ns_bypid(pid_t pid);
+extern void put_syslog_ns(struct syslog_ns *ns);
+
+
+
 int do_syslog(int type, char __user *buf, int count, bool from_file);
 
 #endif /* _LINUX_SYSLOG_H */
index 057472f..441afc7 100644 (file)
@@ -10,7 +10,7 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o \
            kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
            hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
            notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \
-           async.o range.o
+           async.o range.o syslog.o
 obj-$(CONFIG_HAVE_EARLY_RES) += early_res.o
 obj-y += groups.o
 
index f74e6c0..7ade5d2 100644 (file)
@@ -15,6 +15,7 @@
 
 #include <linux/slab.h>
 #include <linux/module.h>
+#include <linux/syslog.h>
 #include <linux/nsproxy.h>
 #include <linux/init_task.h>
 #include <linux/mnt_namespace.h>
@@ -93,8 +94,17 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
                goto out_net;
        }
 
+       new_nsp->syslog_ns = copy_syslog_ns(flags, tsk->nsproxy->syslog_ns);
+       if (IS_ERR(new_nsp->syslog_ns)) {
+               err = PTR_ERR(new_nsp->syslog_ns);
+               goto out_syslog;
+       }
+
        return new_nsp;
 
+out_syslog:
+       if (new_nsp->net_ns)
+               put_net(new_nsp->net_ns);
 out_net:
        if (new_nsp->pid_ns)
                put_pid_ns(new_nsp->pid_ns);
@@ -163,6 +173,8 @@ out:
 
 void free_nsproxy(struct nsproxy *ns)
 {
+       if (ns->syslog_ns)
+               put_syslog_ns(ns->syslog_ns);
        if (ns->mnt_ns)
                put_mnt_ns(ns->mnt_ns);
        if (ns->uts_ns)
index 444b770..7f60d8a 100644 (file)
@@ -33,7 +33,6 @@
 #include <linux/bootmem.h>
 #include <linux/syscalls.h>
 #include <linux/kexec.h>
-#include <linux/kdb.h>
 #include <linux/ratelimit.h>
 #include <linux/kmsg_dump.h>
 #include <linux/syslog.h>
@@ -53,8 +52,6 @@ void asmlinkage __attribute__((weak)) early_printk(const char *fmt, ...)
 {
 }
 
-#define __LOG_BUF_LEN  (1 << CONFIG_LOG_BUF_SHIFT)
-
 /* printk's without a loglevel use this.. */
 #define DEFAULT_MESSAGE_LOGLEVEL 4 /* KERN_WARNING */
 
@@ -97,23 +94,19 @@ EXPORT_SYMBOL_GPL(console_drivers);
  */
 static int console_locked, console_suspended;
 
-/*
- * logbuf_lock protects log_buf, log_start, log_end, con_start and logged_chars
- * It is also used in interesting ways to provide interlocking in
- * release_console_sem().
- */
-static DEFINE_SPINLOCK(logbuf_lock);
-
-#define LOG_BUF_MASK (log_buf_len-1)
-#define LOG_BUF(idx) (log_buf[(idx) & LOG_BUF_MASK])
+#define LOG_BUF_MASK(ns) ((ns)->buf_len-1)
+#define LOG_BUF(ns, idx) ((ns)->buf[(idx) & LOG_BUF_MASK(ns)])
 
 /*
- * The indices into log_buf are not constrained to log_buf_len - they
- * must be masked before subscripting
+ * To access container syslog ring buffer
  */
-static unsigned log_start;     /* Index into log_buf: next char to be read by syslog() */
-static unsigned con_start;     /* Index into log_buf: next char to be sent to consoles */
-static unsigned log_end;       /* Index into log_buf: most-recently-written-char + 1 */
+#define sys_log_lock (syslog_ns->logbuf_lock)
+#define sys_log_start (syslog_ns->log_start)
+#define sys_log_end (syslog_ns->log_end)
+#define sys_log_con_start (syslog_ns->con_start)
+#define sys_log_buf_len (syslog_ns->buf_len)
+#define sys_log_logged_chars (syslog_ns->logged_chars)
+#define sys_log_buf (syslog_ns->buf)
 
 /*
  *     Array of consoles built from command line options (console=)
@@ -141,10 +134,6 @@ static int console_may_schedule;
 
 #ifdef CONFIG_PRINTK
 
-static char __log_buf[__LOG_BUF_LEN];
-static char *log_buf = __log_buf;
-static int log_buf_len = __LOG_BUF_LEN;
-static unsigned logged_chars; /* Number of chars produced since last read+clear operation */
 static int saved_console_loglevel = -1;
 
 #ifdef CONFIG_KEXEC
@@ -158,49 +147,23 @@ static int saved_console_loglevel = -1;
  */
 void log_buf_kexec_setup(void)
 {
-       VMCOREINFO_SYMBOL(log_buf);
-       VMCOREINFO_SYMBOL(log_end);
-       VMCOREINFO_SYMBOL(log_buf_len);
-       VMCOREINFO_SYMBOL(logged_chars);
+       struct syslog_ns *syslog_ns = current_syslog_ns();
+
+       VMCOREINFO_SYMBOL(sys_log_buf);
+       VMCOREINFO_SYMBOL(sys_log_end);
+       VMCOREINFO_SYMBOL(sys_log_buf_len);
+       VMCOREINFO_SYMBOL(sys_log_logged_chars);
 }
 #endif
 
 static int __init log_buf_len_setup(char *str)
 {
        unsigned size = memparse(str, &str);
-       unsigned long flags;
 
-       if (size)
+       if (size) {
                size = roundup_pow_of_two(size);
-       if (size > log_buf_len) {
-               unsigned start, dest_idx, offset;
-               char *new_log_buf;
-
-               new_log_buf = alloc_bootmem(size);
-               if (!new_log_buf) {
-                       printk(KERN_WARNING "log_buf_len: allocation failed\n");
-                       goto out;
-               }
-
-               spin_lock_irqsave(&logbuf_lock, flags);
-               log_buf_len = size;
-               log_buf = new_log_buf;
-
-               offset = start = min(con_start, log_start);
-               dest_idx = 0;
-               while (start != log_end) {
-                       log_buf[dest_idx] = __log_buf[start & (__LOG_BUF_LEN - 1)];
-                       start++;
-                       dest_idx++;
-               }
-               log_start -= offset;
-               con_start -= offset;
-               log_end -= offset;
-               spin_unlock_irqrestore(&logbuf_lock, flags);
-
-               printk(KERN_NOTICE "log_buf_len: %d\n", log_buf_len);
+               resize_syslog_ns(&init_syslog_ns, size);
        }
-out:
        return 1;
 }
 
@@ -265,6 +228,7 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
        int do_clear = 0;
        char c;
        int error = 0;
+       struct syslog_ns *syslog_ns = current_syslog_ns();
 
        error = security_syslog(type, from_file);
        if (error)
@@ -287,22 +251,22 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
                        goto out;
                }
                error = wait_event_interruptible(log_wait,
-                                                       (log_start - log_end));
+                                       (sys_log_start - sys_log_end));
                if (error)
                        goto out;
                i = 0;
-               spin_lock_irq(&logbuf_lock);
-               while (!error && (log_start != log_end) && i < len) {
-                       c = LOG_BUF(log_start);
-                       log_start++;
-                       spin_unlock_irq(&logbuf_lock);
+               spin_lock_irq(&sys_log_lock);
+               while (!error && (sys_log_start != sys_log_end) && i < len) {
+                       c = LOG_BUF(syslog_ns, sys_log_start);
+                       sys_log_start++;
+                       spin_unlock_irq(&sys_log_lock);
                        error = __put_user(c,buf);
                        buf++;
                        i++;
                        cond_resched();
-                       spin_lock_irq(&logbuf_lock);
+                       spin_lock_irq(&sys_log_lock);
                }
-               spin_unlock_irq(&logbuf_lock);
+               spin_unlock_irq(&sys_log_lock);
                if (!error)
                        error = i;
                break;
@@ -323,14 +287,14 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
                        goto out;
                }
                count = len;
-               if (count > log_buf_len)
-                       count = log_buf_len;
-               spin_lock_irq(&logbuf_lock);
-               if (count > logged_chars)
-                       count = logged_chars;
+               if (count > sys_log_buf_len)
+                       count = sys_log_buf_len;
+               spin_lock_irq(&sys_log_lock);
+               if (count > sys_log_logged_chars)
+                       count = sys_log_logged_chars;
                if (do_clear)
-                       logged_chars = 0;
-               limit = log_end;
+                       sys_log_logged_chars = 0;
+               limit = sys_log_end;
                /*
                 * __put_user() could sleep, and while we sleep
                 * printk() could overwrite the messages
@@ -339,15 +303,15 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
                 */
                for (i = 0; i < count && !error; i++) {
                        j = limit-1-i;
-                       if (j + log_buf_len < log_end)
+                       if (j + sys_log_buf_len < sys_log_end)
                                break;
-                       c = LOG_BUF(j);
-                       spin_unlock_irq(&logbuf_lock);
+                       c = LOG_BUF(syslog_ns, j);
+                       spin_unlock_irq(&sys_log_lock);
                        error = __put_user(c,&buf[count-1-i]);
                        cond_resched();
-                       spin_lock_irq(&logbuf_lock);
+                       spin_lock_irq(&sys_log_lock);
                }
-               spin_unlock_irq(&logbuf_lock);
+               spin_unlock_irq(&sys_log_lock);
                if (error)
                        break;
                error = i;
@@ -366,7 +330,7 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
                break;
        /* Clear ring buffer */
        case SYSLOG_ACTION_CLEAR:
-               logged_chars = 0;
+               sys_log_logged_chars = 0;
                break;
        /* Disable logging to console */
        case SYSLOG_ACTION_CONSOLE_OFF:
@@ -395,11 +359,11 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
                break;
        /* Number of chars in the log buffer */
        case SYSLOG_ACTION_SIZE_UNREAD:
-               error = log_end - log_start;
+               error = sys_log_end - sys_log_start;
                break;
        /* Size of the log buffer */
        case SYSLOG_ACTION_SIZE_BUFFER:
-               error = log_buf_len;
+               error = sys_log_buf_len;
                break;
        default:
                error = -EINVAL;
@@ -414,26 +378,11 @@ SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len)
        return do_syslog(type, buf, len, SYSLOG_FROM_CALL);
 }
 
-#ifdef CONFIG_KGDB_KDB
-/* kdb dmesg command needs access to the syslog buffer.  do_syslog()
- * uses locks so it cannot be used during debugging.  Just tell kdb
- * where the start and end of the physical and logical logs are.  This
- * is equivalent to do_syslog(3).
- */
-void kdb_syslog_data(char *syslog_data[4])
-{
-       syslog_data[0] = log_buf;
-       syslog_data[1] = log_buf + log_buf_len;
-       syslog_data[2] = log_buf + log_end -
-               (logged_chars < log_buf_len ? logged_chars : log_buf_len);
-       syslog_data[3] = log_buf + log_end;
-}
-#endif /* CONFIG_KGDB_KDB */
-
 /*
  * Call the console drivers on a range of log_buf
  */
-static void __call_console_drivers(unsigned start, unsigned end)
+static void __call_console_drivers(struct syslog_ns *syslog_ns,
+                               unsigned start, unsigned end)
 {
        struct console *con;
 
@@ -441,7 +390,8 @@ static void __call_console_drivers(unsigned start, unsigned end)
                if ((con->flags & CON_ENABLED) && con->write &&
                                (cpu_online(smp_processor_id()) ||
                                (con->flags & CON_ANYTIME)))
-                       con->write(con, &LOG_BUF(start), end - start);
+                       con->write(con, &LOG_BUF(syslog_ns, start),
+                               end - start);
        }
 }
 
@@ -460,18 +410,21 @@ early_param("ignore_loglevel", ignore_loglevel_setup);
 /*
  * Write out chars from start to end - 1 inclusive
  */
-static void _call_console_drivers(unsigned start,
-                               unsigned end, int msg_log_level)
+static void _call_console_drivers(struct syslog_ns *syslog_ns, unsigned start,
+                                 unsigned end, int msg_log_level)
 {
        if ((msg_log_level < console_loglevel || ignore_loglevel) &&
                        console_drivers && start != end) {
-               if ((start & LOG_BUF_MASK) > (end & LOG_BUF_MASK)) {
+               if ((start & LOG_BUF_MASK(syslog_ns)) >
+                       (end & LOG_BUF_MASK(syslog_ns))) {
                        /* wrapped write */
-                       __call_console_drivers(start & LOG_BUF_MASK,
-                                               log_buf_len);
-                       __call_console_drivers(0, end & LOG_BUF_MASK);
+                       __call_console_drivers(syslog_ns,
+                                       start & LOG_BUF_MASK(syslog_ns),
+                                       sys_log_buf_len);
+                       __call_console_drivers(syslog_ns, 0,
+                                       end & LOG_BUF_MASK(syslog_ns));
                } else {
-                       __call_console_drivers(start, end);
+                       __call_console_drivers(syslog_ns, start, end);
                }
        }
 }
@@ -481,7 +434,8 @@ static void _call_console_drivers(unsigned start,
  * log_buf[start] to log_buf[end - 1].
  * The console_sem must be held.
  */
-static void call_console_drivers(unsigned start, unsigned end)
+static void call_console_drivers(struct syslog_ns *syslog_ns,
+                               unsigned start, unsigned end)
 {
        unsigned cur_index, start_print;
        static int msg_level = -1;
@@ -492,16 +446,16 @@ static void call_console_drivers(unsigned start, unsigned end)
        start_print = start;
        while (cur_index != end) {
                if (msg_level < 0 && ((end - cur_index) > 2) &&
-                               LOG_BUF(cur_index + 0) == '<' &&
-                               LOG_BUF(cur_index + 1) >= '0' &&
-                               LOG_BUF(cur_index + 1) <= '7' &&
-                               LOG_BUF(cur_index + 2) == '>') {
-                       msg_level = LOG_BUF(cur_index + 1) - '0';
+                               LOG_BUF(syslog_ns, cur_index + 0) == '<' &&
+                               LOG_BUF(syslog_ns, cur_index + 1) >= '0' &&
+                               LOG_BUF(syslog_ns, cur_index + 1) <= '7' &&
+                               LOG_BUF(syslog_ns, cur_index + 2) == '>') {
+                       msg_level = LOG_BUF(syslog_ns, cur_index + 1) - '0';
                        cur_index += 3;
                        start_print = cur_index;
                }
                while (cur_index != end) {
-                       char c = LOG_BUF(cur_index);
+                       char c = LOG_BUF(syslog_ns, cur_index);
 
                        cur_index++;
                        if (c == '\n') {
@@ -514,26 +468,27 @@ static void call_console_drivers(unsigned start, unsigned end)
                                         */
                                        msg_level = default_message_loglevel;
                                }
-                               _call_console_drivers(start_print, cur_index, msg_level);
+                               _call_console_drivers(syslog_ns,
+                                       start_print, cur_index, msg_level);
                                msg_level = -1;
                                start_print = cur_index;
                                break;
                        }
                }
        }
-       _call_console_drivers(start_print, end, msg_level);
+       _call_console_drivers(syslog_ns, start_print, end, msg_level);
 }
 
-static void emit_log_char(char c)
+static void emit_log_char(struct syslog_ns *syslog_ns, char c)
 {
-       LOG_BUF(log_end) = c;
-       log_end++;
-       if (log_end - log_start > log_buf_len)
-               log_start = log_end - log_buf_len;
-       if (log_end - con_start > log_buf_len)
-               con_start = log_end - log_buf_len;
-       if (logged_chars < log_buf_len)
-               logged_chars++;
+       LOG_BUF(syslog_ns, sys_log_end) = c;
+       sys_log_end++;
+       if (sys_log_end - sys_log_start > sys_log_buf_len)
+               sys_log_start = sys_log_end - sys_log_buf_len;
+       if (sys_log_end - sys_log_con_start > sys_log_buf_len)
+               sys_log_con_start = sys_log_end - sys_log_buf_len;
+       if (sys_log_logged_chars < sys_log_buf_len)
+               sys_log_logged_chars++;
 }
 
 /*
@@ -541,7 +496,7 @@ static void emit_log_char(char c)
  * every 10 seconds, to leave time for slow consoles to print a
  * full oops.
  */
-static void zap_locks(void)
+static void zap_locks(struct syslog_ns *syslog_ns)
 {
        static unsigned long oops_timestamp;
 
@@ -552,7 +507,7 @@ static void zap_locks(void)
        oops_timestamp = jiffies;
 
        /* If a crash is occurring, make sure we can't deadlock */
-       spin_lock_init(&logbuf_lock);
+       spin_lock_init(&sys_log_lock);
        /* And make sure that we print immediately */
        init_MUTEX(&console_sem);
 }
@@ -603,14 +558,6 @@ asmlinkage int printk(const char *fmt, ...)
        va_list args;
        int r;
 
-#ifdef CONFIG_KGDB_KDB
-       if (unlikely(kdb_trap_printk)) {
-               va_start(args, fmt);
-               r = vkdb_printf(fmt, args);
-               va_end(args);
-               return r;
-       }
-#endif
        va_start(args, fmt);
        r = vprintk(fmt, args);
        va_end(args);
@@ -644,7 +591,8 @@ static inline int can_use_console(unsigned int cpu)
  * interrupts disabled. It should return with 'lockbuf_lock'
  * released but interrupts still disabled.
  */
-static int acquire_console_semaphore_for_printk(unsigned int cpu)
+static int acquire_console_semaphore_for_printk(
+               struct syslog_ns *syslog_ns, unsigned int cpu)
 {
        int retval = 0;
 
@@ -664,7 +612,7 @@ static int acquire_console_semaphore_for_printk(unsigned int cpu)
                }
        }
        printk_cpu = UINT_MAX;
-       spin_unlock(&logbuf_lock);
+       spin_unlock(&sys_log_lock);
        return retval;
 }
 static const char recursion_bug_msg [] =
@@ -691,6 +639,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
 {
        int printed_len = 0;
        int current_log_level = default_message_loglevel;
+       struct syslog_ns *syslog_ns = current_syslog_ns();
        unsigned long flags;
        int this_cpu;
        char *p;
@@ -718,11 +667,11 @@ asmlinkage int vprintk(const char *fmt, va_list args)
                        recursion_bug = 1;
                        goto out_restore_irqs;
                }
-               zap_locks();
+               zap_locks(syslog_ns);
        }
 
        lockdep_off();
-       spin_lock(&logbuf_lock);
+       spin_lock(&sys_log_lock);
        printk_cpu = this_cpu;
 
        if (recursion_bug) {
@@ -747,7 +696,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
                        /* Fallthrough - make sure we're on a new line */
                        case 'd': /* KERN_DEFAULT */
                                if (!new_text_line) {
-                                       emit_log_char('\n');
+                                       emit_log_char(syslog_ns, '\n');
                                        new_text_line = 1;
                                }
                        /* Fallthrough - skip the loglevel */
@@ -765,9 +714,9 @@ asmlinkage int vprintk(const char *fmt, va_list args)
        for ( ; *p; p++) {
                if (new_text_line) {
                        /* Always output the token */
-                       emit_log_char('<');
-                       emit_log_char(current_log_level + '0');
-                       emit_log_char('>');
+                       emit_log_char(syslog_ns, '<');
+                       emit_log_char(syslog_ns, current_log_level + '0');
+                       emit_log_char(syslog_ns, '>');
                        printed_len += 3;
                        new_text_line = 0;
 
@@ -780,12 +729,13 @@ asmlinkage int vprintk(const char *fmt, va_list args)
 
                                t = cpu_clock(printk_cpu);
                                nanosec_rem = do_div(t, 1000000000);
-                               tlen = sprintf(tbuf, "[%5lu.%06lu] ",
+                               tlen = sprintf(tbuf, "ns_id='%d' %5lu.%06lu] ",
+                                               syslog_ns->handle,
                                                (unsigned long) t,
                                                nanosec_rem / 1000);
 
                                for (tp = tbuf; tp < tbuf + tlen; tp++)
-                                       emit_log_char(*tp);
+                                       emit_log_char(syslog_ns, *tp);
                                printed_len += tlen;
                        }
 
@@ -793,7 +743,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
                                break;
                }
 
-               emit_log_char(*p);
+               emit_log_char(syslog_ns, *p);
                if (*p == '\n')
                        new_text_line = 1;
        }
@@ -808,7 +758,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
         * will release 'logbuf_lock' regardless of whether it
         * actually gets the semaphore or not.
         */
-       if (acquire_console_semaphore_for_printk(this_cpu))
+       if (acquire_console_semaphore_for_printk(syslog_ns, this_cpu))
                release_console_sem();
 
        lockdep_on();
@@ -821,12 +771,6 @@ out_restore_irqs:
 EXPORT_SYMBOL(printk);
 EXPORT_SYMBOL(vprintk);
 
-#else
-
-static void call_console_drivers(unsigned start, unsigned end)
-{
-}
-
 #endif
 
 static int __add_preferred_console(char *name, int idx, char *options,
@@ -1059,36 +1003,40 @@ void wake_up_klogd(void)
  */
 void release_console_sem(void)
 {
-       unsigned long flags;
-       unsigned _con_start, _log_end;
-       unsigned wake_klogd = 0;
-
        if (console_suspended) {
                up(&console_sem);
                return;
        }
 
        console_may_schedule = 0;
-
-       for ( ; ; ) {
-               spin_lock_irqsave(&logbuf_lock, flags);
-               wake_klogd |= log_start - log_end;
-               if (con_start == log_end)
-                       break;                  /* Nothing to print */
-               _con_start = con_start;
-               _log_end = log_end;
-               con_start = log_end;            /* Flush */
-               spin_unlock(&logbuf_lock);
-               stop_critical_timings();        /* don't trace print latency */
-               call_console_drivers(_con_start, _log_end);
-               start_critical_timings();
-               local_irq_restore(flags);
+#ifdef CONFIG_PRINTK
+       {
+               unsigned long flags;
+               unsigned _con_start, _log_end;
+               unsigned wake_klogd = 0;
+               struct syslog_ns *syslog_ns = current_syslog_ns();
+
+               for ( ; ; ) {
+                       spin_lock_irqsave(&sys_log_lock, flags);
+                       wake_klogd |= sys_log_start - sys_log_end;
+                       if (sys_log_con_start == sys_log_end)
+                               break;                  /* Nothing to print */
+                       _con_start = sys_log_con_start;
+                       _log_end = sys_log_end;
+                       sys_log_con_start = sys_log_end;        /* Flush */
+                       spin_unlock(&sys_log_lock);
+                       stop_critical_timings();/* don't trace print latency */
+                       call_console_drivers(syslog_ns, _con_start, _log_end);
+                       start_critical_timings();
+                       local_irq_restore(flags);
+               }
+               spin_unlock_irqrestore(&sys_log_lock, flags);
+               if (wake_klogd)
+                       wake_up_klogd();
        }
+#endif
        console_locked = 0;
        up(&console_sem);
-       spin_unlock_irqrestore(&logbuf_lock, flags);
-       if (wake_klogd)
-               wake_up_klogd();
 }
 EXPORT_SYMBOL(release_console_sem);
 
@@ -1193,7 +1141,6 @@ EXPORT_SYMBOL(console_start);
 void register_console(struct console *newcon)
 {
        int i;
-       unsigned long flags;
        struct console *bcon = NULL;
 
        /*
@@ -1299,15 +1246,21 @@ void register_console(struct console *newcon)
                newcon->next = console_drivers->next;
                console_drivers->next = newcon;
        }
+#ifdef CONFIG_PRINTK
        if (newcon->flags & CON_PRINTBUFFER) {
+               unsigned long flags;
                /*
                 * release_console_sem() will print out the buffered messages
                 * for us.
                 */
-               spin_lock_irqsave(&logbuf_lock, flags);
-               con_start = log_start;
-               spin_unlock_irqrestore(&logbuf_lock, flags);
+
+               struct syslog_ns *syslog_ns = current_syslog_ns();
+
+               spin_lock_irqsave(&sys_log_lock, flags);
+               sys_log_con_start = sys_log_start;
+               spin_unlock_irqrestore(&sys_log_lock, flags);
        }
+#endif
        release_console_sem();
 
        /*
@@ -1511,27 +1464,28 @@ void kmsg_dump(enum kmsg_dump_reason reason)
        const char *s1, *s2;
        unsigned long l1, l2;
        unsigned long flags;
+       struct syslog_ns *syslog_ns = current_syslog_ns();
 
        /* Theoretically, the log could move on after we do this, but
           there's not a lot we can do about that. The new messages
           will overwrite the start of what we dump. */
-       spin_lock_irqsave(&logbuf_lock, flags);
-       end = log_end & LOG_BUF_MASK;
-       chars = logged_chars;
-       spin_unlock_irqrestore(&logbuf_lock, flags);
+       spin_lock_irqsave(&sys_log_lock, flags);
+       end = sys_log_end & LOG_BUF_MASK(syslog_ns);
+       chars = sys_log_logged_chars;
+       spin_unlock_irqrestore(&sys_log_lock, flags);
 
-       if (logged_chars > end) {
-               s1 = log_buf + log_buf_len - logged_chars + end;
-               l1 = logged_chars - end;
+       if (sys_log_logged_chars > end) {
+               s1 = sys_log_buf + sys_log_buf_len - sys_log_logged_chars + end;
+               l1 = sys_log_logged_chars - end;
 
-               s2 = log_buf;
+               s2 = sys_log_buf;
                l2 = end;
        } else {
                s1 = "";
                l1 = 0;
 
-               s2 = log_buf + end - logged_chars;
-               l2 = logged_chars;
+               s2 = sys_log_buf + end - sys_log_logged_chars;
+               l2 = sys_log_logged_chars;
        }
 
        if (!spin_trylock_irqsave(&dump_list_lock, flags)) {
@@ -1544,3 +1498,4 @@ void kmsg_dump(enum kmsg_dump_reason reason)
        spin_unlock_irqrestore(&dump_list_lock, flags);
 }
 #endif
+
diff --git a/kernel/syslog.c b/kernel/syslog.c
new file mode 100644 (file)
index 0000000..44b1d90
--- /dev/null
@@ -0,0 +1,355 @@
+/*
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License as
+ *  published by the Free Software Foundation, version 2 of the
+ *  License.
+ *
+ *  Jun 2010
+ *  Jean-Marc Pigeon   <jmp@safe.ca>
+ *
+ *  Purpose is to regroup all procedure involved
+ *  in system log.
+ *  System log need to be containerized to avoid
+ *  crossing over critical data between physical host layer
+ *  and container layer.
+ *
+ *  The principle is to keep a containerized ring buffer
+ *  where container kernel data are redirected, kept and
+ *  managed.
+ *
+ *  Containerized syslog is activated if CLONE_SYSLOG
+ *  condition is true.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/bootmem.h>
+#include <linux/slab.h>
+#include <linux/cred.h>
+#include <linux/kref.h>
+#include <linux/user_namespace.h>
+#include <linux/syslog.h>
+
+/*
+ * Static memory definition, used to assign a syslog
+ * to the init process itself
+ *
+ */
+#define __LOG_BUF_LEN   (1 << CONFIG_LOG_BUF_SHIFT)
+static char __log_buf[__LOG_BUF_LEN];
+
+struct syslog_ns init_syslog_ns = {
+       .kref = {
+               .refcount       = ATOMIC_INIT(2),
+       },
+       .handle = 1,    /*kernel INIT process pid       */
+       .logbuf_lock = __SPIN_LOCK_INITIALIZER(logbuf_lock),
+       .buf_len = __LOG_BUF_LEN,
+       .buf = __log_buf
+};
+
+/*
+ * List of all syslog ns currently allocated
+ * first member of this list (kernel syslog)
+ * can't be removed.
+ */
+struct log_list        {
+       spinlock_t list_lock;           /*make sure about list access   */
+       struct log_list *next;          /*next syslog_ns in the list    */
+       struct syslog_ns *syslog_ns;
+       }  log_list = {
+               .list_lock = __SPIN_LOCK_INITIALIZER(list_lock),
+               .next = (struct log_list *)0,
+               .syslog_ns = &init_syslog_ns
+               };
+/*
+ * removing a syslog reference from the list
+ *
+ */
+static void removing_syslog_ns(struct syslog_ns *syslog_ns)
+
+{
+       int done;
+       struct log_list *start;
+
+       done = false;
+       start = &log_list;
+       while (start->next != (struct log_list *)0) {
+               struct log_list *check;
+               unsigned long flags;
+
+               spin_lock_irqsave(&(start->next->list_lock), flags);
+               check = start->next;
+               if (check->syslog_ns == syslog_ns) {
+                       start->next = check->next;
+                       done = true;
+                       }
+               spin_unlock_irqrestore(&(check->list_lock), flags);
+               if (done == true) {
+                       kfree(check);
+                       break;
+                       }
+               start = start->next;
+               }
+}
+
+/*
+ * adding a syslog_ns to the list of known syslog
+ *
+ */
+static void adding_syslog_ns(struct syslog_ns *syslog_ns)
+
+{
+       int done;
+       struct log_list *start;
+
+       done = false;
+       start = &log_list;
+       while ((done == false) && (start != (struct log_list *)0)) {
+               unsigned long flags;
+
+               spin_lock_irqsave(&(start->list_lock), flags);
+               if (start->next == (struct log_list *)0) {
+                       struct log_list *next;
+
+                       next = kzalloc(sizeof(struct log_list), GFP_KERNEL);
+                       BUG_ON(!next);
+                       spin_lock_init(&(next->list_lock));
+                       next->syslog_ns = syslog_ns;
+                       start->next = next;
+                       done = true;
+                       }
+               spin_unlock_irqrestore(&(start->list_lock), flags);
+               start = start->next;
+               }
+}
+/*
+ * Procedure to free all ressources tied to syslog
+ *
+ */
+static struct syslog_ns *free_all_syslog_ns(struct syslog_ns *syslog_ns)
+
+{
+       if (syslog_ns != (struct syslog_ns *)0) {
+               (void) removing_syslog_ns(syslog_ns);
+               (void) kfree(syslog_ns->buf);
+               (void) kfree(syslog_ns);
+               syslog_ns = (struct syslog_ns *)0;
+               }
+       return syslog_ns;
+}
+
+/*
+ * Procedure to assign memory for syslog area
+ *
+ */
+static struct syslog_ns *malloc_syslog_ns(unsigned container_buf_len)
+{
+       struct syslog_ns *ns;
+       char *buf;
+
+       ns = (struct syslog_ns *)0;
+       buf = (char *)0;
+
+
+       ns = kzalloc(sizeof(*ns), GFP_KERNEL);
+       buf = kzalloc(container_buf_len, GFP_KERNEL);
+       if ((!ns) || (!buf)) {
+               kfree(buf);
+               kfree(ns);
+               return 0;
+               }
+
+       kref_init(&(ns->kref));
+       spin_lock_init(&(ns->logbuf_lock));
+       ns->handle = current->pid;
+       ns->buf_len = container_buf_len;
+       ns->buf = buf;
+       (void) adding_syslog_ns(ns);
+       return ns;
+}
+/*
+ * Procedure to locate and return a syslog_ns with same handle as pid submitted.
+ * return a NULL pointer if not found;
+ *
+ */
+static struct syslog_ns *find_its_syslog_ns(pid_t pid)
+
+{
+       struct log_list *start;
+
+       start = &log_list;
+       while (start != (struct log_list *)0) {
+               if (start->syslog_ns->handle == pid)
+                       return start->syslog_ns;
+               start = start->next;
+               }
+       return 0;
+}
+/*
+ * Procedure to ONLY increase syslog buffer size
+ * If syslog_ns is NULL, assign a brand new syslog_ns
+ *
+ */
+struct syslog_ns *resize_syslog_ns(struct syslog_ns *syslog_ns,
+                       unsigned container_buf_len)
+
+{
+       if ((syslog_ns == &init_syslog_ns) &&
+               (container_buf_len > syslog_ns->buf_len)) {
+               int old_buf_len;
+               char *old_buf;
+               char *new_buf;
+               unsigned long flags;
+
+               old_buf_len = syslog_ns->buf_len;
+               old_buf = syslog_ns->buf;
+               new_buf = alloc_bootmem(container_buf_len);
+               if (!new_buf) {
+                       (void) printk(KERN_WARNING
+                               "log_buf_len: allocation failed\n");
+                       return ERR_PTR(-ENOMEM);
+                       }
+               spin_lock_irqsave(&(syslog_ns->logbuf_lock), flags);
+               (void) memmove(new_buf, old_buf, old_buf_len);
+               syslog_ns->buf = new_buf;
+               syslog_ns->buf_len = container_buf_len;
+               spin_unlock_irqrestore(&(syslog_ns->logbuf_lock), flags);
+               if (old_buf != __log_buf)
+                       (void) free_bootmem((unsigned long)old_buf,
+                                           old_buf_len);
+               }
+       if (!syslog_ns)
+               return malloc_syslog_ns(container_buf_len);
+       if (syslog_ns->buf_len > container_buf_len) {
+               (void) printk(KERN_WARNING "log_buf_len: Not allowed "
+                                       "to decrease syslog buffer\n");
+               return ERR_PTR(-EINVAL);
+               }
+       if (syslog_ns->buf_len < container_buf_len) {
+               char *old_buf;
+               char *new_buf;
+               unsigned long flags;
+
+               old_buf = syslog_ns->buf;
+               new_buf = kzalloc(container_buf_len, GFP_KERNEL);
+               if (!new_buf)
+                       return ERR_PTR(-ENOMEM);
+               spin_lock_irqsave(&(syslog_ns->logbuf_lock), flags);
+               (void) memmove(new_buf, old_buf, syslog_ns->buf_len);
+               syslog_ns->buf = new_buf;
+               syslog_ns->buf_len = container_buf_len;
+               spin_unlock_irqrestore(&(syslog_ns->logbuf_lock), flags);
+               (void) kfree(old_buf);
+               }
+       (void) printk(KERN_NOTICE "log_buf_len: %u\n", syslog_ns->buf_len);
+       return syslog_ns;
+}
+
+/*
+ * Procedure to use current syslog unless a CLONE_SYSLOG is set
+ * such a new syslog area is defined and used
+ *
+ */
+struct syslog_ns *copy_syslog_ns(unsigned long flags,
+                               struct syslog_ns *current_syslog_ns)
+
+{
+/*4096 should be enough for container syslog   */
+#define        CONTAINER_BUF_LEN       4096
+
+       BUG_ON(!current_syslog_ns);
+       if ((flags & CLONE_SYSLOG) == CLONE_SYSLOG)
+               current_syslog_ns = malloc_syslog_ns(CONTAINER_BUF_LEN);
+       else
+               /*incrementing usage ref count  */
+               (void) kref_get(&(current_syslog_ns->kref));
+       return current_syslog_ns;
+}
+
+/*
+ * Procedure to decrement syslog usage count and free memory
+ * if syslog usage count reach zero.
+ *
+ */
+void free_syslog_ns(struct kref *kref)
+
+{
+       struct syslog_ns *sl;
+
+       sl = container_of(kref, struct syslog_ns, kref);
+       sl = free_all_syslog_ns(sl);
+}
+
+/*
+ * Procedure to get the current syslog area linked to a
+ * container (by CLONE_SYSLOG).
+ * if trouble, report host kernel own syslog_ns.
+ *
+ */
+struct syslog_ns *current_syslog_ns(void)
+
+{
+
+       struct syslog_ns *ns;
+
+       ns = (struct syslog_ns *)0;
+       if (current->nsproxy)
+               ns = current->nsproxy->syslog_ns;
+       if (!ns)        /*lets try to cover log anyway! */
+               ns = &init_syslog_ns;
+       return ns;
+}
+/*
+ * Procedure to replace current syslog namespace with another.
+ * Return the original current syslog_ns.
+ *
+ */
+struct syslog_ns *switch_syslog_ns(struct syslog_ns *syslog_ns)
+
+{
+       unsigned long flags;
+       struct syslog_ns *old;
+
+       spin_lock_irqsave(&(current_syslog_ns()->logbuf_lock), flags);
+       old = current_syslog_ns();
+       if (syslog_ns)
+               current->nsproxy->syslog_ns = syslog_ns;
+       spin_unlock_irqrestore(&(old->logbuf_lock), flags);
+       return old;
+}
+
+/*
+ * Procedure to locate the syslog handle own by a given
+ * pid or one of its parents lineage.
+ *
+ */
+struct syslog_ns *find_syslog_ns_bypid(pid_t pid)
+
+{
+       while (pid > 1) {
+               struct syslog_ns *its_ns;
+               struct task_struct *ns_task;
+
+               its_ns = find_its_syslog_ns(pid);
+               if (its_ns)
+                       return its_ns;
+               ns_task = find_task_by_vpid(pid);
+               if ((ns_task) && (ns_task->real_parent))
+                       pid = ns_task->real_parent->pid;
+               else
+                       break;
+               }
+       return log_list.syslog_ns;
+}
+
+/*
+ * Procedure to dereference syslog_ns usage, if no reference
+ * anymore, memory is freed.
+ *
+ */
+void put_syslog_ns(struct syslog_ns *ns)
+
+{
+       kref_put(&ns->kref, free_syslog_ns);
+}
index 1a2af24..5f7cf87 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/types.h>
 #include <linux/socket.h>
 #include <linux/kernel.h>
+#include <linux/syslog.h>
 #include <linux/timer.h>
 #include <linux/string.h>
 #include <linux/sockios.h>
@@ -1082,6 +1083,13 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
                        err = PTR_ERR(net);
                        goto errout;
                }
+               if (dev) {
+                       register pid_t net_ns_pid;
+
+                       net_ns_pid = nla_get_u32(tb[IFLA_NET_NS_PID]);
+                       dev->syslog_ns = find_syslog_ns_bypid(net_ns_pid);
+               }
+
                err = dev_change_net_namespace(dev, net, ifname);
                put_net(net);
                if (err)
index 5234f4f..74761de 100644 (file)
@@ -11,6 +11,7 @@
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
+#include <linux/syslog.h>
 #include <linux/spinlock.h>
 #include <linux/skbuff.h>
 #include <linux/ip.h>
@@ -382,6 +383,14 @@ ipt_log_packet(u_int8_t pf,
               const struct nf_loginfo *loginfo,
               const char *prefix)
 {
+       register struct syslog_ns *syslog_ns;
+
+
+       syslog_ns = (struct syslog_ns *)0;
+       if (skb->dev)   /*another syslog_ns possible?   */
+               syslog_ns = skb->dev->syslog_ns;
+       syslog_ns = switch_syslog_ns(syslog_ns);
+
        if (!loginfo)
                loginfo = &default_loginfo;
 
@@ -422,6 +431,7 @@ ipt_log_packet(u_int8_t pf,
        dump_packet(loginfo, skb, 0);
        printk("\n");
        spin_unlock_bh(&log_lock);
+       (void) switch_syslog_ns(syslog_ns);
 }
 
 static unsigned int