From 7f95d48056f1569a9e97e78e6f1557c7172bf6ac Mon Sep 17 00:00:00 2001 From: Jean-Marc Pigeon Date: Thu, 10 Jun 2010 15:30:15 -0400 Subject: [PATCH] Containerized syslog working properly Mainly iptlog trace from the container are stored within the container syslog. Signed-off-by: Jean-Marc Pigeon --- Makefile | 2 +- include/linux/netdevice.h | 2 + include/linux/nsproxy.h | 1 + include/linux/syslog.h | 33 ++++ kernel/Makefile | 2 +- kernel/nsproxy.c | 12 ++ kernel/printk.c | 323 +++++++++++++++++---------------------- kernel/syslog.c | 355 +++++++++++++++++++++++++++++++++++++++++++ net/core/rtnetlink.c | 8 + net/ipv4/netfilter/ipt_LOG.c | 10 ++ 10 files changed, 562 insertions(+), 186 deletions(-) create mode 100644 kernel/syslog.c diff --git a/Makefile b/Makefile index 654c31a..31c24d5 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 35 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc2-JMP-syslog-1 NAME = Sheep on Meth # *DOCUMENTATION* diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 40291f3..75a3ef1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1020,6 +1020,8 @@ struct net_device { /* Network namespace this network device is inside */ struct net *nd_net; #endif + /* to assign a syslog chanel according device ownership */ + struct syslog_ns *syslog_ns; /* mid-layer private */ void *ml_priv; diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index 7b370c7..cacd734 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -29,6 +29,7 @@ struct nsproxy { struct mnt_namespace *mnt_ns; struct pid_namespace *pid_ns; struct net *net_ns; + struct syslog_ns *syslog_ns; }; extern struct nsproxy init_nsproxy; diff --git a/include/linux/syslog.h b/include/linux/syslog.h index 3891139..102908c 100644 --- a/include/linux/syslog.h +++ b/include/linux/syslog.h @@ -47,6 +47,39 @@ #define SYSLOG_FROM_CALL 0 #define SYSLOG_FROM_FILE 1 +#define CLONE_SYSLOG \ + (CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER | \ + CLONE_NEWPID | CLONE_NEWNET) +/*structure used to dispatch syslog data according container of origin */ +struct syslog_ns { + struct kref kref; /*syslog_ns reference count & control */ + int handle; /*syslog handle number */ + struct syslog_ns *prvns;/*parent process own syslog */ + spinlock_t logbuf_lock; /* access conflict locker */ + unsigned log_start; /* Index: next char to be read by syslog() */ + unsigned con_start; /* Index: next char to be sent to consoles */ + unsigned log_end; /* Index: most-recently-written-char + 1 */ + unsigned logged_chars; /* Num chars produced since last read+clear*/ + unsigned buf_len; /* buffer available space size */ + char *buf; /* allocated ring buffer */ +}; + +/* + * Static structure used by nsproxy + */ +extern struct syslog_ns init_syslog_ns; +extern struct syslog_ns *resize_syslog_ns(struct syslog_ns *syslog_ns, + unsigned container_buf_len); +extern struct syslog_ns *copy_syslog_ns(unsigned long flags, + struct syslog_ns *current_syslog_ns); +extern void free_syslog_ns(struct kref *kref); +extern struct syslog_ns *current_syslog_ns(void); +extern struct syslog_ns *switch_syslog_ns(struct syslog_ns *syslog_ns); +extern struct syslog_ns *find_syslog_ns_bypid(pid_t pid); +extern void put_syslog_ns(struct syslog_ns *ns); + + + int do_syslog(int type, char __user *buf, int count, bool from_file); #endif /* _LINUX_SYSLOG_H */ diff --git a/kernel/Makefile b/kernel/Makefile index 057472f..441afc7 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -10,7 +10,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \ - async.o range.o + async.o range.o syslog.o obj-$(CONFIG_HAVE_EARLY_RES) += early_res.o obj-y += groups.o diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index f74e6c0..7ade5d2 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -15,6 +15,7 @@ #include #include +#include #include #include #include @@ -93,8 +94,17 @@ static struct nsproxy *create_new_namespaces(unsigned long flags, goto out_net; } + new_nsp->syslog_ns = copy_syslog_ns(flags, tsk->nsproxy->syslog_ns); + if (IS_ERR(new_nsp->syslog_ns)) { + err = PTR_ERR(new_nsp->syslog_ns); + goto out_syslog; + } + return new_nsp; +out_syslog: + if (new_nsp->net_ns) + put_net(new_nsp->net_ns); out_net: if (new_nsp->pid_ns) put_pid_ns(new_nsp->pid_ns); @@ -163,6 +173,8 @@ out: void free_nsproxy(struct nsproxy *ns) { + if (ns->syslog_ns) + put_syslog_ns(ns->syslog_ns); if (ns->mnt_ns) put_mnt_ns(ns->mnt_ns); if (ns->uts_ns) diff --git a/kernel/printk.c b/kernel/printk.c index 444b770..7f60d8a 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include #include @@ -53,8 +52,6 @@ void asmlinkage __attribute__((weak)) early_printk(const char *fmt, ...) { } -#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) - /* printk's without a loglevel use this.. */ #define DEFAULT_MESSAGE_LOGLEVEL 4 /* KERN_WARNING */ @@ -97,23 +94,19 @@ EXPORT_SYMBOL_GPL(console_drivers); */ static int console_locked, console_suspended; -/* - * logbuf_lock protects log_buf, log_start, log_end, con_start and logged_chars - * It is also used in interesting ways to provide interlocking in - * release_console_sem(). - */ -static DEFINE_SPINLOCK(logbuf_lock); - -#define LOG_BUF_MASK (log_buf_len-1) -#define LOG_BUF(idx) (log_buf[(idx) & LOG_BUF_MASK]) +#define LOG_BUF_MASK(ns) ((ns)->buf_len-1) +#define LOG_BUF(ns, idx) ((ns)->buf[(idx) & LOG_BUF_MASK(ns)]) /* - * The indices into log_buf are not constrained to log_buf_len - they - * must be masked before subscripting + * To access container syslog ring buffer */ -static unsigned log_start; /* Index into log_buf: next char to be read by syslog() */ -static unsigned con_start; /* Index into log_buf: next char to be sent to consoles */ -static unsigned log_end; /* Index into log_buf: most-recently-written-char + 1 */ +#define sys_log_lock (syslog_ns->logbuf_lock) +#define sys_log_start (syslog_ns->log_start) +#define sys_log_end (syslog_ns->log_end) +#define sys_log_con_start (syslog_ns->con_start) +#define sys_log_buf_len (syslog_ns->buf_len) +#define sys_log_logged_chars (syslog_ns->logged_chars) +#define sys_log_buf (syslog_ns->buf) /* * Array of consoles built from command line options (console=) @@ -141,10 +134,6 @@ static int console_may_schedule; #ifdef CONFIG_PRINTK -static char __log_buf[__LOG_BUF_LEN]; -static char *log_buf = __log_buf; -static int log_buf_len = __LOG_BUF_LEN; -static unsigned logged_chars; /* Number of chars produced since last read+clear operation */ static int saved_console_loglevel = -1; #ifdef CONFIG_KEXEC @@ -158,49 +147,23 @@ static int saved_console_loglevel = -1; */ void log_buf_kexec_setup(void) { - VMCOREINFO_SYMBOL(log_buf); - VMCOREINFO_SYMBOL(log_end); - VMCOREINFO_SYMBOL(log_buf_len); - VMCOREINFO_SYMBOL(logged_chars); + struct syslog_ns *syslog_ns = current_syslog_ns(); + + VMCOREINFO_SYMBOL(sys_log_buf); + VMCOREINFO_SYMBOL(sys_log_end); + VMCOREINFO_SYMBOL(sys_log_buf_len); + VMCOREINFO_SYMBOL(sys_log_logged_chars); } #endif static int __init log_buf_len_setup(char *str) { unsigned size = memparse(str, &str); - unsigned long flags; - if (size) + if (size) { size = roundup_pow_of_two(size); - if (size > log_buf_len) { - unsigned start, dest_idx, offset; - char *new_log_buf; - - new_log_buf = alloc_bootmem(size); - if (!new_log_buf) { - printk(KERN_WARNING "log_buf_len: allocation failed\n"); - goto out; - } - - spin_lock_irqsave(&logbuf_lock, flags); - log_buf_len = size; - log_buf = new_log_buf; - - offset = start = min(con_start, log_start); - dest_idx = 0; - while (start != log_end) { - log_buf[dest_idx] = __log_buf[start & (__LOG_BUF_LEN - 1)]; - start++; - dest_idx++; - } - log_start -= offset; - con_start -= offset; - log_end -= offset; - spin_unlock_irqrestore(&logbuf_lock, flags); - - printk(KERN_NOTICE "log_buf_len: %d\n", log_buf_len); + resize_syslog_ns(&init_syslog_ns, size); } -out: return 1; } @@ -265,6 +228,7 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) int do_clear = 0; char c; int error = 0; + struct syslog_ns *syslog_ns = current_syslog_ns(); error = security_syslog(type, from_file); if (error) @@ -287,22 +251,22 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) goto out; } error = wait_event_interruptible(log_wait, - (log_start - log_end)); + (sys_log_start - sys_log_end)); if (error) goto out; i = 0; - spin_lock_irq(&logbuf_lock); - while (!error && (log_start != log_end) && i < len) { - c = LOG_BUF(log_start); - log_start++; - spin_unlock_irq(&logbuf_lock); + spin_lock_irq(&sys_log_lock); + while (!error && (sys_log_start != sys_log_end) && i < len) { + c = LOG_BUF(syslog_ns, sys_log_start); + sys_log_start++; + spin_unlock_irq(&sys_log_lock); error = __put_user(c,buf); buf++; i++; cond_resched(); - spin_lock_irq(&logbuf_lock); + spin_lock_irq(&sys_log_lock); } - spin_unlock_irq(&logbuf_lock); + spin_unlock_irq(&sys_log_lock); if (!error) error = i; break; @@ -323,14 +287,14 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) goto out; } count = len; - if (count > log_buf_len) - count = log_buf_len; - spin_lock_irq(&logbuf_lock); - if (count > logged_chars) - count = logged_chars; + if (count > sys_log_buf_len) + count = sys_log_buf_len; + spin_lock_irq(&sys_log_lock); + if (count > sys_log_logged_chars) + count = sys_log_logged_chars; if (do_clear) - logged_chars = 0; - limit = log_end; + sys_log_logged_chars = 0; + limit = sys_log_end; /* * __put_user() could sleep, and while we sleep * printk() could overwrite the messages @@ -339,15 +303,15 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) */ for (i = 0; i < count && !error; i++) { j = limit-1-i; - if (j + log_buf_len < log_end) + if (j + sys_log_buf_len < sys_log_end) break; - c = LOG_BUF(j); - spin_unlock_irq(&logbuf_lock); + c = LOG_BUF(syslog_ns, j); + spin_unlock_irq(&sys_log_lock); error = __put_user(c,&buf[count-1-i]); cond_resched(); - spin_lock_irq(&logbuf_lock); + spin_lock_irq(&sys_log_lock); } - spin_unlock_irq(&logbuf_lock); + spin_unlock_irq(&sys_log_lock); if (error) break; error = i; @@ -366,7 +330,7 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) break; /* Clear ring buffer */ case SYSLOG_ACTION_CLEAR: - logged_chars = 0; + sys_log_logged_chars = 0; break; /* Disable logging to console */ case SYSLOG_ACTION_CONSOLE_OFF: @@ -395,11 +359,11 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) break; /* Number of chars in the log buffer */ case SYSLOG_ACTION_SIZE_UNREAD: - error = log_end - log_start; + error = sys_log_end - sys_log_start; break; /* Size of the log buffer */ case SYSLOG_ACTION_SIZE_BUFFER: - error = log_buf_len; + error = sys_log_buf_len; break; default: error = -EINVAL; @@ -414,26 +378,11 @@ SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len) return do_syslog(type, buf, len, SYSLOG_FROM_CALL); } -#ifdef CONFIG_KGDB_KDB -/* kdb dmesg command needs access to the syslog buffer. do_syslog() - * uses locks so it cannot be used during debugging. Just tell kdb - * where the start and end of the physical and logical logs are. This - * is equivalent to do_syslog(3). - */ -void kdb_syslog_data(char *syslog_data[4]) -{ - syslog_data[0] = log_buf; - syslog_data[1] = log_buf + log_buf_len; - syslog_data[2] = log_buf + log_end - - (logged_chars < log_buf_len ? logged_chars : log_buf_len); - syslog_data[3] = log_buf + log_end; -} -#endif /* CONFIG_KGDB_KDB */ - /* * Call the console drivers on a range of log_buf */ -static void __call_console_drivers(unsigned start, unsigned end) +static void __call_console_drivers(struct syslog_ns *syslog_ns, + unsigned start, unsigned end) { struct console *con; @@ -441,7 +390,8 @@ static void __call_console_drivers(unsigned start, unsigned end) if ((con->flags & CON_ENABLED) && con->write && (cpu_online(smp_processor_id()) || (con->flags & CON_ANYTIME))) - con->write(con, &LOG_BUF(start), end - start); + con->write(con, &LOG_BUF(syslog_ns, start), + end - start); } } @@ -460,18 +410,21 @@ early_param("ignore_loglevel", ignore_loglevel_setup); /* * Write out chars from start to end - 1 inclusive */ -static void _call_console_drivers(unsigned start, - unsigned end, int msg_log_level) +static void _call_console_drivers(struct syslog_ns *syslog_ns, unsigned start, + unsigned end, int msg_log_level) { if ((msg_log_level < console_loglevel || ignore_loglevel) && console_drivers && start != end) { - if ((start & LOG_BUF_MASK) > (end & LOG_BUF_MASK)) { + if ((start & LOG_BUF_MASK(syslog_ns)) > + (end & LOG_BUF_MASK(syslog_ns))) { /* wrapped write */ - __call_console_drivers(start & LOG_BUF_MASK, - log_buf_len); - __call_console_drivers(0, end & LOG_BUF_MASK); + __call_console_drivers(syslog_ns, + start & LOG_BUF_MASK(syslog_ns), + sys_log_buf_len); + __call_console_drivers(syslog_ns, 0, + end & LOG_BUF_MASK(syslog_ns)); } else { - __call_console_drivers(start, end); + __call_console_drivers(syslog_ns, start, end); } } } @@ -481,7 +434,8 @@ static void _call_console_drivers(unsigned start, * log_buf[start] to log_buf[end - 1]. * The console_sem must be held. */ -static void call_console_drivers(unsigned start, unsigned end) +static void call_console_drivers(struct syslog_ns *syslog_ns, + unsigned start, unsigned end) { unsigned cur_index, start_print; static int msg_level = -1; @@ -492,16 +446,16 @@ static void call_console_drivers(unsigned start, unsigned end) start_print = start; while (cur_index != end) { if (msg_level < 0 && ((end - cur_index) > 2) && - LOG_BUF(cur_index + 0) == '<' && - LOG_BUF(cur_index + 1) >= '0' && - LOG_BUF(cur_index + 1) <= '7' && - LOG_BUF(cur_index + 2) == '>') { - msg_level = LOG_BUF(cur_index + 1) - '0'; + LOG_BUF(syslog_ns, cur_index + 0) == '<' && + LOG_BUF(syslog_ns, cur_index + 1) >= '0' && + LOG_BUF(syslog_ns, cur_index + 1) <= '7' && + LOG_BUF(syslog_ns, cur_index + 2) == '>') { + msg_level = LOG_BUF(syslog_ns, cur_index + 1) - '0'; cur_index += 3; start_print = cur_index; } while (cur_index != end) { - char c = LOG_BUF(cur_index); + char c = LOG_BUF(syslog_ns, cur_index); cur_index++; if (c == '\n') { @@ -514,26 +468,27 @@ static void call_console_drivers(unsigned start, unsigned end) */ msg_level = default_message_loglevel; } - _call_console_drivers(start_print, cur_index, msg_level); + _call_console_drivers(syslog_ns, + start_print, cur_index, msg_level); msg_level = -1; start_print = cur_index; break; } } } - _call_console_drivers(start_print, end, msg_level); + _call_console_drivers(syslog_ns, start_print, end, msg_level); } -static void emit_log_char(char c) +static void emit_log_char(struct syslog_ns *syslog_ns, char c) { - LOG_BUF(log_end) = c; - log_end++; - if (log_end - log_start > log_buf_len) - log_start = log_end - log_buf_len; - if (log_end - con_start > log_buf_len) - con_start = log_end - log_buf_len; - if (logged_chars < log_buf_len) - logged_chars++; + LOG_BUF(syslog_ns, sys_log_end) = c; + sys_log_end++; + if (sys_log_end - sys_log_start > sys_log_buf_len) + sys_log_start = sys_log_end - sys_log_buf_len; + if (sys_log_end - sys_log_con_start > sys_log_buf_len) + sys_log_con_start = sys_log_end - sys_log_buf_len; + if (sys_log_logged_chars < sys_log_buf_len) + sys_log_logged_chars++; } /* @@ -541,7 +496,7 @@ static void emit_log_char(char c) * every 10 seconds, to leave time for slow consoles to print a * full oops. */ -static void zap_locks(void) +static void zap_locks(struct syslog_ns *syslog_ns) { static unsigned long oops_timestamp; @@ -552,7 +507,7 @@ static void zap_locks(void) oops_timestamp = jiffies; /* If a crash is occurring, make sure we can't deadlock */ - spin_lock_init(&logbuf_lock); + spin_lock_init(&sys_log_lock); /* And make sure that we print immediately */ init_MUTEX(&console_sem); } @@ -603,14 +558,6 @@ asmlinkage int printk(const char *fmt, ...) va_list args; int r; -#ifdef CONFIG_KGDB_KDB - if (unlikely(kdb_trap_printk)) { - va_start(args, fmt); - r = vkdb_printf(fmt, args); - va_end(args); - return r; - } -#endif va_start(args, fmt); r = vprintk(fmt, args); va_end(args); @@ -644,7 +591,8 @@ static inline int can_use_console(unsigned int cpu) * interrupts disabled. It should return with 'lockbuf_lock' * released but interrupts still disabled. */ -static int acquire_console_semaphore_for_printk(unsigned int cpu) +static int acquire_console_semaphore_for_printk( + struct syslog_ns *syslog_ns, unsigned int cpu) { int retval = 0; @@ -664,7 +612,7 @@ static int acquire_console_semaphore_for_printk(unsigned int cpu) } } printk_cpu = UINT_MAX; - spin_unlock(&logbuf_lock); + spin_unlock(&sys_log_lock); return retval; } static const char recursion_bug_msg [] = @@ -691,6 +639,7 @@ asmlinkage int vprintk(const char *fmt, va_list args) { int printed_len = 0; int current_log_level = default_message_loglevel; + struct syslog_ns *syslog_ns = current_syslog_ns(); unsigned long flags; int this_cpu; char *p; @@ -718,11 +667,11 @@ asmlinkage int vprintk(const char *fmt, va_list args) recursion_bug = 1; goto out_restore_irqs; } - zap_locks(); + zap_locks(syslog_ns); } lockdep_off(); - spin_lock(&logbuf_lock); + spin_lock(&sys_log_lock); printk_cpu = this_cpu; if (recursion_bug) { @@ -747,7 +696,7 @@ asmlinkage int vprintk(const char *fmt, va_list args) /* Fallthrough - make sure we're on a new line */ case 'd': /* KERN_DEFAULT */ if (!new_text_line) { - emit_log_char('\n'); + emit_log_char(syslog_ns, '\n'); new_text_line = 1; } /* Fallthrough - skip the loglevel */ @@ -765,9 +714,9 @@ asmlinkage int vprintk(const char *fmt, va_list args) for ( ; *p; p++) { if (new_text_line) { /* Always output the token */ - emit_log_char('<'); - emit_log_char(current_log_level + '0'); - emit_log_char('>'); + emit_log_char(syslog_ns, '<'); + emit_log_char(syslog_ns, current_log_level + '0'); + emit_log_char(syslog_ns, '>'); printed_len += 3; new_text_line = 0; @@ -780,12 +729,13 @@ asmlinkage int vprintk(const char *fmt, va_list args) t = cpu_clock(printk_cpu); nanosec_rem = do_div(t, 1000000000); - tlen = sprintf(tbuf, "[%5lu.%06lu] ", + tlen = sprintf(tbuf, "ns_id='%d' %5lu.%06lu] ", + syslog_ns->handle, (unsigned long) t, nanosec_rem / 1000); for (tp = tbuf; tp < tbuf + tlen; tp++) - emit_log_char(*tp); + emit_log_char(syslog_ns, *tp); printed_len += tlen; } @@ -793,7 +743,7 @@ asmlinkage int vprintk(const char *fmt, va_list args) break; } - emit_log_char(*p); + emit_log_char(syslog_ns, *p); if (*p == '\n') new_text_line = 1; } @@ -808,7 +758,7 @@ asmlinkage int vprintk(const char *fmt, va_list args) * will release 'logbuf_lock' regardless of whether it * actually gets the semaphore or not. */ - if (acquire_console_semaphore_for_printk(this_cpu)) + if (acquire_console_semaphore_for_printk(syslog_ns, this_cpu)) release_console_sem(); lockdep_on(); @@ -821,12 +771,6 @@ out_restore_irqs: EXPORT_SYMBOL(printk); EXPORT_SYMBOL(vprintk); -#else - -static void call_console_drivers(unsigned start, unsigned end) -{ -} - #endif static int __add_preferred_console(char *name, int idx, char *options, @@ -1059,36 +1003,40 @@ void wake_up_klogd(void) */ void release_console_sem(void) { - unsigned long flags; - unsigned _con_start, _log_end; - unsigned wake_klogd = 0; - if (console_suspended) { up(&console_sem); return; } console_may_schedule = 0; - - for ( ; ; ) { - spin_lock_irqsave(&logbuf_lock, flags); - wake_klogd |= log_start - log_end; - if (con_start == log_end) - break; /* Nothing to print */ - _con_start = con_start; - _log_end = log_end; - con_start = log_end; /* Flush */ - spin_unlock(&logbuf_lock); - stop_critical_timings(); /* don't trace print latency */ - call_console_drivers(_con_start, _log_end); - start_critical_timings(); - local_irq_restore(flags); +#ifdef CONFIG_PRINTK + { + unsigned long flags; + unsigned _con_start, _log_end; + unsigned wake_klogd = 0; + struct syslog_ns *syslog_ns = current_syslog_ns(); + + for ( ; ; ) { + spin_lock_irqsave(&sys_log_lock, flags); + wake_klogd |= sys_log_start - sys_log_end; + if (sys_log_con_start == sys_log_end) + break; /* Nothing to print */ + _con_start = sys_log_con_start; + _log_end = sys_log_end; + sys_log_con_start = sys_log_end; /* Flush */ + spin_unlock(&sys_log_lock); + stop_critical_timings();/* don't trace print latency */ + call_console_drivers(syslog_ns, _con_start, _log_end); + start_critical_timings(); + local_irq_restore(flags); + } + spin_unlock_irqrestore(&sys_log_lock, flags); + if (wake_klogd) + wake_up_klogd(); } +#endif console_locked = 0; up(&console_sem); - spin_unlock_irqrestore(&logbuf_lock, flags); - if (wake_klogd) - wake_up_klogd(); } EXPORT_SYMBOL(release_console_sem); @@ -1193,7 +1141,6 @@ EXPORT_SYMBOL(console_start); void register_console(struct console *newcon) { int i; - unsigned long flags; struct console *bcon = NULL; /* @@ -1299,15 +1246,21 @@ void register_console(struct console *newcon) newcon->next = console_drivers->next; console_drivers->next = newcon; } +#ifdef CONFIG_PRINTK if (newcon->flags & CON_PRINTBUFFER) { + unsigned long flags; /* * release_console_sem() will print out the buffered messages * for us. */ - spin_lock_irqsave(&logbuf_lock, flags); - con_start = log_start; - spin_unlock_irqrestore(&logbuf_lock, flags); + + struct syslog_ns *syslog_ns = current_syslog_ns(); + + spin_lock_irqsave(&sys_log_lock, flags); + sys_log_con_start = sys_log_start; + spin_unlock_irqrestore(&sys_log_lock, flags); } +#endif release_console_sem(); /* @@ -1511,27 +1464,28 @@ void kmsg_dump(enum kmsg_dump_reason reason) const char *s1, *s2; unsigned long l1, l2; unsigned long flags; + struct syslog_ns *syslog_ns = current_syslog_ns(); /* Theoretically, the log could move on after we do this, but there's not a lot we can do about that. The new messages will overwrite the start of what we dump. */ - spin_lock_irqsave(&logbuf_lock, flags); - end = log_end & LOG_BUF_MASK; - chars = logged_chars; - spin_unlock_irqrestore(&logbuf_lock, flags); + spin_lock_irqsave(&sys_log_lock, flags); + end = sys_log_end & LOG_BUF_MASK(syslog_ns); + chars = sys_log_logged_chars; + spin_unlock_irqrestore(&sys_log_lock, flags); - if (logged_chars > end) { - s1 = log_buf + log_buf_len - logged_chars + end; - l1 = logged_chars - end; + if (sys_log_logged_chars > end) { + s1 = sys_log_buf + sys_log_buf_len - sys_log_logged_chars + end; + l1 = sys_log_logged_chars - end; - s2 = log_buf; + s2 = sys_log_buf; l2 = end; } else { s1 = ""; l1 = 0; - s2 = log_buf + end - logged_chars; - l2 = logged_chars; + s2 = sys_log_buf + end - sys_log_logged_chars; + l2 = sys_log_logged_chars; } if (!spin_trylock_irqsave(&dump_list_lock, flags)) { @@ -1544,3 +1498,4 @@ void kmsg_dump(enum kmsg_dump_reason reason) spin_unlock_irqrestore(&dump_list_lock, flags); } #endif + diff --git a/kernel/syslog.c b/kernel/syslog.c new file mode 100644 index 0000000..44b1d90 --- /dev/null +++ b/kernel/syslog.c @@ -0,0 +1,355 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2 of the + * License. + * + * Jun 2010 + * Jean-Marc Pigeon + * + * Purpose is to regroup all procedure involved + * in system log. + * System log need to be containerized to avoid + * crossing over critical data between physical host layer + * and container layer. + * + * The principle is to keep a containerized ring buffer + * where container kernel data are redirected, kept and + * managed. + * + * Containerized syslog is activated if CLONE_SYSLOG + * condition is true. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +/* + * Static memory definition, used to assign a syslog + * to the init process itself + * + */ +#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) +static char __log_buf[__LOG_BUF_LEN]; + +struct syslog_ns init_syslog_ns = { + .kref = { + .refcount = ATOMIC_INIT(2), + }, + .handle = 1, /*kernel INIT process pid */ + .logbuf_lock = __SPIN_LOCK_INITIALIZER(logbuf_lock), + .buf_len = __LOG_BUF_LEN, + .buf = __log_buf +}; + +/* + * List of all syslog ns currently allocated + * first member of this list (kernel syslog) + * can't be removed. + */ +struct log_list { + spinlock_t list_lock; /*make sure about list access */ + struct log_list *next; /*next syslog_ns in the list */ + struct syslog_ns *syslog_ns; + } log_list = { + .list_lock = __SPIN_LOCK_INITIALIZER(list_lock), + .next = (struct log_list *)0, + .syslog_ns = &init_syslog_ns + }; +/* + * removing a syslog reference from the list + * + */ +static void removing_syslog_ns(struct syslog_ns *syslog_ns) + +{ + int done; + struct log_list *start; + + done = false; + start = &log_list; + while (start->next != (struct log_list *)0) { + struct log_list *check; + unsigned long flags; + + spin_lock_irqsave(&(start->next->list_lock), flags); + check = start->next; + if (check->syslog_ns == syslog_ns) { + start->next = check->next; + done = true; + } + spin_unlock_irqrestore(&(check->list_lock), flags); + if (done == true) { + kfree(check); + break; + } + start = start->next; + } +} + +/* + * adding a syslog_ns to the list of known syslog + * + */ +static void adding_syslog_ns(struct syslog_ns *syslog_ns) + +{ + int done; + struct log_list *start; + + done = false; + start = &log_list; + while ((done == false) && (start != (struct log_list *)0)) { + unsigned long flags; + + spin_lock_irqsave(&(start->list_lock), flags); + if (start->next == (struct log_list *)0) { + struct log_list *next; + + next = kzalloc(sizeof(struct log_list), GFP_KERNEL); + BUG_ON(!next); + spin_lock_init(&(next->list_lock)); + next->syslog_ns = syslog_ns; + start->next = next; + done = true; + } + spin_unlock_irqrestore(&(start->list_lock), flags); + start = start->next; + } +} +/* + * Procedure to free all ressources tied to syslog + * + */ +static struct syslog_ns *free_all_syslog_ns(struct syslog_ns *syslog_ns) + +{ + if (syslog_ns != (struct syslog_ns *)0) { + (void) removing_syslog_ns(syslog_ns); + (void) kfree(syslog_ns->buf); + (void) kfree(syslog_ns); + syslog_ns = (struct syslog_ns *)0; + } + return syslog_ns; +} + +/* + * Procedure to assign memory for syslog area + * + */ +static struct syslog_ns *malloc_syslog_ns(unsigned container_buf_len) +{ + struct syslog_ns *ns; + char *buf; + + ns = (struct syslog_ns *)0; + buf = (char *)0; + + + ns = kzalloc(sizeof(*ns), GFP_KERNEL); + buf = kzalloc(container_buf_len, GFP_KERNEL); + if ((!ns) || (!buf)) { + kfree(buf); + kfree(ns); + return 0; + } + + kref_init(&(ns->kref)); + spin_lock_init(&(ns->logbuf_lock)); + ns->handle = current->pid; + ns->buf_len = container_buf_len; + ns->buf = buf; + (void) adding_syslog_ns(ns); + return ns; +} +/* + * Procedure to locate and return a syslog_ns with same handle as pid submitted. + * return a NULL pointer if not found; + * + */ +static struct syslog_ns *find_its_syslog_ns(pid_t pid) + +{ + struct log_list *start; + + start = &log_list; + while (start != (struct log_list *)0) { + if (start->syslog_ns->handle == pid) + return start->syslog_ns; + start = start->next; + } + return 0; +} +/* + * Procedure to ONLY increase syslog buffer size + * If syslog_ns is NULL, assign a brand new syslog_ns + * + */ +struct syslog_ns *resize_syslog_ns(struct syslog_ns *syslog_ns, + unsigned container_buf_len) + +{ + if ((syslog_ns == &init_syslog_ns) && + (container_buf_len > syslog_ns->buf_len)) { + int old_buf_len; + char *old_buf; + char *new_buf; + unsigned long flags; + + old_buf_len = syslog_ns->buf_len; + old_buf = syslog_ns->buf; + new_buf = alloc_bootmem(container_buf_len); + if (!new_buf) { + (void) printk(KERN_WARNING + "log_buf_len: allocation failed\n"); + return ERR_PTR(-ENOMEM); + } + spin_lock_irqsave(&(syslog_ns->logbuf_lock), flags); + (void) memmove(new_buf, old_buf, old_buf_len); + syslog_ns->buf = new_buf; + syslog_ns->buf_len = container_buf_len; + spin_unlock_irqrestore(&(syslog_ns->logbuf_lock), flags); + if (old_buf != __log_buf) + (void) free_bootmem((unsigned long)old_buf, + old_buf_len); + } + if (!syslog_ns) + return malloc_syslog_ns(container_buf_len); + if (syslog_ns->buf_len > container_buf_len) { + (void) printk(KERN_WARNING "log_buf_len: Not allowed " + "to decrease syslog buffer\n"); + return ERR_PTR(-EINVAL); + } + if (syslog_ns->buf_len < container_buf_len) { + char *old_buf; + char *new_buf; + unsigned long flags; + + old_buf = syslog_ns->buf; + new_buf = kzalloc(container_buf_len, GFP_KERNEL); + if (!new_buf) + return ERR_PTR(-ENOMEM); + spin_lock_irqsave(&(syslog_ns->logbuf_lock), flags); + (void) memmove(new_buf, old_buf, syslog_ns->buf_len); + syslog_ns->buf = new_buf; + syslog_ns->buf_len = container_buf_len; + spin_unlock_irqrestore(&(syslog_ns->logbuf_lock), flags); + (void) kfree(old_buf); + } + (void) printk(KERN_NOTICE "log_buf_len: %u\n", syslog_ns->buf_len); + return syslog_ns; +} + +/* + * Procedure to use current syslog unless a CLONE_SYSLOG is set + * such a new syslog area is defined and used + * + */ +struct syslog_ns *copy_syslog_ns(unsigned long flags, + struct syslog_ns *current_syslog_ns) + +{ +/*4096 should be enough for container syslog */ +#define CONTAINER_BUF_LEN 4096 + + BUG_ON(!current_syslog_ns); + if ((flags & CLONE_SYSLOG) == CLONE_SYSLOG) + current_syslog_ns = malloc_syslog_ns(CONTAINER_BUF_LEN); + else + /*incrementing usage ref count */ + (void) kref_get(&(current_syslog_ns->kref)); + return current_syslog_ns; +} + +/* + * Procedure to decrement syslog usage count and free memory + * if syslog usage count reach zero. + * + */ +void free_syslog_ns(struct kref *kref) + +{ + struct syslog_ns *sl; + + sl = container_of(kref, struct syslog_ns, kref); + sl = free_all_syslog_ns(sl); +} + +/* + * Procedure to get the current syslog area linked to a + * container (by CLONE_SYSLOG). + * if trouble, report host kernel own syslog_ns. + * + */ +struct syslog_ns *current_syslog_ns(void) + +{ + + struct syslog_ns *ns; + + ns = (struct syslog_ns *)0; + if (current->nsproxy) + ns = current->nsproxy->syslog_ns; + if (!ns) /*lets try to cover log anyway! */ + ns = &init_syslog_ns; + return ns; +} +/* + * Procedure to replace current syslog namespace with another. + * Return the original current syslog_ns. + * + */ +struct syslog_ns *switch_syslog_ns(struct syslog_ns *syslog_ns) + +{ + unsigned long flags; + struct syslog_ns *old; + + spin_lock_irqsave(&(current_syslog_ns()->logbuf_lock), flags); + old = current_syslog_ns(); + if (syslog_ns) + current->nsproxy->syslog_ns = syslog_ns; + spin_unlock_irqrestore(&(old->logbuf_lock), flags); + return old; +} + +/* + * Procedure to locate the syslog handle own by a given + * pid or one of its parents lineage. + * + */ +struct syslog_ns *find_syslog_ns_bypid(pid_t pid) + +{ + while (pid > 1) { + struct syslog_ns *its_ns; + struct task_struct *ns_task; + + its_ns = find_its_syslog_ns(pid); + if (its_ns) + return its_ns; + ns_task = find_task_by_vpid(pid); + if ((ns_task) && (ns_task->real_parent)) + pid = ns_task->real_parent->pid; + else + break; + } + return log_list.syslog_ns; +} + +/* + * Procedure to dereference syslog_ns usage, if no reference + * anymore, memory is freed. + * + */ +void put_syslog_ns(struct syslog_ns *ns) + +{ + kref_put(&ns->kref, free_syslog_ns); +} diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 1a2af24..5f7cf87 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -1082,6 +1083,13 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, err = PTR_ERR(net); goto errout; } + if (dev) { + register pid_t net_ns_pid; + + net_ns_pid = nla_get_u32(tb[IFLA_NET_NS_PID]); + dev->syslog_ns = find_syslog_ns_bypid(net_ns_pid); + } + err = dev_change_net_namespace(dev, net, ifname); put_net(net); if (err) diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index 5234f4f..74761de 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -11,6 +11,7 @@ */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include +#include #include #include #include @@ -382,6 +383,14 @@ ipt_log_packet(u_int8_t pf, const struct nf_loginfo *loginfo, const char *prefix) { + register struct syslog_ns *syslog_ns; + + + syslog_ns = (struct syslog_ns *)0; + if (skb->dev) /*another syslog_ns possible? */ + syslog_ns = skb->dev->syslog_ns; + syslog_ns = switch_syslog_ns(syslog_ns); + if (!loginfo) loginfo = &default_loginfo; @@ -422,6 +431,7 @@ ipt_log_packet(u_int8_t pf, dump_packet(loginfo, skb, 0); printk("\n"); spin_unlock_bh(&log_lock); + (void) switch_syslog_ns(syslog_ns); } static unsigned int -- 1.8.2.3