#include <linux/slab.h>
#include <linux/sysctl.h>
#include <linux/proc_fs.h>
-#include <linux/capability.h>
+#include <linux/security.h>
#include <linux/ctype.h>
#include <linux/utsname.h>
#include <linux/smp_lock.h>
#include <linux/highuid.h>
#include <linux/writeback.h>
#include <linux/hugetlb.h>
-#include <linux/security.h>
#include <linux/initrd.h>
+#include <linux/key.h>
#include <linux/times.h>
#include <linux/limits.h>
#include <linux/dcache.h>
#include <linux/syscalls.h>
+#include <linux/vmstat.h>
#include <linux/nfs_fs.h>
#include <linux/acpi.h>
#include <linux/reboot.h>
+#include <linux/ftrace.h>
#include <asm/uaccess.h>
#include <asm/processor.h>
#ifdef CONFIG_X86
#include <asm/nmi.h>
#include <asm/stacktrace.h>
+#include <asm/io.h>
#endif
+static int deprecated_sysctl_warning(struct __sysctl_args *args);
+
#if defined(CONFIG_SYSCTL)
/* External variables not in a header file. */
extern int sysctl_overcommit_ratio;
extern int sysctl_panic_on_oom;
extern int sysctl_oom_kill_allocating_task;
+extern int sysctl_oom_dump_tasks;
extern int max_threads;
extern int core_uses_pid;
extern int suid_dumpable;
extern char core_pattern[];
extern int pid_max;
extern int min_free_kbytes;
-extern int printk_ratelimit_jiffies;
-extern int printk_ratelimit_burst;
extern int pid_max_min, pid_max_max;
extern int sysctl_drop_caches;
extern int percpu_pagelist_fraction;
extern int compat_log;
-extern int maps_protect;
-extern int sysctl_stat_interval;
-extern int audit_argv_kb;
+extern int latencytop_enabled;
+extern int sysctl_nr_open_min, sysctl_nr_open_max;
+#ifdef CONFIG_RCU_TORTURE_TEST
+extern int rcutorture_runnable;
+#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
/* Constants used for minimum and maximum */
-#ifdef CONFIG_DETECT_SOFTLOCKUP
+#if defined(CONFIG_HIGHMEM) || defined(CONFIG_DETECT_SOFTLOCKUP)
static int one = 1;
+#endif
+
+#ifdef CONFIG_DETECT_SOFTLOCKUP
static int sixty = 60;
+static int neg_one = -1;
#endif
-#ifdef CONFIG_MMU
+#if defined(CONFIG_MMU) && defined(CONFIG_FILE_LOCKING)
static int two = 2;
#endif
static int ngroups_max = NGROUPS_MAX;
-#ifdef CONFIG_KMOD
+#ifdef CONFIG_MODULES
extern char modprobe_path[];
#endif
#ifdef CONFIG_CHR_DEV_SG
extern int sg_big_buff;
#endif
-#ifdef __sparc__
-extern char reboot_command [];
-extern int stop_a_enabled;
-extern int scons_pwroff;
+#ifdef CONFIG_SPARC
+#include <asm/system.h>
#endif
#ifdef __hppa__
extern int spin_retry;
#endif
-extern int sysctl_hz_timer;
-
#ifdef CONFIG_BSD_PROCESS_ACCT
extern int acct_parm[];
#endif
extern int max_lock_depth;
#endif
-#ifdef CONFIG_SYSCTL_SYSCALL
-static int parse_table(int __user *, int, void __user *, size_t __user *,
- void __user *, size_t, struct ctl_table *);
-#endif
-
-
#ifdef CONFIG_PROC_SYSCTL
static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp,
void __user *buffer, size_t *lenp, loff_t *ppos);
-static int proc_dointvec_taint(struct ctl_table *table, int write, struct file *filp,
+static int proc_taint(struct ctl_table *table, int write, struct file *filp,
void __user *buffer, size_t *lenp, loff_t *ppos);
#endif
static struct ctl_table root_table[];
-static struct ctl_table_header root_table_header =
- { root_table, LIST_HEAD_INIT(root_table_header.ctl_entry) };
+static struct ctl_table_root sysctl_table_root;
+static struct ctl_table_header root_table_header = {
+ .count = 1,
+ .ctl_table = root_table,
+ .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),
+ .root = &sysctl_table_root,
+ .set = &sysctl_table_root.default_set,
+};
+static struct ctl_table_root sysctl_table_root = {
+ .root_list = LIST_HEAD_INIT(sysctl_table_root.root_list),
+ .default_set.list = LIST_HEAD_INIT(root_table_header.ctl_entry),
+};
static struct ctl_table kern_table[];
static struct ctl_table vm_table[];
static struct ctl_table debug_table[];
static struct ctl_table dev_table[];
extern struct ctl_table random_table[];
-#ifdef CONFIG_UNIX98_PTYS
-extern struct ctl_table pty_table[];
-#endif
#ifdef CONFIG_INOTIFY_USER
extern struct ctl_table inotify_table[];
#endif
+#ifdef CONFIG_EPOLL
+extern struct ctl_table epoll_table[];
+#endif
#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
int sysctl_legacy_va_layout;
.mode = 0555,
.child = vm_table,
},
-#ifdef CONFIG_NET
- {
- .ctl_name = CTL_NET,
- .procname = "net",
- .mode = 0555,
- .child = net_table,
- },
-#endif
{
.ctl_name = CTL_FS,
.procname = "fs",
};
#ifdef CONFIG_SCHED_DEBUG
-static unsigned long min_sched_granularity_ns = 100000; /* 100 usecs */
-static unsigned long max_sched_granularity_ns = 1000000000; /* 1 second */
-static unsigned long min_wakeup_granularity_ns; /* 0 usecs */
-static unsigned long max_wakeup_granularity_ns = 1000000000; /* 1 second */
+static int min_sched_granularity_ns = 100000; /* 100 usecs */
+static int max_sched_granularity_ns = NSEC_PER_SEC; /* 1 second */
+static int min_wakeup_granularity_ns; /* 0 usecs */
+static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */
#endif
static struct ctl_table kern_table[] = {
#ifdef CONFIG_SCHED_DEBUG
{
.ctl_name = CTL_UNNUMBERED,
- .procname = "sched_nr_latency",
- .data = &sysctl_sched_nr_latency,
+ .procname = "sched_min_granularity_ns",
+ .data = &sysctl_sched_min_granularity,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = &sched_nr_latency_handler,
+ .strategy = &sysctl_intvec,
+ .extra1 = &min_sched_granularity_ns,
+ .extra2 = &max_sched_granularity_ns,
},
{
.ctl_name = CTL_UNNUMBERED,
.data = &sysctl_sched_latency,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
+ .proc_handler = &sched_nr_latency_handler,
.strategy = &sysctl_intvec,
.extra1 = &min_sched_granularity_ns,
.extra2 = &max_sched_granularity_ns,
},
{
.ctl_name = CTL_UNNUMBERED,
- .procname = "sched_batch_wakeup_granularity_ns",
- .data = &sysctl_sched_batch_wakeup_granularity,
+ .procname = "sched_shares_ratelimit",
+ .data = &sysctl_sched_shares_ratelimit,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "sched_shares_thresh",
+ .data = &sysctl_sched_shares_thresh,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_minmax,
.strategy = &sysctl_intvec,
- .extra1 = &min_wakeup_granularity_ns,
- .extra2 = &max_wakeup_granularity_ns,
+ .extra1 = &zero,
},
{
.ctl_name = CTL_UNNUMBERED,
.mode = 0644,
.proc_handler = &proc_dointvec,
},
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "sched_nr_migrate",
+ .data = &sysctl_sched_nr_migrate,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
#endif
{
.ctl_name = CTL_UNNUMBERED,
+ .procname = "sched_rt_period_us",
+ .data = &sysctl_sched_rt_period,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &sched_rt_handler,
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "sched_rt_runtime_us",
+ .data = &sysctl_sched_rt_runtime,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &sched_rt_handler,
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
.procname = "sched_compat_yield",
.data = &sysctl_sched_compat_yield,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
-#ifdef CONFIG_AUDITSYSCALL
- {
- .ctl_name = CTL_UNNUMBERED,
- .procname = "audit_argv_kb",
- .data = &audit_argv_kb,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
-#endif
{
.ctl_name = KERN_CORE_PATTERN,
.procname = "core_pattern",
},
#ifdef CONFIG_PROC_SYSCTL
{
- .ctl_name = KERN_TAINTED,
.procname = "tainted",
- .data = &tainted,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(long),
.mode = 0644,
- .proc_handler = &proc_dointvec_taint,
+ .proc_handler = &proc_taint,
},
#endif
+#ifdef CONFIG_LATENCYTOP
{
- .ctl_name = KERN_CAP_BSET,
- .procname = "cap-bound",
- .data = &cap_bset,
- .maxlen = sizeof(kernel_cap_t),
- .mode = 0600,
- .proc_handler = &proc_dointvec_bset,
+ .procname = "latencytop",
+ .data = &latencytop_enabled,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
},
+#endif
#ifdef CONFIG_BLK_DEV_INITRD
{
.ctl_name = KERN_REALROOTDEV,
.mode = 0644,
.proc_handler = &proc_dointvec,
},
-#ifdef __sparc__
+#ifdef CONFIG_SPARC
{
.ctl_name = KERN_SPARC_REBOOT,
.procname = "reboot-cmd",
.mode = 0644,
.proc_handler = &proc_dointvec,
},
+#ifdef CONFIG_FUNCTION_TRACER
{
- .ctl_name = KERN_PRINTK,
- .procname = "printk",
- .data = &console_loglevel,
- .maxlen = 4*sizeof(int),
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "ftrace_enabled",
+ .data = &ftrace_enabled,
+ .maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = &ftrace_enable_sysctl,
},
-#ifdef CONFIG_KMOD
+#endif
+#ifdef CONFIG_MODULES
{
.ctl_name = KERN_MODPROBE,
.procname = "modprobe",
#endif
#ifdef CONFIG_PROC_SYSCTL
{
- .ctl_name = KERN_CADPID,
.procname = "cad_pid",
.data = NULL,
.maxlen = sizeof (int),
.mode = 0555,
.child = random_table,
},
-#ifdef CONFIG_UNIX98_PTYS
- {
- .ctl_name = KERN_PTY,
- .procname = "pty",
- .mode = 0555,
- .child = pty_table,
- },
-#endif
{
.ctl_name = KERN_OVERFLOWUID,
.procname = "overflowuid",
.proc_handler = &proc_dointvec,
},
#endif
-#ifdef CONFIG_NO_IDLE_HZ
- {
- .ctl_name = KERN_HZ_TIMER,
- .procname = "hz_timer",
- .data = &sysctl_hz_timer,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
-#endif
{
.ctl_name = KERN_S390_USER_DEBUG_LOGGING,
.procname = "userprocess_debug",
.mode = 0644,
.proc_handler = &proc_dointvec,
},
+#if defined CONFIG_PRINTK
+ {
+ .ctl_name = KERN_PRINTK,
+ .procname = "printk",
+ .data = &console_loglevel,
+ .maxlen = 4*sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
{
.ctl_name = KERN_PRINTK_RATELIMIT,
.procname = "printk_ratelimit",
- .data = &printk_ratelimit_jiffies,
+ .data = &printk_ratelimit_state.interval,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
{
.ctl_name = KERN_PRINTK_RATELIMIT_BURST,
.procname = "printk_ratelimit_burst",
- .data = &printk_ratelimit_burst,
+ .data = &printk_ratelimit_state.burst,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
+#endif
{
.ctl_name = KERN_NGROUPS_MAX,
.procname = "ngroups_max",
.proc_handler = &proc_dointvec,
},
{
- .ctl_name = KERN_NMI_WATCHDOG,
.procname = "nmi_watchdog",
.data = &nmi_watchdog_enabled,
.maxlen = sizeof (int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "io_delay_type",
+ .data = &io_delay_type,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
#endif
#if defined(CONFIG_MMU)
{
#endif
#if defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
{
- .ctl_name = KERN_ACPI_VIDEO_FLAGS,
.procname = "acpi_video_flags",
.data = &acpi_realmode_flags,
.maxlen = sizeof (unsigned long),
#ifdef CONFIG_DETECT_SOFTLOCKUP
{
.ctl_name = CTL_UNNUMBERED,
+ .procname = "softlockup_panic",
+ .data = &softlockup_panic,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
.procname = "softlockup_thresh",
.data = &softlockup_thresh,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec_minmax,
.strategy = &sysctl_intvec,
- .extra1 = &one,
+ .extra1 = &neg_one,
.extra2 = &sixty,
},
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "hung_task_check_count",
+ .data = &sysctl_hung_task_check_count,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = &proc_doulongvec_minmax,
+ .strategy = &sysctl_intvec,
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "hung_task_timeout_secs",
+ .data = &sysctl_hung_task_timeout_secs,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = &proc_doulongvec_minmax,
+ .strategy = &sysctl_intvec,
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "hung_task_warnings",
+ .data = &sysctl_hung_task_warnings,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = &proc_doulongvec_minmax,
+ .strategy = &sysctl_intvec,
+ },
#endif
#ifdef CONFIG_COMPAT
{
.proc_handler = &proc_dointvec,
},
#endif
-#ifdef CONFIG_PROC_FS
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "poweroff_cmd",
+ .data = &poweroff_cmd,
+ .maxlen = POWEROFF_CMD_PATH_LEN,
+ .mode = 0644,
+ .proc_handler = &proc_dostring,
+ .strategy = &sysctl_string,
+ },
+#ifdef CONFIG_KEYS
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "keys",
+ .mode = 0555,
+ .child = key_sysctls,
+ },
+#endif
+#ifdef CONFIG_RCU_TORTURE_TEST
{
.ctl_name = CTL_UNNUMBERED,
- .procname = "maps_protect",
- .data = &maps_protect,
+ .procname = "rcutorture_runnable",
+ .data = &rcutorture_runnable,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
#endif
+#ifdef CONFIG_UNEVICTABLE_LRU
{
.ctl_name = CTL_UNNUMBERED,
- .procname = "poweroff_cmd",
- .data = &poweroff_cmd,
- .maxlen = POWEROFF_CMD_PATH_LEN,
+ .procname = "scan_unevictable_pages",
+ .data = &scan_unevictable_pages,
+ .maxlen = sizeof(scan_unevictable_pages),
.mode = 0644,
- .proc_handler = &proc_dostring,
- .strategy = &sysctl_string,
+ .proc_handler = &scan_unevictable_handler,
},
+#endif
/*
* NOTE: do not add new entries to this table unless you have read
* Documentation/sysctl/ctl_unnumbered.txt
.proc_handler = &proc_dointvec,
},
{
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "oom_dump_tasks",
+ .data = &sysctl_oom_dump_tasks,
+ .maxlen = sizeof(sysctl_oom_dump_tasks),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
.ctl_name = VM_OVERCOMMIT_RATIO,
.procname = "overcommit_ratio",
.data = &sysctl_overcommit_ratio,
.extra2 = &one_hundred,
},
{
- .ctl_name = VM_DIRTY_WB_CS,
.procname = "dirty_writeback_centisecs",
.data = &dirty_writeback_interval,
.maxlen = sizeof(dirty_writeback_interval),
.proc_handler = &dirty_writeback_centisecs_handler,
},
{
- .ctl_name = VM_DIRTY_EXPIRE_CS,
.procname = "dirty_expire_centisecs",
.data = &dirty_expire_interval,
.maxlen = sizeof(dirty_expire_interval),
},
#ifdef CONFIG_HUGETLB_PAGE
{
- .ctl_name = VM_HUGETLB_PAGES,
.procname = "nr_hugepages",
- .data = &max_huge_pages,
+ .data = NULL,
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = &hugetlb_sysctl_handler,
},
{
.ctl_name = CTL_UNNUMBERED,
- .procname = "hugetlb_dynamic_pool",
- .data = &hugetlb_dynamic_pool,
- .maxlen = sizeof(hugetlb_dynamic_pool),
+ .procname = "nr_overcommit_hugepages",
+ .data = NULL,
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = &hugetlb_overcommit_handler,
+ .extra1 = (void *)&hugetlb_zero,
+ .extra2 = (void *)&hugetlb_infinity,
},
#endif
{
.extra1 = &zero,
},
#endif
+#ifdef CONFIG_HIGHMEM
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "highmem_is_dirtyable",
+ .data = &vm_highmem_is_dirtyable,
+ .maxlen = sizeof(vm_highmem_is_dirtyable),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
+#endif
/*
* NOTE: do not add new entries to this table unless you have read
* Documentation/sysctl/ctl_unnumbered.txt
.proc_handler = &proc_dointvec,
},
{
- .ctl_name = FS_NRFILE,
.procname = "file-nr",
.data = &files_stat,
.maxlen = 3*sizeof(int),
.proc_handler = &proc_dointvec,
},
{
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "nr_open",
+ .data = &sysctl_nr_open,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .extra1 = &sysctl_nr_open_min,
+ .extra2 = &sysctl_nr_open_max,
+ },
+ {
.ctl_name = FS_DENTRY,
.procname = "dentry-state",
.data = &dentry_stat,
.extra1 = &minolduid,
.extra2 = &maxolduid,
},
+#ifdef CONFIG_FILE_LOCKING
{
.ctl_name = FS_LEASES,
.procname = "leases-enable",
.mode = 0644,
.proc_handler = &proc_dointvec,
},
+#endif
#ifdef CONFIG_DNOTIFY
{
.ctl_name = FS_DIR_NOTIFY,
},
#endif
#ifdef CONFIG_MMU
+#ifdef CONFIG_FILE_LOCKING
{
.ctl_name = FS_LEASE_TIME,
.procname = "lease-break-time",
.extra1 = &zero,
.extra2 = &two,
},
+#endif
+#ifdef CONFIG_AIO
{
- .ctl_name = FS_AIO_NR,
.procname = "aio-nr",
.data = &aio_nr,
.maxlen = sizeof(aio_nr),
.proc_handler = &proc_doulongvec_minmax,
},
{
- .ctl_name = FS_AIO_MAX_NR,
.procname = "aio-max-nr",
.data = &aio_max_nr,
.maxlen = sizeof(aio_max_nr),
.mode = 0644,
.proc_handler = &proc_doulongvec_minmax,
},
+#endif /* CONFIG_AIO */
#ifdef CONFIG_INOTIFY_USER
{
.ctl_name = FS_INOTIFY,
.child = inotify_table,
},
#endif
+#ifdef CONFIG_EPOLL
+ {
+ .procname = "epoll",
+ .mode = 0555,
+ .child = epoll_table,
+ },
+#endif
#endif
{
.ctl_name = KERN_SETUID_DUMPABLE,
spin_unlock(&sysctl_lock);
wait_for_completion(&wait);
spin_lock(&sysctl_lock);
+ } else {
+ /* anything non-NULL; we'll never dereference it */
+ p->unregistering = ERR_PTR(-EINVAL);
}
/*
* do not remove from the list until nobody holds it; walking the
list_del_init(&p->ctl_entry);
}
+void sysctl_head_get(struct ctl_table_header *head)
+{
+ spin_lock(&sysctl_lock);
+ head->count++;
+ spin_unlock(&sysctl_lock);
+}
+
+void sysctl_head_put(struct ctl_table_header *head)
+{
+ spin_lock(&sysctl_lock);
+ if (!--head->count)
+ kfree(head);
+ spin_unlock(&sysctl_lock);
+}
+
+struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head)
+{
+ if (!head)
+ BUG();
+ spin_lock(&sysctl_lock);
+ if (!use_table(head))
+ head = ERR_PTR(-ENOENT);
+ spin_unlock(&sysctl_lock);
+ return head;
+}
+
void sysctl_head_finish(struct ctl_table_header *head)
{
if (!head)
spin_unlock(&sysctl_lock);
}
-struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev)
+static struct ctl_table_set *
+lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces)
+{
+ struct ctl_table_set *set = &root->default_set;
+ if (root->lookup)
+ set = root->lookup(root, namespaces);
+ return set;
+}
+
+static struct list_head *
+lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces)
{
+ struct ctl_table_set *set = lookup_header_set(root, namespaces);
+ return &set->list;
+}
+
+struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces,
+ struct ctl_table_header *prev)
+{
+ struct ctl_table_root *root;
+ struct list_head *header_list;
struct ctl_table_header *head;
struct list_head *tmp;
+
spin_lock(&sysctl_lock);
if (prev) {
+ head = prev;
tmp = &prev->ctl_entry;
unuse_table(prev);
goto next;
spin_unlock(&sysctl_lock);
return head;
next:
+ root = head->root;
tmp = tmp->next;
- if (tmp == &root_table_header.ctl_entry)
- break;
+ header_list = lookup_header_list(root, namespaces);
+ if (tmp != header_list)
+ continue;
+
+ do {
+ root = list_entry(root->root_list.next,
+ struct ctl_table_root, root_list);
+ if (root == &sysctl_table_root)
+ goto out;
+ header_list = lookup_header_list(root, namespaces);
+ } while (list_empty(header_list));
+ tmp = header_list->next;
}
+out:
spin_unlock(&sysctl_lock);
return NULL;
}
+struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev)
+{
+ return __sysctl_head_next(current->nsproxy, prev);
+}
+
+void register_sysctl_root(struct ctl_table_root *root)
+{
+ spin_lock(&sysctl_lock);
+ list_add_tail(&root->root_list, &sysctl_table_root.root_list);
+ spin_unlock(&sysctl_lock);
+}
+
#ifdef CONFIG_SYSCTL_SYSCALL
+/* Perform the actual read/write of a sysctl table entry. */
+static int do_sysctl_strategy(struct ctl_table_root *root,
+ struct ctl_table *table,
+ void __user *oldval, size_t __user *oldlenp,
+ void __user *newval, size_t newlen)
+{
+ int op = 0, rc;
+
+ if (oldval)
+ op |= MAY_READ;
+ if (newval)
+ op |= MAY_WRITE;
+ if (sysctl_perm(root, table, op))
+ return -EPERM;
+
+ if (table->strategy) {
+ rc = table->strategy(table, oldval, oldlenp, newval, newlen);
+ if (rc < 0)
+ return rc;
+ if (rc > 0)
+ return 0;
+ }
+
+ /* If there is no strategy routine, or if the strategy returns
+ * zero, proceed with automatic r/w */
+ if (table->data && table->maxlen) {
+ rc = sysctl_data(table, oldval, oldlenp, newval, newlen);
+ if (rc < 0)
+ return rc;
+ }
+ return 0;
+}
+
+static int parse_table(int __user *name, int nlen,
+ void __user *oldval, size_t __user *oldlenp,
+ void __user *newval, size_t newlen,
+ struct ctl_table_root *root,
+ struct ctl_table *table)
+{
+ int n;
+repeat:
+ if (!nlen)
+ return -ENOTDIR;
+ if (get_user(n, name))
+ return -EFAULT;
+ for ( ; table->ctl_name || table->procname; table++) {
+ if (!table->ctl_name)
+ continue;
+ if (n == table->ctl_name) {
+ int error;
+ if (table->child) {
+ if (sysctl_perm(root, table, MAY_EXEC))
+ return -EPERM;
+ name++;
+ nlen--;
+ table = table->child;
+ goto repeat;
+ }
+ error = do_sysctl_strategy(root, table,
+ oldval, oldlenp,
+ newval, newlen);
+ return error;
+ }
+ }
+ return -ENOTDIR;
+}
+
int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp,
void __user *newval, size_t newlen)
{
for (head = sysctl_head_next(NULL); head;
head = sysctl_head_next(head)) {
error = parse_table(name, nlen, oldval, oldlenp,
- newval, newlen, head->ctl_table);
+ newval, newlen,
+ head->root, head->ctl_table);
if (error != -ENOTDIR) {
sysctl_head_finish(head);
break;
if (copy_from_user(&tmp, args, sizeof(tmp)))
return -EFAULT;
+ error = deprecated_sysctl_warning(&tmp);
+ if (error)
+ goto out;
+
lock_kernel();
error = do_sysctl(tmp.name, tmp.nlen, tmp.oldval, tmp.oldlenp,
tmp.newval, tmp.newlen);
unlock_kernel();
+out:
return error;
}
#endif /* CONFIG_SYSCTL_SYSCALL */
static int test_perm(int mode, int op)
{
- if (!current->euid)
+ if (!current_euid())
mode >>= 6;
else if (in_egroup_p(0))
mode >>= 3;
- if ((mode & op & 0007) == op)
+ if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0)
return 0;
return -EACCES;
}
-int sysctl_perm(struct ctl_table *table, int op)
+int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op)
{
int error;
- error = security_sysctl(table, op);
+ int mode;
+
+ error = security_sysctl(table, op & (MAY_READ | MAY_WRITE | MAY_EXEC));
if (error)
return error;
- return test_perm(table->mode, op);
-}
-#ifdef CONFIG_SYSCTL_SYSCALL
-static int parse_table(int __user *name, int nlen,
- void __user *oldval, size_t __user *oldlenp,
- void __user *newval, size_t newlen,
- struct ctl_table *table)
-{
- int n;
-repeat:
- if (!nlen)
- return -ENOTDIR;
- if (get_user(n, name))
- return -EFAULT;
- for ( ; table->ctl_name || table->procname; table++) {
- if (!table->ctl_name)
- continue;
- if (n == table->ctl_name) {
- int error;
- if (table->child) {
- if (sysctl_perm(table, 001))
- return -EPERM;
- name++;
- nlen--;
- table = table->child;
- goto repeat;
- }
- error = do_sysctl_strategy(table, name, nlen,
- oldval, oldlenp,
- newval, newlen);
- return error;
- }
- }
- return -ENOTDIR;
-}
-
-/* Perform the actual read/write of a sysctl table entry. */
-int do_sysctl_strategy (struct ctl_table *table,
- int __user *name, int nlen,
- void __user *oldval, size_t __user *oldlenp,
- void __user *newval, size_t newlen)
-{
- int op = 0, rc;
-
- if (oldval)
- op |= 004;
- if (newval)
- op |= 002;
- if (sysctl_perm(table, op))
- return -EPERM;
+ if (root->permissions)
+ mode = root->permissions(root, current->nsproxy, table);
+ else
+ mode = table->mode;
- if (table->strategy) {
- rc = table->strategy(table, name, nlen, oldval, oldlenp,
- newval, newlen);
- if (rc < 0)
- return rc;
- if (rc > 0)
- return 0;
- }
-
- /* If there is no strategy routine, or if the strategy returns
- * zero, proceed with automatic r/w */
- if (table->data && table->maxlen) {
- rc = sysctl_data(table, name, nlen, oldval, oldlenp,
- newval, newlen);
- if (rc < 0)
- return rc;
- }
- return 0;
+ return test_perm(mode, op);
}
-#endif /* CONFIG_SYSCTL_SYSCALL */
static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table)
{
static __init int sysctl_init(void)
{
sysctl_set_parent(NULL, root_table);
+#ifdef CONFIG_SYSCTL_SYSCALL_CHECK
+ {
+ int err;
+ err = sysctl_check_table(current->nsproxy, root_table);
+ }
+#endif
return 0;
}
core_initcall(sysctl_init);
+static struct ctl_table *is_branch_in(struct ctl_table *branch,
+ struct ctl_table *table)
+{
+ struct ctl_table *p;
+ const char *s = branch->procname;
+
+ /* branch should have named subdirectory as its first element */
+ if (!s || !branch->child)
+ return NULL;
+
+ /* ... and nothing else */
+ if (branch[1].procname || branch[1].ctl_name)
+ return NULL;
+
+ /* table should contain subdirectory with the same name */
+ for (p = table; p->procname || p->ctl_name; p++) {
+ if (!p->child)
+ continue;
+ if (p->procname && strcmp(p->procname, s) == 0)
+ return p;
+ }
+ return NULL;
+}
+
+/* see if attaching q to p would be an improvement */
+static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q)
+{
+ struct ctl_table *to = p->ctl_table, *by = q->ctl_table;
+ struct ctl_table *next;
+ int is_better = 0;
+ int not_in_parent = !p->attached_by;
+
+ while ((next = is_branch_in(by, to)) != NULL) {
+ if (by == q->attached_by)
+ is_better = 1;
+ if (to == p->attached_by)
+ not_in_parent = 1;
+ by = by->child;
+ to = next->child;
+ }
+
+ if (is_better && not_in_parent) {
+ q->attached_by = by;
+ q->attached_to = to;
+ q->parent = p;
+ }
+}
+
/**
- * register_sysctl_table - register a sysctl hierarchy
+ * __register_sysctl_paths - register a sysctl hierarchy
+ * @root: List of sysctl headers to register on
+ * @namespaces: Data to compute which lists of sysctl entries are visible
+ * @path: The path to the directory the sysctl table is in.
* @table: the top-level table structure
*
* Register a sysctl table hierarchy. @table should be a filled in ctl_table
- * array. An entry with a ctl_name of 0 terminates the table.
+ * array. A completely 0 filled entry terminates the table.
*
* The members of the &struct ctl_table structure are used as follows:
*
* This routine returns %NULL on a failure to register, and a pointer
* to the table header on success.
*/
-struct ctl_table_header *register_sysctl_table(struct ctl_table * table)
+struct ctl_table_header *__register_sysctl_paths(
+ struct ctl_table_root *root,
+ struct nsproxy *namespaces,
+ const struct ctl_path *path, struct ctl_table *table)
{
- struct ctl_table_header *tmp;
- tmp = kmalloc(sizeof(struct ctl_table_header), GFP_KERNEL);
- if (!tmp)
+ struct ctl_table_header *header;
+ struct ctl_table *new, **prevp;
+ unsigned int n, npath;
+ struct ctl_table_set *set;
+
+ /* Count the path components */
+ for (npath = 0; path[npath].ctl_name || path[npath].procname; ++npath)
+ ;
+
+ /*
+ * For each path component, allocate a 2-element ctl_table array.
+ * The first array element will be filled with the sysctl entry
+ * for this, the second will be the sentinel (ctl_name == 0).
+ *
+ * We allocate everything in one go so that we don't have to
+ * worry about freeing additional memory in unregister_sysctl_table.
+ */
+ header = kzalloc(sizeof(struct ctl_table_header) +
+ (2 * npath * sizeof(struct ctl_table)), GFP_KERNEL);
+ if (!header)
return NULL;
- tmp->ctl_table = table;
- INIT_LIST_HEAD(&tmp->ctl_entry);
- tmp->used = 0;
- tmp->unregistering = NULL;
- sysctl_set_parent(NULL, table);
+
+ new = (struct ctl_table *) (header + 1);
+
+ /* Now connect the dots */
+ prevp = &header->ctl_table;
+ for (n = 0; n < npath; ++n, ++path) {
+ /* Copy the procname */
+ new->procname = path->procname;
+ new->ctl_name = path->ctl_name;
+ new->mode = 0555;
+
+ *prevp = new;
+ prevp = &new->child;
+
+ new += 2;
+ }
+ *prevp = table;
+ header->ctl_table_arg = table;
+
+ INIT_LIST_HEAD(&header->ctl_entry);
+ header->used = 0;
+ header->unregistering = NULL;
+ header->root = root;
+ sysctl_set_parent(NULL, header->ctl_table);
+ header->count = 1;
+#ifdef CONFIG_SYSCTL_SYSCALL_CHECK
+ if (sysctl_check_table(namespaces, header->ctl_table)) {
+ kfree(header);
+ return NULL;
+ }
+#endif
spin_lock(&sysctl_lock);
- list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry);
+ header->set = lookup_header_set(root, namespaces);
+ header->attached_by = header->ctl_table;
+ header->attached_to = root_table;
+ header->parent = &root_table_header;
+ for (set = header->set; set; set = set->parent) {
+ struct ctl_table_header *p;
+ list_for_each_entry(p, &set->list, ctl_entry) {
+ if (p->unregistering)
+ continue;
+ try_attach(p, header);
+ }
+ }
+ header->parent->count++;
+ list_add_tail(&header->ctl_entry, &header->set->list);
spin_unlock(&sysctl_lock);
- return tmp;
+
+ return header;
+}
+
+/**
+ * register_sysctl_table_path - register a sysctl table hierarchy
+ * @path: The path to the directory the sysctl table is in.
+ * @table: the top-level table structure
+ *
+ * Register a sysctl table hierarchy. @table should be a filled in ctl_table
+ * array. A completely 0 filled entry terminates the table.
+ *
+ * See __register_sysctl_paths for more details.
+ */
+struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
+ struct ctl_table *table)
+{
+ return __register_sysctl_paths(&sysctl_table_root, current->nsproxy,
+ path, table);
+}
+
+/**
+ * register_sysctl_table - register a sysctl table hierarchy
+ * @table: the top-level table structure
+ *
+ * Register a sysctl table hierarchy. @table should be a filled in ctl_table
+ * array. A completely 0 filled entry terminates the table.
+ *
+ * See register_sysctl_paths for more details.
+ */
+struct ctl_table_header *register_sysctl_table(struct ctl_table *table)
+{
+ static const struct ctl_path null_path[] = { {} };
+
+ return register_sysctl_paths(null_path, table);
}
/**
void unregister_sysctl_table(struct ctl_table_header * header)
{
might_sleep();
+
+ if (header == NULL)
+ return;
+
spin_lock(&sysctl_lock);
start_unregistering(header);
+ if (!--header->parent->count) {
+ WARN_ON(1);
+ kfree(header->parent);
+ }
+ if (!--header->count)
+ kfree(header);
spin_unlock(&sysctl_lock);
- kfree(header);
+}
+
+int sysctl_is_seen(struct ctl_table_header *p)
+{
+ struct ctl_table_set *set = p->set;
+ int res;
+ spin_lock(&sysctl_lock);
+ if (p->unregistering)
+ res = 0;
+ else if (!set->is_seen)
+ res = 1;
+ else
+ res = set->is_seen(set);
+ spin_unlock(&sysctl_lock);
+ return res;
+}
+
+void setup_sysctl_set(struct ctl_table_set *p,
+ struct ctl_table_set *parent,
+ int (*is_seen)(struct ctl_table_set *))
+{
+ INIT_LIST_HEAD(&p->list);
+ p->parent = parent ? parent : &sysctl_table_root.default_set;
+ p->is_seen = is_seen;
}
#else /* !CONFIG_SYSCTL */
return NULL;
}
+struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
+ struct ctl_table *table)
+{
+ return NULL;
+}
+
void unregister_sysctl_table(struct ctl_table_header * table)
{
}
+void setup_sysctl_set(struct ctl_table_set *p,
+ struct ctl_table_set *parent,
+ int (*is_seen)(struct ctl_table_set *))
+{
+}
+
+void sysctl_head_put(struct ctl_table_header *head)
+{
+}
+
#endif /* CONFIG_SYSCTL */
/*
NULL,NULL);
}
-#define OP_SET 0
-#define OP_AND 1
-#define OP_OR 2
-
-static int do_proc_dointvec_bset_conv(int *negp, unsigned long *lvalp,
- int *valp,
- int write, void *data)
-{
- int op = *(int *)data;
- if (write) {
- int val = *negp ? -*lvalp : *lvalp;
- switch(op) {
- case OP_SET: *valp = val; break;
- case OP_AND: *valp &= val; break;
- case OP_OR: *valp |= val; break;
- }
- } else {
- int val = *valp;
- if (val < 0) {
- *negp = -1;
- *lvalp = (unsigned long)-val;
- } else {
- *negp = 0;
- *lvalp = (unsigned long)val;
- }
- }
- return 0;
-}
-
-/*
- * init may raise the set.
- */
-
-int proc_dointvec_bset(struct ctl_table *table, int write, struct file *filp,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- int op;
-
- if (write && !capable(CAP_SYS_MODULE)) {
- return -EPERM;
- }
-
- op = is_init(current) ? OP_SET : OP_AND;
- return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
- do_proc_dointvec_bset_conv,&op);
-}
-
/*
- * Taint values can only be increased
+ * Taint values can only be increased
+ * This means we can safely use a temporary.
*/
-static int proc_dointvec_taint(struct ctl_table *table, int write, struct file *filp,
+static int proc_taint(struct ctl_table *table, int write, struct file *filp,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
- int op;
+ struct ctl_table t;
+ unsigned long tmptaint = get_taint();
+ int err;
if (write && !capable(CAP_SYS_ADMIN))
return -EPERM;
- op = OP_OR;
- return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
- do_proc_dointvec_bset_conv,&op);
+ t = *table;
+ t.data = &tmptaint;
+ err = proc_doulongvec_minmax(&t, write, filp, buffer, lenp, ppos);
+ if (err < 0)
+ return err;
+
+ if (write) {
+ /*
+ * Poor man's atomic or. Not worth adding a primitive
+ * to everyone's atomic.h for this
+ */
+ int i;
+ for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) {
+ if ((tmptaint >> i) & 1)
+ add_taint(i);
+ }
+ }
+
+ return err;
}
struct do_proc_dointvec_minmax_conv_param {
pid_t tmp;
int r;
- tmp = pid_nr(cad_pid);
+ tmp = pid_vnr(cad_pid);
r = __do_proc_dointvec(&tmp, table, write, filp, buffer,
lenp, ppos, NULL, NULL);
return -ENOSYS;
}
-int proc_dointvec_bset(struct ctl_table *table, int write, struct file *filp,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- return -ENOSYS;
-}
-
int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
*/
/* The generic sysctl data routine (used if no strategy routine supplied) */
-int sysctl_data(struct ctl_table *table, int __user *name, int nlen,
+int sysctl_data(struct ctl_table *table,
void __user *oldval, size_t __user *oldlenp,
void __user *newval, size_t newlen)
{
}
/* The generic string strategy routine: */
-int sysctl_string(struct ctl_table *table, int __user *name, int nlen,
+int sysctl_string(struct ctl_table *table,
void __user *oldval, size_t __user *oldlenp,
void __user *newval, size_t newlen)
{
* are between the minimum and maximum values given in the arrays
* table->extra1 and table->extra2, respectively.
*/
-int sysctl_intvec(struct ctl_table *table, int __user *name, int nlen,
+int sysctl_intvec(struct ctl_table *table,
void __user *oldval, size_t __user *oldlenp,
void __user *newval, size_t newlen)
{
}
/* Strategy function to convert jiffies to seconds */
-int sysctl_jiffies(struct ctl_table *table, int __user *name, int nlen,
+int sysctl_jiffies(struct ctl_table *table,
void __user *oldval, size_t __user *oldlenp,
void __user *newval, size_t newlen)
{
}
/* Strategy function to convert jiffies to seconds */
-int sysctl_ms_jiffies(struct ctl_table *table, int __user *name, int nlen,
+int sysctl_ms_jiffies(struct ctl_table *table,
void __user *oldval, size_t __user *oldlenp,
void __user *newval, size_t newlen)
{
asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
{
- static int msg_count;
struct __sysctl_args tmp;
- int name[CTL_MAXNAME];
- int i;
+ int error;
- /* Read in the sysctl name for better debug message logging */
if (copy_from_user(&tmp, args, sizeof(tmp)))
return -EFAULT;
- if (tmp.nlen <= 0 || tmp.nlen >= CTL_MAXNAME)
- return -ENOTDIR;
- for (i = 0; i < tmp.nlen; i++)
- if (get_user(name[i], tmp.name + i))
- return -EFAULT;
- /* Ignore accesses to kernel.version */
- if ((tmp.nlen == 2) && (name[0] == CTL_KERN) && (name[1] == KERN_VERSION))
- goto out;
+ error = deprecated_sysctl_warning(&tmp);
- if (msg_count < 5) {
- msg_count++;
- printk(KERN_INFO
- "warning: process `%s' used the removed sysctl "
- "system call with ", current->comm);
- for (i = 0; i < tmp.nlen; i++)
- printk("%d.", name[i]);
- printk("\n");
- }
-out:
- return -ENOSYS;
+ /* If no error reading the parameters then just -ENOSYS ... */
+ if (!error)
+ error = -ENOSYS;
+
+ return error;
}
-int sysctl_data(struct ctl_table *table, int __user *name, int nlen,
+int sysctl_data(struct ctl_table *table,
void __user *oldval, size_t __user *oldlenp,
void __user *newval, size_t newlen)
{
return -ENOSYS;
}
-int sysctl_string(struct ctl_table *table, int __user *name, int nlen,
+int sysctl_string(struct ctl_table *table,
void __user *oldval, size_t __user *oldlenp,
void __user *newval, size_t newlen)
{
return -ENOSYS;
}
-int sysctl_intvec(struct ctl_table *table, int __user *name, int nlen,
+int sysctl_intvec(struct ctl_table *table,
void __user *oldval, size_t __user *oldlenp,
void __user *newval, size_t newlen)
{
return -ENOSYS;
}
-int sysctl_jiffies(struct ctl_table *table, int __user *name, int nlen,
+int sysctl_jiffies(struct ctl_table *table,
void __user *oldval, size_t __user *oldlenp,
void __user *newval, size_t newlen)
{
return -ENOSYS;
}
-int sysctl_ms_jiffies(struct ctl_table *table, int __user *name, int nlen,
+int sysctl_ms_jiffies(struct ctl_table *table,
void __user *oldval, size_t __user *oldlenp,
void __user *newval, size_t newlen)
{
#endif /* CONFIG_SYSCTL_SYSCALL */
+static int deprecated_sysctl_warning(struct __sysctl_args *args)
+{
+ static int msg_count;
+ int name[CTL_MAXNAME];
+ int i;
+
+ /* Check args->nlen. */
+ if (args->nlen < 0 || args->nlen > CTL_MAXNAME)
+ return -ENOTDIR;
+
+ /* Read in the sysctl name for better debug message logging */
+ for (i = 0; i < args->nlen; i++)
+ if (get_user(name[i], args->name + i))
+ return -EFAULT;
+
+ /* Ignore accesses to kernel.version */
+ if ((args->nlen == 2) && (name[0] == CTL_KERN) && (name[1] == KERN_VERSION))
+ return 0;
+
+ if (msg_count < 5) {
+ msg_count++;
+ printk(KERN_INFO
+ "warning: process `%s' used the deprecated sysctl "
+ "system call with ", current->comm);
+ for (i = 0; i < args->nlen; i++)
+ printk("%d.", name[i]);
+ printk("\n");
+ }
+ return 0;
+}
+
/*
* No sense putting this after each symbol definition, twice,
* exception granted :-)
EXPORT_SYMBOL(proc_doulongvec_minmax);
EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
EXPORT_SYMBOL(register_sysctl_table);
+EXPORT_SYMBOL(register_sysctl_paths);
EXPORT_SYMBOL(sysctl_intvec);
EXPORT_SYMBOL(sysctl_jiffies);
EXPORT_SYMBOL(sysctl_ms_jiffies);