X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=kernel%2Fsysctl.c;h=368d1638ee78229c87067c4d805f7145fe79c352;hb=1092307d582a7566d23779c304cf86f3075ac5f0;hp=2c0e6581944804eee1391161224afd89972b34e1;hpb=eab03ac7bd3e0da99eb9dc068772a85a5e3f3577;p=safe%2Fjmp%2Flinux-2.6 diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 2c0e658..368d163 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -18,18 +18,17 @@ * Removed it and replaced it with older style, 03/23/00, Bill Wendling */ -#include #include #include #include #include #include #include -#include +#include #include #include -#include #include +#include #include #include #include @@ -38,48 +37,72 @@ #include #include #include -#include #include +#include #include #include #include #include +#include #include #include +#include +#include #include #include -extern int proc_nr_files(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp, loff_t *ppos); +#ifdef CONFIG_X86 +#include +#include +#include +#endif + +static int deprecated_sysctl_warning(struct __sysctl_args *args); #if defined(CONFIG_SYSCTL) /* External variables not in a header file. */ extern int C_A_D; +extern int print_fatal_signals; extern int sysctl_overcommit_memory; extern int sysctl_overcommit_ratio; extern int sysctl_panic_on_oom; +extern int sysctl_oom_kill_allocating_task; +extern int sysctl_oom_dump_tasks; extern int max_threads; -extern int sysrq_enabled; extern int core_uses_pid; extern int suid_dumpable; extern char core_pattern[]; -extern int cad_pid; extern int pid_max; extern int min_free_kbytes; -extern int printk_ratelimit_jiffies; -extern int printk_ratelimit_burst; extern int pid_max_min, pid_max_max; extern int sysctl_drop_caches; extern int percpu_pagelist_fraction; +extern int compat_log; +extern int latencytop_enabled; +extern int sysctl_nr_open_min, sysctl_nr_open_max; +#ifndef CONFIG_MMU +extern int sysctl_nr_trim_pages; +#endif +#ifdef CONFIG_RCU_TORTURE_TEST +extern int rcutorture_runnable; +#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ + +/* Constants used for minimum and maximum */ +#ifdef CONFIG_DETECT_SOFTLOCKUP +static int sixty = 60; +static int neg_one = -1; +#endif -#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) -int unknown_nmi_panic; -extern int proc_unknown_nmi_panic(ctl_table *, int, struct file *, - void __user *, size_t *, loff_t *); +#if defined(CONFIG_MMU) && defined(CONFIG_FILE_LOCKING) +static int two = 2; #endif +static int zero; +static int one = 1; +static int one_hundred = 100; + /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ static int maxolduid = 65535; static int minolduid; @@ -87,26 +110,19 @@ static int min_percpu_pagelist_fract = 8; static int ngroups_max = NGROUPS_MAX; -#ifdef CONFIG_KMOD +#ifdef CONFIG_MODULES extern char modprobe_path[]; #endif #ifdef CONFIG_CHR_DEV_SG extern int sg_big_buff; #endif -#ifdef CONFIG_SYSVIPC -extern size_t shm_ctlmax; -extern size_t shm_ctlall; -extern int shm_ctlmni; -extern int msg_ctlmax; -extern int msg_ctlmnb; -extern int msg_ctlmni; -extern int sem_ctls[]; + +#ifdef CONFIG_SPARC +#include #endif -#ifdef __sparc__ -extern char reboot_command []; -extern int stop_a_enabled; -extern int scons_pwroff; +#ifdef CONFIG_SPARC64 +extern int sysctl_tsb_ratio; #endif #ifdef __hppa__ @@ -122,65 +138,63 @@ extern int sysctl_userprocess_debug; extern int spin_retry; #endif -extern int sysctl_hz_timer; - #ifdef CONFIG_BSD_PROCESS_ACCT extern int acct_parm[]; #endif #ifdef CONFIG_IA64 extern int no_unaligned_warning; +extern int unaligned_dump_stack; #endif -static int parse_table(int __user *, int, void __user *, size_t __user *, void __user *, size_t, - ctl_table *, void **); -static int proc_doutsstring(ctl_table *table, int write, struct file *filp, +#ifdef CONFIG_RT_MUTEXES +extern int max_lock_depth; +#endif + +#ifdef CONFIG_PROC_SYSCTL +static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos); +static int proc_taint(struct ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos); +#endif -static ctl_table root_table[]; -static struct ctl_table_header root_table_header = - { root_table, LIST_HEAD_INIT(root_table_header.ctl_entry) }; +static struct ctl_table root_table[]; +static struct ctl_table_root sysctl_table_root; +static struct ctl_table_header root_table_header = { + .count = 1, + .ctl_table = root_table, + .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list), + .root = &sysctl_table_root, + .set = &sysctl_table_root.default_set, +}; +static struct ctl_table_root sysctl_table_root = { + .root_list = LIST_HEAD_INIT(sysctl_table_root.root_list), + .default_set.list = LIST_HEAD_INIT(root_table_header.ctl_entry), +}; -static ctl_table kern_table[]; -static ctl_table vm_table[]; -static ctl_table fs_table[]; -static ctl_table debug_table[]; -static ctl_table dev_table[]; -extern ctl_table random_table[]; -#ifdef CONFIG_UNIX98_PTYS -extern ctl_table pty_table[]; -#endif +static struct ctl_table kern_table[]; +static struct ctl_table vm_table[]; +static struct ctl_table fs_table[]; +static struct ctl_table debug_table[]; +static struct ctl_table dev_table[]; +extern struct ctl_table random_table[]; #ifdef CONFIG_INOTIFY_USER -extern ctl_table inotify_table[]; +extern struct ctl_table inotify_table[]; +#endif +#ifdef CONFIG_EPOLL +extern struct ctl_table epoll_table[]; #endif #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT int sysctl_legacy_va_layout; #endif -/* /proc declarations: */ - -#ifdef CONFIG_PROC_FS - -static ssize_t proc_readsys(struct file *, char __user *, size_t, loff_t *); -static ssize_t proc_writesys(struct file *, const char __user *, size_t, loff_t *); -static int proc_opensys(struct inode *, struct file *); - -struct file_operations proc_sys_file_operations = { - .open = proc_opensys, - .read = proc_readsys, - .write = proc_writesys, -}; - -extern struct proc_dir_entry *proc_sys_root; - -static void register_proc_table(ctl_table *, struct proc_dir_entry *, void *); -static void unregister_proc_table(ctl_table *, struct proc_dir_entry *); -#endif +extern int prove_locking; +extern int lock_stat; /* The default sysctl tables: */ -static ctl_table root_table[] = { +static struct ctl_table root_table[] = { { .ctl_name = CTL_KERN, .procname = "kernel", @@ -193,14 +207,6 @@ static ctl_table root_table[] = { .mode = 0555, .child = vm_table, }, -#ifdef CONFIG_NET - { - .ctl_name = CTL_NET, - .procname = "net", - .mode = 0555, - .child = net_table, - }, -#endif { .ctl_name = CTL_FS, .procname = "fs", @@ -219,56 +225,150 @@ static ctl_table root_table[] = { .mode = 0555, .child = dev_table, }, - +/* + * NOTE: do not add new entries to this table unless you have read + * Documentation/sysctl/ctl_unnumbered.txt + */ { .ctl_name = 0 } }; -static ctl_table kern_table[] = { +#ifdef CONFIG_SCHED_DEBUG +static int min_sched_granularity_ns = 100000; /* 100 usecs */ +static int max_sched_granularity_ns = NSEC_PER_SEC; /* 1 second */ +static int min_wakeup_granularity_ns; /* 0 usecs */ +static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */ +#endif + +static struct ctl_table kern_table[] = { +#ifdef CONFIG_SCHED_DEBUG { - .ctl_name = KERN_OSTYPE, - .procname = "ostype", - .data = system_utsname.sysname, - .maxlen = sizeof(system_utsname.sysname), - .mode = 0444, - .proc_handler = &proc_doutsstring, - .strategy = &sysctl_string, + .ctl_name = CTL_UNNUMBERED, + .procname = "sched_min_granularity_ns", + .data = &sysctl_sched_min_granularity, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &sched_nr_latency_handler, + .strategy = &sysctl_intvec, + .extra1 = &min_sched_granularity_ns, + .extra2 = &max_sched_granularity_ns, }, { - .ctl_name = KERN_OSRELEASE, - .procname = "osrelease", - .data = system_utsname.release, - .maxlen = sizeof(system_utsname.release), - .mode = 0444, - .proc_handler = &proc_doutsstring, - .strategy = &sysctl_string, + .ctl_name = CTL_UNNUMBERED, + .procname = "sched_latency_ns", + .data = &sysctl_sched_latency, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &sched_nr_latency_handler, + .strategy = &sysctl_intvec, + .extra1 = &min_sched_granularity_ns, + .extra2 = &max_sched_granularity_ns, }, { - .ctl_name = KERN_VERSION, - .procname = "version", - .data = system_utsname.version, - .maxlen = sizeof(system_utsname.version), - .mode = 0444, - .proc_handler = &proc_doutsstring, - .strategy = &sysctl_string, + .ctl_name = CTL_UNNUMBERED, + .procname = "sched_wakeup_granularity_ns", + .data = &sysctl_sched_wakeup_granularity, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &min_wakeup_granularity_ns, + .extra2 = &max_wakeup_granularity_ns, }, { - .ctl_name = KERN_NODENAME, - .procname = "hostname", - .data = system_utsname.nodename, - .maxlen = sizeof(system_utsname.nodename), + .ctl_name = CTL_UNNUMBERED, + .procname = "sched_shares_ratelimit", + .data = &sysctl_sched_shares_ratelimit, + .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_doutsstring, - .strategy = &sysctl_string, + .proc_handler = &proc_dointvec, }, { - .ctl_name = KERN_DOMAINNAME, - .procname = "domainname", - .data = system_utsname.domainname, - .maxlen = sizeof(system_utsname.domainname), + .ctl_name = CTL_UNNUMBERED, + .procname = "sched_shares_thresh", + .data = &sysctl_sched_shares_thresh, + .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_doutsstring, - .strategy = &sysctl_string, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &zero, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "sched_child_runs_first", + .data = &sysctl_sched_child_runs_first, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "sched_features", + .data = &sysctl_sched_features, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "sched_migration_cost", + .data = &sysctl_sched_migration_cost, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "sched_nr_migrate", + .data = &sysctl_sched_nr_migrate, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif + { + .ctl_name = CTL_UNNUMBERED, + .procname = "sched_rt_period_us", + .data = &sysctl_sched_rt_period, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &sched_rt_handler, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "sched_rt_runtime_us", + .data = &sysctl_sched_rt_runtime, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &sched_rt_handler, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "sched_compat_yield", + .data = &sysctl_sched_compat_yield, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#ifdef CONFIG_PROVE_LOCKING + { + .ctl_name = CTL_UNNUMBERED, + .procname = "prove_locking", + .data = &prove_locking, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif +#ifdef CONFIG_LOCK_STAT + { + .ctl_name = CTL_UNNUMBERED, + .procname = "lock_stat", + .data = &lock_stat, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, }, +#endif { .ctl_name = KERN_PANIC, .procname = "panic", @@ -289,27 +389,28 @@ static ctl_table kern_table[] = { .ctl_name = KERN_CORE_PATTERN, .procname = "core_pattern", .data = core_pattern, - .maxlen = 64, + .maxlen = CORENAME_MAX_SIZE, .mode = 0644, .proc_handler = &proc_dostring, .strategy = &sysctl_string, }, +#ifdef CONFIG_PROC_SYSCTL { - .ctl_name = KERN_TAINTED, .procname = "tainted", - .data = &tainted, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec, + .maxlen = sizeof(long), + .mode = 0644, + .proc_handler = &proc_taint, }, +#endif +#ifdef CONFIG_LATENCYTOP { - .ctl_name = KERN_CAP_BSET, - .procname = "cap-bound", - .data = &cap_bset, - .maxlen = sizeof(kernel_cap_t), - .mode = 0600, - .proc_handler = &proc_dointvec_bset, + .procname = "latencytop", + .data = &latencytop_enabled, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, }, +#endif #ifdef CONFIG_BLK_DEV_INITRD { .ctl_name = KERN_REALROOTDEV, @@ -320,7 +421,15 @@ static ctl_table kern_table[] = { .proc_handler = &proc_dointvec, }, #endif -#ifdef __sparc__ + { + .ctl_name = CTL_UNNUMBERED, + .procname = "print-fatal-signals", + .data = &print_fatal_signals, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#ifdef CONFIG_SPARC { .ctl_name = KERN_SPARC_REBOOT, .procname = "reboot-cmd", @@ -347,6 +456,16 @@ static ctl_table kern_table[] = { .proc_handler = &proc_dointvec, }, #endif +#ifdef CONFIG_SPARC64 + { + .ctl_name = CTL_UNNUMBERED, + .procname = "tsb-ratio", + .data = &sysctl_tsb_ratio, + .maxlen = sizeof (int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif #ifdef __hppa__ { .ctl_name = KERN_HPPA_PWRSW, @@ -373,15 +492,37 @@ static ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, +#ifdef CONFIG_FUNCTION_TRACER { - .ctl_name = KERN_PRINTK, - .procname = "printk", - .data = &console_loglevel, - .maxlen = 4*sizeof(int), + .ctl_name = CTL_UNNUMBERED, + .procname = "ftrace_enabled", + .data = &ftrace_enabled, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &ftrace_enable_sysctl, + }, +#endif +#ifdef CONFIG_STACK_TRACER + { + .ctl_name = CTL_UNNUMBERED, + .procname = "stack_tracer_enabled", + .data = &stack_tracer_enabled, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &stack_trace_sysctl, + }, +#endif +#ifdef CONFIG_TRACING + { + .ctl_name = CTL_UNNUMBERED, + .procname = "ftrace_dump_on_oops", + .data = &ftrace_dump_on_oops, + .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec, }, -#ifdef CONFIG_KMOD +#endif +#ifdef CONFIG_MODULES { .ctl_name = KERN_MODPROBE, .procname = "modprobe", @@ -423,82 +564,25 @@ static ctl_table kern_table[] = { .proc_handler = &proc_dointvec, }, #endif -#ifdef CONFIG_SYSVIPC - { - .ctl_name = KERN_SHMMAX, - .procname = "shmmax", - .data = &shm_ctlmax, - .maxlen = sizeof (size_t), - .mode = 0644, - .proc_handler = &proc_doulongvec_minmax, - }, - { - .ctl_name = KERN_SHMALL, - .procname = "shmall", - .data = &shm_ctlall, - .maxlen = sizeof (size_t), - .mode = 0644, - .proc_handler = &proc_doulongvec_minmax, - }, - { - .ctl_name = KERN_SHMMNI, - .procname = "shmmni", - .data = &shm_ctlmni, - .maxlen = sizeof (int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = KERN_MSGMAX, - .procname = "msgmax", - .data = &msg_ctlmax, - .maxlen = sizeof (int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = KERN_MSGMNI, - .procname = "msgmni", - .data = &msg_ctlmni, - .maxlen = sizeof (int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = KERN_MSGMNB, - .procname = "msgmnb", - .data = &msg_ctlmnb, - .maxlen = sizeof (int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = KERN_SEM, - .procname = "sem", - .data = &sem_ctls, - .maxlen = 4*sizeof (int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, -#endif #ifdef CONFIG_MAGIC_SYSRQ { .ctl_name = KERN_SYSRQ, .procname = "sysrq", - .data = &sysrq_enabled, + .data = &__sysrq_enabled, .maxlen = sizeof (int), .mode = 0644, .proc_handler = &proc_dointvec, }, #endif +#ifdef CONFIG_PROC_SYSCTL { - .ctl_name = KERN_CADPID, .procname = "cad_pid", - .data = &cad_pid, + .data = NULL, .maxlen = sizeof (int), .mode = 0600, - .proc_handler = &proc_dointvec, + .proc_handler = &proc_do_cad_pid, }, +#endif { .ctl_name = KERN_MAX_THREADS, .procname = "threads-max", @@ -513,14 +597,6 @@ static ctl_table kern_table[] = { .mode = 0555, .child = random_table, }, -#ifdef CONFIG_UNIX98_PTYS - { - .ctl_name = KERN_PTY, - .procname = "pty", - .mode = 0555, - .child = pty_table, - }, -#endif { .ctl_name = KERN_OVERFLOWUID, .procname = "overflowuid", @@ -554,16 +630,6 @@ static ctl_table kern_table[] = { .proc_handler = &proc_dointvec, }, #endif -#ifdef CONFIG_NO_IDLE_HZ - { - .ctl_name = KERN_HZ_TIMER, - .procname = "hz_timer", - .data = &sysctl_hz_timer, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, -#endif { .ctl_name = KERN_S390_USER_DEBUG_LOGGING, .procname = "userprocess_debug", @@ -592,10 +658,19 @@ static ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, +#if defined CONFIG_PRINTK + { + .ctl_name = KERN_PRINTK, + .procname = "printk", + .data = &console_loglevel, + .maxlen = 4*sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, { .ctl_name = KERN_PRINTK_RATELIMIT, .procname = "printk_ratelimit", - .data = &printk_ratelimit_jiffies, + .data = &printk_ratelimit_state.interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, @@ -604,11 +679,12 @@ static ctl_table kern_table[] = { { .ctl_name = KERN_PRINTK_RATELIMIT_BURST, .procname = "printk_ratelimit_burst", - .data = &printk_ratelimit_burst, + .data = &printk_ratelimit_state.burst, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec, }, +#endif { .ctl_name = KERN_NGROUPS_MAX, .procname = "ngroups_max", @@ -624,11 +700,26 @@ static ctl_table kern_table[] = { .data = &unknown_nmi_panic, .maxlen = sizeof (int), .mode = 0644, - .proc_handler = &proc_unknown_nmi_panic, + .proc_handler = &proc_dointvec, + }, + { + .procname = "nmi_watchdog", + .data = &nmi_watchdog_enabled, + .maxlen = sizeof (int), + .mode = 0644, + .proc_handler = &proc_nmi_enabled, }, #endif #if defined(CONFIG_X86) { + .ctl_name = KERN_PANIC_ON_NMI, + .procname = "panic_on_unrecovered_nmi", + .data = &panic_on_unrecovered_nmi, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { .ctl_name = KERN_BOOTLOADER_TYPE, .procname = "bootloader_type", .data = &bootloader_type, @@ -636,6 +727,22 @@ static ctl_table kern_table[] = { .mode = 0444, .proc_handler = &proc_dointvec, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "kstack_depth_to_print", + .data = &kstack_depth_to_print, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "io_delay_type", + .data = &io_delay_type, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, #endif #if defined(CONFIG_MMU) { @@ -657,11 +764,10 @@ static ctl_table kern_table[] = { .proc_handler = &proc_dointvec, }, #endif -#ifdef CONFIG_ACPI_SLEEP +#if defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86) { - .ctl_name = KERN_ACPI_VIDEO_FLAGS, .procname = "acpi_video_flags", - .data = &acpi_video_flags, + .data = &acpi_realmode_flags, .maxlen = sizeof (unsigned long), .mode = 0644, .proc_handler = &proc_doulongvec_minmax, @@ -676,73 +782,222 @@ static ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, -#endif - { .ctl_name = 0 } -}; - -/* Constants for minimum and maximum testing in vm_table. - We use these as one-element integer vectors. */ -static int zero; -static int one_hundred = 100; - - -static ctl_table vm_table[] = { { - .ctl_name = VM_OVERCOMMIT_MEMORY, - .procname = "overcommit_memory", - .data = &sysctl_overcommit_memory, - .maxlen = sizeof(sysctl_overcommit_memory), + .ctl_name = CTL_UNNUMBERED, + .procname = "unaligned-dump-stack", + .data = &unaligned_dump_stack, + .maxlen = sizeof (int), .mode = 0644, .proc_handler = &proc_dointvec, }, +#endif +#ifdef CONFIG_DETECT_SOFTLOCKUP { - .ctl_name = VM_PANIC_ON_OOM, - .procname = "panic_on_oom", - .data = &sysctl_panic_on_oom, - .maxlen = sizeof(sysctl_panic_on_oom), + .ctl_name = CTL_UNNUMBERED, + .procname = "softlockup_panic", + .data = &softlockup_panic, + .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &zero, + .extra2 = &one, }, { - .ctl_name = VM_OVERCOMMIT_RATIO, - .procname = "overcommit_ratio", - .data = &sysctl_overcommit_ratio, - .maxlen = sizeof(sysctl_overcommit_ratio), + .ctl_name = CTL_UNNUMBERED, + .procname = "softlockup_thresh", + .data = &softlockup_thresh, + .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &neg_one, + .extra2 = &sixty, }, { - .ctl_name = VM_PAGE_CLUSTER, - .procname = "page-cluster", - .data = &page_cluster, - .maxlen = sizeof(int), + .ctl_name = CTL_UNNUMBERED, + .procname = "hung_task_check_count", + .data = &sysctl_hung_task_check_count, + .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = &proc_doulongvec_minmax, + .strategy = &sysctl_intvec, }, { - .ctl_name = VM_DIRTY_BACKGROUND, - .procname = "dirty_background_ratio", - .data = &dirty_background_ratio, - .maxlen = sizeof(dirty_background_ratio), + .ctl_name = CTL_UNNUMBERED, + .procname = "hung_task_timeout_secs", + .data = &sysctl_hung_task_timeout_secs, + .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, + .proc_handler = &proc_doulongvec_minmax, .strategy = &sysctl_intvec, - .extra1 = &zero, - .extra2 = &one_hundred, }, { - .ctl_name = VM_DIRTY_RATIO, - .procname = "dirty_ratio", - .data = &vm_dirty_ratio, + .ctl_name = CTL_UNNUMBERED, + .procname = "hung_task_warnings", + .data = &sysctl_hung_task_warnings, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = &proc_doulongvec_minmax, + .strategy = &sysctl_intvec, + }, +#endif +#ifdef CONFIG_COMPAT + { + .ctl_name = KERN_COMPAT_LOG, + .procname = "compat-log", + .data = &compat_log, + .maxlen = sizeof (int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif +#ifdef CONFIG_RT_MUTEXES + { + .ctl_name = KERN_MAX_LOCK_DEPTH, + .procname = "max_lock_depth", + .data = &max_lock_depth, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif + { + .ctl_name = CTL_UNNUMBERED, + .procname = "poweroff_cmd", + .data = &poweroff_cmd, + .maxlen = POWEROFF_CMD_PATH_LEN, + .mode = 0644, + .proc_handler = &proc_dostring, + .strategy = &sysctl_string, + }, +#ifdef CONFIG_KEYS + { + .ctl_name = CTL_UNNUMBERED, + .procname = "keys", + .mode = 0555, + .child = key_sysctls, + }, +#endif +#ifdef CONFIG_RCU_TORTURE_TEST + { + .ctl_name = CTL_UNNUMBERED, + .procname = "rcutorture_runnable", + .data = &rcutorture_runnable, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif +#ifdef CONFIG_UNEVICTABLE_LRU + { + .ctl_name = CTL_UNNUMBERED, + .procname = "scan_unevictable_pages", + .data = &scan_unevictable_pages, + .maxlen = sizeof(scan_unevictable_pages), + .mode = 0644, + .proc_handler = &scan_unevictable_handler, + }, +#endif +/* + * NOTE: do not add new entries to this table unless you have read + * Documentation/sysctl/ctl_unnumbered.txt + */ + { .ctl_name = 0 } +}; + +static struct ctl_table vm_table[] = { + { + .ctl_name = VM_OVERCOMMIT_MEMORY, + .procname = "overcommit_memory", + .data = &sysctl_overcommit_memory, + .maxlen = sizeof(sysctl_overcommit_memory), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = VM_PANIC_ON_OOM, + .procname = "panic_on_oom", + .data = &sysctl_panic_on_oom, + .maxlen = sizeof(sysctl_panic_on_oom), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "oom_kill_allocating_task", + .data = &sysctl_oom_kill_allocating_task, + .maxlen = sizeof(sysctl_oom_kill_allocating_task), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "oom_dump_tasks", + .data = &sysctl_oom_dump_tasks, + .maxlen = sizeof(sysctl_oom_dump_tasks), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = VM_OVERCOMMIT_RATIO, + .procname = "overcommit_ratio", + .data = &sysctl_overcommit_ratio, + .maxlen = sizeof(sysctl_overcommit_ratio), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = VM_PAGE_CLUSTER, + .procname = "page-cluster", + .data = &page_cluster, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = VM_DIRTY_BACKGROUND, + .procname = "dirty_background_ratio", + .data = &dirty_background_ratio, + .maxlen = sizeof(dirty_background_ratio), + .mode = 0644, + .proc_handler = &dirty_background_ratio_handler, + .strategy = &sysctl_intvec, + .extra1 = &zero, + .extra2 = &one_hundred, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "dirty_background_bytes", + .data = &dirty_background_bytes, + .maxlen = sizeof(dirty_background_bytes), + .mode = 0644, + .proc_handler = &dirty_background_bytes_handler, + .strategy = &sysctl_intvec, + .extra1 = &one, + }, + { + .ctl_name = VM_DIRTY_RATIO, + .procname = "dirty_ratio", + .data = &vm_dirty_ratio, .maxlen = sizeof(vm_dirty_ratio), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, + .proc_handler = &dirty_ratio_handler, .strategy = &sysctl_intvec, .extra1 = &zero, .extra2 = &one_hundred, }, { - .ctl_name = VM_DIRTY_WB_CS, + .ctl_name = CTL_UNNUMBERED, + .procname = "dirty_bytes", + .data = &vm_dirty_bytes, + .maxlen = sizeof(vm_dirty_bytes), + .mode = 0644, + .proc_handler = &dirty_bytes_handler, + .strategy = &sysctl_intvec, + .extra1 = &one, + }, + { .procname = "dirty_writeback_centisecs", .data = &dirty_writeback_interval, .maxlen = sizeof(dirty_writeback_interval), @@ -750,7 +1005,6 @@ static ctl_table vm_table[] = { .proc_handler = &dirty_writeback_centisecs_handler, }, { - .ctl_name = VM_DIRTY_EXPIRE_CS, .procname = "dirty_expire_centisecs", .data = &dirty_expire_interval, .maxlen = sizeof(dirty_expire_interval), @@ -778,9 +1032,8 @@ static ctl_table vm_table[] = { }, #ifdef CONFIG_HUGETLB_PAGE { - .ctl_name = VM_HUGETLB_PAGES, .procname = "nr_hugepages", - .data = &max_huge_pages, + .data = NULL, .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = &hugetlb_sysctl_handler, @@ -795,6 +1048,24 @@ static ctl_table vm_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "hugepages_treat_as_movable", + .data = &hugepages_treat_as_movable, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &hugetlb_treat_movable_handler, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "nr_overcommit_hugepages", + .data = NULL, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = &hugetlb_overcommit_handler, + .extra1 = (void *)&hugetlb_zero, + .extra2 = (void *)&hugetlb_infinity, + }, #endif { .ctl_name = VM_LOWMEM_RESERVE_RATIO, @@ -843,6 +1114,17 @@ static ctl_table vm_table[] = { .mode = 0644, .proc_handler = &proc_dointvec }, +#else + { + .ctl_name = CTL_UNNUMBERED, + .procname = "nr_trim_pages", + .data = &sysctl_nr_trim_pages, + .maxlen = sizeof(sysctl_nr_trim_pages), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &zero, + }, #endif { .ctl_name = VM_LAPTOP_MODE, @@ -885,17 +1167,6 @@ static ctl_table vm_table[] = { .extra1 = &zero, }, #endif -#ifdef CONFIG_SWAP - { - .ctl_name = VM_SWAP_TOKEN_TIMEOUT, - .procname = "swap_token_timeout", - .data = &swap_token_default_timeout, - .maxlen = sizeof(swap_token_default_timeout), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, - }, -#endif #ifdef CONFIG_NUMA { .ctl_name = VM_ZONE_RECLAIM_MODE, @@ -908,19 +1179,100 @@ static ctl_table vm_table[] = { .extra1 = &zero, }, { - .ctl_name = VM_ZONE_RECLAIM_INTERVAL, - .procname = "zone_reclaim_interval", - .data = &zone_reclaim_interval, - .maxlen = sizeof(zone_reclaim_interval), + .ctl_name = VM_MIN_UNMAPPED, + .procname = "min_unmapped_ratio", + .data = &sysctl_min_unmapped_ratio, + .maxlen = sizeof(sysctl_min_unmapped_ratio), + .mode = 0644, + .proc_handler = &sysctl_min_unmapped_ratio_sysctl_handler, + .strategy = &sysctl_intvec, + .extra1 = &zero, + .extra2 = &one_hundred, + }, + { + .ctl_name = VM_MIN_SLAB, + .procname = "min_slab_ratio", + .data = &sysctl_min_slab_ratio, + .maxlen = sizeof(sysctl_min_slab_ratio), + .mode = 0644, + .proc_handler = &sysctl_min_slab_ratio_sysctl_handler, + .strategy = &sysctl_intvec, + .extra1 = &zero, + .extra2 = &one_hundred, + }, +#endif +#ifdef CONFIG_SMP + { + .ctl_name = CTL_UNNUMBERED, + .procname = "stat_interval", + .data = &sysctl_stat_interval, + .maxlen = sizeof(sysctl_stat_interval), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, .strategy = &sysctl_jiffies, }, #endif +#ifdef CONFIG_SECURITY + { + .ctl_name = CTL_UNNUMBERED, + .procname = "mmap_min_addr", + .data = &mmap_min_addr, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = &proc_doulongvec_minmax, + }, +#endif +#ifdef CONFIG_NUMA + { + .ctl_name = CTL_UNNUMBERED, + .procname = "numa_zonelist_order", + .data = &numa_zonelist_order, + .maxlen = NUMA_ZONELIST_ORDER_LEN, + .mode = 0644, + .proc_handler = &numa_zonelist_order_handler, + .strategy = &sysctl_string, + }, +#endif +#if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \ + (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL)) + { + .ctl_name = VM_VDSO_ENABLED, + .procname = "vdso_enabled", + .data = &vdso_enabled, + .maxlen = sizeof(vdso_enabled), + .mode = 0644, + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec, + .extra1 = &zero, + }, +#endif +#ifdef CONFIG_HIGHMEM + { + .ctl_name = CTL_UNNUMBERED, + .procname = "highmem_is_dirtyable", + .data = &vm_highmem_is_dirtyable, + .maxlen = sizeof(vm_highmem_is_dirtyable), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &zero, + .extra2 = &one, + }, +#endif +/* + * NOTE: do not add new entries to this table unless you have read + * Documentation/sysctl/ctl_unnumbered.txt + */ + { .ctl_name = 0 } +}; + +#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE) +static struct ctl_table binfmt_misc_table[] = { { .ctl_name = 0 } }; +#endif -static ctl_table fs_table[] = { +static struct ctl_table fs_table[] = { { .ctl_name = FS_NRINODE, .procname = "inode-nr", @@ -938,7 +1290,6 @@ static ctl_table fs_table[] = { .proc_handler = &proc_dointvec, }, { - .ctl_name = FS_NRFILE, .procname = "file-nr", .data = &files_stat, .maxlen = 3*sizeof(int), @@ -954,6 +1305,16 @@ static ctl_table fs_table[] = { .proc_handler = &proc_dointvec, }, { + .ctl_name = CTL_UNNUMBERED, + .procname = "nr_open", + .data = &sysctl_nr_open, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .extra1 = &sysctl_nr_open_min, + .extra2 = &sysctl_nr_open_max, + }, + { .ctl_name = FS_DENTRY, .procname = "dentry-state", .data = &dentry_stat, @@ -983,6 +1344,7 @@ static ctl_table fs_table[] = { .extra1 = &minolduid, .extra2 = &maxolduid, }, +#ifdef CONFIG_FILE_LOCKING { .ctl_name = FS_LEASES, .procname = "leases-enable", @@ -991,6 +1353,7 @@ static ctl_table fs_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, +#endif #ifdef CONFIG_DNOTIFY { .ctl_name = FS_DIR_NOTIFY, @@ -1002,16 +1365,21 @@ static ctl_table fs_table[] = { }, #endif #ifdef CONFIG_MMU +#ifdef CONFIG_FILE_LOCKING { .ctl_name = FS_LEASE_TIME, .procname = "lease-break-time", .data = &lease_break_time, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &zero, + .extra2 = &two, }, +#endif +#ifdef CONFIG_AIO { - .ctl_name = FS_AIO_NR, .procname = "aio-nr", .data = &aio_nr, .maxlen = sizeof(aio_nr), @@ -1019,13 +1387,13 @@ static ctl_table fs_table[] = { .proc_handler = &proc_doulongvec_minmax, }, { - .ctl_name = FS_AIO_MAX_NR, .procname = "aio-max-nr", .data = &aio_max_nr, .maxlen = sizeof(aio_max_nr), .mode = 0644, .proc_handler = &proc_doulongvec_minmax, }, +#endif /* CONFIG_AIO */ #ifdef CONFIG_INOTIFY_USER { .ctl_name = FS_INOTIFY, @@ -1034,6 +1402,13 @@ static ctl_table fs_table[] = { .child = inotify_table, }, #endif +#ifdef CONFIG_EPOLL + { + .procname = "epoll", + .mode = 0555, + .child = epoll_table, + }, +#endif #endif { .ctl_name = KERN_SETUID_DUMPABLE, @@ -1043,19 +1418,39 @@ static ctl_table fs_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, +#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE) + { + .ctl_name = CTL_UNNUMBERED, + .procname = "binfmt_misc", + .mode = 0555, + .child = binfmt_misc_table, + }, +#endif +/* + * NOTE: do not add new entries to this table unless you have read + * Documentation/sysctl/ctl_unnumbered.txt + */ { .ctl_name = 0 } }; -static ctl_table debug_table[] = { +static struct ctl_table debug_table[] = { +#if defined(CONFIG_X86) || defined(CONFIG_PPC) + { + .ctl_name = CTL_UNNUMBERED, + .procname = "exception-trace", + .data = &show_unhandled_signals, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, +#endif { .ctl_name = 0 } }; -static ctl_table dev_table[] = { +static struct ctl_table dev_table[] = { { .ctl_name = 0 } }; -extern void init_irq_proc (void); - static DEFINE_SPINLOCK(sysctl_lock); /* called under sysctl_lock */ @@ -1089,6 +1484,9 @@ static void start_unregistering(struct ctl_table_header *p) spin_unlock(&sysctl_lock); wait_for_completion(&wait); spin_lock(&sysctl_lock); + } else { + /* anything non-NULL; we'll never dereference it */ + p->unregistering = ERR_PTR(-EINVAL); } /* * do not remove from the list until nobody holds it; walking the @@ -1097,18 +1495,185 @@ static void start_unregistering(struct ctl_table_header *p) list_del_init(&p->ctl_entry); } -void __init sysctl_init(void) +void sysctl_head_get(struct ctl_table_header *head) { -#ifdef CONFIG_PROC_FS - register_proc_table(root_table, proc_sys_root, &root_table_header); - init_irq_proc(); -#endif + spin_lock(&sysctl_lock); + head->count++; + spin_unlock(&sysctl_lock); +} + +void sysctl_head_put(struct ctl_table_header *head) +{ + spin_lock(&sysctl_lock); + if (!--head->count) + kfree(head); + spin_unlock(&sysctl_lock); +} + +struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head) +{ + if (!head) + BUG(); + spin_lock(&sysctl_lock); + if (!use_table(head)) + head = ERR_PTR(-ENOENT); + spin_unlock(&sysctl_lock); + return head; +} + +void sysctl_head_finish(struct ctl_table_header *head) +{ + if (!head) + return; + spin_lock(&sysctl_lock); + unuse_table(head); + spin_unlock(&sysctl_lock); +} + +static struct ctl_table_set * +lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces) +{ + struct ctl_table_set *set = &root->default_set; + if (root->lookup) + set = root->lookup(root, namespaces); + return set; +} + +static struct list_head * +lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces) +{ + struct ctl_table_set *set = lookup_header_set(root, namespaces); + return &set->list; +} + +struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces, + struct ctl_table_header *prev) +{ + struct ctl_table_root *root; + struct list_head *header_list; + struct ctl_table_header *head; + struct list_head *tmp; + + spin_lock(&sysctl_lock); + if (prev) { + head = prev; + tmp = &prev->ctl_entry; + unuse_table(prev); + goto next; + } + tmp = &root_table_header.ctl_entry; + for (;;) { + head = list_entry(tmp, struct ctl_table_header, ctl_entry); + + if (!use_table(head)) + goto next; + spin_unlock(&sysctl_lock); + return head; + next: + root = head->root; + tmp = tmp->next; + header_list = lookup_header_list(root, namespaces); + if (tmp != header_list) + continue; + + do { + root = list_entry(root->root_list.next, + struct ctl_table_root, root_list); + if (root == &sysctl_table_root) + goto out; + header_list = lookup_header_list(root, namespaces); + } while (list_empty(header_list)); + tmp = header_list->next; + } +out: + spin_unlock(&sysctl_lock); + return NULL; +} + +struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev) +{ + return __sysctl_head_next(current->nsproxy, prev); +} + +void register_sysctl_root(struct ctl_table_root *root) +{ + spin_lock(&sysctl_lock); + list_add_tail(&root->root_list, &sysctl_table_root.root_list); + spin_unlock(&sysctl_lock); +} + +#ifdef CONFIG_SYSCTL_SYSCALL +/* Perform the actual read/write of a sysctl table entry. */ +static int do_sysctl_strategy(struct ctl_table_root *root, + struct ctl_table *table, + void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen) +{ + int op = 0, rc; + + if (oldval) + op |= MAY_READ; + if (newval) + op |= MAY_WRITE; + if (sysctl_perm(root, table, op)) + return -EPERM; + + if (table->strategy) { + rc = table->strategy(table, oldval, oldlenp, newval, newlen); + if (rc < 0) + return rc; + if (rc > 0) + return 0; + } + + /* If there is no strategy routine, or if the strategy returns + * zero, proceed with automatic r/w */ + if (table->data && table->maxlen) { + rc = sysctl_data(table, oldval, oldlenp, newval, newlen); + if (rc < 0) + return rc; + } + return 0; +} + +static int parse_table(int __user *name, int nlen, + void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen, + struct ctl_table_root *root, + struct ctl_table *table) +{ + int n; +repeat: + if (!nlen) + return -ENOTDIR; + if (get_user(n, name)) + return -EFAULT; + for ( ; table->ctl_name || table->procname; table++) { + if (!table->ctl_name) + continue; + if (n == table->ctl_name) { + int error; + if (table->child) { + if (sysctl_perm(root, table, MAY_EXEC)) + return -EPERM; + name++; + nlen--; + table = table->child; + goto repeat; + } + error = do_sysctl_strategy(root, table, + oldval, oldlenp, + newval, newlen); + return error; + } + } + return -ENOTDIR; } int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { - struct list_head *tmp; + struct ctl_table_header *head; int error = -ENOTDIR; if (nlen <= 0 || nlen >= CTL_MAXNAME) @@ -1118,33 +1683,21 @@ int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *ol if (!oldlenp || get_user(old_len, oldlenp)) return -EFAULT; } - spin_lock(&sysctl_lock); - tmp = &root_table_header.ctl_entry; - do { - struct ctl_table_header *head = - list_entry(tmp, struct ctl_table_header, ctl_entry); - void *context = NULL; - - if (!use_table(head)) - continue; - - spin_unlock(&sysctl_lock); + for (head = sysctl_head_next(NULL); head; + head = sysctl_head_next(head)) { error = parse_table(name, nlen, oldval, oldlenp, - newval, newlen, head->ctl_table, - &context); - kfree(context); - - spin_lock(&sysctl_lock); - unuse_table(head); - if (error != -ENOTDIR) + newval, newlen, + head->root, head->ctl_table); + if (error != -ENOTDIR) { + sysctl_head_finish(head); break; - } while ((tmp = tmp->next) != &root_table_header.ctl_entry); - spin_unlock(&sysctl_lock); + } + } return error; } -asmlinkage long sys_sysctl(struct __sysctl_args __user *args) +SYSCALL_DEFINE1(sysctl, struct __sysctl_args __user *, args) { struct __sysctl_args tmp; int error; @@ -1152,137 +1705,134 @@ asmlinkage long sys_sysctl(struct __sysctl_args __user *args) if (copy_from_user(&tmp, args, sizeof(tmp))) return -EFAULT; + error = deprecated_sysctl_warning(&tmp); + if (error) + goto out; + lock_kernel(); error = do_sysctl(tmp.name, tmp.nlen, tmp.oldval, tmp.oldlenp, tmp.newval, tmp.newlen); unlock_kernel(); +out: return error; } +#endif /* CONFIG_SYSCTL_SYSCALL */ /* - * ctl_perm does NOT grant the superuser all rights automatically, because + * sysctl_perm does NOT grant the superuser all rights automatically, because * some sysctl variables are readonly even to root. */ static int test_perm(int mode, int op) { - if (!current->euid) + if (!current_euid()) mode >>= 6; else if (in_egroup_p(0)) mode >>= 3; - if ((mode & op & 0007) == op) + if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0) return 0; return -EACCES; } -static inline int ctl_perm(ctl_table *table, int op) +int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op) { int error; - error = security_sysctl(table, op); + int mode; + + error = security_sysctl(table, op & (MAY_READ | MAY_WRITE | MAY_EXEC)); if (error) return error; - return test_perm(table->mode, op); + + if (root->permissions) + mode = root->permissions(root, current->nsproxy, table); + else + mode = table->mode; + + return test_perm(mode, op); } -static int parse_table(int __user *name, int nlen, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen, - ctl_table *table, void **context) +static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table) { - int n; -repeat: - if (!nlen) - return -ENOTDIR; - if (get_user(n, name)) - return -EFAULT; - for ( ; table->ctl_name; table++) { - if (n == table->ctl_name || table->ctl_name == CTL_ANY) { - int error; - if (table->child) { - if (ctl_perm(table, 001)) - return -EPERM; - if (table->strategy) { - error = table->strategy( - table, name, nlen, - oldval, oldlenp, - newval, newlen, context); - if (error) - return error; - } - name++; - nlen--; - table = table->child; - goto repeat; - } - error = do_sysctl_strategy(table, name, nlen, - oldval, oldlenp, - newval, newlen, context); - return error; - } + for (; table->ctl_name || table->procname; table++) { + table->parent = parent; + if (table->child) + sysctl_set_parent(table, table->child); } - return -ENOTDIR; } -/* Perform the actual read/write of a sysctl table entry. */ -int do_sysctl_strategy (ctl_table *table, - int __user *name, int nlen, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen, void **context) +static __init int sysctl_init(void) { - int op = 0, rc; - size_t len; + sysctl_set_parent(NULL, root_table); +#ifdef CONFIG_SYSCTL_SYSCALL_CHECK + { + int err; + err = sysctl_check_table(current->nsproxy, root_table); + } +#endif + return 0; +} - if (oldval) - op |= 004; - if (newval) - op |= 002; - if (ctl_perm(table, op)) - return -EPERM; +core_initcall(sysctl_init); - if (table->strategy) { - rc = table->strategy(table, name, nlen, oldval, oldlenp, - newval, newlen, context); - if (rc < 0) - return rc; - if (rc > 0) - return 0; +static struct ctl_table *is_branch_in(struct ctl_table *branch, + struct ctl_table *table) +{ + struct ctl_table *p; + const char *s = branch->procname; + + /* branch should have named subdirectory as its first element */ + if (!s || !branch->child) + return NULL; + + /* ... and nothing else */ + if (branch[1].procname || branch[1].ctl_name) + return NULL; + + /* table should contain subdirectory with the same name */ + for (p = table; p->procname || p->ctl_name; p++) { + if (!p->child) + continue; + if (p->procname && strcmp(p->procname, s) == 0) + return p; } + return NULL; +} - /* If there is no strategy routine, or if the strategy returns - * zero, proceed with automatic r/w */ - if (table->data && table->maxlen) { - if (oldval && oldlenp) { - if (get_user(len, oldlenp)) - return -EFAULT; - if (len) { - if (len > table->maxlen) - len = table->maxlen; - if(copy_to_user(oldval, table->data, len)) - return -EFAULT; - if(put_user(len, oldlenp)) - return -EFAULT; - } - } - if (newval && newlen) { - len = newlen; - if (len > table->maxlen) - len = table->maxlen; - if(copy_from_user(table->data, newval, len)) - return -EFAULT; - } +/* see if attaching q to p would be an improvement */ +static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q) +{ + struct ctl_table *to = p->ctl_table, *by = q->ctl_table; + struct ctl_table *next; + int is_better = 0; + int not_in_parent = !p->attached_by; + + while ((next = is_branch_in(by, to)) != NULL) { + if (by == q->attached_by) + is_better = 1; + if (to == p->attached_by) + not_in_parent = 1; + by = by->child; + to = next->child; + } + + if (is_better && not_in_parent) { + q->attached_by = by; + q->attached_to = to; + q->parent = p; } - return 0; } /** - * register_sysctl_table - register a sysctl hierarchy + * __register_sysctl_paths - register a sysctl hierarchy + * @root: List of sysctl headers to register on + * @namespaces: Data to compute which lists of sysctl entries are visible + * @path: The path to the directory the sysctl table is in. * @table: the top-level table structure - * @insert_at_head: whether the entry should be inserted in front or at the end * * Register a sysctl table hierarchy. @table should be a filled in ctl_table - * array. An entry with a ctl_name of 0 terminates the table. + * array. A completely 0 filled entry terminates the table. * - * The members of the &ctl_table structure are used as follows: + * The members of the &struct ctl_table structure are used as follows: * * ctl_name - This is the numeric sysctl value used by sysctl(2). The number * must be unique within that level of sysctl @@ -1343,234 +1893,212 @@ int do_sysctl_strategy (ctl_table *table, * This routine returns %NULL on a failure to register, and a pointer * to the table header on success. */ -struct ctl_table_header *register_sysctl_table(ctl_table * table, - int insert_at_head) +struct ctl_table_header *__register_sysctl_paths( + struct ctl_table_root *root, + struct nsproxy *namespaces, + const struct ctl_path *path, struct ctl_table *table) { - struct ctl_table_header *tmp; - tmp = kmalloc(sizeof(struct ctl_table_header), GFP_KERNEL); - if (!tmp) + struct ctl_table_header *header; + struct ctl_table *new, **prevp; + unsigned int n, npath; + struct ctl_table_set *set; + + /* Count the path components */ + for (npath = 0; path[npath].ctl_name || path[npath].procname; ++npath) + ; + + /* + * For each path component, allocate a 2-element ctl_table array. + * The first array element will be filled with the sysctl entry + * for this, the second will be the sentinel (ctl_name == 0). + * + * We allocate everything in one go so that we don't have to + * worry about freeing additional memory in unregister_sysctl_table. + */ + header = kzalloc(sizeof(struct ctl_table_header) + + (2 * npath * sizeof(struct ctl_table)), GFP_KERNEL); + if (!header) + return NULL; + + new = (struct ctl_table *) (header + 1); + + /* Now connect the dots */ + prevp = &header->ctl_table; + for (n = 0; n < npath; ++n, ++path) { + /* Copy the procname */ + new->procname = path->procname; + new->ctl_name = path->ctl_name; + new->mode = 0555; + + *prevp = new; + prevp = &new->child; + + new += 2; + } + *prevp = table; + header->ctl_table_arg = table; + + INIT_LIST_HEAD(&header->ctl_entry); + header->used = 0; + header->unregistering = NULL; + header->root = root; + sysctl_set_parent(NULL, header->ctl_table); + header->count = 1; +#ifdef CONFIG_SYSCTL_SYSCALL_CHECK + if (sysctl_check_table(namespaces, header->ctl_table)) { + kfree(header); return NULL; - tmp->ctl_table = table; - INIT_LIST_HEAD(&tmp->ctl_entry); - tmp->used = 0; - tmp->unregistering = NULL; + } +#endif spin_lock(&sysctl_lock); - if (insert_at_head) - list_add(&tmp->ctl_entry, &root_table_header.ctl_entry); - else - list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry); + header->set = lookup_header_set(root, namespaces); + header->attached_by = header->ctl_table; + header->attached_to = root_table; + header->parent = &root_table_header; + for (set = header->set; set; set = set->parent) { + struct ctl_table_header *p; + list_for_each_entry(p, &set->list, ctl_entry) { + if (p->unregistering) + continue; + try_attach(p, header); + } + } + header->parent->count++; + list_add_tail(&header->ctl_entry, &header->set->list); spin_unlock(&sysctl_lock); -#ifdef CONFIG_PROC_FS - register_proc_table(table, proc_sys_root, tmp); -#endif - return tmp; + + return header; } /** - * unregister_sysctl_table - unregister a sysctl table hierarchy - * @header: the header returned from register_sysctl_table + * register_sysctl_table_path - register a sysctl table hierarchy + * @path: The path to the directory the sysctl table is in. + * @table: the top-level table structure * - * Unregisters the sysctl table and all children. proc entries may not - * actually be removed until they are no longer used by anyone. + * Register a sysctl table hierarchy. @table should be a filled in ctl_table + * array. A completely 0 filled entry terminates the table. + * + * See __register_sysctl_paths for more details. */ -void unregister_sysctl_table(struct ctl_table_header * header) +struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, + struct ctl_table *table) { - might_sleep(); - spin_lock(&sysctl_lock); - start_unregistering(header); -#ifdef CONFIG_PROC_FS - unregister_proc_table(header->ctl_table, proc_sys_root); -#endif - spin_unlock(&sysctl_lock); - kfree(header); + return __register_sysctl_paths(&sysctl_table_root, current->nsproxy, + path, table); } -/* - * /proc/sys support +/** + * register_sysctl_table - register a sysctl table hierarchy + * @table: the top-level table structure + * + * Register a sysctl table hierarchy. @table should be a filled in ctl_table + * array. A completely 0 filled entry terminates the table. + * + * See register_sysctl_paths for more details. */ - -#ifdef CONFIG_PROC_FS - -/* Scan the sysctl entries in table and add them all into /proc */ -static void register_proc_table(ctl_table * table, struct proc_dir_entry *root, void *set) +struct ctl_table_header *register_sysctl_table(struct ctl_table *table) { - struct proc_dir_entry *de; - int len; - mode_t mode; - - for (; table->ctl_name; table++) { - /* Can't do anything without a proc name. */ - if (!table->procname) - continue; - /* Maybe we can't do anything with it... */ - if (!table->proc_handler && !table->child) { - printk(KERN_WARNING "SYSCTL: Can't register %s\n", - table->procname); - continue; - } - - len = strlen(table->procname); - mode = table->mode; - - de = NULL; - if (table->proc_handler) - mode |= S_IFREG; - else { - mode |= S_IFDIR; - for (de = root->subdir; de; de = de->next) { - if (proc_match(len, table->procname, de)) - break; - } - /* If the subdir exists already, de is non-NULL */ - } + static const struct ctl_path null_path[] = { {} }; - if (!de) { - de = create_proc_entry(table->procname, mode, root); - if (!de) - continue; - de->set = set; - de->data = (void *) table; - if (table->proc_handler) - de->proc_fops = &proc_sys_file_operations; - } - table->de = de; - if (de->mode & S_IFDIR) - register_proc_table(table->child, de, set); - } + return register_sysctl_paths(null_path, table); } -/* - * Unregister a /proc sysctl table and any subdirectories. - */ -static void unregister_proc_table(ctl_table * table, struct proc_dir_entry *root) -{ - struct proc_dir_entry *de; - for (; table->ctl_name; table++) { - if (!(de = table->de)) - continue; - if (de->mode & S_IFDIR) { - if (!table->child) { - printk (KERN_ALERT "Help - malformed sysctl tree on free\n"); - continue; - } - unregister_proc_table(table->child, de); - - /* Don't unregister directories which still have entries.. */ - if (de->subdir) - continue; - } - - /* - * In any case, mark the entry as goner; we'll keep it - * around if it's busy, but we'll know to do nothing with - * its fields. We are under sysctl_lock here. - */ - de->data = NULL; +/** + * unregister_sysctl_table - unregister a sysctl table hierarchy + * @header: the header returned from register_sysctl_table + * + * Unregisters the sysctl table and all children. proc entries may not + * actually be removed until they are no longer used by anyone. + */ +void unregister_sysctl_table(struct ctl_table_header * header) +{ + might_sleep(); - /* Don't unregister proc entries that are still being used.. */ - if (atomic_read(&de->count)) - continue; + if (header == NULL) + return; - table->de = NULL; - remove_proc_entry(table->procname, root); + spin_lock(&sysctl_lock); + start_unregistering(header); + if (!--header->parent->count) { + WARN_ON(1); + kfree(header->parent); } + if (!--header->count) + kfree(header); + spin_unlock(&sysctl_lock); } -static ssize_t do_rw_proc(int write, struct file * file, char __user * buf, - size_t count, loff_t *ppos) +int sysctl_is_seen(struct ctl_table_header *p) { - int op; - struct proc_dir_entry *de = PDE(file->f_dentry->d_inode); - struct ctl_table *table; - size_t res; - ssize_t error = -ENOTDIR; - + struct ctl_table_set *set = p->set; + int res; spin_lock(&sysctl_lock); - if (de && de->data && use_table(de->set)) { - /* - * at that point we know that sysctl was not unregistered - * and won't be until we finish - */ - spin_unlock(&sysctl_lock); - table = (struct ctl_table *) de->data; - if (!table || !table->proc_handler) - goto out; - error = -EPERM; - op = (write ? 002 : 004); - if (ctl_perm(table, op)) - goto out; - - /* careful: calling conventions are nasty here */ - res = count; - error = (*table->proc_handler)(table, write, file, - buf, &res, ppos); - if (!error) - error = res; - out: - spin_lock(&sysctl_lock); - unuse_table(de->set); - } + if (p->unregistering) + res = 0; + else if (!set->is_seen) + res = 1; + else + res = set->is_seen(set); spin_unlock(&sysctl_lock); - return error; + return res; } -static int proc_opensys(struct inode *inode, struct file *file) +void setup_sysctl_set(struct ctl_table_set *p, + struct ctl_table_set *parent, + int (*is_seen)(struct ctl_table_set *)) { - if (file->f_mode & FMODE_WRITE) { - /* - * sysctl entries that are not writable, - * are _NOT_ writable, capabilities or not. - */ - if (!(inode->i_mode & S_IWUSR)) - return -EPERM; - } + INIT_LIST_HEAD(&p->list); + p->parent = parent ? parent : &sysctl_table_root.default_set; + p->is_seen = is_seen; +} - return 0; +#else /* !CONFIG_SYSCTL */ +struct ctl_table_header *register_sysctl_table(struct ctl_table * table) +{ + return NULL; } -static ssize_t proc_readsys(struct file * file, char __user * buf, - size_t count, loff_t *ppos) +struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, + struct ctl_table *table) { - return do_rw_proc(0, file, buf, count, ppos); + return NULL; } -static ssize_t proc_writesys(struct file * file, const char __user * buf, - size_t count, loff_t *ppos) +void unregister_sysctl_table(struct ctl_table_header * table) { - return do_rw_proc(1, file, (char __user *) buf, count, ppos); } -/** - * proc_dostring - read a string sysctl - * @table: the sysctl table - * @write: %TRUE if this is a write to the sysctl file - * @filp: the file structure - * @buffer: the user buffer - * @lenp: the size of the user buffer - * @ppos: file position - * - * Reads/writes a string from/to the user buffer. If the kernel - * buffer provided is not large enough to hold the string, the - * string is truncated. The copied string is %NULL-terminated. - * If the string is being read by the user process, it is copied - * and a newline '\n' is added. It is truncated if the buffer is - * not large enough. - * - * Returns 0 on success. +void setup_sysctl_set(struct ctl_table_set *p, + struct ctl_table_set *parent, + int (*is_seen)(struct ctl_table_set *)) +{ +} + +void sysctl_head_put(struct ctl_table_header *head) +{ +} + +#endif /* CONFIG_SYSCTL */ + +/* + * /proc/sys support */ -int proc_dostring(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp, loff_t *ppos) + +#ifdef CONFIG_PROC_SYSCTL + +static int _proc_do_string(void* data, int maxlen, int write, + struct file *filp, void __user *buffer, + size_t *lenp, loff_t *ppos) { size_t len; char __user *p; char c; - - if (!table->data || !table->maxlen || !*lenp || - (*ppos && !write)) { + + if (!data || !maxlen || !*lenp) { *lenp = 0; return 0; } - + if (write) { len = 0; p = buffer; @@ -1581,20 +2109,29 @@ int proc_dostring(ctl_table *table, int write, struct file *filp, break; len++; } - if (len >= table->maxlen) - len = table->maxlen-1; - if(copy_from_user(table->data, buffer, len)) + if (len >= maxlen) + len = maxlen-1; + if(copy_from_user(data, buffer, len)) return -EFAULT; - ((char *) table->data)[len] = 0; + ((char *) data)[len] = 0; *ppos += *lenp; } else { - len = strlen(table->data); - if (len > table->maxlen) - len = table->maxlen; + len = strlen(data); + if (len > maxlen) + len = maxlen; + + if (*ppos > len) { + *lenp = 0; + return 0; + } + + data += *ppos; + len -= *ppos; + if (len > *lenp) len = *lenp; if (len) - if(copy_to_user(buffer, table->data, len)) + if(copy_to_user(buffer, data, len)) return -EFAULT; if (len < *lenp) { if(put_user('\n', ((char __user *) buffer) + len)) @@ -1607,28 +2144,32 @@ int proc_dostring(ctl_table *table, int write, struct file *filp, return 0; } -/* - * Special case of dostring for the UTS structure. This has locks - * to observe. Should this be in kernel/sys.c ???? +/** + * proc_dostring - read a string sysctl + * @table: the sysctl table + * @write: %TRUE if this is a write to the sysctl file + * @filp: the file structure + * @buffer: the user buffer + * @lenp: the size of the user buffer + * @ppos: file position + * + * Reads/writes a string from/to the user buffer. If the kernel + * buffer provided is not large enough to hold the string, the + * string is truncated. The copied string is %NULL-terminated. + * If the string is being read by the user process, it is copied + * and a newline '\n' is added. It is truncated if the buffer is + * not large enough. + * + * Returns 0 on success. */ - -static int proc_doutsstring(ctl_table *table, int write, struct file *filp, +int proc_dostring(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { - int r; - - if (!write) { - down_read(&uts_sem); - r=proc_dostring(table,0,filp,buffer,lenp, ppos); - up_read(&uts_sem); - } else { - down_write(&uts_sem); - r=proc_dostring(table,1,filp,buffer,lenp, ppos); - up_write(&uts_sem); - } - return r; + return _proc_do_string(table->data, table->maxlen, write, filp, + buffer, lenp, ppos); } + static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp, int *valp, int write, void *data) @@ -1648,8 +2189,9 @@ static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp, return 0; } -static int do_proc_dointvec(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp, loff_t *ppos, +static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, + int write, struct file *filp, void __user *buffer, + size_t *lenp, loff_t *ppos, int (*conv)(int *negp, unsigned long *lvalp, int *valp, int write, void *data), void *data) @@ -1662,13 +2204,13 @@ static int do_proc_dointvec(ctl_table *table, int write, struct file *filp, char buf[TMPBUFLEN], *p; char __user *s = buffer; - if (!table->data || !table->maxlen || !*lenp || + if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) { *lenp = 0; return 0; } - i = (int *) table->data; + i = (int *) tbl_data; vleft = table->maxlen / sizeof(*i); left = *lenp; @@ -1698,7 +2240,7 @@ static int do_proc_dointvec(ctl_table *table, int write, struct file *filp, p = buf; if (*p == '-' && left > 1) { neg = 1; - left--, p++; + p++; } if (*p < '0' || *p > '9') break; @@ -1757,6 +2299,16 @@ static int do_proc_dointvec(ctl_table *table, int write, struct file *filp, #undef TMPBUFLEN } +static int do_proc_dointvec(struct ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos, + int (*conv)(int *negp, unsigned long *lvalp, int *valp, + int write, void *data), + void *data) +{ + return __do_proc_dointvec(table->data, table, write, filp, + buffer, lenp, ppos, conv, data); +} + /** * proc_dointvec - read a vector of integers * @table: the sysctl table @@ -1771,66 +2323,46 @@ static int do_proc_dointvec(ctl_table *table, int write, struct file *filp, * * Returns 0 on success. */ -int proc_dointvec(ctl_table *table, int write, struct file *filp, +int proc_dointvec(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { return do_proc_dointvec(table,write,filp,buffer,lenp,ppos, NULL,NULL); } -#define OP_SET 0 -#define OP_AND 1 -#define OP_OR 2 -#define OP_MAX 3 -#define OP_MIN 4 - -static int do_proc_dointvec_bset_conv(int *negp, unsigned long *lvalp, - int *valp, - int write, void *data) -{ - int op = *(int *)data; - if (write) { - int val = *negp ? -*lvalp : *lvalp; - switch(op) { - case OP_SET: *valp = val; break; - case OP_AND: *valp &= val; break; - case OP_OR: *valp |= val; break; - case OP_MAX: if(*valp < val) - *valp = val; - break; - case OP_MIN: if(*valp > val) - *valp = val; - break; - } - } else { - int val = *valp; - if (val < 0) { - *negp = -1; - *lvalp = (unsigned long)-val; - } else { - *negp = 0; - *lvalp = (unsigned long)val; - } - } - return 0; -} - /* - * init may raise the set. + * Taint values can only be increased + * This means we can safely use a temporary. */ - -int proc_dointvec_bset(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp, loff_t *ppos) +static int proc_taint(struct ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos) { - int op; + struct ctl_table t; + unsigned long tmptaint = get_taint(); + int err; - if (!capable(CAP_SYS_MODULE)) { + if (write && !capable(CAP_SYS_ADMIN)) return -EPERM; + + t = *table; + t.data = &tmptaint; + err = proc_doulongvec_minmax(&t, write, filp, buffer, lenp, ppos); + if (err < 0) + return err; + + if (write) { + /* + * Poor man's atomic or. Not worth adding a primitive + * to everyone's atomic.h for this + */ + int i; + for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) { + if ((tmptaint >> i) & 1) + add_taint(i); + } } - op = (current->pid == 1) ? OP_SET : OP_AND; - return do_proc_dointvec(table,write,filp,buffer,lenp,ppos, - do_proc_dointvec_bset_conv,&op); + return err; } struct do_proc_dointvec_minmax_conv_param { @@ -1879,7 +2411,7 @@ static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp, * * Returns 0 on success. */ -int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp, +int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { struct do_proc_dointvec_minmax_conv_param param = { @@ -1890,7 +2422,7 @@ int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp, do_proc_dointvec_minmax_conv, ¶m); } -static int do_proc_doulongvec_minmax(ctl_table *table, int write, +static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos, @@ -1904,13 +2436,13 @@ static int do_proc_doulongvec_minmax(ctl_table *table, int write, char buf[TMPBUFLEN], *p; char __user *s = buffer; - if (!table->data || !table->maxlen || !*lenp || + if (!data || !table->maxlen || !*lenp || (*ppos && !write)) { *lenp = 0; return 0; } - i = (unsigned long *) table->data; + i = (unsigned long *) data; min = (unsigned long *) table->extra1; max = (unsigned long *) table->extra2; vleft = table->maxlen / sizeof(unsigned long); @@ -1939,7 +2471,7 @@ static int do_proc_doulongvec_minmax(ctl_table *table, int write, p = buf; if (*p == '-' && left > 1) { neg = 1; - left--, p++; + p++; } if (*p < '0' || *p > '9') break; @@ -1995,6 +2527,17 @@ static int do_proc_doulongvec_minmax(ctl_table *table, int write, #undef TMPBUFLEN } +static int do_proc_doulongvec_minmax(struct ctl_table *table, int write, + struct file *filp, + void __user *buffer, + size_t *lenp, loff_t *ppos, + unsigned long convmul, + unsigned long convdiv) +{ + return __do_proc_doulongvec_minmax(table->data, table, write, + filp, buffer, lenp, ppos, convmul, convdiv); +} + /** * proc_doulongvec_minmax - read a vector of long integers with min/max values * @table: the sysctl table @@ -2012,7 +2555,7 @@ static int do_proc_doulongvec_minmax(ctl_table *table, int write, * * Returns 0 on success. */ -int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp, +int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos, 1l, 1l); @@ -2036,7 +2579,7 @@ int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp, * * Returns 0 on success. */ -int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write, +int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -2129,7 +2672,7 @@ static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp, * * Returns 0 on success. */ -int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp, +int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { return do_proc_dointvec(table,write,filp,buffer,lenp,ppos, @@ -2152,7 +2695,7 @@ int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp, * * Returns 0 on success. */ -int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp, +int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { return do_proc_dointvec(table,write,filp,buffer,lenp,ppos, @@ -2176,70 +2719,80 @@ int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp, * * Returns 0 on success. */ -int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp, +int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { return do_proc_dointvec(table, write, filp, buffer, lenp, ppos, do_proc_dointvec_ms_jiffies_conv, NULL); } -#else /* CONFIG_PROC_FS */ - -int proc_dostring(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp, loff_t *ppos) +static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos) { - return -ENOSYS; -} + struct pid *new_pid; + pid_t tmp; + int r; -static int proc_doutsstring(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - return -ENOSYS; + tmp = pid_vnr(cad_pid); + + r = __do_proc_dointvec(&tmp, table, write, filp, buffer, + lenp, ppos, NULL, NULL); + if (r || !write) + return r; + + new_pid = find_get_pid(tmp); + if (!new_pid) + return -ESRCH; + + put_pid(xchg(&cad_pid, new_pid)); + return 0; } -int proc_dointvec(ctl_table *table, int write, struct file *filp, +#else /* CONFIG_PROC_FS */ + +int proc_dostring(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } -int proc_dointvec_bset(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp, loff_t *ppos) +int proc_dointvec(struct ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } -int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp, +int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } -int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp, +int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } -int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp, +int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } -int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp, +int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } -int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp, +int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } -int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write, +int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -2251,14 +2804,49 @@ int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write, #endif /* CONFIG_PROC_FS */ +#ifdef CONFIG_SYSCTL_SYSCALL /* * General sysctl support routines */ +/* The generic sysctl data routine (used if no strategy routine supplied) */ +int sysctl_data(struct ctl_table *table, + void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen) +{ + size_t len; + + /* Get out of I don't have a variable */ + if (!table->data || !table->maxlen) + return -ENOTDIR; + + if (oldval && oldlenp) { + if (get_user(len, oldlenp)) + return -EFAULT; + if (len) { + if (len > table->maxlen) + len = table->maxlen; + if (copy_to_user(oldval, table->data, len)) + return -EFAULT; + if (put_user(len, oldlenp)) + return -EFAULT; + } + } + + if (newval && newlen) { + if (newlen > table->maxlen) + newlen = table->maxlen; + + if (copy_from_user(table->data, newval, newlen)) + return -EFAULT; + } + return 1; +} + /* The generic string strategy routine: */ -int sysctl_string(ctl_table *table, int __user *name, int nlen, +int sysctl_string(struct ctl_table *table, void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen, void **context) + void __user *newval, size_t newlen) { if (!table->data || !table->maxlen) return -ENOTDIR; @@ -2302,9 +2890,9 @@ int sysctl_string(ctl_table *table, int __user *name, int nlen, * are between the minimum and maximum values given in the arrays * table->extra1 and table->extra2, respectively. */ -int sysctl_intvec(ctl_table *table, int __user *name, int nlen, +int sysctl_intvec(struct ctl_table *table, void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen, void **context) + void __user *newval, size_t newlen) { if (newval && newlen) { @@ -2338,21 +2926,27 @@ int sysctl_intvec(ctl_table *table, int __user *name, int nlen, } /* Strategy function to convert jiffies to seconds */ -int sysctl_jiffies(ctl_table *table, int __user *name, int nlen, +int sysctl_jiffies(struct ctl_table *table, void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen, void **context) + void __user *newval, size_t newlen) { - if (oldval) { + if (oldval && oldlenp) { size_t olen; - if (oldlenp) { - if (get_user(olen, oldlenp)) + + if (get_user(olen, oldlenp)) + return -EFAULT; + if (olen) { + int val; + + if (olen < sizeof(int)) + return -EINVAL; + + val = *(int *)(table->data) / HZ; + if (put_user(val, (int __user *)oldval)) + return -EFAULT; + if (put_user(sizeof(int), oldlenp)) return -EFAULT; - if (olen!=sizeof(int)) - return -EINVAL; } - if (put_user(*(int *)(table->data)/HZ, (int __user *)oldval) || - (oldlenp && put_user(sizeof(int),oldlenp))) - return -EFAULT; } if (newval && newlen) { int new; @@ -2366,21 +2960,27 @@ int sysctl_jiffies(ctl_table *table, int __user *name, int nlen, } /* Strategy function to convert jiffies to seconds */ -int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen, +int sysctl_ms_jiffies(struct ctl_table *table, void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen, void **context) + void __user *newval, size_t newlen) { - if (oldval) { + if (oldval && oldlenp) { size_t olen; - if (oldlenp) { - if (get_user(olen, oldlenp)) + + if (get_user(olen, oldlenp)) + return -EFAULT; + if (olen) { + int val; + + if (olen < sizeof(int)) + return -EINVAL; + + val = jiffies_to_msecs(*(int *)(table->data)); + if (put_user(val, (int __user *)oldval)) + return -EFAULT; + if (put_user(sizeof(int), oldlenp)) return -EFAULT; - if (olen!=sizeof(int)) - return -EINVAL; } - if (put_user(jiffies_to_msecs(*(int *)(table->data)), (int __user *)oldval) || - (oldlenp && put_user(sizeof(int),oldlenp))) - return -EFAULT; } if (newval && newlen) { int new; @@ -2393,109 +2993,95 @@ int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen, return 1; } -#else /* CONFIG_SYSCTL */ -asmlinkage long sys_sysctl(struct __sysctl_args __user *args) -{ - return -ENOSYS; -} +#else /* CONFIG_SYSCTL_SYSCALL */ -int sysctl_string(ctl_table *table, int __user *name, int nlen, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen, void **context) -{ - return -ENOSYS; -} -int sysctl_intvec(ctl_table *table, int __user *name, int nlen, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen, void **context) +SYSCALL_DEFINE1(sysctl, struct __sysctl_args __user *, args) { - return -ENOSYS; -} + struct __sysctl_args tmp; + int error; -int sysctl_jiffies(ctl_table *table, int __user *name, int nlen, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen, void **context) -{ - return -ENOSYS; -} + if (copy_from_user(&tmp, args, sizeof(tmp))) + return -EFAULT; -int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen, void **context) -{ - return -ENOSYS; -} + error = deprecated_sysctl_warning(&tmp); -int proc_dostring(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - return -ENOSYS; -} + /* If no error reading the parameters then just -ENOSYS ... */ + if (!error) + error = -ENOSYS; -int proc_dointvec(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - return -ENOSYS; + return error; } -int proc_dointvec_bset(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp, loff_t *ppos) +int sysctl_data(struct ctl_table *table, + void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen) { return -ENOSYS; } -int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp, loff_t *ppos) +int sysctl_string(struct ctl_table *table, + void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen) { return -ENOSYS; } -int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp, loff_t *ppos) +int sysctl_intvec(struct ctl_table *table, + void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen) { return -ENOSYS; } -int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp, loff_t *ppos) +int sysctl_jiffies(struct ctl_table *table, + void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen) { return -ENOSYS; } -int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp, loff_t *ppos) +int sysctl_ms_jiffies(struct ctl_table *table, + void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen) { return -ENOSYS; } -int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - return -ENOSYS; -} +#endif /* CONFIG_SYSCTL_SYSCALL */ -int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write, - struct file *filp, - void __user *buffer, - size_t *lenp, loff_t *ppos) +static int deprecated_sysctl_warning(struct __sysctl_args *args) { - return -ENOSYS; -} + static int msg_count; + int name[CTL_MAXNAME]; + int i; -struct ctl_table_header * register_sysctl_table(ctl_table * table, - int insert_at_head) -{ - return NULL; -} + /* Check args->nlen. */ + if (args->nlen < 0 || args->nlen > CTL_MAXNAME) + return -ENOTDIR; -void unregister_sysctl_table(struct ctl_table_header * table) -{ -} + /* Read in the sysctl name for better debug message logging */ + for (i = 0; i < args->nlen; i++) + if (get_user(name[i], args->name + i)) + return -EFAULT; -#endif /* CONFIG_SYSCTL */ + /* Ignore accesses to kernel.version */ + if ((args->nlen == 2) && (name[0] == CTL_KERN) && (name[1] == KERN_VERSION)) + return 0; + + if (msg_count < 5) { + msg_count++; + printk(KERN_INFO + "warning: process `%s' used the deprecated sysctl " + "system call with ", current->comm); + for (i = 0; i < args->nlen; i++) + printk("%d.", name[i]); + printk("\n"); + } + return 0; +} /* * No sense putting this after each symbol definition, twice, @@ -2510,8 +3096,10 @@ EXPORT_SYMBOL(proc_dostring); EXPORT_SYMBOL(proc_doulongvec_minmax); EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax); EXPORT_SYMBOL(register_sysctl_table); +EXPORT_SYMBOL(register_sysctl_paths); EXPORT_SYMBOL(sysctl_intvec); EXPORT_SYMBOL(sysctl_jiffies); EXPORT_SYMBOL(sysctl_ms_jiffies); EXPORT_SYMBOL(sysctl_string); +EXPORT_SYMBOL(sysctl_data); EXPORT_SYMBOL(unregister_sysctl_table);