lockd: dont return EAGAIN for a permanent error
[safe/jmp/linux-2.6] / kernel / sysctl.c
index 7cb1ac3..35a50db 100644 (file)
 #include <linux/highuid.h>
 #include <linux/writeback.h>
 #include <linux/hugetlb.h>
-#include <linux/security.h>
 #include <linux/initrd.h>
+#include <linux/key.h>
 #include <linux/times.h>
 #include <linux/limits.h>
 #include <linux/dcache.h>
 #include <linux/syscalls.h>
+#include <linux/vmstat.h>
 #include <linux/nfs_fs.h>
 #include <linux/acpi.h>
 #include <linux/reboot.h>
+#include <linux/ftrace.h>
 
 #include <asm/uaccess.h>
 #include <asm/processor.h>
@@ -67,26 +69,32 @@ extern int sysctl_overcommit_memory;
 extern int sysctl_overcommit_ratio;
 extern int sysctl_panic_on_oom;
 extern int sysctl_oom_kill_allocating_task;
+extern int sysctl_oom_dump_tasks;
 extern int max_threads;
 extern int core_uses_pid;
 extern int suid_dumpable;
 extern char core_pattern[];
 extern int pid_max;
 extern int min_free_kbytes;
-extern int printk_ratelimit_jiffies;
-extern int printk_ratelimit_burst;
 extern int pid_max_min, pid_max_max;
 extern int sysctl_drop_caches;
 extern int percpu_pagelist_fraction;
 extern int compat_log;
 extern int maps_protect;
-extern int sysctl_stat_interval;
 extern int latencytop_enabled;
+extern int sysctl_nr_open_min, sysctl_nr_open_max;
+#ifdef CONFIG_RCU_TORTURE_TEST
+extern int rcutorture_runnable;
+#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
 
 /* Constants used for minimum and  maximum */
-#ifdef CONFIG_DETECT_SOFTLOCKUP
+#if defined(CONFIG_HIGHMEM) || defined(CONFIG_DETECT_SOFTLOCKUP)
 static int one = 1;
+#endif
+
+#ifdef CONFIG_DETECT_SOFTLOCKUP
 static int sixty = 60;
+static int neg_one = -1;
 #endif
 
 #ifdef CONFIG_MMU
@@ -103,7 +111,7 @@ static int min_percpu_pagelist_fract = 8;
 
 static int ngroups_max = NGROUPS_MAX;
 
-#ifdef CONFIG_KMOD
+#ifdef CONFIG_MODULES
 extern char modprobe_path[];
 #endif
 #ifdef CONFIG_CHR_DEV_SG
@@ -129,8 +137,6 @@ extern int sysctl_userprocess_debug;
 extern int spin_retry;
 #endif
 
-extern int sysctl_hz_timer;
-
 #ifdef CONFIG_BSD_PROCESS_ACCT
 extern int acct_parm[];
 #endif
@@ -143,12 +149,6 @@ extern int no_unaligned_warning;
 extern int max_lock_depth;
 #endif
 
-#ifdef CONFIG_SYSCTL_SYSCALL
-static int parse_table(int __user *, int, void __user *, size_t __user *,
-               void __user *, size_t, struct ctl_table *);
-#endif
-
-
 #ifdef CONFIG_PROC_SYSCTL
 static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp,
                  void __user *buffer, size_t *lenp, loff_t *ppos);
@@ -269,14 +269,11 @@ static struct ctl_table kern_table[] = {
        },
        {
                .ctl_name       = CTL_UNNUMBERED,
-               .procname       = "sched_batch_wakeup_granularity_ns",
-               .data           = &sysctl_sched_batch_wakeup_granularity,
+               .procname       = "sched_shares_ratelimit",
+               .data           = &sysctl_sched_shares_ratelimit,
                .maxlen         = sizeof(unsigned int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec_minmax,
-               .strategy       = &sysctl_intvec,
-               .extra1         = &min_wakeup_granularity_ns,
-               .extra2         = &max_wakeup_granularity_ns,
+               .proc_handler   = &proc_dointvec,
        },
        {
                .ctl_name       = CTL_UNNUMBERED,
@@ -310,41 +307,23 @@ static struct ctl_table kern_table[] = {
                .mode           = 0644,
                .proc_handler   = &proc_dointvec,
        },
+#endif
        {
                .ctl_name       = CTL_UNNUMBERED,
-               .procname       = "sched_rt_period_ms",
+               .procname       = "sched_rt_period_us",
                .data           = &sysctl_sched_rt_period,
                .maxlen         = sizeof(unsigned int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec,
+               .proc_handler   = &sched_rt_handler,
        },
        {
                .ctl_name       = CTL_UNNUMBERED,
-               .procname       = "sched_rt_ratio",
-               .data           = &sysctl_sched_rt_ratio,
-               .maxlen         = sizeof(unsigned int),
+               .procname       = "sched_rt_runtime_us",
+               .data           = &sysctl_sched_rt_runtime,
+               .maxlen         = sizeof(int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec,
-       },
-#if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP)
-       {
-               .ctl_name       = CTL_UNNUMBERED,
-               .procname       = "sched_min_bal_int_shares",
-               .data           = &sysctl_sched_min_bal_int_shares,
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = &proc_dointvec,
-       },
-       {
-               .ctl_name       = CTL_UNNUMBERED,
-               .procname       = "sched_max_bal_int_shares",
-               .data           = &sysctl_sched_max_bal_int_shares,
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = &proc_dointvec,
+               .proc_handler   = &sched_rt_handler,
        },
-#endif
-#endif
        {
                .ctl_name       = CTL_UNNUMBERED,
                .procname       = "sched_compat_yield",
@@ -416,15 +395,6 @@ static struct ctl_table kern_table[] = {
                .proc_handler   = &proc_dointvec,
        },
 #endif
-#ifdef CONFIG_SECURITY_CAPABILITIES
-       {
-               .procname       = "cap-bound",
-               .data           = &cap_bset,
-               .maxlen         = sizeof(kernel_cap_t),
-               .mode           = 0600,
-               .proc_handler   = &proc_dointvec_bset,
-       },
-#endif /* def CONFIG_SECURITY_CAPABILITIES */
 #ifdef CONFIG_BLK_DEV_INITRD
        {
                .ctl_name       = KERN_REALROOTDEV,
@@ -496,15 +466,17 @@ static struct ctl_table kern_table[] = {
                .mode           = 0644,
                .proc_handler   = &proc_dointvec,
        },
+#ifdef CONFIG_FTRACE
        {
-               .ctl_name       = KERN_PRINTK,
-               .procname       = "printk",
-               .data           = &console_loglevel,
-               .maxlen         = 4*sizeof(int),
+               .ctl_name       = CTL_UNNUMBERED,
+               .procname       = "ftrace_enabled",
+               .data           = &ftrace_enabled,
+               .maxlen         = sizeof(int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec,
+               .proc_handler   = &ftrace_enable_sysctl,
        },
-#ifdef CONFIG_KMOD
+#endif
+#ifdef CONFIG_MODULES
        {
                .ctl_name       = KERN_MODPROBE,
                .procname       = "modprobe",
@@ -612,16 +584,6 @@ static struct ctl_table kern_table[] = {
                .proc_handler   = &proc_dointvec,
        },
 #endif
-#ifdef CONFIG_NO_IDLE_HZ
-       {
-               .ctl_name       = KERN_HZ_TIMER,
-               .procname       = "hz_timer",
-               .data           = &sysctl_hz_timer,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = &proc_dointvec,
-       },
-#endif
        {
                .ctl_name       = KERN_S390_USER_DEBUG_LOGGING,
                .procname       = "userprocess_debug",
@@ -650,10 +612,19 @@ static struct ctl_table kern_table[] = {
                .mode           = 0644,
                .proc_handler   = &proc_dointvec,
        },
+#if defined CONFIG_PRINTK
+       {
+               .ctl_name       = KERN_PRINTK,
+               .procname       = "printk",
+               .data           = &console_loglevel,
+               .maxlen         = 4*sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+       },
        {
                .ctl_name       = KERN_PRINTK_RATELIMIT,
                .procname       = "printk_ratelimit",
-               .data           = &printk_ratelimit_jiffies,
+               .data           = &printk_ratelimit_state.interval,
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = &proc_dointvec_jiffies,
@@ -662,11 +633,12 @@ static struct ctl_table kern_table[] = {
        {
                .ctl_name       = KERN_PRINTK_RATELIMIT_BURST,
                .procname       = "printk_ratelimit_burst",
-               .data           = &printk_ratelimit_burst,
+               .data           = &printk_ratelimit_state.burst,
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = &proc_dointvec,
        },
+#endif
        {
                .ctl_name       = KERN_NGROUPS_MAX,
                .procname       = "ngroups_max",
@@ -768,13 +740,24 @@ static struct ctl_table kern_table[] = {
 #ifdef CONFIG_DETECT_SOFTLOCKUP
        {
                .ctl_name       = CTL_UNNUMBERED,
+               .procname       = "softlockup_panic",
+               .data           = &softlockup_panic,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec_minmax,
+               .strategy       = &sysctl_intvec,
+               .extra1         = &zero,
+               .extra2         = &one,
+       },
+       {
+               .ctl_name       = CTL_UNNUMBERED,
                .procname       = "softlockup_thresh",
                .data           = &softlockup_thresh,
-               .maxlen         = sizeof(unsigned long),
+               .maxlen         = sizeof(int),
                .mode           = 0644,
-               .proc_handler   = &proc_doulongvec_minmax,
+               .proc_handler   = &proc_dointvec_minmax,
                .strategy       = &sysctl_intvec,
-               .extra1         = &one,
+               .extra1         = &neg_one,
                .extra2         = &sixty,
        },
        {
@@ -844,6 +827,24 @@ static struct ctl_table kern_table[] = {
                .proc_handler   = &proc_dostring,
                .strategy       = &sysctl_string,
        },
+#ifdef CONFIG_KEYS
+       {
+               .ctl_name       = CTL_UNNUMBERED,
+               .procname       = "keys",
+               .mode           = 0555,
+               .child          = key_sysctls,
+       },
+#endif
+#ifdef CONFIG_RCU_TORTURE_TEST
+       {
+               .ctl_name       = CTL_UNNUMBERED,
+               .procname       = "rcutorture_runnable",
+               .data           = &rcutorture_runnable,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+       },
+#endif
 /*
  * NOTE: do not add new entries to this table unless you have read
  * Documentation/sysctl/ctl_unnumbered.txt
@@ -877,6 +878,14 @@ static struct ctl_table vm_table[] = {
                .proc_handler   = &proc_dointvec,
        },
        {
+               .ctl_name       = CTL_UNNUMBERED,
+               .procname       = "oom_dump_tasks",
+               .data           = &sysctl_oom_dump_tasks,
+               .maxlen         = sizeof(sysctl_oom_dump_tasks),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+       },
+       {
                .ctl_name       = VM_OVERCOMMIT_RATIO,
                .procname       = "overcommit_ratio",
                .data           = &sysctl_overcommit_ratio,
@@ -950,7 +959,7 @@ static struct ctl_table vm_table[] = {
 #ifdef CONFIG_HUGETLB_PAGE
         {
                .procname       = "nr_hugepages",
-               .data           = &max_huge_pages,
+               .data           = NULL,
                .maxlen         = sizeof(unsigned long),
                .mode           = 0644,
                .proc_handler   = &hugetlb_sysctl_handler,
@@ -976,10 +985,12 @@ static struct ctl_table vm_table[] = {
        {
                .ctl_name       = CTL_UNNUMBERED,
                .procname       = "nr_overcommit_hugepages",
-               .data           = &nr_overcommit_huge_pages,
-               .maxlen         = sizeof(nr_overcommit_huge_pages),
+               .data           = NULL,
+               .maxlen         = sizeof(unsigned long),
                .mode           = 0644,
-               .proc_handler   = &proc_doulongvec_minmax,
+               .proc_handler   = &hugetlb_overcommit_handler,
+               .extra1         = (void *)&hugetlb_zero,
+               .extra2         = (void *)&hugetlb_infinity,
        },
 #endif
        {
@@ -1150,6 +1161,19 @@ static struct ctl_table vm_table[] = {
                .extra1         = &zero,
        },
 #endif
+#ifdef CONFIG_HIGHMEM
+       {
+               .ctl_name       = CTL_UNNUMBERED,
+               .procname       = "highmem_is_dirtyable",
+               .data           = &vm_highmem_is_dirtyable,
+               .maxlen         = sizeof(vm_highmem_is_dirtyable),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec_minmax,
+               .strategy       = &sysctl_intvec,
+               .extra1         = &zero,
+               .extra2         = &one,
+       },
+#endif
 /*
  * NOTE: do not add new entries to this table unless you have read
  * Documentation/sysctl/ctl_unnumbered.txt
@@ -1196,6 +1220,16 @@ static struct ctl_table fs_table[] = {
                .proc_handler   = &proc_dointvec,
        },
        {
+               .ctl_name       = CTL_UNNUMBERED,
+               .procname       = "nr_open",
+               .data           = &sysctl_nr_open,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec_minmax,
+               .extra1         = &sysctl_nr_open_min,
+               .extra2         = &sysctl_nr_open_max,
+       },
+       {
                .ctl_name       = FS_DENTRY,
                .procname       = "dentry-state",
                .data           = &dentry_stat,
@@ -1436,6 +1470,76 @@ void register_sysctl_root(struct ctl_table_root *root)
 }
 
 #ifdef CONFIG_SYSCTL_SYSCALL
+/* Perform the actual read/write of a sysctl table entry. */
+static int do_sysctl_strategy(struct ctl_table_root *root,
+                       struct ctl_table *table,
+                       int __user *name, int nlen,
+                       void __user *oldval, size_t __user *oldlenp,
+                       void __user *newval, size_t newlen)
+{
+       int op = 0, rc;
+
+       if (oldval)
+               op |= 004;
+       if (newval)
+               op |= 002;
+       if (sysctl_perm(root, table, op))
+               return -EPERM;
+
+       if (table->strategy) {
+               rc = table->strategy(table, name, nlen, oldval, oldlenp,
+                                    newval, newlen);
+               if (rc < 0)
+                       return rc;
+               if (rc > 0)
+                       return 0;
+       }
+
+       /* If there is no strategy routine, or if the strategy returns
+        * zero, proceed with automatic r/w */
+       if (table->data && table->maxlen) {
+               rc = sysctl_data(table, name, nlen, oldval, oldlenp,
+                                newval, newlen);
+               if (rc < 0)
+                       return rc;
+       }
+       return 0;
+}
+
+static int parse_table(int __user *name, int nlen,
+                      void __user *oldval, size_t __user *oldlenp,
+                      void __user *newval, size_t newlen,
+                      struct ctl_table_root *root,
+                      struct ctl_table *table)
+{
+       int n;
+repeat:
+       if (!nlen)
+               return -ENOTDIR;
+       if (get_user(n, name))
+               return -EFAULT;
+       for ( ; table->ctl_name || table->procname; table++) {
+               if (!table->ctl_name)
+                       continue;
+               if (n == table->ctl_name) {
+                       int error;
+                       if (table->child) {
+                               if (sysctl_perm(root, table, 001))
+                                       return -EPERM;
+                               name++;
+                               nlen--;
+                               table = table->child;
+                               goto repeat;
+                       }
+                       error = do_sysctl_strategy(root, table, name, nlen,
+                                                  oldval, oldlenp,
+                                                  newval, newlen);
+                       return error;
+               }
+       }
+       return -ENOTDIR;
+}
+
 int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp,
               void __user *newval, size_t newlen)
 {
@@ -1453,7 +1557,8 @@ int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *ol
        for (head = sysctl_head_next(NULL); head;
                        head = sysctl_head_next(head)) {
                error = parse_table(name, nlen, oldval, oldlenp, 
-                                       newval, newlen, head->ctl_table);
+                                       newval, newlen,
+                                       head->root, head->ctl_table);
                if (error != -ENOTDIR) {
                        sysctl_head_finish(head);
                        break;
@@ -1499,84 +1604,22 @@ static int test_perm(int mode, int op)
        return -EACCES;
 }
 
-int sysctl_perm(struct ctl_table *table, int op)
+int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op)
 {
        int error;
+       int mode;
+
        error = security_sysctl(table, op);
        if (error)
                return error;
-       return test_perm(table->mode, op);
-}
 
-#ifdef CONFIG_SYSCTL_SYSCALL
-static int parse_table(int __user *name, int nlen,
-                      void __user *oldval, size_t __user *oldlenp,
-                      void __user *newval, size_t newlen,
-                      struct ctl_table *table)
-{
-       int n;
-repeat:
-       if (!nlen)
-               return -ENOTDIR;
-       if (get_user(n, name))
-               return -EFAULT;
-       for ( ; table->ctl_name || table->procname; table++) {
-               if (!table->ctl_name)
-                       continue;
-               if (n == table->ctl_name) {
-                       int error;
-                       if (table->child) {
-                               if (sysctl_perm(table, 001))
-                                       return -EPERM;
-                               name++;
-                               nlen--;
-                               table = table->child;
-                               goto repeat;
-                       }
-                       error = do_sysctl_strategy(table, name, nlen,
-                                                  oldval, oldlenp,
-                                                  newval, newlen);
-                       return error;
-               }
-       }
-       return -ENOTDIR;
-}
-
-/* Perform the actual read/write of a sysctl table entry. */
-int do_sysctl_strategy (struct ctl_table *table,
-                       int __user *name, int nlen,
-                       void __user *oldval, size_t __user *oldlenp,
-                       void __user *newval, size_t newlen)
-{
-       int op = 0, rc;
-
-       if (oldval)
-               op |= 004;
-       if (newval) 
-               op |= 002;
-       if (sysctl_perm(table, op))
-               return -EPERM;
-
-       if (table->strategy) {
-               rc = table->strategy(table, name, nlen, oldval, oldlenp,
-                                    newval, newlen);
-               if (rc < 0)
-                       return rc;
-               if (rc > 0)
-                       return 0;
-       }
+       if (root->permissions)
+               mode = root->permissions(root, current->nsproxy, table);
+       else
+               mode = table->mode;
 
-       /* If there is no strategy routine, or if the strategy returns
-        * zero, proceed with automatic r/w */
-       if (table->data && table->maxlen) {
-               rc = sysctl_data(table, name, nlen, oldval, oldlenp,
-                                newval, newlen);
-               if (rc < 0)
-                       return rc;
-       }
-       return 0;
+       return test_perm(mode, op);
 }
-#endif /* CONFIG_SYSCTL_SYSCALL */
 
 static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table)
 {
@@ -1589,9 +1632,13 @@ static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table)
 
 static __init int sysctl_init(void)
 {
-       int err;
        sysctl_set_parent(NULL, root_table);
-       err = sysctl_check_table(current->nsproxy, root_table);
+#ifdef CONFIG_SYSCTL_SYSCALL_CHECK
+       {
+               int err;
+               err = sysctl_check_table(current->nsproxy, root_table);
+       }
+#endif
        return 0;
 }
 
@@ -1718,10 +1765,12 @@ struct ctl_table_header *__register_sysctl_paths(
        header->unregistering = NULL;
        header->root = root;
        sysctl_set_parent(NULL, header->ctl_table);
+#ifdef CONFIG_SYSCTL_SYSCALL_CHECK
        if (sysctl_check_table(namespaces, header->ctl_table)) {
                kfree(header);
                return NULL;
        }
+#endif
        spin_lock(&sysctl_lock);
        header_list = lookup_header_list(root, namespaces);
        list_add_tail(&header->ctl_entry, header_list);
@@ -2080,26 +2129,6 @@ static int do_proc_dointvec_bset_conv(int *negp, unsigned long *lvalp,
        return 0;
 }
 
-#ifdef CONFIG_SECURITY_CAPABILITIES
-/*
- *     init may raise the set.
- */
-
-int proc_dointvec_bset(struct ctl_table *table, int write, struct file *filp,
-                       void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-       int op;
-
-       if (write && !capable(CAP_SYS_MODULE)) {
-               return -EPERM;
-       }
-
-       op = is_global_init(current) ? OP_SET : OP_AND;
-       return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
-                               do_proc_dointvec_bset_conv,&op);
-}
-#endif /* def CONFIG_SECURITY_CAPABILITIES */
-
 /*
  *     Taint values can only be increased
  */
@@ -2484,7 +2513,7 @@ static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp
        pid_t tmp;
        int r;
 
-       tmp = pid_nr_ns(cad_pid, current->nsproxy->pid_ns);
+       tmp = pid_vnr(cad_pid);
 
        r = __do_proc_dointvec(&tmp, table, write, filp, buffer,
                               lenp, ppos, NULL, NULL);
@@ -2513,12 +2542,6 @@ int proc_dointvec(struct ctl_table *table, int write, struct file *filp,
        return -ENOSYS;
 }
 
-int proc_dointvec_bset(struct ctl_table *table, int write, struct file *filp,
-                       void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-       return -ENOSYS;
-}
-
 int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp,
                    void __user *buffer, size_t *lenp, loff_t *ppos)
 {