sched: group scheduler, fix fairness of cpu bandwidth allocation for task groups
[safe/jmp/linux-2.6] / kernel / sysctl.c
1 /*
2  * sysctl.c: General linux system control interface
3  *
4  * Begun 24 March 1995, Stephen Tweedie
5  * Added /proc support, Dec 1995
6  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9  * Dynamic registration fixes, Stephen Tweedie.
10  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
12  *  Horn.
13  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
16  *  Wendling.
17  * The list_for_each() macro wasn't appropriate for the sysctl loop.
18  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
19  */
20
21 #include <linux/module.h>
22 #include <linux/mm.h>
23 #include <linux/swap.h>
24 #include <linux/slab.h>
25 #include <linux/sysctl.h>
26 #include <linux/proc_fs.h>
27 #include <linux/security.h>
28 #include <linux/ctype.h>
29 #include <linux/utsname.h>
30 #include <linux/smp_lock.h>
31 #include <linux/fs.h>
32 #include <linux/init.h>
33 #include <linux/kernel.h>
34 #include <linux/kobject.h>
35 #include <linux/net.h>
36 #include <linux/sysrq.h>
37 #include <linux/highuid.h>
38 #include <linux/writeback.h>
39 #include <linux/hugetlb.h>
40 #include <linux/security.h>
41 #include <linux/initrd.h>
42 #include <linux/times.h>
43 #include <linux/limits.h>
44 #include <linux/dcache.h>
45 #include <linux/syscalls.h>
46 #include <linux/nfs_fs.h>
47 #include <linux/acpi.h>
48 #include <linux/reboot.h>
49
50 #include <asm/uaccess.h>
51 #include <asm/processor.h>
52
53 #ifdef CONFIG_X86
54 #include <asm/nmi.h>
55 #include <asm/stacktrace.h>
56 #endif
57
58 static int deprecated_sysctl_warning(struct __sysctl_args *args);
59
60 #if defined(CONFIG_SYSCTL)
61
62 /* External variables not in a header file. */
63 extern int C_A_D;
64 extern int print_fatal_signals;
65 extern int sysctl_overcommit_memory;
66 extern int sysctl_overcommit_ratio;
67 extern int sysctl_panic_on_oom;
68 extern int sysctl_oom_kill_allocating_task;
69 extern int max_threads;
70 extern int core_uses_pid;
71 extern int suid_dumpable;
72 extern char core_pattern[];
73 extern int pid_max;
74 extern int min_free_kbytes;
75 extern int printk_ratelimit_jiffies;
76 extern int printk_ratelimit_burst;
77 extern int pid_max_min, pid_max_max;
78 extern int sysctl_drop_caches;
79 extern int percpu_pagelist_fraction;
80 extern int compat_log;
81 extern int maps_protect;
82 extern int sysctl_stat_interval;
83 extern int audit_argv_kb;
84
85 /* Constants used for minimum and  maximum */
86 #ifdef CONFIG_DETECT_SOFTLOCKUP
87 static int one = 1;
88 static int sixty = 60;
89 #endif
90
91 #ifdef CONFIG_MMU
92 static int two = 2;
93 #endif
94
95 static int zero;
96 static int one_hundred = 100;
97
98 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
99 static int maxolduid = 65535;
100 static int minolduid;
101 static int min_percpu_pagelist_fract = 8;
102
103 static int ngroups_max = NGROUPS_MAX;
104
105 #ifdef CONFIG_KMOD
106 extern char modprobe_path[];
107 #endif
108 #ifdef CONFIG_CHR_DEV_SG
109 extern int sg_big_buff;
110 #endif
111
112 #ifdef __sparc__
113 extern char reboot_command [];
114 extern int stop_a_enabled;
115 extern int scons_pwroff;
116 #endif
117
118 #ifdef __hppa__
119 extern int pwrsw_enabled;
120 extern int unaligned_enabled;
121 #endif
122
123 #ifdef CONFIG_S390
124 #ifdef CONFIG_MATHEMU
125 extern int sysctl_ieee_emulation_warnings;
126 #endif
127 extern int sysctl_userprocess_debug;
128 extern int spin_retry;
129 #endif
130
131 extern int sysctl_hz_timer;
132
133 #ifdef CONFIG_BSD_PROCESS_ACCT
134 extern int acct_parm[];
135 #endif
136
137 #ifdef CONFIG_IA64
138 extern int no_unaligned_warning;
139 #endif
140
141 #ifdef CONFIG_RT_MUTEXES
142 extern int max_lock_depth;
143 #endif
144
145 #ifdef CONFIG_SYSCTL_SYSCALL
146 static int parse_table(int __user *, int, void __user *, size_t __user *,
147                 void __user *, size_t, struct ctl_table *);
148 #endif
149
150
151 #ifdef CONFIG_PROC_SYSCTL
152 static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp,
153                   void __user *buffer, size_t *lenp, loff_t *ppos);
154 static int proc_dointvec_taint(struct ctl_table *table, int write, struct file *filp,
155                                void __user *buffer, size_t *lenp, loff_t *ppos);
156 #endif
157
158 static struct ctl_table root_table[];
159 static struct ctl_table_header root_table_header =
160         { root_table, LIST_HEAD_INIT(root_table_header.ctl_entry) };
161
162 static struct ctl_table kern_table[];
163 static struct ctl_table vm_table[];
164 static struct ctl_table fs_table[];
165 static struct ctl_table debug_table[];
166 static struct ctl_table dev_table[];
167 extern struct ctl_table random_table[];
168 #ifdef CONFIG_INOTIFY_USER
169 extern struct ctl_table inotify_table[];
170 #endif
171
172 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
173 int sysctl_legacy_va_layout;
174 #endif
175
176 extern int prove_locking;
177 extern int lock_stat;
178
179 /* The default sysctl tables: */
180
181 static struct ctl_table root_table[] = {
182         {
183                 .ctl_name       = CTL_KERN,
184                 .procname       = "kernel",
185                 .mode           = 0555,
186                 .child          = kern_table,
187         },
188         {
189                 .ctl_name       = CTL_VM,
190                 .procname       = "vm",
191                 .mode           = 0555,
192                 .child          = vm_table,
193         },
194 #ifdef CONFIG_NET
195         {
196                 .ctl_name       = CTL_NET,
197                 .procname       = "net",
198                 .mode           = 0555,
199                 .child          = net_table,
200         },
201 #endif
202         {
203                 .ctl_name       = CTL_FS,
204                 .procname       = "fs",
205                 .mode           = 0555,
206                 .child          = fs_table,
207         },
208         {
209                 .ctl_name       = CTL_DEBUG,
210                 .procname       = "debug",
211                 .mode           = 0555,
212                 .child          = debug_table,
213         },
214         {
215                 .ctl_name       = CTL_DEV,
216                 .procname       = "dev",
217                 .mode           = 0555,
218                 .child          = dev_table,
219         },
220 /*
221  * NOTE: do not add new entries to this table unless you have read
222  * Documentation/sysctl/ctl_unnumbered.txt
223  */
224         { .ctl_name = 0 }
225 };
226
227 #ifdef CONFIG_SCHED_DEBUG
228 static int min_sched_granularity_ns = 100000;           /* 100 usecs */
229 static int max_sched_granularity_ns = NSEC_PER_SEC;     /* 1 second */
230 static int min_wakeup_granularity_ns;                   /* 0 usecs */
231 static int max_wakeup_granularity_ns = NSEC_PER_SEC;    /* 1 second */
232 #endif
233
234 static struct ctl_table kern_table[] = {
235 #ifdef CONFIG_SCHED_DEBUG
236         {
237                 .ctl_name       = CTL_UNNUMBERED,
238                 .procname       = "sched_min_granularity_ns",
239                 .data           = &sysctl_sched_min_granularity,
240                 .maxlen         = sizeof(unsigned int),
241                 .mode           = 0644,
242                 .proc_handler   = &sched_nr_latency_handler,
243                 .strategy       = &sysctl_intvec,
244                 .extra1         = &min_sched_granularity_ns,
245                 .extra2         = &max_sched_granularity_ns,
246         },
247         {
248                 .ctl_name       = CTL_UNNUMBERED,
249                 .procname       = "sched_latency_ns",
250                 .data           = &sysctl_sched_latency,
251                 .maxlen         = sizeof(unsigned int),
252                 .mode           = 0644,
253                 .proc_handler   = &sched_nr_latency_handler,
254                 .strategy       = &sysctl_intvec,
255                 .extra1         = &min_sched_granularity_ns,
256                 .extra2         = &max_sched_granularity_ns,
257         },
258         {
259                 .ctl_name       = CTL_UNNUMBERED,
260                 .procname       = "sched_wakeup_granularity_ns",
261                 .data           = &sysctl_sched_wakeup_granularity,
262                 .maxlen         = sizeof(unsigned int),
263                 .mode           = 0644,
264                 .proc_handler   = &proc_dointvec_minmax,
265                 .strategy       = &sysctl_intvec,
266                 .extra1         = &min_wakeup_granularity_ns,
267                 .extra2         = &max_wakeup_granularity_ns,
268         },
269         {
270                 .ctl_name       = CTL_UNNUMBERED,
271                 .procname       = "sched_batch_wakeup_granularity_ns",
272                 .data           = &sysctl_sched_batch_wakeup_granularity,
273                 .maxlen         = sizeof(unsigned int),
274                 .mode           = 0644,
275                 .proc_handler   = &proc_dointvec_minmax,
276                 .strategy       = &sysctl_intvec,
277                 .extra1         = &min_wakeup_granularity_ns,
278                 .extra2         = &max_wakeup_granularity_ns,
279         },
280         {
281                 .ctl_name       = CTL_UNNUMBERED,
282                 .procname       = "sched_child_runs_first",
283                 .data           = &sysctl_sched_child_runs_first,
284                 .maxlen         = sizeof(unsigned int),
285                 .mode           = 0644,
286                 .proc_handler   = &proc_dointvec,
287         },
288         {
289                 .ctl_name       = CTL_UNNUMBERED,
290                 .procname       = "sched_features",
291                 .data           = &sysctl_sched_features,
292                 .maxlen         = sizeof(unsigned int),
293                 .mode           = 0644,
294                 .proc_handler   = &proc_dointvec,
295         },
296         {
297                 .ctl_name       = CTL_UNNUMBERED,
298                 .procname       = "sched_migration_cost",
299                 .data           = &sysctl_sched_migration_cost,
300                 .maxlen         = sizeof(unsigned int),
301                 .mode           = 0644,
302                 .proc_handler   = &proc_dointvec,
303         },
304         {
305                 .ctl_name       = CTL_UNNUMBERED,
306                 .procname       = "sched_nr_migrate",
307                 .data           = &sysctl_sched_nr_migrate,
308                 .maxlen         = sizeof(unsigned int),
309                 .mode           = 644,
310                 .proc_handler   = &proc_dointvec,
311         },
312 #if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP)
313         {
314                 .ctl_name       = CTL_UNNUMBERED,
315                 .procname       = "sched_min_bal_int_shares",
316                 .data           = &sysctl_sched_min_bal_int_shares,
317                 .maxlen         = sizeof(unsigned int),
318                 .mode           = 0644,
319                 .proc_handler   = &proc_dointvec,
320         },
321         {
322                 .ctl_name       = CTL_UNNUMBERED,
323                 .procname       = "sched_max_bal_int_shares",
324                 .data           = &sysctl_sched_max_bal_int_shares,
325                 .maxlen         = sizeof(unsigned int),
326                 .mode           = 0644,
327                 .proc_handler   = &proc_dointvec,
328         },
329 #endif
330 #endif
331         {
332                 .ctl_name       = CTL_UNNUMBERED,
333                 .procname       = "sched_compat_yield",
334                 .data           = &sysctl_sched_compat_yield,
335                 .maxlen         = sizeof(unsigned int),
336                 .mode           = 0644,
337                 .proc_handler   = &proc_dointvec,
338         },
339 #ifdef CONFIG_PROVE_LOCKING
340         {
341                 .ctl_name       = CTL_UNNUMBERED,
342                 .procname       = "prove_locking",
343                 .data           = &prove_locking,
344                 .maxlen         = sizeof(int),
345                 .mode           = 0644,
346                 .proc_handler   = &proc_dointvec,
347         },
348 #endif
349 #ifdef CONFIG_LOCK_STAT
350         {
351                 .ctl_name       = CTL_UNNUMBERED,
352                 .procname       = "lock_stat",
353                 .data           = &lock_stat,
354                 .maxlen         = sizeof(int),
355                 .mode           = 0644,
356                 .proc_handler   = &proc_dointvec,
357         },
358 #endif
359         {
360                 .ctl_name       = KERN_PANIC,
361                 .procname       = "panic",
362                 .data           = &panic_timeout,
363                 .maxlen         = sizeof(int),
364                 .mode           = 0644,
365                 .proc_handler   = &proc_dointvec,
366         },
367         {
368                 .ctl_name       = KERN_CORE_USES_PID,
369                 .procname       = "core_uses_pid",
370                 .data           = &core_uses_pid,
371                 .maxlen         = sizeof(int),
372                 .mode           = 0644,
373                 .proc_handler   = &proc_dointvec,
374         },
375 #ifdef CONFIG_AUDITSYSCALL
376         {
377                 .ctl_name       = CTL_UNNUMBERED,
378                 .procname       = "audit_argv_kb",
379                 .data           = &audit_argv_kb,
380                 .maxlen         = sizeof(int),
381                 .mode           = 0644,
382                 .proc_handler   = &proc_dointvec,
383         },
384 #endif
385         {
386                 .ctl_name       = KERN_CORE_PATTERN,
387                 .procname       = "core_pattern",
388                 .data           = core_pattern,
389                 .maxlen         = CORENAME_MAX_SIZE,
390                 .mode           = 0644,
391                 .proc_handler   = &proc_dostring,
392                 .strategy       = &sysctl_string,
393         },
394 #ifdef CONFIG_PROC_SYSCTL
395         {
396                 .procname       = "tainted",
397                 .data           = &tainted,
398                 .maxlen         = sizeof(int),
399                 .mode           = 0644,
400                 .proc_handler   = &proc_dointvec_taint,
401         },
402 #endif
403 #ifdef CONFIG_SECURITY_CAPABILITIES
404         {
405                 .procname       = "cap-bound",
406                 .data           = &cap_bset,
407                 .maxlen         = sizeof(kernel_cap_t),
408                 .mode           = 0600,
409                 .proc_handler   = &proc_dointvec_bset,
410         },
411 #endif /* def CONFIG_SECURITY_CAPABILITIES */
412 #ifdef CONFIG_BLK_DEV_INITRD
413         {
414                 .ctl_name       = KERN_REALROOTDEV,
415                 .procname       = "real-root-dev",
416                 .data           = &real_root_dev,
417                 .maxlen         = sizeof(int),
418                 .mode           = 0644,
419                 .proc_handler   = &proc_dointvec,
420         },
421 #endif
422         {
423                 .ctl_name       = CTL_UNNUMBERED,
424                 .procname       = "print-fatal-signals",
425                 .data           = &print_fatal_signals,
426                 .maxlen         = sizeof(int),
427                 .mode           = 0644,
428                 .proc_handler   = &proc_dointvec,
429         },
430 #ifdef __sparc__
431         {
432                 .ctl_name       = KERN_SPARC_REBOOT,
433                 .procname       = "reboot-cmd",
434                 .data           = reboot_command,
435                 .maxlen         = 256,
436                 .mode           = 0644,
437                 .proc_handler   = &proc_dostring,
438                 .strategy       = &sysctl_string,
439         },
440         {
441                 .ctl_name       = KERN_SPARC_STOP_A,
442                 .procname       = "stop-a",
443                 .data           = &stop_a_enabled,
444                 .maxlen         = sizeof (int),
445                 .mode           = 0644,
446                 .proc_handler   = &proc_dointvec,
447         },
448         {
449                 .ctl_name       = KERN_SPARC_SCONS_PWROFF,
450                 .procname       = "scons-poweroff",
451                 .data           = &scons_pwroff,
452                 .maxlen         = sizeof (int),
453                 .mode           = 0644,
454                 .proc_handler   = &proc_dointvec,
455         },
456 #endif
457 #ifdef __hppa__
458         {
459                 .ctl_name       = KERN_HPPA_PWRSW,
460                 .procname       = "soft-power",
461                 .data           = &pwrsw_enabled,
462                 .maxlen         = sizeof (int),
463                 .mode           = 0644,
464                 .proc_handler   = &proc_dointvec,
465         },
466         {
467                 .ctl_name       = KERN_HPPA_UNALIGNED,
468                 .procname       = "unaligned-trap",
469                 .data           = &unaligned_enabled,
470                 .maxlen         = sizeof (int),
471                 .mode           = 0644,
472                 .proc_handler   = &proc_dointvec,
473         },
474 #endif
475         {
476                 .ctl_name       = KERN_CTLALTDEL,
477                 .procname       = "ctrl-alt-del",
478                 .data           = &C_A_D,
479                 .maxlen         = sizeof(int),
480                 .mode           = 0644,
481                 .proc_handler   = &proc_dointvec,
482         },
483         {
484                 .ctl_name       = KERN_PRINTK,
485                 .procname       = "printk",
486                 .data           = &console_loglevel,
487                 .maxlen         = 4*sizeof(int),
488                 .mode           = 0644,
489                 .proc_handler   = &proc_dointvec,
490         },
491 #ifdef CONFIG_KMOD
492         {
493                 .ctl_name       = KERN_MODPROBE,
494                 .procname       = "modprobe",
495                 .data           = &modprobe_path,
496                 .maxlen         = KMOD_PATH_LEN,
497                 .mode           = 0644,
498                 .proc_handler   = &proc_dostring,
499                 .strategy       = &sysctl_string,
500         },
501 #endif
502 #if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
503         {
504                 .ctl_name       = KERN_HOTPLUG,
505                 .procname       = "hotplug",
506                 .data           = &uevent_helper,
507                 .maxlen         = UEVENT_HELPER_PATH_LEN,
508                 .mode           = 0644,
509                 .proc_handler   = &proc_dostring,
510                 .strategy       = &sysctl_string,
511         },
512 #endif
513 #ifdef CONFIG_CHR_DEV_SG
514         {
515                 .ctl_name       = KERN_SG_BIG_BUFF,
516                 .procname       = "sg-big-buff",
517                 .data           = &sg_big_buff,
518                 .maxlen         = sizeof (int),
519                 .mode           = 0444,
520                 .proc_handler   = &proc_dointvec,
521         },
522 #endif
523 #ifdef CONFIG_BSD_PROCESS_ACCT
524         {
525                 .ctl_name       = KERN_ACCT,
526                 .procname       = "acct",
527                 .data           = &acct_parm,
528                 .maxlen         = 3*sizeof(int),
529                 .mode           = 0644,
530                 .proc_handler   = &proc_dointvec,
531         },
532 #endif
533 #ifdef CONFIG_MAGIC_SYSRQ
534         {
535                 .ctl_name       = KERN_SYSRQ,
536                 .procname       = "sysrq",
537                 .data           = &__sysrq_enabled,
538                 .maxlen         = sizeof (int),
539                 .mode           = 0644,
540                 .proc_handler   = &proc_dointvec,
541         },
542 #endif
543 #ifdef CONFIG_PROC_SYSCTL
544         {
545                 .procname       = "cad_pid",
546                 .data           = NULL,
547                 .maxlen         = sizeof (int),
548                 .mode           = 0600,
549                 .proc_handler   = &proc_do_cad_pid,
550         },
551 #endif
552         {
553                 .ctl_name       = KERN_MAX_THREADS,
554                 .procname       = "threads-max",
555                 .data           = &max_threads,
556                 .maxlen         = sizeof(int),
557                 .mode           = 0644,
558                 .proc_handler   = &proc_dointvec,
559         },
560         {
561                 .ctl_name       = KERN_RANDOM,
562                 .procname       = "random",
563                 .mode           = 0555,
564                 .child          = random_table,
565         },
566         {
567                 .ctl_name       = KERN_OVERFLOWUID,
568                 .procname       = "overflowuid",
569                 .data           = &overflowuid,
570                 .maxlen         = sizeof(int),
571                 .mode           = 0644,
572                 .proc_handler   = &proc_dointvec_minmax,
573                 .strategy       = &sysctl_intvec,
574                 .extra1         = &minolduid,
575                 .extra2         = &maxolduid,
576         },
577         {
578                 .ctl_name       = KERN_OVERFLOWGID,
579                 .procname       = "overflowgid",
580                 .data           = &overflowgid,
581                 .maxlen         = sizeof(int),
582                 .mode           = 0644,
583                 .proc_handler   = &proc_dointvec_minmax,
584                 .strategy       = &sysctl_intvec,
585                 .extra1         = &minolduid,
586                 .extra2         = &maxolduid,
587         },
588 #ifdef CONFIG_S390
589 #ifdef CONFIG_MATHEMU
590         {
591                 .ctl_name       = KERN_IEEE_EMULATION_WARNINGS,
592                 .procname       = "ieee_emulation_warnings",
593                 .data           = &sysctl_ieee_emulation_warnings,
594                 .maxlen         = sizeof(int),
595                 .mode           = 0644,
596                 .proc_handler   = &proc_dointvec,
597         },
598 #endif
599 #ifdef CONFIG_NO_IDLE_HZ
600         {
601                 .ctl_name       = KERN_HZ_TIMER,
602                 .procname       = "hz_timer",
603                 .data           = &sysctl_hz_timer,
604                 .maxlen         = sizeof(int),
605                 .mode           = 0644,
606                 .proc_handler   = &proc_dointvec,
607         },
608 #endif
609         {
610                 .ctl_name       = KERN_S390_USER_DEBUG_LOGGING,
611                 .procname       = "userprocess_debug",
612                 .data           = &sysctl_userprocess_debug,
613                 .maxlen         = sizeof(int),
614                 .mode           = 0644,
615                 .proc_handler   = &proc_dointvec,
616         },
617 #endif
618         {
619                 .ctl_name       = KERN_PIDMAX,
620                 .procname       = "pid_max",
621                 .data           = &pid_max,
622                 .maxlen         = sizeof (int),
623                 .mode           = 0644,
624                 .proc_handler   = &proc_dointvec_minmax,
625                 .strategy       = sysctl_intvec,
626                 .extra1         = &pid_max_min,
627                 .extra2         = &pid_max_max,
628         },
629         {
630                 .ctl_name       = KERN_PANIC_ON_OOPS,
631                 .procname       = "panic_on_oops",
632                 .data           = &panic_on_oops,
633                 .maxlen         = sizeof(int),
634                 .mode           = 0644,
635                 .proc_handler   = &proc_dointvec,
636         },
637         {
638                 .ctl_name       = KERN_PRINTK_RATELIMIT,
639                 .procname       = "printk_ratelimit",
640                 .data           = &printk_ratelimit_jiffies,
641                 .maxlen         = sizeof(int),
642                 .mode           = 0644,
643                 .proc_handler   = &proc_dointvec_jiffies,
644                 .strategy       = &sysctl_jiffies,
645         },
646         {
647                 .ctl_name       = KERN_PRINTK_RATELIMIT_BURST,
648                 .procname       = "printk_ratelimit_burst",
649                 .data           = &printk_ratelimit_burst,
650                 .maxlen         = sizeof(int),
651                 .mode           = 0644,
652                 .proc_handler   = &proc_dointvec,
653         },
654         {
655                 .ctl_name       = KERN_NGROUPS_MAX,
656                 .procname       = "ngroups_max",
657                 .data           = &ngroups_max,
658                 .maxlen         = sizeof (int),
659                 .mode           = 0444,
660                 .proc_handler   = &proc_dointvec,
661         },
662 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
663         {
664                 .ctl_name       = KERN_UNKNOWN_NMI_PANIC,
665                 .procname       = "unknown_nmi_panic",
666                 .data           = &unknown_nmi_panic,
667                 .maxlen         = sizeof (int),
668                 .mode           = 0644,
669                 .proc_handler   = &proc_dointvec,
670         },
671         {
672                 .procname       = "nmi_watchdog",
673                 .data           = &nmi_watchdog_enabled,
674                 .maxlen         = sizeof (int),
675                 .mode           = 0644,
676                 .proc_handler   = &proc_nmi_enabled,
677         },
678 #endif
679 #if defined(CONFIG_X86)
680         {
681                 .ctl_name       = KERN_PANIC_ON_NMI,
682                 .procname       = "panic_on_unrecovered_nmi",
683                 .data           = &panic_on_unrecovered_nmi,
684                 .maxlen         = sizeof(int),
685                 .mode           = 0644,
686                 .proc_handler   = &proc_dointvec,
687         },
688         {
689                 .ctl_name       = KERN_BOOTLOADER_TYPE,
690                 .procname       = "bootloader_type",
691                 .data           = &bootloader_type,
692                 .maxlen         = sizeof (int),
693                 .mode           = 0444,
694                 .proc_handler   = &proc_dointvec,
695         },
696         {
697                 .ctl_name       = CTL_UNNUMBERED,
698                 .procname       = "kstack_depth_to_print",
699                 .data           = &kstack_depth_to_print,
700                 .maxlen         = sizeof(int),
701                 .mode           = 0644,
702                 .proc_handler   = &proc_dointvec,
703         },
704 #endif
705 #if defined(CONFIG_MMU)
706         {
707                 .ctl_name       = KERN_RANDOMIZE,
708                 .procname       = "randomize_va_space",
709                 .data           = &randomize_va_space,
710                 .maxlen         = sizeof(int),
711                 .mode           = 0644,
712                 .proc_handler   = &proc_dointvec,
713         },
714 #endif
715 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
716         {
717                 .ctl_name       = KERN_SPIN_RETRY,
718                 .procname       = "spin_retry",
719                 .data           = &spin_retry,
720                 .maxlen         = sizeof (int),
721                 .mode           = 0644,
722                 .proc_handler   = &proc_dointvec,
723         },
724 #endif
725 #if     defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
726         {
727                 .procname       = "acpi_video_flags",
728                 .data           = &acpi_realmode_flags,
729                 .maxlen         = sizeof (unsigned long),
730                 .mode           = 0644,
731                 .proc_handler   = &proc_doulongvec_minmax,
732         },
733 #endif
734 #ifdef CONFIG_IA64
735         {
736                 .ctl_name       = KERN_IA64_UNALIGNED,
737                 .procname       = "ignore-unaligned-usertrap",
738                 .data           = &no_unaligned_warning,
739                 .maxlen         = sizeof (int),
740                 .mode           = 0644,
741                 .proc_handler   = &proc_dointvec,
742         },
743 #endif
744 #ifdef CONFIG_DETECT_SOFTLOCKUP
745         {
746                 .ctl_name       = CTL_UNNUMBERED,
747                 .procname       = "softlockup_thresh",
748                 .data           = &softlockup_thresh,
749                 .maxlen         = sizeof(int),
750                 .mode           = 0644,
751                 .proc_handler   = &proc_dointvec_minmax,
752                 .strategy       = &sysctl_intvec,
753                 .extra1         = &one,
754                 .extra2         = &sixty,
755         },
756 #endif
757 #ifdef CONFIG_COMPAT
758         {
759                 .ctl_name       = KERN_COMPAT_LOG,
760                 .procname       = "compat-log",
761                 .data           = &compat_log,
762                 .maxlen         = sizeof (int),
763                 .mode           = 0644,
764                 .proc_handler   = &proc_dointvec,
765         },
766 #endif
767 #ifdef CONFIG_RT_MUTEXES
768         {
769                 .ctl_name       = KERN_MAX_LOCK_DEPTH,
770                 .procname       = "max_lock_depth",
771                 .data           = &max_lock_depth,
772                 .maxlen         = sizeof(int),
773                 .mode           = 0644,
774                 .proc_handler   = &proc_dointvec,
775         },
776 #endif
777 #ifdef CONFIG_PROC_FS
778         {
779                 .ctl_name       = CTL_UNNUMBERED,
780                 .procname       = "maps_protect",
781                 .data           = &maps_protect,
782                 .maxlen         = sizeof(int),
783                 .mode           = 0644,
784                 .proc_handler   = &proc_dointvec,
785         },
786 #endif
787         {
788                 .ctl_name       = CTL_UNNUMBERED,
789                 .procname       = "poweroff_cmd",
790                 .data           = &poweroff_cmd,
791                 .maxlen         = POWEROFF_CMD_PATH_LEN,
792                 .mode           = 0644,
793                 .proc_handler   = &proc_dostring,
794                 .strategy       = &sysctl_string,
795         },
796 /*
797  * NOTE: do not add new entries to this table unless you have read
798  * Documentation/sysctl/ctl_unnumbered.txt
799  */
800         { .ctl_name = 0 }
801 };
802
803 static struct ctl_table vm_table[] = {
804         {
805                 .ctl_name       = VM_OVERCOMMIT_MEMORY,
806                 .procname       = "overcommit_memory",
807                 .data           = &sysctl_overcommit_memory,
808                 .maxlen         = sizeof(sysctl_overcommit_memory),
809                 .mode           = 0644,
810                 .proc_handler   = &proc_dointvec,
811         },
812         {
813                 .ctl_name       = VM_PANIC_ON_OOM,
814                 .procname       = "panic_on_oom",
815                 .data           = &sysctl_panic_on_oom,
816                 .maxlen         = sizeof(sysctl_panic_on_oom),
817                 .mode           = 0644,
818                 .proc_handler   = &proc_dointvec,
819         },
820         {
821                 .ctl_name       = CTL_UNNUMBERED,
822                 .procname       = "oom_kill_allocating_task",
823                 .data           = &sysctl_oom_kill_allocating_task,
824                 .maxlen         = sizeof(sysctl_oom_kill_allocating_task),
825                 .mode           = 0644,
826                 .proc_handler   = &proc_dointvec,
827         },
828         {
829                 .ctl_name       = VM_OVERCOMMIT_RATIO,
830                 .procname       = "overcommit_ratio",
831                 .data           = &sysctl_overcommit_ratio,
832                 .maxlen         = sizeof(sysctl_overcommit_ratio),
833                 .mode           = 0644,
834                 .proc_handler   = &proc_dointvec,
835         },
836         {
837                 .ctl_name       = VM_PAGE_CLUSTER,
838                 .procname       = "page-cluster", 
839                 .data           = &page_cluster,
840                 .maxlen         = sizeof(int),
841                 .mode           = 0644,
842                 .proc_handler   = &proc_dointvec,
843         },
844         {
845                 .ctl_name       = VM_DIRTY_BACKGROUND,
846                 .procname       = "dirty_background_ratio",
847                 .data           = &dirty_background_ratio,
848                 .maxlen         = sizeof(dirty_background_ratio),
849                 .mode           = 0644,
850                 .proc_handler   = &proc_dointvec_minmax,
851                 .strategy       = &sysctl_intvec,
852                 .extra1         = &zero,
853                 .extra2         = &one_hundred,
854         },
855         {
856                 .ctl_name       = VM_DIRTY_RATIO,
857                 .procname       = "dirty_ratio",
858                 .data           = &vm_dirty_ratio,
859                 .maxlen         = sizeof(vm_dirty_ratio),
860                 .mode           = 0644,
861                 .proc_handler   = &dirty_ratio_handler,
862                 .strategy       = &sysctl_intvec,
863                 .extra1         = &zero,
864                 .extra2         = &one_hundred,
865         },
866         {
867                 .procname       = "dirty_writeback_centisecs",
868                 .data           = &dirty_writeback_interval,
869                 .maxlen         = sizeof(dirty_writeback_interval),
870                 .mode           = 0644,
871                 .proc_handler   = &dirty_writeback_centisecs_handler,
872         },
873         {
874                 .procname       = "dirty_expire_centisecs",
875                 .data           = &dirty_expire_interval,
876                 .maxlen         = sizeof(dirty_expire_interval),
877                 .mode           = 0644,
878                 .proc_handler   = &proc_dointvec_userhz_jiffies,
879         },
880         {
881                 .ctl_name       = VM_NR_PDFLUSH_THREADS,
882                 .procname       = "nr_pdflush_threads",
883                 .data           = &nr_pdflush_threads,
884                 .maxlen         = sizeof nr_pdflush_threads,
885                 .mode           = 0444 /* read-only*/,
886                 .proc_handler   = &proc_dointvec,
887         },
888         {
889                 .ctl_name       = VM_SWAPPINESS,
890                 .procname       = "swappiness",
891                 .data           = &vm_swappiness,
892                 .maxlen         = sizeof(vm_swappiness),
893                 .mode           = 0644,
894                 .proc_handler   = &proc_dointvec_minmax,
895                 .strategy       = &sysctl_intvec,
896                 .extra1         = &zero,
897                 .extra2         = &one_hundred,
898         },
899 #ifdef CONFIG_HUGETLB_PAGE
900          {
901                 .procname       = "nr_hugepages",
902                 .data           = &max_huge_pages,
903                 .maxlen         = sizeof(unsigned long),
904                 .mode           = 0644,
905                 .proc_handler   = &hugetlb_sysctl_handler,
906                 .extra1         = (void *)&hugetlb_zero,
907                 .extra2         = (void *)&hugetlb_infinity,
908          },
909          {
910                 .ctl_name       = VM_HUGETLB_GROUP,
911                 .procname       = "hugetlb_shm_group",
912                 .data           = &sysctl_hugetlb_shm_group,
913                 .maxlen         = sizeof(gid_t),
914                 .mode           = 0644,
915                 .proc_handler   = &proc_dointvec,
916          },
917          {
918                 .ctl_name       = CTL_UNNUMBERED,
919                 .procname       = "hugepages_treat_as_movable",
920                 .data           = &hugepages_treat_as_movable,
921                 .maxlen         = sizeof(int),
922                 .mode           = 0644,
923                 .proc_handler   = &hugetlb_treat_movable_handler,
924         },
925         {
926                 .ctl_name       = CTL_UNNUMBERED,
927                 .procname       = "nr_overcommit_hugepages",
928                 .data           = &nr_overcommit_huge_pages,
929                 .maxlen         = sizeof(nr_overcommit_huge_pages),
930                 .mode           = 0644,
931                 .proc_handler   = &proc_doulongvec_minmax,
932         },
933 #endif
934         {
935                 .ctl_name       = VM_LOWMEM_RESERVE_RATIO,
936                 .procname       = "lowmem_reserve_ratio",
937                 .data           = &sysctl_lowmem_reserve_ratio,
938                 .maxlen         = sizeof(sysctl_lowmem_reserve_ratio),
939                 .mode           = 0644,
940                 .proc_handler   = &lowmem_reserve_ratio_sysctl_handler,
941                 .strategy       = &sysctl_intvec,
942         },
943         {
944                 .ctl_name       = VM_DROP_PAGECACHE,
945                 .procname       = "drop_caches",
946                 .data           = &sysctl_drop_caches,
947                 .maxlen         = sizeof(int),
948                 .mode           = 0644,
949                 .proc_handler   = drop_caches_sysctl_handler,
950                 .strategy       = &sysctl_intvec,
951         },
952         {
953                 .ctl_name       = VM_MIN_FREE_KBYTES,
954                 .procname       = "min_free_kbytes",
955                 .data           = &min_free_kbytes,
956                 .maxlen         = sizeof(min_free_kbytes),
957                 .mode           = 0644,
958                 .proc_handler   = &min_free_kbytes_sysctl_handler,
959                 .strategy       = &sysctl_intvec,
960                 .extra1         = &zero,
961         },
962         {
963                 .ctl_name       = VM_PERCPU_PAGELIST_FRACTION,
964                 .procname       = "percpu_pagelist_fraction",
965                 .data           = &percpu_pagelist_fraction,
966                 .maxlen         = sizeof(percpu_pagelist_fraction),
967                 .mode           = 0644,
968                 .proc_handler   = &percpu_pagelist_fraction_sysctl_handler,
969                 .strategy       = &sysctl_intvec,
970                 .extra1         = &min_percpu_pagelist_fract,
971         },
972 #ifdef CONFIG_MMU
973         {
974                 .ctl_name       = VM_MAX_MAP_COUNT,
975                 .procname       = "max_map_count",
976                 .data           = &sysctl_max_map_count,
977                 .maxlen         = sizeof(sysctl_max_map_count),
978                 .mode           = 0644,
979                 .proc_handler   = &proc_dointvec
980         },
981 #endif
982         {
983                 .ctl_name       = VM_LAPTOP_MODE,
984                 .procname       = "laptop_mode",
985                 .data           = &laptop_mode,
986                 .maxlen         = sizeof(laptop_mode),
987                 .mode           = 0644,
988                 .proc_handler   = &proc_dointvec_jiffies,
989                 .strategy       = &sysctl_jiffies,
990         },
991         {
992                 .ctl_name       = VM_BLOCK_DUMP,
993                 .procname       = "block_dump",
994                 .data           = &block_dump,
995                 .maxlen         = sizeof(block_dump),
996                 .mode           = 0644,
997                 .proc_handler   = &proc_dointvec,
998                 .strategy       = &sysctl_intvec,
999                 .extra1         = &zero,
1000         },
1001         {
1002                 .ctl_name       = VM_VFS_CACHE_PRESSURE,
1003                 .procname       = "vfs_cache_pressure",
1004                 .data           = &sysctl_vfs_cache_pressure,
1005                 .maxlen         = sizeof(sysctl_vfs_cache_pressure),
1006                 .mode           = 0644,
1007                 .proc_handler   = &proc_dointvec,
1008                 .strategy       = &sysctl_intvec,
1009                 .extra1         = &zero,
1010         },
1011 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
1012         {
1013                 .ctl_name       = VM_LEGACY_VA_LAYOUT,
1014                 .procname       = "legacy_va_layout",
1015                 .data           = &sysctl_legacy_va_layout,
1016                 .maxlen         = sizeof(sysctl_legacy_va_layout),
1017                 .mode           = 0644,
1018                 .proc_handler   = &proc_dointvec,
1019                 .strategy       = &sysctl_intvec,
1020                 .extra1         = &zero,
1021         },
1022 #endif
1023 #ifdef CONFIG_NUMA
1024         {
1025                 .ctl_name       = VM_ZONE_RECLAIM_MODE,
1026                 .procname       = "zone_reclaim_mode",
1027                 .data           = &zone_reclaim_mode,
1028                 .maxlen         = sizeof(zone_reclaim_mode),
1029                 .mode           = 0644,
1030                 .proc_handler   = &proc_dointvec,
1031                 .strategy       = &sysctl_intvec,
1032                 .extra1         = &zero,
1033         },
1034         {
1035                 .ctl_name       = VM_MIN_UNMAPPED,
1036                 .procname       = "min_unmapped_ratio",
1037                 .data           = &sysctl_min_unmapped_ratio,
1038                 .maxlen         = sizeof(sysctl_min_unmapped_ratio),
1039                 .mode           = 0644,
1040                 .proc_handler   = &sysctl_min_unmapped_ratio_sysctl_handler,
1041                 .strategy       = &sysctl_intvec,
1042                 .extra1         = &zero,
1043                 .extra2         = &one_hundred,
1044         },
1045         {
1046                 .ctl_name       = VM_MIN_SLAB,
1047                 .procname       = "min_slab_ratio",
1048                 .data           = &sysctl_min_slab_ratio,
1049                 .maxlen         = sizeof(sysctl_min_slab_ratio),
1050                 .mode           = 0644,
1051                 .proc_handler   = &sysctl_min_slab_ratio_sysctl_handler,
1052                 .strategy       = &sysctl_intvec,
1053                 .extra1         = &zero,
1054                 .extra2         = &one_hundred,
1055         },
1056 #endif
1057 #ifdef CONFIG_SMP
1058         {
1059                 .ctl_name       = CTL_UNNUMBERED,
1060                 .procname       = "stat_interval",
1061                 .data           = &sysctl_stat_interval,
1062                 .maxlen         = sizeof(sysctl_stat_interval),
1063                 .mode           = 0644,
1064                 .proc_handler   = &proc_dointvec_jiffies,
1065                 .strategy       = &sysctl_jiffies,
1066         },
1067 #endif
1068 #ifdef CONFIG_SECURITY
1069         {
1070                 .ctl_name       = CTL_UNNUMBERED,
1071                 .procname       = "mmap_min_addr",
1072                 .data           = &mmap_min_addr,
1073                 .maxlen         = sizeof(unsigned long),
1074                 .mode           = 0644,
1075                 .proc_handler   = &proc_doulongvec_minmax,
1076         },
1077 #endif
1078 #ifdef CONFIG_NUMA
1079         {
1080                 .ctl_name       = CTL_UNNUMBERED,
1081                 .procname       = "numa_zonelist_order",
1082                 .data           = &numa_zonelist_order,
1083                 .maxlen         = NUMA_ZONELIST_ORDER_LEN,
1084                 .mode           = 0644,
1085                 .proc_handler   = &numa_zonelist_order_handler,
1086                 .strategy       = &sysctl_string,
1087         },
1088 #endif
1089 #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
1090    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1091         {
1092                 .ctl_name       = VM_VDSO_ENABLED,
1093                 .procname       = "vdso_enabled",
1094                 .data           = &vdso_enabled,
1095                 .maxlen         = sizeof(vdso_enabled),
1096                 .mode           = 0644,
1097                 .proc_handler   = &proc_dointvec,
1098                 .strategy       = &sysctl_intvec,
1099                 .extra1         = &zero,
1100         },
1101 #endif
1102 /*
1103  * NOTE: do not add new entries to this table unless you have read
1104  * Documentation/sysctl/ctl_unnumbered.txt
1105  */
1106         { .ctl_name = 0 }
1107 };
1108
1109 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1110 static struct ctl_table binfmt_misc_table[] = {
1111         { .ctl_name = 0 }
1112 };
1113 #endif
1114
1115 static struct ctl_table fs_table[] = {
1116         {
1117                 .ctl_name       = FS_NRINODE,
1118                 .procname       = "inode-nr",
1119                 .data           = &inodes_stat,
1120                 .maxlen         = 2*sizeof(int),
1121                 .mode           = 0444,
1122                 .proc_handler   = &proc_dointvec,
1123         },
1124         {
1125                 .ctl_name       = FS_STATINODE,
1126                 .procname       = "inode-state",
1127                 .data           = &inodes_stat,
1128                 .maxlen         = 7*sizeof(int),
1129                 .mode           = 0444,
1130                 .proc_handler   = &proc_dointvec,
1131         },
1132         {
1133                 .procname       = "file-nr",
1134                 .data           = &files_stat,
1135                 .maxlen         = 3*sizeof(int),
1136                 .mode           = 0444,
1137                 .proc_handler   = &proc_nr_files,
1138         },
1139         {
1140                 .ctl_name       = FS_MAXFILE,
1141                 .procname       = "file-max",
1142                 .data           = &files_stat.max_files,
1143                 .maxlen         = sizeof(int),
1144                 .mode           = 0644,
1145                 .proc_handler   = &proc_dointvec,
1146         },
1147         {
1148                 .ctl_name       = FS_DENTRY,
1149                 .procname       = "dentry-state",
1150                 .data           = &dentry_stat,
1151                 .maxlen         = 6*sizeof(int),
1152                 .mode           = 0444,
1153                 .proc_handler   = &proc_dointvec,
1154         },
1155         {
1156                 .ctl_name       = FS_OVERFLOWUID,
1157                 .procname       = "overflowuid",
1158                 .data           = &fs_overflowuid,
1159                 .maxlen         = sizeof(int),
1160                 .mode           = 0644,
1161                 .proc_handler   = &proc_dointvec_minmax,
1162                 .strategy       = &sysctl_intvec,
1163                 .extra1         = &minolduid,
1164                 .extra2         = &maxolduid,
1165         },
1166         {
1167                 .ctl_name       = FS_OVERFLOWGID,
1168                 .procname       = "overflowgid",
1169                 .data           = &fs_overflowgid,
1170                 .maxlen         = sizeof(int),
1171                 .mode           = 0644,
1172                 .proc_handler   = &proc_dointvec_minmax,
1173                 .strategy       = &sysctl_intvec,
1174                 .extra1         = &minolduid,
1175                 .extra2         = &maxolduid,
1176         },
1177         {
1178                 .ctl_name       = FS_LEASES,
1179                 .procname       = "leases-enable",
1180                 .data           = &leases_enable,
1181                 .maxlen         = sizeof(int),
1182                 .mode           = 0644,
1183                 .proc_handler   = &proc_dointvec,
1184         },
1185 #ifdef CONFIG_DNOTIFY
1186         {
1187                 .ctl_name       = FS_DIR_NOTIFY,
1188                 .procname       = "dir-notify-enable",
1189                 .data           = &dir_notify_enable,
1190                 .maxlen         = sizeof(int),
1191                 .mode           = 0644,
1192                 .proc_handler   = &proc_dointvec,
1193         },
1194 #endif
1195 #ifdef CONFIG_MMU
1196         {
1197                 .ctl_name       = FS_LEASE_TIME,
1198                 .procname       = "lease-break-time",
1199                 .data           = &lease_break_time,
1200                 .maxlen         = sizeof(int),
1201                 .mode           = 0644,
1202                 .proc_handler   = &proc_dointvec_minmax,
1203                 .strategy       = &sysctl_intvec,
1204                 .extra1         = &zero,
1205                 .extra2         = &two,
1206         },
1207         {
1208                 .procname       = "aio-nr",
1209                 .data           = &aio_nr,
1210                 .maxlen         = sizeof(aio_nr),
1211                 .mode           = 0444,
1212                 .proc_handler   = &proc_doulongvec_minmax,
1213         },
1214         {
1215                 .procname       = "aio-max-nr",
1216                 .data           = &aio_max_nr,
1217                 .maxlen         = sizeof(aio_max_nr),
1218                 .mode           = 0644,
1219                 .proc_handler   = &proc_doulongvec_minmax,
1220         },
1221 #ifdef CONFIG_INOTIFY_USER
1222         {
1223                 .ctl_name       = FS_INOTIFY,
1224                 .procname       = "inotify",
1225                 .mode           = 0555,
1226                 .child          = inotify_table,
1227         },
1228 #endif  
1229 #endif
1230         {
1231                 .ctl_name       = KERN_SETUID_DUMPABLE,
1232                 .procname       = "suid_dumpable",
1233                 .data           = &suid_dumpable,
1234                 .maxlen         = sizeof(int),
1235                 .mode           = 0644,
1236                 .proc_handler   = &proc_dointvec,
1237         },
1238 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1239         {
1240                 .ctl_name       = CTL_UNNUMBERED,
1241                 .procname       = "binfmt_misc",
1242                 .mode           = 0555,
1243                 .child          = binfmt_misc_table,
1244         },
1245 #endif
1246 /*
1247  * NOTE: do not add new entries to this table unless you have read
1248  * Documentation/sysctl/ctl_unnumbered.txt
1249  */
1250         { .ctl_name = 0 }
1251 };
1252
1253 static struct ctl_table debug_table[] = {
1254 #if defined(CONFIG_X86) || defined(CONFIG_PPC)
1255         {
1256                 .ctl_name       = CTL_UNNUMBERED,
1257                 .procname       = "exception-trace",
1258                 .data           = &show_unhandled_signals,
1259                 .maxlen         = sizeof(int),
1260                 .mode           = 0644,
1261                 .proc_handler   = proc_dointvec
1262         },
1263 #endif
1264         { .ctl_name = 0 }
1265 };
1266
1267 static struct ctl_table dev_table[] = {
1268         { .ctl_name = 0 }
1269 };
1270
1271 static DEFINE_SPINLOCK(sysctl_lock);
1272
1273 /* called under sysctl_lock */
1274 static int use_table(struct ctl_table_header *p)
1275 {
1276         if (unlikely(p->unregistering))
1277                 return 0;
1278         p->used++;
1279         return 1;
1280 }
1281
1282 /* called under sysctl_lock */
1283 static void unuse_table(struct ctl_table_header *p)
1284 {
1285         if (!--p->used)
1286                 if (unlikely(p->unregistering))
1287                         complete(p->unregistering);
1288 }
1289
1290 /* called under sysctl_lock, will reacquire if has to wait */
1291 static void start_unregistering(struct ctl_table_header *p)
1292 {
1293         /*
1294          * if p->used is 0, nobody will ever touch that entry again;
1295          * we'll eliminate all paths to it before dropping sysctl_lock
1296          */
1297         if (unlikely(p->used)) {
1298                 struct completion wait;
1299                 init_completion(&wait);
1300                 p->unregistering = &wait;
1301                 spin_unlock(&sysctl_lock);
1302                 wait_for_completion(&wait);
1303                 spin_lock(&sysctl_lock);
1304         }
1305         /*
1306          * do not remove from the list until nobody holds it; walking the
1307          * list in do_sysctl() relies on that.
1308          */
1309         list_del_init(&p->ctl_entry);
1310 }
1311
1312 void sysctl_head_finish(struct ctl_table_header *head)
1313 {
1314         if (!head)
1315                 return;
1316         spin_lock(&sysctl_lock);
1317         unuse_table(head);
1318         spin_unlock(&sysctl_lock);
1319 }
1320
1321 struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev)
1322 {
1323         struct ctl_table_header *head;
1324         struct list_head *tmp;
1325         spin_lock(&sysctl_lock);
1326         if (prev) {
1327                 tmp = &prev->ctl_entry;
1328                 unuse_table(prev);
1329                 goto next;
1330         }
1331         tmp = &root_table_header.ctl_entry;
1332         for (;;) {
1333                 head = list_entry(tmp, struct ctl_table_header, ctl_entry);
1334
1335                 if (!use_table(head))
1336                         goto next;
1337                 spin_unlock(&sysctl_lock);
1338                 return head;
1339         next:
1340                 tmp = tmp->next;
1341                 if (tmp == &root_table_header.ctl_entry)
1342                         break;
1343         }
1344         spin_unlock(&sysctl_lock);
1345         return NULL;
1346 }
1347
1348 #ifdef CONFIG_SYSCTL_SYSCALL
1349 int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp,
1350                void __user *newval, size_t newlen)
1351 {
1352         struct ctl_table_header *head;
1353         int error = -ENOTDIR;
1354
1355         if (nlen <= 0 || nlen >= CTL_MAXNAME)
1356                 return -ENOTDIR;
1357         if (oldval) {
1358                 int old_len;
1359                 if (!oldlenp || get_user(old_len, oldlenp))
1360                         return -EFAULT;
1361         }
1362
1363         for (head = sysctl_head_next(NULL); head;
1364                         head = sysctl_head_next(head)) {
1365                 error = parse_table(name, nlen, oldval, oldlenp, 
1366                                         newval, newlen, head->ctl_table);
1367                 if (error != -ENOTDIR) {
1368                         sysctl_head_finish(head);
1369                         break;
1370                 }
1371         }
1372         return error;
1373 }
1374
1375 asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
1376 {
1377         struct __sysctl_args tmp;
1378         int error;
1379
1380         if (copy_from_user(&tmp, args, sizeof(tmp)))
1381                 return -EFAULT;
1382
1383         error = deprecated_sysctl_warning(&tmp);
1384         if (error)
1385                 goto out;
1386
1387         lock_kernel();
1388         error = do_sysctl(tmp.name, tmp.nlen, tmp.oldval, tmp.oldlenp,
1389                           tmp.newval, tmp.newlen);
1390         unlock_kernel();
1391 out:
1392         return error;
1393 }
1394 #endif /* CONFIG_SYSCTL_SYSCALL */
1395
1396 /*
1397  * sysctl_perm does NOT grant the superuser all rights automatically, because
1398  * some sysctl variables are readonly even to root.
1399  */
1400
1401 static int test_perm(int mode, int op)
1402 {
1403         if (!current->euid)
1404                 mode >>= 6;
1405         else if (in_egroup_p(0))
1406                 mode >>= 3;
1407         if ((mode & op & 0007) == op)
1408                 return 0;
1409         return -EACCES;
1410 }
1411
1412 int sysctl_perm(struct ctl_table *table, int op)
1413 {
1414         int error;
1415         error = security_sysctl(table, op);
1416         if (error)
1417                 return error;
1418         return test_perm(table->mode, op);
1419 }
1420
1421 #ifdef CONFIG_SYSCTL_SYSCALL
1422 static int parse_table(int __user *name, int nlen,
1423                        void __user *oldval, size_t __user *oldlenp,
1424                        void __user *newval, size_t newlen,
1425                        struct ctl_table *table)
1426 {
1427         int n;
1428 repeat:
1429         if (!nlen)
1430                 return -ENOTDIR;
1431         if (get_user(n, name))
1432                 return -EFAULT;
1433         for ( ; table->ctl_name || table->procname; table++) {
1434                 if (!table->ctl_name)
1435                         continue;
1436                 if (n == table->ctl_name) {
1437                         int error;
1438                         if (table->child) {
1439                                 if (sysctl_perm(table, 001))
1440                                         return -EPERM;
1441                                 name++;
1442                                 nlen--;
1443                                 table = table->child;
1444                                 goto repeat;
1445                         }
1446                         error = do_sysctl_strategy(table, name, nlen,
1447                                                    oldval, oldlenp,
1448                                                    newval, newlen);
1449                         return error;
1450                 }
1451         }
1452         return -ENOTDIR;
1453 }
1454
1455 /* Perform the actual read/write of a sysctl table entry. */
1456 int do_sysctl_strategy (struct ctl_table *table,
1457                         int __user *name, int nlen,
1458                         void __user *oldval, size_t __user *oldlenp,
1459                         void __user *newval, size_t newlen)
1460 {
1461         int op = 0, rc;
1462
1463         if (oldval)
1464                 op |= 004;
1465         if (newval) 
1466                 op |= 002;
1467         if (sysctl_perm(table, op))
1468                 return -EPERM;
1469
1470         if (table->strategy) {
1471                 rc = table->strategy(table, name, nlen, oldval, oldlenp,
1472                                      newval, newlen);
1473                 if (rc < 0)
1474                         return rc;
1475                 if (rc > 0)
1476                         return 0;
1477         }
1478
1479         /* If there is no strategy routine, or if the strategy returns
1480          * zero, proceed with automatic r/w */
1481         if (table->data && table->maxlen) {
1482                 rc = sysctl_data(table, name, nlen, oldval, oldlenp,
1483                                  newval, newlen);
1484                 if (rc < 0)
1485                         return rc;
1486         }
1487         return 0;
1488 }
1489 #endif /* CONFIG_SYSCTL_SYSCALL */
1490
1491 static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table)
1492 {
1493         for (; table->ctl_name || table->procname; table++) {
1494                 table->parent = parent;
1495                 if (table->child)
1496                         sysctl_set_parent(table, table->child);
1497         }
1498 }
1499
1500 static __init int sysctl_init(void)
1501 {
1502         int err;
1503         sysctl_set_parent(NULL, root_table);
1504         err = sysctl_check_table(root_table);
1505         return 0;
1506 }
1507
1508 core_initcall(sysctl_init);
1509
1510 /**
1511  * register_sysctl_table - register a sysctl hierarchy
1512  * @table: the top-level table structure
1513  *
1514  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1515  * array. An entry with a ctl_name of 0 terminates the table. 
1516  *
1517  * The members of the &struct ctl_table structure are used as follows:
1518  *
1519  * ctl_name - This is the numeric sysctl value used by sysctl(2). The number
1520  *            must be unique within that level of sysctl
1521  *
1522  * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
1523  *            enter a sysctl file
1524  *
1525  * data - a pointer to data for use by proc_handler
1526  *
1527  * maxlen - the maximum size in bytes of the data
1528  *
1529  * mode - the file permissions for the /proc/sys file, and for sysctl(2)
1530  *
1531  * child - a pointer to the child sysctl table if this entry is a directory, or
1532  *         %NULL.
1533  *
1534  * proc_handler - the text handler routine (described below)
1535  *
1536  * strategy - the strategy routine (described below)
1537  *
1538  * de - for internal use by the sysctl routines
1539  *
1540  * extra1, extra2 - extra pointers usable by the proc handler routines
1541  *
1542  * Leaf nodes in the sysctl tree will be represented by a single file
1543  * under /proc; non-leaf nodes will be represented by directories.
1544  *
1545  * sysctl(2) can automatically manage read and write requests through
1546  * the sysctl table.  The data and maxlen fields of the ctl_table
1547  * struct enable minimal validation of the values being written to be
1548  * performed, and the mode field allows minimal authentication.
1549  *
1550  * More sophisticated management can be enabled by the provision of a
1551  * strategy routine with the table entry.  This will be called before
1552  * any automatic read or write of the data is performed.
1553  *
1554  * The strategy routine may return
1555  *
1556  * < 0 - Error occurred (error is passed to user process)
1557  *
1558  * 0   - OK - proceed with automatic read or write.
1559  *
1560  * > 0 - OK - read or write has been done by the strategy routine, so
1561  *       return immediately.
1562  *
1563  * There must be a proc_handler routine for any terminal nodes
1564  * mirrored under /proc/sys (non-terminals are handled by a built-in
1565  * directory handler).  Several default handlers are available to
1566  * cover common cases -
1567  *
1568  * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
1569  * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(), 
1570  * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
1571  *
1572  * It is the handler's job to read the input buffer from user memory
1573  * and process it. The handler should return 0 on success.
1574  *
1575  * This routine returns %NULL on a failure to register, and a pointer
1576  * to the table header on success.
1577  */
1578 struct ctl_table_header *register_sysctl_table(struct ctl_table * table)
1579 {
1580         struct ctl_table_header *tmp;
1581         tmp = kmalloc(sizeof(struct ctl_table_header), GFP_KERNEL);
1582         if (!tmp)
1583                 return NULL;
1584         tmp->ctl_table = table;
1585         INIT_LIST_HEAD(&tmp->ctl_entry);
1586         tmp->used = 0;
1587         tmp->unregistering = NULL;
1588         sysctl_set_parent(NULL, table);
1589         if (sysctl_check_table(tmp->ctl_table)) {
1590                 kfree(tmp);
1591                 return NULL;
1592         }
1593         spin_lock(&sysctl_lock);
1594         list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry);
1595         spin_unlock(&sysctl_lock);
1596         return tmp;
1597 }
1598
1599 /**
1600  * unregister_sysctl_table - unregister a sysctl table hierarchy
1601  * @header: the header returned from register_sysctl_table
1602  *
1603  * Unregisters the sysctl table and all children. proc entries may not
1604  * actually be removed until they are no longer used by anyone.
1605  */
1606 void unregister_sysctl_table(struct ctl_table_header * header)
1607 {
1608         might_sleep();
1609
1610         if (header == NULL)
1611                 return;
1612
1613         spin_lock(&sysctl_lock);
1614         start_unregistering(header);
1615         spin_unlock(&sysctl_lock);
1616         kfree(header);
1617 }
1618
1619 #else /* !CONFIG_SYSCTL */
1620 struct ctl_table_header *register_sysctl_table(struct ctl_table * table)
1621 {
1622         return NULL;
1623 }
1624
1625 void unregister_sysctl_table(struct ctl_table_header * table)
1626 {
1627 }
1628
1629 #endif /* CONFIG_SYSCTL */
1630
1631 /*
1632  * /proc/sys support
1633  */
1634
1635 #ifdef CONFIG_PROC_SYSCTL
1636
1637 static int _proc_do_string(void* data, int maxlen, int write,
1638                            struct file *filp, void __user *buffer,
1639                            size_t *lenp, loff_t *ppos)
1640 {
1641         size_t len;
1642         char __user *p;
1643         char c;
1644
1645         if (!data || !maxlen || !*lenp) {
1646                 *lenp = 0;
1647                 return 0;
1648         }
1649
1650         if (write) {
1651                 len = 0;
1652                 p = buffer;
1653                 while (len < *lenp) {
1654                         if (get_user(c, p++))
1655                                 return -EFAULT;
1656                         if (c == 0 || c == '\n')
1657                                 break;
1658                         len++;
1659                 }
1660                 if (len >= maxlen)
1661                         len = maxlen-1;
1662                 if(copy_from_user(data, buffer, len))
1663                         return -EFAULT;
1664                 ((char *) data)[len] = 0;
1665                 *ppos += *lenp;
1666         } else {
1667                 len = strlen(data);
1668                 if (len > maxlen)
1669                         len = maxlen;
1670
1671                 if (*ppos > len) {
1672                         *lenp = 0;
1673                         return 0;
1674                 }
1675
1676                 data += *ppos;
1677                 len  -= *ppos;
1678
1679                 if (len > *lenp)
1680                         len = *lenp;
1681                 if (len)
1682                         if(copy_to_user(buffer, data, len))
1683                                 return -EFAULT;
1684                 if (len < *lenp) {
1685                         if(put_user('\n', ((char __user *) buffer) + len))
1686                                 return -EFAULT;
1687                         len++;
1688                 }
1689                 *lenp = len;
1690                 *ppos += len;
1691         }
1692         return 0;
1693 }
1694
1695 /**
1696  * proc_dostring - read a string sysctl
1697  * @table: the sysctl table
1698  * @write: %TRUE if this is a write to the sysctl file
1699  * @filp: the file structure
1700  * @buffer: the user buffer
1701  * @lenp: the size of the user buffer
1702  * @ppos: file position
1703  *
1704  * Reads/writes a string from/to the user buffer. If the kernel
1705  * buffer provided is not large enough to hold the string, the
1706  * string is truncated. The copied string is %NULL-terminated.
1707  * If the string is being read by the user process, it is copied
1708  * and a newline '\n' is added. It is truncated if the buffer is
1709  * not large enough.
1710  *
1711  * Returns 0 on success.
1712  */
1713 int proc_dostring(struct ctl_table *table, int write, struct file *filp,
1714                   void __user *buffer, size_t *lenp, loff_t *ppos)
1715 {
1716         return _proc_do_string(table->data, table->maxlen, write, filp,
1717                                buffer, lenp, ppos);
1718 }
1719
1720
1721 static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
1722                                  int *valp,
1723                                  int write, void *data)
1724 {
1725         if (write) {
1726                 *valp = *negp ? -*lvalp : *lvalp;
1727         } else {
1728                 int val = *valp;
1729                 if (val < 0) {
1730                         *negp = -1;
1731                         *lvalp = (unsigned long)-val;
1732                 } else {
1733                         *negp = 0;
1734                         *lvalp = (unsigned long)val;
1735                 }
1736         }
1737         return 0;
1738 }
1739
1740 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
1741                   int write, struct file *filp, void __user *buffer,
1742                   size_t *lenp, loff_t *ppos,
1743                   int (*conv)(int *negp, unsigned long *lvalp, int *valp,
1744                               int write, void *data),
1745                   void *data)
1746 {
1747 #define TMPBUFLEN 21
1748         int *i, vleft, first=1, neg, val;
1749         unsigned long lval;
1750         size_t left, len;
1751         
1752         char buf[TMPBUFLEN], *p;
1753         char __user *s = buffer;
1754         
1755         if (!tbl_data || !table->maxlen || !*lenp ||
1756             (*ppos && !write)) {
1757                 *lenp = 0;
1758                 return 0;
1759         }
1760         
1761         i = (int *) tbl_data;
1762         vleft = table->maxlen / sizeof(*i);
1763         left = *lenp;
1764
1765         if (!conv)
1766                 conv = do_proc_dointvec_conv;
1767
1768         for (; left && vleft--; i++, first=0) {
1769                 if (write) {
1770                         while (left) {
1771                                 char c;
1772                                 if (get_user(c, s))
1773                                         return -EFAULT;
1774                                 if (!isspace(c))
1775                                         break;
1776                                 left--;
1777                                 s++;
1778                         }
1779                         if (!left)
1780                                 break;
1781                         neg = 0;
1782                         len = left;
1783                         if (len > sizeof(buf) - 1)
1784                                 len = sizeof(buf) - 1;
1785                         if (copy_from_user(buf, s, len))
1786                                 return -EFAULT;
1787                         buf[len] = 0;
1788                         p = buf;
1789                         if (*p == '-' && left > 1) {
1790                                 neg = 1;
1791                                 p++;
1792                         }
1793                         if (*p < '0' || *p > '9')
1794                                 break;
1795
1796                         lval = simple_strtoul(p, &p, 0);
1797
1798                         len = p-buf;
1799                         if ((len < left) && *p && !isspace(*p))
1800                                 break;
1801                         if (neg)
1802                                 val = -val;
1803                         s += len;
1804                         left -= len;
1805
1806                         if (conv(&neg, &lval, i, 1, data))
1807                                 break;
1808                 } else {
1809                         p = buf;
1810                         if (!first)
1811                                 *p++ = '\t';
1812         
1813                         if (conv(&neg, &lval, i, 0, data))
1814                                 break;
1815
1816                         sprintf(p, "%s%lu", neg ? "-" : "", lval);
1817                         len = strlen(buf);
1818                         if (len > left)
1819                                 len = left;
1820                         if(copy_to_user(s, buf, len))
1821                                 return -EFAULT;
1822                         left -= len;
1823                         s += len;
1824                 }
1825         }
1826
1827         if (!write && !first && left) {
1828                 if(put_user('\n', s))
1829                         return -EFAULT;
1830                 left--, s++;
1831         }
1832         if (write) {
1833                 while (left) {
1834                         char c;
1835                         if (get_user(c, s++))
1836                                 return -EFAULT;
1837                         if (!isspace(c))
1838                                 break;
1839                         left--;
1840                 }
1841         }
1842         if (write && first)
1843                 return -EINVAL;
1844         *lenp -= left;
1845         *ppos += *lenp;
1846         return 0;
1847 #undef TMPBUFLEN
1848 }
1849
1850 static int do_proc_dointvec(struct ctl_table *table, int write, struct file *filp,
1851                   void __user *buffer, size_t *lenp, loff_t *ppos,
1852                   int (*conv)(int *negp, unsigned long *lvalp, int *valp,
1853                               int write, void *data),
1854                   void *data)
1855 {
1856         return __do_proc_dointvec(table->data, table, write, filp,
1857                         buffer, lenp, ppos, conv, data);
1858 }
1859
1860 /**
1861  * proc_dointvec - read a vector of integers
1862  * @table: the sysctl table
1863  * @write: %TRUE if this is a write to the sysctl file
1864  * @filp: the file structure
1865  * @buffer: the user buffer
1866  * @lenp: the size of the user buffer
1867  * @ppos: file position
1868  *
1869  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1870  * values from/to the user buffer, treated as an ASCII string. 
1871  *
1872  * Returns 0 on success.
1873  */
1874 int proc_dointvec(struct ctl_table *table, int write, struct file *filp,
1875                      void __user *buffer, size_t *lenp, loff_t *ppos)
1876 {
1877     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1878                             NULL,NULL);
1879 }
1880
1881 #define OP_SET  0
1882 #define OP_AND  1
1883 #define OP_OR   2
1884
1885 static int do_proc_dointvec_bset_conv(int *negp, unsigned long *lvalp,
1886                                       int *valp,
1887                                       int write, void *data)
1888 {
1889         int op = *(int *)data;
1890         if (write) {
1891                 int val = *negp ? -*lvalp : *lvalp;
1892                 switch(op) {
1893                 case OP_SET:    *valp = val; break;
1894                 case OP_AND:    *valp &= val; break;
1895                 case OP_OR:     *valp |= val; break;
1896                 }
1897         } else {
1898                 int val = *valp;
1899                 if (val < 0) {
1900                         *negp = -1;
1901                         *lvalp = (unsigned long)-val;
1902                 } else {
1903                         *negp = 0;
1904                         *lvalp = (unsigned long)val;
1905                 }
1906         }
1907         return 0;
1908 }
1909
1910 #ifdef CONFIG_SECURITY_CAPABILITIES
1911 /*
1912  *      init may raise the set.
1913  */
1914
1915 int proc_dointvec_bset(struct ctl_table *table, int write, struct file *filp,
1916                         void __user *buffer, size_t *lenp, loff_t *ppos)
1917 {
1918         int op;
1919
1920         if (write && !capable(CAP_SYS_MODULE)) {
1921                 return -EPERM;
1922         }
1923
1924         op = is_global_init(current) ? OP_SET : OP_AND;
1925         return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1926                                 do_proc_dointvec_bset_conv,&op);
1927 }
1928 #endif /* def CONFIG_SECURITY_CAPABILITIES */
1929
1930 /*
1931  *      Taint values can only be increased
1932  */
1933 static int proc_dointvec_taint(struct ctl_table *table, int write, struct file *filp,
1934                                void __user *buffer, size_t *lenp, loff_t *ppos)
1935 {
1936         int op;
1937
1938         if (write && !capable(CAP_SYS_ADMIN))
1939                 return -EPERM;
1940
1941         op = OP_OR;
1942         return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1943                                 do_proc_dointvec_bset_conv,&op);
1944 }
1945
1946 struct do_proc_dointvec_minmax_conv_param {
1947         int *min;
1948         int *max;
1949 };
1950
1951 static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp, 
1952                                         int *valp, 
1953                                         int write, void *data)
1954 {
1955         struct do_proc_dointvec_minmax_conv_param *param = data;
1956         if (write) {
1957                 int val = *negp ? -*lvalp : *lvalp;
1958                 if ((param->min && *param->min > val) ||
1959                     (param->max && *param->max < val))
1960                         return -EINVAL;
1961                 *valp = val;
1962         } else {
1963                 int val = *valp;
1964                 if (val < 0) {
1965                         *negp = -1;
1966                         *lvalp = (unsigned long)-val;
1967                 } else {
1968                         *negp = 0;
1969                         *lvalp = (unsigned long)val;
1970                 }
1971         }
1972         return 0;
1973 }
1974
1975 /**
1976  * proc_dointvec_minmax - read a vector of integers with min/max values
1977  * @table: the sysctl table
1978  * @write: %TRUE if this is a write to the sysctl file
1979  * @filp: the file structure
1980  * @buffer: the user buffer
1981  * @lenp: the size of the user buffer
1982  * @ppos: file position
1983  *
1984  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1985  * values from/to the user buffer, treated as an ASCII string.
1986  *
1987  * This routine will ensure the values are within the range specified by
1988  * table->extra1 (min) and table->extra2 (max).
1989  *
1990  * Returns 0 on success.
1991  */
1992 int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp,
1993                   void __user *buffer, size_t *lenp, loff_t *ppos)
1994 {
1995         struct do_proc_dointvec_minmax_conv_param param = {
1996                 .min = (int *) table->extra1,
1997                 .max = (int *) table->extra2,
1998         };
1999         return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
2000                                 do_proc_dointvec_minmax_conv, &param);
2001 }
2002
2003 static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
2004                                      struct file *filp,
2005                                      void __user *buffer,
2006                                      size_t *lenp, loff_t *ppos,
2007                                      unsigned long convmul,
2008                                      unsigned long convdiv)
2009 {
2010 #define TMPBUFLEN 21
2011         unsigned long *i, *min, *max, val;
2012         int vleft, first=1, neg;
2013         size_t len, left;
2014         char buf[TMPBUFLEN], *p;
2015         char __user *s = buffer;
2016         
2017         if (!data || !table->maxlen || !*lenp ||
2018             (*ppos && !write)) {
2019                 *lenp = 0;
2020                 return 0;
2021         }
2022         
2023         i = (unsigned long *) data;
2024         min = (unsigned long *) table->extra1;
2025         max = (unsigned long *) table->extra2;
2026         vleft = table->maxlen / sizeof(unsigned long);
2027         left = *lenp;
2028         
2029         for (; left && vleft--; i++, min++, max++, first=0) {
2030                 if (write) {
2031                         while (left) {
2032                                 char c;
2033                                 if (get_user(c, s))
2034                                         return -EFAULT;
2035                                 if (!isspace(c))
2036                                         break;
2037                                 left--;
2038                                 s++;
2039                         }
2040                         if (!left)
2041                                 break;
2042                         neg = 0;
2043                         len = left;
2044                         if (len > TMPBUFLEN-1)
2045                                 len = TMPBUFLEN-1;
2046                         if (copy_from_user(buf, s, len))
2047                                 return -EFAULT;
2048                         buf[len] = 0;
2049                         p = buf;
2050                         if (*p == '-' && left > 1) {
2051                                 neg = 1;
2052                                 p++;
2053                         }
2054                         if (*p < '0' || *p > '9')
2055                                 break;
2056                         val = simple_strtoul(p, &p, 0) * convmul / convdiv ;
2057                         len = p-buf;
2058                         if ((len < left) && *p && !isspace(*p))
2059                                 break;
2060                         if (neg)
2061                                 val = -val;
2062                         s += len;
2063                         left -= len;
2064
2065                         if(neg)
2066                                 continue;
2067                         if ((min && val < *min) || (max && val > *max))
2068                                 continue;
2069                         *i = val;
2070                 } else {
2071                         p = buf;
2072                         if (!first)
2073                                 *p++ = '\t';
2074                         sprintf(p, "%lu", convdiv * (*i) / convmul);
2075                         len = strlen(buf);
2076                         if (len > left)
2077                                 len = left;
2078                         if(copy_to_user(s, buf, len))
2079                                 return -EFAULT;
2080                         left -= len;
2081                         s += len;
2082                 }
2083         }
2084
2085         if (!write && !first && left) {
2086                 if(put_user('\n', s))
2087                         return -EFAULT;
2088                 left--, s++;
2089         }
2090         if (write) {
2091                 while (left) {
2092                         char c;
2093                         if (get_user(c, s++))
2094                                 return -EFAULT;
2095                         if (!isspace(c))
2096                                 break;
2097                         left--;
2098                 }
2099         }
2100         if (write && first)
2101                 return -EINVAL;
2102         *lenp -= left;
2103         *ppos += *lenp;
2104         return 0;
2105 #undef TMPBUFLEN
2106 }
2107
2108 static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
2109                                      struct file *filp,
2110                                      void __user *buffer,
2111                                      size_t *lenp, loff_t *ppos,
2112                                      unsigned long convmul,
2113                                      unsigned long convdiv)
2114 {
2115         return __do_proc_doulongvec_minmax(table->data, table, write,
2116                         filp, buffer, lenp, ppos, convmul, convdiv);
2117 }
2118
2119 /**
2120  * proc_doulongvec_minmax - read a vector of long integers with min/max values
2121  * @table: the sysctl table
2122  * @write: %TRUE if this is a write to the sysctl file
2123  * @filp: the file structure
2124  * @buffer: the user buffer
2125  * @lenp: the size of the user buffer
2126  * @ppos: file position
2127  *
2128  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2129  * values from/to the user buffer, treated as an ASCII string.
2130  *
2131  * This routine will ensure the values are within the range specified by
2132  * table->extra1 (min) and table->extra2 (max).
2133  *
2134  * Returns 0 on success.
2135  */
2136 int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp,
2137                            void __user *buffer, size_t *lenp, loff_t *ppos)
2138 {
2139     return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos, 1l, 1l);
2140 }
2141
2142 /**
2143  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2144  * @table: the sysctl table
2145  * @write: %TRUE if this is a write to the sysctl file
2146  * @filp: the file structure
2147  * @buffer: the user buffer
2148  * @lenp: the size of the user buffer
2149  * @ppos: file position
2150  *
2151  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2152  * values from/to the user buffer, treated as an ASCII string. The values
2153  * are treated as milliseconds, and converted to jiffies when they are stored.
2154  *
2155  * This routine will ensure the values are within the range specified by
2156  * table->extra1 (min) and table->extra2 (max).
2157  *
2158  * Returns 0 on success.
2159  */
2160 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2161                                       struct file *filp,
2162                                       void __user *buffer,
2163                                       size_t *lenp, loff_t *ppos)
2164 {
2165     return do_proc_doulongvec_minmax(table, write, filp, buffer,
2166                                      lenp, ppos, HZ, 1000l);
2167 }
2168
2169
2170 static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp,
2171                                          int *valp,
2172                                          int write, void *data)
2173 {
2174         if (write) {
2175                 if (*lvalp > LONG_MAX / HZ)
2176                         return 1;
2177                 *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2178         } else {
2179                 int val = *valp;
2180                 unsigned long lval;
2181                 if (val < 0) {
2182                         *negp = -1;
2183                         lval = (unsigned long)-val;
2184                 } else {
2185                         *negp = 0;
2186                         lval = (unsigned long)val;
2187                 }
2188                 *lvalp = lval / HZ;
2189         }
2190         return 0;
2191 }
2192
2193 static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp,
2194                                                 int *valp,
2195                                                 int write, void *data)
2196 {
2197         if (write) {
2198                 if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2199                         return 1;
2200                 *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2201         } else {
2202                 int val = *valp;
2203                 unsigned long lval;
2204                 if (val < 0) {
2205                         *negp = -1;
2206                         lval = (unsigned long)-val;
2207                 } else {
2208                         *negp = 0;
2209                         lval = (unsigned long)val;
2210                 }
2211                 *lvalp = jiffies_to_clock_t(lval);
2212         }
2213         return 0;
2214 }
2215
2216 static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp,
2217                                             int *valp,
2218                                             int write, void *data)
2219 {
2220         if (write) {
2221                 *valp = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2222         } else {
2223                 int val = *valp;
2224                 unsigned long lval;
2225                 if (val < 0) {
2226                         *negp = -1;
2227                         lval = (unsigned long)-val;
2228                 } else {
2229                         *negp = 0;
2230                         lval = (unsigned long)val;
2231                 }
2232                 *lvalp = jiffies_to_msecs(lval);
2233         }
2234         return 0;
2235 }
2236
2237 /**
2238  * proc_dointvec_jiffies - read a vector of integers as seconds
2239  * @table: the sysctl table
2240  * @write: %TRUE if this is a write to the sysctl file
2241  * @filp: the file structure
2242  * @buffer: the user buffer
2243  * @lenp: the size of the user buffer
2244  * @ppos: file position
2245  *
2246  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2247  * values from/to the user buffer, treated as an ASCII string. 
2248  * The values read are assumed to be in seconds, and are converted into
2249  * jiffies.
2250  *
2251  * Returns 0 on success.
2252  */
2253 int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp,
2254                           void __user *buffer, size_t *lenp, loff_t *ppos)
2255 {
2256     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2257                             do_proc_dointvec_jiffies_conv,NULL);
2258 }
2259
2260 /**
2261  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2262  * @table: the sysctl table
2263  * @write: %TRUE if this is a write to the sysctl file
2264  * @filp: the file structure
2265  * @buffer: the user buffer
2266  * @lenp: the size of the user buffer
2267  * @ppos: pointer to the file position
2268  *
2269  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2270  * values from/to the user buffer, treated as an ASCII string. 
2271  * The values read are assumed to be in 1/USER_HZ seconds, and 
2272  * are converted into jiffies.
2273  *
2274  * Returns 0 on success.
2275  */
2276 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file *filp,
2277                                  void __user *buffer, size_t *lenp, loff_t *ppos)
2278 {
2279     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2280                             do_proc_dointvec_userhz_jiffies_conv,NULL);
2281 }
2282
2283 /**
2284  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
2285  * @table: the sysctl table
2286  * @write: %TRUE if this is a write to the sysctl file
2287  * @filp: the file structure
2288  * @buffer: the user buffer
2289  * @lenp: the size of the user buffer
2290  * @ppos: file position
2291  * @ppos: the current position in the file
2292  *
2293  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2294  * values from/to the user buffer, treated as an ASCII string. 
2295  * The values read are assumed to be in 1/1000 seconds, and 
2296  * are converted into jiffies.
2297  *
2298  * Returns 0 on success.
2299  */
2300 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, struct file *filp,
2301                              void __user *buffer, size_t *lenp, loff_t *ppos)
2302 {
2303         return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
2304                                 do_proc_dointvec_ms_jiffies_conv, NULL);
2305 }
2306
2307 static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp,
2308                            void __user *buffer, size_t *lenp, loff_t *ppos)
2309 {
2310         struct pid *new_pid;
2311         pid_t tmp;
2312         int r;
2313
2314         tmp = pid_nr_ns(cad_pid, current->nsproxy->pid_ns);
2315
2316         r = __do_proc_dointvec(&tmp, table, write, filp, buffer,
2317                                lenp, ppos, NULL, NULL);
2318         if (r || !write)
2319                 return r;
2320
2321         new_pid = find_get_pid(tmp);
2322         if (!new_pid)
2323                 return -ESRCH;
2324
2325         put_pid(xchg(&cad_pid, new_pid));
2326         return 0;
2327 }
2328
2329 #else /* CONFIG_PROC_FS */
2330
2331 int proc_dostring(struct ctl_table *table, int write, struct file *filp,
2332                   void __user *buffer, size_t *lenp, loff_t *ppos)
2333 {
2334         return -ENOSYS;
2335 }
2336
2337 int proc_dointvec(struct ctl_table *table, int write, struct file *filp,
2338                   void __user *buffer, size_t *lenp, loff_t *ppos)
2339 {
2340         return -ENOSYS;
2341 }
2342
2343 int proc_dointvec_bset(struct ctl_table *table, int write, struct file *filp,
2344                         void __user *buffer, size_t *lenp, loff_t *ppos)
2345 {
2346         return -ENOSYS;
2347 }
2348
2349 int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp,
2350                     void __user *buffer, size_t *lenp, loff_t *ppos)
2351 {
2352         return -ENOSYS;
2353 }
2354
2355 int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp,
2356                     void __user *buffer, size_t *lenp, loff_t *ppos)
2357 {
2358         return -ENOSYS;
2359 }
2360
2361 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file *filp,
2362                     void __user *buffer, size_t *lenp, loff_t *ppos)
2363 {
2364         return -ENOSYS;
2365 }
2366
2367 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, struct file *filp,
2368                              void __user *buffer, size_t *lenp, loff_t *ppos)
2369 {
2370         return -ENOSYS;
2371 }
2372
2373 int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp,
2374                     void __user *buffer, size_t *lenp, loff_t *ppos)
2375 {
2376         return -ENOSYS;
2377 }
2378
2379 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2380                                       struct file *filp,
2381                                       void __user *buffer,
2382                                       size_t *lenp, loff_t *ppos)
2383 {
2384     return -ENOSYS;
2385 }
2386
2387
2388 #endif /* CONFIG_PROC_FS */
2389
2390
2391 #ifdef CONFIG_SYSCTL_SYSCALL
2392 /*
2393  * General sysctl support routines 
2394  */
2395
2396 /* The generic sysctl data routine (used if no strategy routine supplied) */
2397 int sysctl_data(struct ctl_table *table, int __user *name, int nlen,
2398                 void __user *oldval, size_t __user *oldlenp,
2399                 void __user *newval, size_t newlen)
2400 {
2401         size_t len;
2402
2403         /* Get out of I don't have a variable */
2404         if (!table->data || !table->maxlen)
2405                 return -ENOTDIR;
2406
2407         if (oldval && oldlenp) {
2408                 if (get_user(len, oldlenp))
2409                         return -EFAULT;
2410                 if (len) {
2411                         if (len > table->maxlen)
2412                                 len = table->maxlen;
2413                         if (copy_to_user(oldval, table->data, len))
2414                                 return -EFAULT;
2415                         if (put_user(len, oldlenp))
2416                                 return -EFAULT;
2417                 }
2418         }
2419
2420         if (newval && newlen) {
2421                 if (newlen > table->maxlen)
2422                         newlen = table->maxlen;
2423
2424                 if (copy_from_user(table->data, newval, newlen))
2425                         return -EFAULT;
2426         }
2427         return 1;
2428 }
2429
2430 /* The generic string strategy routine: */
2431 int sysctl_string(struct ctl_table *table, int __user *name, int nlen,
2432                   void __user *oldval, size_t __user *oldlenp,
2433                   void __user *newval, size_t newlen)
2434 {
2435         if (!table->data || !table->maxlen) 
2436                 return -ENOTDIR;
2437         
2438         if (oldval && oldlenp) {
2439                 size_t bufsize;
2440                 if (get_user(bufsize, oldlenp))
2441                         return -EFAULT;
2442                 if (bufsize) {
2443                         size_t len = strlen(table->data), copied;
2444
2445                         /* This shouldn't trigger for a well-formed sysctl */
2446                         if (len > table->maxlen)
2447                                 len = table->maxlen;
2448
2449                         /* Copy up to a max of bufsize-1 bytes of the string */
2450                         copied = (len >= bufsize) ? bufsize - 1 : len;
2451
2452                         if (copy_to_user(oldval, table->data, copied) ||
2453                             put_user(0, (char __user *)(oldval + copied)))
2454                                 return -EFAULT;
2455                         if (put_user(len, oldlenp))
2456                                 return -EFAULT;
2457                 }
2458         }
2459         if (newval && newlen) {
2460                 size_t len = newlen;
2461                 if (len > table->maxlen)
2462                         len = table->maxlen;
2463                 if(copy_from_user(table->data, newval, len))
2464                         return -EFAULT;
2465                 if (len == table->maxlen)
2466                         len--;
2467                 ((char *) table->data)[len] = 0;
2468         }
2469         return 1;
2470 }
2471
2472 /*
2473  * This function makes sure that all of the integers in the vector
2474  * are between the minimum and maximum values given in the arrays
2475  * table->extra1 and table->extra2, respectively.
2476  */
2477 int sysctl_intvec(struct ctl_table *table, int __user *name, int nlen,
2478                 void __user *oldval, size_t __user *oldlenp,
2479                 void __user *newval, size_t newlen)
2480 {
2481
2482         if (newval && newlen) {
2483                 int __user *vec = (int __user *) newval;
2484                 int *min = (int *) table->extra1;
2485                 int *max = (int *) table->extra2;
2486                 size_t length;
2487                 int i;
2488
2489                 if (newlen % sizeof(int) != 0)
2490                         return -EINVAL;
2491
2492                 if (!table->extra1 && !table->extra2)
2493                         return 0;
2494
2495                 if (newlen > table->maxlen)
2496                         newlen = table->maxlen;
2497                 length = newlen / sizeof(int);
2498
2499                 for (i = 0; i < length; i++) {
2500                         int value;
2501                         if (get_user(value, vec + i))
2502                                 return -EFAULT;
2503                         if (min && value < min[i])
2504                                 return -EINVAL;
2505                         if (max && value > max[i])
2506                                 return -EINVAL;
2507                 }
2508         }
2509         return 0;
2510 }
2511
2512 /* Strategy function to convert jiffies to seconds */ 
2513 int sysctl_jiffies(struct ctl_table *table, int __user *name, int nlen,
2514                 void __user *oldval, size_t __user *oldlenp,
2515                 void __user *newval, size_t newlen)
2516 {
2517         if (oldval && oldlenp) {
2518                 size_t olen;
2519
2520                 if (get_user(olen, oldlenp))
2521                         return -EFAULT;
2522                 if (olen) {
2523                         int val;
2524
2525                         if (olen < sizeof(int))
2526                                 return -EINVAL;
2527
2528                         val = *(int *)(table->data) / HZ;
2529                         if (put_user(val, (int __user *)oldval))
2530                                 return -EFAULT;
2531                         if (put_user(sizeof(int), oldlenp))
2532                                 return -EFAULT;
2533                 }
2534         }
2535         if (newval && newlen) { 
2536                 int new;
2537                 if (newlen != sizeof(int))
2538                         return -EINVAL; 
2539                 if (get_user(new, (int __user *)newval))
2540                         return -EFAULT;
2541                 *(int *)(table->data) = new*HZ; 
2542         }
2543         return 1;
2544 }
2545
2546 /* Strategy function to convert jiffies to seconds */ 
2547 int sysctl_ms_jiffies(struct ctl_table *table, int __user *name, int nlen,
2548                 void __user *oldval, size_t __user *oldlenp,
2549                 void __user *newval, size_t newlen)
2550 {
2551         if (oldval && oldlenp) {
2552                 size_t olen;
2553
2554                 if (get_user(olen, oldlenp))
2555                         return -EFAULT;
2556                 if (olen) {
2557                         int val;
2558
2559                         if (olen < sizeof(int))
2560                                 return -EINVAL;
2561
2562                         val = jiffies_to_msecs(*(int *)(table->data));
2563                         if (put_user(val, (int __user *)oldval))
2564                                 return -EFAULT;
2565                         if (put_user(sizeof(int), oldlenp))
2566                                 return -EFAULT;
2567                 }
2568         }
2569         if (newval && newlen) { 
2570                 int new;
2571                 if (newlen != sizeof(int))
2572                         return -EINVAL; 
2573                 if (get_user(new, (int __user *)newval))
2574                         return -EFAULT;
2575                 *(int *)(table->data) = msecs_to_jiffies(new);
2576         }
2577         return 1;
2578 }
2579
2580
2581
2582 #else /* CONFIG_SYSCTL_SYSCALL */
2583
2584
2585 asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
2586 {
2587         struct __sysctl_args tmp;
2588         int error;
2589
2590         if (copy_from_user(&tmp, args, sizeof(tmp)))
2591                 return -EFAULT;
2592
2593         error = deprecated_sysctl_warning(&tmp);
2594
2595         /* If no error reading the parameters then just -ENOSYS ... */
2596         if (!error)
2597                 error = -ENOSYS;
2598
2599         return error;
2600 }
2601
2602 int sysctl_data(struct ctl_table *table, int __user *name, int nlen,
2603                   void __user *oldval, size_t __user *oldlenp,
2604                   void __user *newval, size_t newlen)
2605 {
2606         return -ENOSYS;
2607 }
2608
2609 int sysctl_string(struct ctl_table *table, int __user *name, int nlen,
2610                   void __user *oldval, size_t __user *oldlenp,
2611                   void __user *newval, size_t newlen)
2612 {
2613         return -ENOSYS;
2614 }
2615
2616 int sysctl_intvec(struct ctl_table *table, int __user *name, int nlen,
2617                 void __user *oldval, size_t __user *oldlenp,
2618                 void __user *newval, size_t newlen)
2619 {
2620         return -ENOSYS;
2621 }
2622
2623 int sysctl_jiffies(struct ctl_table *table, int __user *name, int nlen,
2624                 void __user *oldval, size_t __user *oldlenp,
2625                 void __user *newval, size_t newlen)
2626 {
2627         return -ENOSYS;
2628 }
2629
2630 int sysctl_ms_jiffies(struct ctl_table *table, int __user *name, int nlen,
2631                 void __user *oldval, size_t __user *oldlenp,
2632                 void __user *newval, size_t newlen)
2633 {
2634         return -ENOSYS;
2635 }
2636
2637 #endif /* CONFIG_SYSCTL_SYSCALL */
2638
2639 static int deprecated_sysctl_warning(struct __sysctl_args *args)
2640 {
2641         static int msg_count;
2642         int name[CTL_MAXNAME];
2643         int i;
2644
2645         /* Check args->nlen. */
2646         if (args->nlen < 0 || args->nlen > CTL_MAXNAME)
2647                 return -ENOTDIR;
2648
2649         /* Read in the sysctl name for better debug message logging */
2650         for (i = 0; i < args->nlen; i++)
2651                 if (get_user(name[i], args->name + i))
2652                         return -EFAULT;
2653
2654         /* Ignore accesses to kernel.version */
2655         if ((args->nlen == 2) && (name[0] == CTL_KERN) && (name[1] == KERN_VERSION))
2656                 return 0;
2657
2658         if (msg_count < 5) {
2659                 msg_count++;
2660                 printk(KERN_INFO
2661                         "warning: process `%s' used the deprecated sysctl "
2662                         "system call with ", current->comm);
2663                 for (i = 0; i < args->nlen; i++)
2664                         printk("%d.", name[i]);
2665                 printk("\n");
2666         }
2667         return 0;
2668 }
2669
2670 /*
2671  * No sense putting this after each symbol definition, twice,
2672  * exception granted :-)
2673  */
2674 EXPORT_SYMBOL(proc_dointvec);
2675 EXPORT_SYMBOL(proc_dointvec_jiffies);
2676 EXPORT_SYMBOL(proc_dointvec_minmax);
2677 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
2678 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
2679 EXPORT_SYMBOL(proc_dostring);
2680 EXPORT_SYMBOL(proc_doulongvec_minmax);
2681 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
2682 EXPORT_SYMBOL(register_sysctl_table);
2683 EXPORT_SYMBOL(sysctl_intvec);
2684 EXPORT_SYMBOL(sysctl_jiffies);
2685 EXPORT_SYMBOL(sysctl_ms_jiffies);
2686 EXPORT_SYMBOL(sysctl_string);
2687 EXPORT_SYMBOL(sysctl_data);
2688 EXPORT_SYMBOL(unregister_sysctl_table);