[PATCH] proc: reorder the functions in base.c
[safe/jmp/linux-2.6] / fs / proc / base.c
1 /*
2  *  linux/fs/proc/base.c
3  *
4  *  Copyright (C) 1991, 1992 Linus Torvalds
5  *
6  *  proc base directory handling functions
7  *
8  *  1999, Al Viro. Rewritten. Now it covers the whole per-process part.
9  *  Instead of using magical inumbers to determine the kind of object
10  *  we allocate and fill in-core inodes upon lookup. They don't even
11  *  go into icache. We cache the reference to task_struct upon lookup too.
12  *  Eventually it should become a filesystem in its own. We don't use the
13  *  rest of procfs anymore.
14  *
15  *
16  *  Changelog:
17  *  17-Jan-2005
18  *  Allan Bezerra
19  *  Bruna Moreira <bruna.moreira@indt.org.br>
20  *  Edjard Mota <edjard.mota@indt.org.br>
21  *  Ilias Biris <ilias.biris@indt.org.br>
22  *  Mauricio Lin <mauricio.lin@indt.org.br>
23  *
24  *  Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
25  *
26  *  A new process specific entry (smaps) included in /proc. It shows the
27  *  size of rss for each memory area. The maps entry lacks information
28  *  about physical memory size (rss) for each mapped file, i.e.,
29  *  rss information for executables and library files.
30  *  This additional information is useful for any tools that need to know
31  *  about physical memory consumption for a process specific library.
32  *
33  *  Changelog:
34  *  21-Feb-2005
35  *  Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
36  *  Pud inclusion in the page table walking.
37  *
38  *  ChangeLog:
39  *  10-Mar-2005
40  *  10LE Instituto Nokia de Tecnologia - INdT:
41  *  A better way to walks through the page table as suggested by Hugh Dickins.
42  *
43  *  Simo Piiroinen <simo.piiroinen@nokia.com>:
44  *  Smaps information related to shared, private, clean and dirty pages.
45  *
46  *  Paul Mundt <paul.mundt@nokia.com>:
47  *  Overall revision about smaps.
48  */
49
50 #include <asm/uaccess.h>
51
52 #include <linux/errno.h>
53 #include <linux/time.h>
54 #include <linux/proc_fs.h>
55 #include <linux/stat.h>
56 #include <linux/init.h>
57 #include <linux/capability.h>
58 #include <linux/file.h>
59 #include <linux/string.h>
60 #include <linux/seq_file.h>
61 #include <linux/namei.h>
62 #include <linux/namespace.h>
63 #include <linux/mm.h>
64 #include <linux/smp_lock.h>
65 #include <linux/rcupdate.h>
66 #include <linux/kallsyms.h>
67 #include <linux/mount.h>
68 #include <linux/security.h>
69 #include <linux/ptrace.h>
70 #include <linux/seccomp.h>
71 #include <linux/cpuset.h>
72 #include <linux/audit.h>
73 #include <linux/poll.h>
74 #include "internal.h"
75
76 /* NOTE:
77  *      Implementing inode permission operations in /proc is almost
78  *      certainly an error.  Permission checks need to happen during
79  *      each system call not at open time.  The reason is that most of
80  *      what we wish to check for permissions in /proc varies at runtime.
81  *
82  *      The classic example of a problem is opening file descriptors
83  *      in /proc for a task before it execs a suid executable.
84  */
85
86 /*
87  * For hysterical raisins we keep the same inumbers as in the old procfs.
88  * Feel free to change the macro below - just keep the range distinct from
89  * inumbers of the rest of procfs (currently those are in 0x0000--0xffff).
90  * As soon as we'll get a separate superblock we will be able to forget
91  * about magical ranges too.
92  */
93
94 #define fake_ino(pid,ino) (((pid)<<16)|(ino))
95
96 enum pid_directory_inos {
97         PROC_TGID_INO = 2,
98         PROC_TGID_TASK,
99         PROC_TGID_STATUS,
100         PROC_TGID_MEM,
101 #ifdef CONFIG_SECCOMP
102         PROC_TGID_SECCOMP,
103 #endif
104         PROC_TGID_CWD,
105         PROC_TGID_ROOT,
106         PROC_TGID_EXE,
107         PROC_TGID_FD,
108         PROC_TGID_ENVIRON,
109         PROC_TGID_AUXV,
110         PROC_TGID_CMDLINE,
111         PROC_TGID_STAT,
112         PROC_TGID_STATM,
113         PROC_TGID_MAPS,
114         PROC_TGID_NUMA_MAPS,
115         PROC_TGID_MOUNTS,
116         PROC_TGID_MOUNTSTATS,
117         PROC_TGID_WCHAN,
118 #ifdef CONFIG_MMU
119         PROC_TGID_SMAPS,
120 #endif
121 #ifdef CONFIG_SCHEDSTATS
122         PROC_TGID_SCHEDSTAT,
123 #endif
124 #ifdef CONFIG_CPUSETS
125         PROC_TGID_CPUSET,
126 #endif
127 #ifdef CONFIG_SECURITY
128         PROC_TGID_ATTR,
129         PROC_TGID_ATTR_CURRENT,
130         PROC_TGID_ATTR_PREV,
131         PROC_TGID_ATTR_EXEC,
132         PROC_TGID_ATTR_FSCREATE,
133         PROC_TGID_ATTR_KEYCREATE,
134         PROC_TGID_ATTR_SOCKCREATE,
135 #endif
136 #ifdef CONFIG_AUDITSYSCALL
137         PROC_TGID_LOGINUID,
138 #endif
139         PROC_TGID_OOM_SCORE,
140         PROC_TGID_OOM_ADJUST,
141         PROC_TID_INO,
142         PROC_TID_STATUS,
143         PROC_TID_MEM,
144 #ifdef CONFIG_SECCOMP
145         PROC_TID_SECCOMP,
146 #endif
147         PROC_TID_CWD,
148         PROC_TID_ROOT,
149         PROC_TID_EXE,
150         PROC_TID_FD,
151         PROC_TID_ENVIRON,
152         PROC_TID_AUXV,
153         PROC_TID_CMDLINE,
154         PROC_TID_STAT,
155         PROC_TID_STATM,
156         PROC_TID_MAPS,
157         PROC_TID_NUMA_MAPS,
158         PROC_TID_MOUNTS,
159         PROC_TID_MOUNTSTATS,
160         PROC_TID_WCHAN,
161 #ifdef CONFIG_MMU
162         PROC_TID_SMAPS,
163 #endif
164 #ifdef CONFIG_SCHEDSTATS
165         PROC_TID_SCHEDSTAT,
166 #endif
167 #ifdef CONFIG_CPUSETS
168         PROC_TID_CPUSET,
169 #endif
170 #ifdef CONFIG_SECURITY
171         PROC_TID_ATTR,
172         PROC_TID_ATTR_CURRENT,
173         PROC_TID_ATTR_PREV,
174         PROC_TID_ATTR_EXEC,
175         PROC_TID_ATTR_FSCREATE,
176         PROC_TID_ATTR_KEYCREATE,
177         PROC_TID_ATTR_SOCKCREATE,
178 #endif
179 #ifdef CONFIG_AUDITSYSCALL
180         PROC_TID_LOGINUID,
181 #endif
182         PROC_TID_OOM_SCORE,
183         PROC_TID_OOM_ADJUST,
184
185         /* Add new entries before this */
186         PROC_TID_FD_DIR = 0x8000,       /* 0x8000-0xffff */
187 };
188
189 /* Worst case buffer size needed for holding an integer. */
190 #define PROC_NUMBUF 10
191
192 struct pid_entry {
193         int type;
194         int len;
195         char *name;
196         mode_t mode;
197 };
198
199 #define E(type,name,mode) {(type),sizeof(name)-1,(name),(mode)}
200
201 static struct fs_struct *get_fs_struct(struct task_struct *task)
202 {
203         struct fs_struct *fs;
204         task_lock(task);
205         fs = task->fs;
206         if(fs)
207                 atomic_inc(&fs->count);
208         task_unlock(task);
209         return fs;
210 }
211
212 static int get_nr_threads(struct task_struct *tsk)
213 {
214         /* Must be called with the rcu_read_lock held */
215         unsigned long flags;
216         int count = 0;
217
218         if (lock_task_sighand(tsk, &flags)) {
219                 count = atomic_read(&tsk->signal->count);
220                 unlock_task_sighand(tsk, &flags);
221         }
222         return count;
223 }
224
225 static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
226 {
227         struct task_struct *task = get_proc_task(inode);
228         struct fs_struct *fs = NULL;
229         int result = -ENOENT;
230
231         if (task) {
232                 fs = get_fs_struct(task);
233                 put_task_struct(task);
234         }
235         if (fs) {
236                 read_lock(&fs->lock);
237                 *mnt = mntget(fs->pwdmnt);
238                 *dentry = dget(fs->pwd);
239                 read_unlock(&fs->lock);
240                 result = 0;
241                 put_fs_struct(fs);
242         }
243         return result;
244 }
245
246 static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
247 {
248         struct task_struct *task = get_proc_task(inode);
249         struct fs_struct *fs = NULL;
250         int result = -ENOENT;
251
252         if (task) {
253                 fs = get_fs_struct(task);
254                 put_task_struct(task);
255         }
256         if (fs) {
257                 read_lock(&fs->lock);
258                 *mnt = mntget(fs->rootmnt);
259                 *dentry = dget(fs->root);
260                 read_unlock(&fs->lock);
261                 result = 0;
262                 put_fs_struct(fs);
263         }
264         return result;
265 }
266
267 #define MAY_PTRACE(task) \
268         (task == current || \
269         (task->parent == current && \
270         (task->ptrace & PT_PTRACED) && \
271          (task->state == TASK_STOPPED || task->state == TASK_TRACED) && \
272          security_ptrace(current,task) == 0))
273
274 static int proc_pid_environ(struct task_struct *task, char * buffer)
275 {
276         int res = 0;
277         struct mm_struct *mm = get_task_mm(task);
278         if (mm) {
279                 unsigned int len = mm->env_end - mm->env_start;
280                 if (len > PAGE_SIZE)
281                         len = PAGE_SIZE;
282                 res = access_process_vm(task, mm->env_start, buffer, len, 0);
283                 if (!ptrace_may_attach(task))
284                         res = -ESRCH;
285                 mmput(mm);
286         }
287         return res;
288 }
289
290 static int proc_pid_cmdline(struct task_struct *task, char * buffer)
291 {
292         int res = 0;
293         unsigned int len;
294         struct mm_struct *mm = get_task_mm(task);
295         if (!mm)
296                 goto out;
297         if (!mm->arg_end)
298                 goto out_mm;    /* Shh! No looking before we're done */
299
300         len = mm->arg_end - mm->arg_start;
301  
302         if (len > PAGE_SIZE)
303                 len = PAGE_SIZE;
304  
305         res = access_process_vm(task, mm->arg_start, buffer, len, 0);
306
307         // If the nul at the end of args has been overwritten, then
308         // assume application is using setproctitle(3).
309         if (res > 0 && buffer[res-1] != '\0' && len < PAGE_SIZE) {
310                 len = strnlen(buffer, res);
311                 if (len < res) {
312                     res = len;
313                 } else {
314                         len = mm->env_end - mm->env_start;
315                         if (len > PAGE_SIZE - res)
316                                 len = PAGE_SIZE - res;
317                         res += access_process_vm(task, mm->env_start, buffer+res, len, 0);
318                         res = strnlen(buffer, res);
319                 }
320         }
321 out_mm:
322         mmput(mm);
323 out:
324         return res;
325 }
326
327 static int proc_pid_auxv(struct task_struct *task, char *buffer)
328 {
329         int res = 0;
330         struct mm_struct *mm = get_task_mm(task);
331         if (mm) {
332                 unsigned int nwords = 0;
333                 do
334                         nwords += 2;
335                 while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */
336                 res = nwords * sizeof(mm->saved_auxv[0]);
337                 if (res > PAGE_SIZE)
338                         res = PAGE_SIZE;
339                 memcpy(buffer, mm->saved_auxv, res);
340                 mmput(mm);
341         }
342         return res;
343 }
344
345
346 #ifdef CONFIG_KALLSYMS
347 /*
348  * Provides a wchan file via kallsyms in a proper one-value-per-file format.
349  * Returns the resolved symbol.  If that fails, simply return the address.
350  */
351 static int proc_pid_wchan(struct task_struct *task, char *buffer)
352 {
353         char *modname;
354         const char *sym_name;
355         unsigned long wchan, size, offset;
356         char namebuf[KSYM_NAME_LEN+1];
357
358         wchan = get_wchan(task);
359
360         sym_name = kallsyms_lookup(wchan, &size, &offset, &modname, namebuf);
361         if (sym_name)
362                 return sprintf(buffer, "%s", sym_name);
363         return sprintf(buffer, "%lu", wchan);
364 }
365 #endif /* CONFIG_KALLSYMS */
366
367 #ifdef CONFIG_SCHEDSTATS
368 /*
369  * Provides /proc/PID/schedstat
370  */
371 static int proc_pid_schedstat(struct task_struct *task, char *buffer)
372 {
373         return sprintf(buffer, "%lu %lu %lu\n",
374                         task->sched_info.cpu_time,
375                         task->sched_info.run_delay,
376                         task->sched_info.pcnt);
377 }
378 #endif
379
380 /* The badness from the OOM killer */
381 unsigned long badness(struct task_struct *p, unsigned long uptime);
382 static int proc_oom_score(struct task_struct *task, char *buffer)
383 {
384         unsigned long points;
385         struct timespec uptime;
386
387         do_posix_clock_monotonic_gettime(&uptime);
388         points = badness(task, uptime.tv_sec);
389         return sprintf(buffer, "%lu\n", points);
390 }
391
392 /************************************************************************/
393 /*                       Here the fs part begins                        */
394 /************************************************************************/
395
396 /* permission checks */
397 static int proc_fd_access_allowed(struct inode *inode)
398 {
399         struct task_struct *task;
400         int allowed = 0;
401         /* Allow access to a task's file descriptors if it is us or we
402          * may use ptrace attach to the process and find out that
403          * information.
404          */
405         task = get_proc_task(inode);
406         if (task) {
407                 allowed = ptrace_may_attach(task);
408                 put_task_struct(task);
409         }
410         return allowed;
411 }
412
413 static int proc_setattr(struct dentry *dentry, struct iattr *attr)
414 {
415         int error;
416         struct inode *inode = dentry->d_inode;
417
418         if (attr->ia_valid & ATTR_MODE)
419                 return -EPERM;
420
421         error = inode_change_ok(inode, attr);
422         if (!error) {
423                 error = security_inode_setattr(dentry, attr);
424                 if (!error)
425                         error = inode_setattr(inode, attr);
426         }
427         return error;
428 }
429
430 static struct inode_operations proc_def_inode_operations = {
431         .setattr        = proc_setattr,
432 };
433
434 extern struct seq_operations mounts_op;
435 struct proc_mounts {
436         struct seq_file m;
437         int event;
438 };
439
440 static int mounts_open(struct inode *inode, struct file *file)
441 {
442         struct task_struct *task = get_proc_task(inode);
443         struct namespace *namespace = NULL;
444         struct proc_mounts *p;
445         int ret = -EINVAL;
446
447         if (task) {
448                 task_lock(task);
449                 namespace = task->namespace;
450                 if (namespace)
451                         get_namespace(namespace);
452                 task_unlock(task);
453                 put_task_struct(task);
454         }
455
456         if (namespace) {
457                 ret = -ENOMEM;
458                 p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL);
459                 if (p) {
460                         file->private_data = &p->m;
461                         ret = seq_open(file, &mounts_op);
462                         if (!ret) {
463                                 p->m.private = namespace;
464                                 p->event = namespace->event;
465                                 return 0;
466                         }
467                         kfree(p);
468                 }
469                 put_namespace(namespace);
470         }
471         return ret;
472 }
473
474 static int mounts_release(struct inode *inode, struct file *file)
475 {
476         struct seq_file *m = file->private_data;
477         struct namespace *namespace = m->private;
478         put_namespace(namespace);
479         return seq_release(inode, file);
480 }
481
482 static unsigned mounts_poll(struct file *file, poll_table *wait)
483 {
484         struct proc_mounts *p = file->private_data;
485         struct namespace *ns = p->m.private;
486         unsigned res = 0;
487
488         poll_wait(file, &ns->poll, wait);
489
490         spin_lock(&vfsmount_lock);
491         if (p->event != ns->event) {
492                 p->event = ns->event;
493                 res = POLLERR;
494         }
495         spin_unlock(&vfsmount_lock);
496
497         return res;
498 }
499
500 static struct file_operations proc_mounts_operations = {
501         .open           = mounts_open,
502         .read           = seq_read,
503         .llseek         = seq_lseek,
504         .release        = mounts_release,
505         .poll           = mounts_poll,
506 };
507
508 extern struct seq_operations mountstats_op;
509 static int mountstats_open(struct inode *inode, struct file *file)
510 {
511         int ret = seq_open(file, &mountstats_op);
512
513         if (!ret) {
514                 struct seq_file *m = file->private_data;
515                 struct namespace *namespace = NULL;
516                 struct task_struct *task = get_proc_task(inode);
517
518                 if (task) {
519                         task_lock(task);
520                         namespace = task->namespace;
521                         if (namespace)
522                                 get_namespace(namespace);
523                         task_unlock(task);
524                         put_task_struct(task);
525                 }
526
527                 if (namespace)
528                         m->private = namespace;
529                 else {
530                         seq_release(inode, file);
531                         ret = -EINVAL;
532                 }
533         }
534         return ret;
535 }
536
537 static struct file_operations proc_mountstats_operations = {
538         .open           = mountstats_open,
539         .read           = seq_read,
540         .llseek         = seq_lseek,
541         .release        = mounts_release,
542 };
543
544 #define PROC_BLOCK_SIZE (3*1024)                /* 4K page size but our output routines use some slack for overruns */
545
546 static ssize_t proc_info_read(struct file * file, char __user * buf,
547                           size_t count, loff_t *ppos)
548 {
549         struct inode * inode = file->f_dentry->d_inode;
550         unsigned long page;
551         ssize_t length;
552         struct task_struct *task = get_proc_task(inode);
553
554         length = -ESRCH;
555         if (!task)
556                 goto out_no_task;
557
558         if (count > PROC_BLOCK_SIZE)
559                 count = PROC_BLOCK_SIZE;
560
561         length = -ENOMEM;
562         if (!(page = __get_free_page(GFP_KERNEL)))
563                 goto out;
564
565         length = PROC_I(inode)->op.proc_read(task, (char*)page);
566
567         if (length >= 0)
568                 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length);
569         free_page(page);
570 out:
571         put_task_struct(task);
572 out_no_task:
573         return length;
574 }
575
576 static struct file_operations proc_info_file_operations = {
577         .read           = proc_info_read,
578 };
579
580 static int mem_open(struct inode* inode, struct file* file)
581 {
582         file->private_data = (void*)((long)current->self_exec_id);
583         return 0;
584 }
585
586 static ssize_t mem_read(struct file * file, char __user * buf,
587                         size_t count, loff_t *ppos)
588 {
589         struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
590         char *page;
591         unsigned long src = *ppos;
592         int ret = -ESRCH;
593         struct mm_struct *mm;
594
595         if (!task)
596                 goto out_no_task;
597
598         if (!MAY_PTRACE(task) || !ptrace_may_attach(task))
599                 goto out;
600
601         ret = -ENOMEM;
602         page = (char *)__get_free_page(GFP_USER);
603         if (!page)
604                 goto out;
605
606         ret = 0;
607  
608         mm = get_task_mm(task);
609         if (!mm)
610                 goto out_free;
611
612         ret = -EIO;
613  
614         if (file->private_data != (void*)((long)current->self_exec_id))
615                 goto out_put;
616
617         ret = 0;
618  
619         while (count > 0) {
620                 int this_len, retval;
621
622                 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
623                 retval = access_process_vm(task, src, page, this_len, 0);
624                 if (!retval || !MAY_PTRACE(task) || !ptrace_may_attach(task)) {
625                         if (!ret)
626                                 ret = -EIO;
627                         break;
628                 }
629
630                 if (copy_to_user(buf, page, retval)) {
631                         ret = -EFAULT;
632                         break;
633                 }
634  
635                 ret += retval;
636                 src += retval;
637                 buf += retval;
638                 count -= retval;
639         }
640         *ppos = src;
641
642 out_put:
643         mmput(mm);
644 out_free:
645         free_page((unsigned long) page);
646 out:
647         put_task_struct(task);
648 out_no_task:
649         return ret;
650 }
651
652 #define mem_write NULL
653
654 #ifndef mem_write
655 /* This is a security hazard */
656 static ssize_t mem_write(struct file * file, const char * buf,
657                          size_t count, loff_t *ppos)
658 {
659         int copied;
660         char *page;
661         struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
662         unsigned long dst = *ppos;
663
664         copied = -ESRCH;
665         if (!task)
666                 goto out_no_task;
667
668         if (!MAY_PTRACE(task) || !ptrace_may_attach(task))
669                 goto out;
670
671         copied = -ENOMEM;
672         page = (char *)__get_free_page(GFP_USER);
673         if (!page)
674                 goto out;
675
676         copied = 0;
677         while (count > 0) {
678                 int this_len, retval;
679
680                 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
681                 if (copy_from_user(page, buf, this_len)) {
682                         copied = -EFAULT;
683                         break;
684                 }
685                 retval = access_process_vm(task, dst, page, this_len, 1);
686                 if (!retval) {
687                         if (!copied)
688                                 copied = -EIO;
689                         break;
690                 }
691                 copied += retval;
692                 buf += retval;
693                 dst += retval;
694                 count -= retval;                        
695         }
696         *ppos = dst;
697         free_page((unsigned long) page);
698 out:
699         put_task_struct(task);
700 out_no_task:
701         return copied;
702 }
703 #endif
704
705 static loff_t mem_lseek(struct file * file, loff_t offset, int orig)
706 {
707         switch (orig) {
708         case 0:
709                 file->f_pos = offset;
710                 break;
711         case 1:
712                 file->f_pos += offset;
713                 break;
714         default:
715                 return -EINVAL;
716         }
717         force_successful_syscall_return();
718         return file->f_pos;
719 }
720
721 static struct file_operations proc_mem_operations = {
722         .llseek         = mem_lseek,
723         .read           = mem_read,
724         .write          = mem_write,
725         .open           = mem_open,
726 };
727
728 static ssize_t oom_adjust_read(struct file *file, char __user *buf,
729                                 size_t count, loff_t *ppos)
730 {
731         struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
732         char buffer[PROC_NUMBUF];
733         size_t len;
734         int oom_adjust;
735         loff_t __ppos = *ppos;
736
737         if (!task)
738                 return -ESRCH;
739         oom_adjust = task->oomkilladj;
740         put_task_struct(task);
741
742         len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust);
743         if (__ppos >= len)
744                 return 0;
745         if (count > len-__ppos)
746                 count = len-__ppos;
747         if (copy_to_user(buf, buffer + __ppos, count))
748                 return -EFAULT;
749         *ppos = __ppos + count;
750         return count;
751 }
752
753 static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
754                                 size_t count, loff_t *ppos)
755 {
756         struct task_struct *task;
757         char buffer[PROC_NUMBUF], *end;
758         int oom_adjust;
759
760         if (!capable(CAP_SYS_RESOURCE))
761                 return -EPERM;
762         memset(buffer, 0, sizeof(buffer));
763         if (count > sizeof(buffer) - 1)
764                 count = sizeof(buffer) - 1;
765         if (copy_from_user(buffer, buf, count))
766                 return -EFAULT;
767         oom_adjust = simple_strtol(buffer, &end, 0);
768         if ((oom_adjust < -16 || oom_adjust > 15) && oom_adjust != OOM_DISABLE)
769                 return -EINVAL;
770         if (*end == '\n')
771                 end++;
772         task = get_proc_task(file->f_dentry->d_inode);
773         if (!task)
774                 return -ESRCH;
775         task->oomkilladj = oom_adjust;
776         put_task_struct(task);
777         if (end - buffer == 0)
778                 return -EIO;
779         return end - buffer;
780 }
781
782 static struct file_operations proc_oom_adjust_operations = {
783         .read           = oom_adjust_read,
784         .write          = oom_adjust_write,
785 };
786
787 #ifdef CONFIG_AUDITSYSCALL
788 #define TMPBUFLEN 21
789 static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
790                                   size_t count, loff_t *ppos)
791 {
792         struct inode * inode = file->f_dentry->d_inode;
793         struct task_struct *task = get_proc_task(inode);
794         ssize_t length;
795         char tmpbuf[TMPBUFLEN];
796
797         if (!task)
798                 return -ESRCH;
799         length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
800                                 audit_get_loginuid(task->audit_context));
801         put_task_struct(task);
802         return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
803 }
804
805 static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
806                                    size_t count, loff_t *ppos)
807 {
808         struct inode * inode = file->f_dentry->d_inode;
809         char *page, *tmp;
810         ssize_t length;
811         uid_t loginuid;
812
813         if (!capable(CAP_AUDIT_CONTROL))
814                 return -EPERM;
815
816         if (current != pid_task(proc_pid(inode), PIDTYPE_PID))
817                 return -EPERM;
818
819         if (count >= PAGE_SIZE)
820                 count = PAGE_SIZE - 1;
821
822         if (*ppos != 0) {
823                 /* No partial writes. */
824                 return -EINVAL;
825         }
826         page = (char*)__get_free_page(GFP_USER);
827         if (!page)
828                 return -ENOMEM;
829         length = -EFAULT;
830         if (copy_from_user(page, buf, count))
831                 goto out_free_page;
832
833         page[count] = '\0';
834         loginuid = simple_strtoul(page, &tmp, 10);
835         if (tmp == page) {
836                 length = -EINVAL;
837                 goto out_free_page;
838
839         }
840         length = audit_set_loginuid(current, loginuid);
841         if (likely(length == 0))
842                 length = count;
843
844 out_free_page:
845         free_page((unsigned long) page);
846         return length;
847 }
848
849 static struct file_operations proc_loginuid_operations = {
850         .read           = proc_loginuid_read,
851         .write          = proc_loginuid_write,
852 };
853 #endif
854
855 #ifdef CONFIG_SECCOMP
856 static ssize_t seccomp_read(struct file *file, char __user *buf,
857                             size_t count, loff_t *ppos)
858 {
859         struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode);
860         char __buf[20];
861         loff_t __ppos = *ppos;
862         size_t len;
863
864         if (!tsk)
865                 return -ESRCH;
866         /* no need to print the trailing zero, so use only len */
867         len = sprintf(__buf, "%u\n", tsk->seccomp.mode);
868         put_task_struct(tsk);
869         if (__ppos >= len)
870                 return 0;
871         if (count > len - __ppos)
872                 count = len - __ppos;
873         if (copy_to_user(buf, __buf + __ppos, count))
874                 return -EFAULT;
875         *ppos = __ppos + count;
876         return count;
877 }
878
879 static ssize_t seccomp_write(struct file *file, const char __user *buf,
880                              size_t count, loff_t *ppos)
881 {
882         struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode);
883         char __buf[20], *end;
884         unsigned int seccomp_mode;
885         ssize_t result;
886
887         result = -ESRCH;
888         if (!tsk)
889                 goto out_no_task;
890
891         /* can set it only once to be even more secure */
892         result = -EPERM;
893         if (unlikely(tsk->seccomp.mode))
894                 goto out;
895
896         result = -EFAULT;
897         memset(__buf, 0, sizeof(__buf));
898         count = min(count, sizeof(__buf) - 1);
899         if (copy_from_user(__buf, buf, count))
900                 goto out;
901
902         seccomp_mode = simple_strtoul(__buf, &end, 0);
903         if (*end == '\n')
904                 end++;
905         result = -EINVAL;
906         if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) {
907                 tsk->seccomp.mode = seccomp_mode;
908                 set_tsk_thread_flag(tsk, TIF_SECCOMP);
909         } else
910                 goto out;
911         result = -EIO;
912         if (unlikely(!(end - __buf)))
913                 goto out;
914         result = end - __buf;
915 out:
916         put_task_struct(tsk);
917 out_no_task:
918         return result;
919 }
920
921 static struct file_operations proc_seccomp_operations = {
922         .read           = seccomp_read,
923         .write          = seccomp_write,
924 };
925 #endif /* CONFIG_SECCOMP */
926
927 static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
928 {
929         struct inode *inode = dentry->d_inode;
930         int error = -EACCES;
931
932         /* We don't need a base pointer in the /proc filesystem */
933         path_release(nd);
934
935         /* Are we allowed to snoop on the tasks file descriptors? */
936         if (!proc_fd_access_allowed(inode))
937                 goto out;
938
939         error = PROC_I(inode)->op.proc_get_link(inode, &nd->dentry, &nd->mnt);
940         nd->last_type = LAST_BIND;
941 out:
942         return ERR_PTR(error);
943 }
944
945 static int do_proc_readlink(struct dentry *dentry, struct vfsmount *mnt,
946                             char __user *buffer, int buflen)
947 {
948         struct inode * inode;
949         char *tmp = (char*)__get_free_page(GFP_KERNEL), *path;
950         int len;
951
952         if (!tmp)
953                 return -ENOMEM;
954                 
955         inode = dentry->d_inode;
956         path = d_path(dentry, mnt, tmp, PAGE_SIZE);
957         len = PTR_ERR(path);
958         if (IS_ERR(path))
959                 goto out;
960         len = tmp + PAGE_SIZE - 1 - path;
961
962         if (len > buflen)
963                 len = buflen;
964         if (copy_to_user(buffer, path, len))
965                 len = -EFAULT;
966  out:
967         free_page((unsigned long)tmp);
968         return len;
969 }
970
971 static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen)
972 {
973         int error = -EACCES;
974         struct inode *inode = dentry->d_inode;
975         struct dentry *de;
976         struct vfsmount *mnt = NULL;
977
978         /* Are we allowed to snoop on the tasks file descriptors? */
979         if (!proc_fd_access_allowed(inode))
980                 goto out;
981
982         error = PROC_I(inode)->op.proc_get_link(inode, &de, &mnt);
983         if (error)
984                 goto out;
985
986         error = do_proc_readlink(de, mnt, buffer, buflen);
987         dput(de);
988         mntput(mnt);
989 out:
990         return error;
991 }
992
993 static struct inode_operations proc_pid_link_inode_operations = {
994         .readlink       = proc_pid_readlink,
995         .follow_link    = proc_pid_follow_link,
996         .setattr        = proc_setattr,
997 };
998
999
1000 /* building an inode */
1001
1002 static int task_dumpable(struct task_struct *task)
1003 {
1004         int dumpable = 0;
1005         struct mm_struct *mm;
1006
1007         task_lock(task);
1008         mm = task->mm;
1009         if (mm)
1010                 dumpable = mm->dumpable;
1011         task_unlock(task);
1012         if(dumpable == 1)
1013                 return 1;
1014         return 0;
1015 }
1016
1017
1018 static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task, int ino)
1019 {
1020         struct inode * inode;
1021         struct proc_inode *ei;
1022
1023         /* We need a new inode */
1024
1025         inode = new_inode(sb);
1026         if (!inode)
1027                 goto out;
1028
1029         /* Common stuff */
1030         ei = PROC_I(inode);
1031         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1032         inode->i_ino = fake_ino(task->pid, ino);
1033         inode->i_op = &proc_def_inode_operations;
1034
1035         /*
1036          * grab the reference to task.
1037          */
1038         ei->pid = get_pid(task->pids[PIDTYPE_PID].pid);
1039         if (!ei->pid)
1040                 goto out_unlock;
1041
1042         inode->i_uid = 0;
1043         inode->i_gid = 0;
1044         if (task_dumpable(task)) {
1045                 inode->i_uid = task->euid;
1046                 inode->i_gid = task->egid;
1047         }
1048         security_task_to_inode(task, inode);
1049
1050 out:
1051         return inode;
1052
1053 out_unlock:
1054         iput(inode);
1055         return NULL;
1056 }
1057
1058 static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
1059 {
1060         struct inode *inode = dentry->d_inode;
1061         struct task_struct *task;
1062         generic_fillattr(inode, stat);
1063
1064         rcu_read_lock();
1065         stat->uid = 0;
1066         stat->gid = 0;
1067         task = pid_task(proc_pid(inode), PIDTYPE_PID);
1068         if (task) {
1069                 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
1070                     task_dumpable(task)) {
1071                         stat->uid = task->euid;
1072                         stat->gid = task->egid;
1073                 }
1074         }
1075         rcu_read_unlock();
1076         return 0;
1077 }
1078
1079 /* dentry stuff */
1080
1081 /*
1082  *      Exceptional case: normally we are not allowed to unhash a busy
1083  * directory. In this case, however, we can do it - no aliasing problems
1084  * due to the way we treat inodes.
1085  *
1086  * Rewrite the inode's ownerships here because the owning task may have
1087  * performed a setuid(), etc.
1088  *
1089  * Before the /proc/pid/status file was created the only way to read
1090  * the effective uid of a /process was to stat /proc/pid.  Reading
1091  * /proc/pid/status is slow enough that procps and other packages
1092  * kept stating /proc/pid.  To keep the rules in /proc simple I have
1093  * made this apply to all per process world readable and executable
1094  * directories.
1095  */
1096 static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
1097 {
1098         struct inode *inode = dentry->d_inode;
1099         struct task_struct *task = get_proc_task(inode);
1100         if (task) {
1101                 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
1102                     task_dumpable(task)) {
1103                         inode->i_uid = task->euid;
1104                         inode->i_gid = task->egid;
1105                 } else {
1106                         inode->i_uid = 0;
1107                         inode->i_gid = 0;
1108                 }
1109                 inode->i_mode &= ~(S_ISUID | S_ISGID);
1110                 security_task_to_inode(task, inode);
1111                 put_task_struct(task);
1112                 return 1;
1113         }
1114         d_drop(dentry);
1115         return 0;
1116 }
1117
1118 static int pid_delete_dentry(struct dentry * dentry)
1119 {
1120         /* Is the task we represent dead?
1121          * If so, then don't put the dentry on the lru list,
1122          * kill it immediately.
1123          */
1124         return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
1125 }
1126
1127 static struct dentry_operations pid_dentry_operations =
1128 {
1129         .d_revalidate   = pid_revalidate,
1130         .d_delete       = pid_delete_dentry,
1131 };
1132
1133 /* Lookups */
1134
1135 static unsigned name_to_int(struct dentry *dentry)
1136 {
1137         const char *name = dentry->d_name.name;
1138         int len = dentry->d_name.len;
1139         unsigned n = 0;
1140
1141         if (len > 1 && *name == '0')
1142                 goto out;
1143         while (len-- > 0) {
1144                 unsigned c = *name++ - '0';
1145                 if (c > 9)
1146                         goto out;
1147                 if (n >= (~0U-9)/10)
1148                         goto out;
1149                 n *= 10;
1150                 n += c;
1151         }
1152         return n;
1153 out:
1154         return ~0U;
1155 }
1156
1157 static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
1158 {
1159         struct task_struct *task = get_proc_task(inode);
1160         struct files_struct *files = NULL;
1161         struct file *file;
1162         int fd = proc_fd(inode);
1163
1164         if (task) {
1165                 files = get_files_struct(task);
1166                 put_task_struct(task);
1167         }
1168         if (files) {
1169                 /*
1170                  * We are not taking a ref to the file structure, so we must
1171                  * hold ->file_lock.
1172                  */
1173                 spin_lock(&files->file_lock);
1174                 file = fcheck_files(files, fd);
1175                 if (file) {
1176                         *mnt = mntget(file->f_vfsmnt);
1177                         *dentry = dget(file->f_dentry);
1178                         spin_unlock(&files->file_lock);
1179                         put_files_struct(files);
1180                         return 0;
1181                 }
1182                 spin_unlock(&files->file_lock);
1183                 put_files_struct(files);
1184         }
1185         return -ENOENT;
1186 }
1187
1188 static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
1189 {
1190         struct inode *inode = dentry->d_inode;
1191         struct task_struct *task = get_proc_task(inode);
1192         int fd = proc_fd(inode);
1193         struct files_struct *files;
1194
1195         if (task) {
1196                 files = get_files_struct(task);
1197                 if (files) {
1198                         rcu_read_lock();
1199                         if (fcheck_files(files, fd)) {
1200                                 rcu_read_unlock();
1201                                 put_files_struct(files);
1202                                 if (task_dumpable(task)) {
1203                                         inode->i_uid = task->euid;
1204                                         inode->i_gid = task->egid;
1205                                 } else {
1206                                         inode->i_uid = 0;
1207                                         inode->i_gid = 0;
1208                                 }
1209                                 inode->i_mode &= ~(S_ISUID | S_ISGID);
1210                                 security_task_to_inode(task, inode);
1211                                 put_task_struct(task);
1212                                 return 1;
1213                         }
1214                         rcu_read_unlock();
1215                         put_files_struct(files);
1216                 }
1217                 put_task_struct(task);
1218         }
1219         d_drop(dentry);
1220         return 0;
1221 }
1222
1223 static struct dentry_operations tid_fd_dentry_operations =
1224 {
1225         .d_revalidate   = tid_fd_revalidate,
1226         .d_delete       = pid_delete_dentry,
1227 };
1228
1229 /* SMP-safe */
1230 static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd)
1231 {
1232         struct task_struct *task = get_proc_task(dir);
1233         unsigned fd = name_to_int(dentry);
1234         struct dentry *result = ERR_PTR(-ENOENT);
1235         struct file * file;
1236         struct files_struct * files;
1237         struct inode *inode;
1238         struct proc_inode *ei;
1239
1240         if (!task)
1241                 goto out_no_task;
1242         if (fd == ~0U)
1243                 goto out;
1244
1245         inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_FD_DIR+fd);
1246         if (!inode)
1247                 goto out;
1248         ei = PROC_I(inode);
1249         ei->fd = fd;
1250         files = get_files_struct(task);
1251         if (!files)
1252                 goto out_unlock;
1253         inode->i_mode = S_IFLNK;
1254
1255         /*
1256          * We are not taking a ref to the file structure, so we must
1257          * hold ->file_lock.
1258          */
1259         spin_lock(&files->file_lock);
1260         file = fcheck_files(files, fd);
1261         if (!file)
1262                 goto out_unlock2;
1263         if (file->f_mode & 1)
1264                 inode->i_mode |= S_IRUSR | S_IXUSR;
1265         if (file->f_mode & 2)
1266                 inode->i_mode |= S_IWUSR | S_IXUSR;
1267         spin_unlock(&files->file_lock);
1268         put_files_struct(files);
1269         inode->i_op = &proc_pid_link_inode_operations;
1270         inode->i_size = 64;
1271         ei->op.proc_get_link = proc_fd_link;
1272         dentry->d_op = &tid_fd_dentry_operations;
1273         d_add(dentry, inode);
1274         /* Close the race of the process dying before we return the dentry */
1275         if (tid_fd_revalidate(dentry, NULL))
1276                 result = NULL;
1277 out:
1278         put_task_struct(task);
1279 out_no_task:
1280         return result;
1281
1282 out_unlock2:
1283         spin_unlock(&files->file_lock);
1284         put_files_struct(files);
1285 out_unlock:
1286         iput(inode);
1287         goto out;
1288 }
1289
1290 static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
1291 {
1292         struct dentry *dentry = filp->f_dentry;
1293         struct inode *inode = dentry->d_inode;
1294         struct task_struct *p = get_proc_task(inode);
1295         unsigned int fd, tid, ino;
1296         int retval;
1297         char buf[PROC_NUMBUF];
1298         struct files_struct * files;
1299         struct fdtable *fdt;
1300
1301         retval = -ENOENT;
1302         if (!p)
1303                 goto out_no_task;
1304         retval = 0;
1305         tid = p->pid;
1306
1307         fd = filp->f_pos;
1308         switch (fd) {
1309                 case 0:
1310                         if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
1311                                 goto out;
1312                         filp->f_pos++;
1313                 case 1:
1314                         ino = parent_ino(dentry);
1315                         if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
1316                                 goto out;
1317                         filp->f_pos++;
1318                 default:
1319                         files = get_files_struct(p);
1320                         if (!files)
1321                                 goto out;
1322                         rcu_read_lock();
1323                         fdt = files_fdtable(files);
1324                         for (fd = filp->f_pos-2;
1325                              fd < fdt->max_fds;
1326                              fd++, filp->f_pos++) {
1327                                 unsigned int i,j;
1328
1329                                 if (!fcheck_files(files, fd))
1330                                         continue;
1331                                 rcu_read_unlock();
1332
1333                                 j = PROC_NUMBUF;
1334                                 i = fd;
1335                                 do {
1336                                         j--;
1337                                         buf[j] = '0' + (i % 10);
1338                                         i /= 10;
1339                                 } while (i);
1340
1341                                 ino = fake_ino(tid, PROC_TID_FD_DIR + fd);
1342                                 if (filldir(dirent, buf+j, PROC_NUMBUF-j, fd+2, ino, DT_LNK) < 0) {
1343                                         rcu_read_lock();
1344                                         break;
1345                                 }
1346                                 rcu_read_lock();
1347                         }
1348                         rcu_read_unlock();
1349                         put_files_struct(files);
1350         }
1351 out:
1352         put_task_struct(p);
1353 out_no_task:
1354         return retval;
1355 }
1356
1357 static struct file_operations proc_fd_operations = {
1358         .read           = generic_read_dir,
1359         .readdir        = proc_readfd,
1360 };
1361
1362 /*
1363  * proc directories can do almost nothing..
1364  */
1365 static struct inode_operations proc_fd_inode_operations = {
1366         .lookup         = proc_lookupfd,
1367         .setattr        = proc_setattr,
1368 };
1369
1370 static struct file_operations proc_task_operations;
1371 static struct inode_operations proc_task_inode_operations;
1372
1373 #ifdef CONFIG_SECURITY
1374 static struct file_operations proc_pid_attr_operations;
1375 static struct file_operations proc_tid_attr_operations;
1376 static struct inode_operations proc_tid_attr_inode_operations;
1377 static struct file_operations proc_tgid_attr_operations;
1378 static struct inode_operations proc_tgid_attr_inode_operations;
1379 #endif
1380
1381 /* SMP-safe */
1382 static struct dentry *proc_pident_lookup(struct inode *dir, 
1383                                          struct dentry *dentry,
1384                                          struct pid_entry *ents)
1385 {
1386         struct inode *inode;
1387         struct dentry *error;
1388         struct task_struct *task = get_proc_task(dir);
1389         struct pid_entry *p;
1390         struct proc_inode *ei;
1391
1392         error = ERR_PTR(-ENOENT);
1393         inode = NULL;
1394
1395         if (!task)
1396                 goto out_no_task;
1397
1398         for (p = ents; p->name; p++) {
1399                 if (p->len != dentry->d_name.len)
1400                         continue;
1401                 if (!memcmp(dentry->d_name.name, p->name, p->len))
1402                         break;
1403         }
1404         if (!p->name)
1405                 goto out;
1406
1407         error = ERR_PTR(-EINVAL);
1408         inode = proc_pid_make_inode(dir->i_sb, task, p->type);
1409         if (!inode)
1410                 goto out;
1411
1412         ei = PROC_I(inode);
1413         inode->i_mode = p->mode;
1414         /*
1415          * Yes, it does not scale. And it should not. Don't add
1416          * new entries into /proc/<tgid>/ without very good reasons.
1417          */
1418         switch(p->type) {
1419                 case PROC_TGID_TASK:
1420                         inode->i_nlink = 2;
1421                         inode->i_op = &proc_task_inode_operations;
1422                         inode->i_fop = &proc_task_operations;
1423                         break;
1424                 case PROC_TID_FD:
1425                 case PROC_TGID_FD:
1426                         inode->i_nlink = 2;
1427                         inode->i_op = &proc_fd_inode_operations;
1428                         inode->i_fop = &proc_fd_operations;
1429                         break;
1430                 case PROC_TID_EXE:
1431                 case PROC_TGID_EXE:
1432                         inode->i_op = &proc_pid_link_inode_operations;
1433                         ei->op.proc_get_link = proc_exe_link;
1434                         break;
1435                 case PROC_TID_CWD:
1436                 case PROC_TGID_CWD:
1437                         inode->i_op = &proc_pid_link_inode_operations;
1438                         ei->op.proc_get_link = proc_cwd_link;
1439                         break;
1440                 case PROC_TID_ROOT:
1441                 case PROC_TGID_ROOT:
1442                         inode->i_op = &proc_pid_link_inode_operations;
1443                         ei->op.proc_get_link = proc_root_link;
1444                         break;
1445                 case PROC_TID_ENVIRON:
1446                 case PROC_TGID_ENVIRON:
1447                         inode->i_fop = &proc_info_file_operations;
1448                         ei->op.proc_read = proc_pid_environ;
1449                         break;
1450                 case PROC_TID_AUXV:
1451                 case PROC_TGID_AUXV:
1452                         inode->i_fop = &proc_info_file_operations;
1453                         ei->op.proc_read = proc_pid_auxv;
1454                         break;
1455                 case PROC_TID_STATUS:
1456                 case PROC_TGID_STATUS:
1457                         inode->i_fop = &proc_info_file_operations;
1458                         ei->op.proc_read = proc_pid_status;
1459                         break;
1460                 case PROC_TID_STAT:
1461                         inode->i_fop = &proc_info_file_operations;
1462                         ei->op.proc_read = proc_tid_stat;
1463                         break;
1464                 case PROC_TGID_STAT:
1465                         inode->i_fop = &proc_info_file_operations;
1466                         ei->op.proc_read = proc_tgid_stat;
1467                         break;
1468                 case PROC_TID_CMDLINE:
1469                 case PROC_TGID_CMDLINE:
1470                         inode->i_fop = &proc_info_file_operations;
1471                         ei->op.proc_read = proc_pid_cmdline;
1472                         break;
1473                 case PROC_TID_STATM:
1474                 case PROC_TGID_STATM:
1475                         inode->i_fop = &proc_info_file_operations;
1476                         ei->op.proc_read = proc_pid_statm;
1477                         break;
1478                 case PROC_TID_MAPS:
1479                 case PROC_TGID_MAPS:
1480                         inode->i_fop = &proc_maps_operations;
1481                         break;
1482 #ifdef CONFIG_NUMA
1483                 case PROC_TID_NUMA_MAPS:
1484                 case PROC_TGID_NUMA_MAPS:
1485                         inode->i_fop = &proc_numa_maps_operations;
1486                         break;
1487 #endif
1488                 case PROC_TID_MEM:
1489                 case PROC_TGID_MEM:
1490                         inode->i_fop = &proc_mem_operations;
1491                         break;
1492 #ifdef CONFIG_SECCOMP
1493                 case PROC_TID_SECCOMP:
1494                 case PROC_TGID_SECCOMP:
1495                         inode->i_fop = &proc_seccomp_operations;
1496                         break;
1497 #endif /* CONFIG_SECCOMP */
1498                 case PROC_TID_MOUNTS:
1499                 case PROC_TGID_MOUNTS:
1500                         inode->i_fop = &proc_mounts_operations;
1501                         break;
1502 #ifdef CONFIG_MMU
1503                 case PROC_TID_SMAPS:
1504                 case PROC_TGID_SMAPS:
1505                         inode->i_fop = &proc_smaps_operations;
1506                         break;
1507 #endif
1508                 case PROC_TID_MOUNTSTATS:
1509                 case PROC_TGID_MOUNTSTATS:
1510                         inode->i_fop = &proc_mountstats_operations;
1511                         break;
1512 #ifdef CONFIG_SECURITY
1513                 case PROC_TID_ATTR:
1514                         inode->i_nlink = 2;
1515                         inode->i_op = &proc_tid_attr_inode_operations;
1516                         inode->i_fop = &proc_tid_attr_operations;
1517                         break;
1518                 case PROC_TGID_ATTR:
1519                         inode->i_nlink = 2;
1520                         inode->i_op = &proc_tgid_attr_inode_operations;
1521                         inode->i_fop = &proc_tgid_attr_operations;
1522                         break;
1523                 case PROC_TID_ATTR_CURRENT:
1524                 case PROC_TGID_ATTR_CURRENT:
1525                 case PROC_TID_ATTR_PREV:
1526                 case PROC_TGID_ATTR_PREV:
1527                 case PROC_TID_ATTR_EXEC:
1528                 case PROC_TGID_ATTR_EXEC:
1529                 case PROC_TID_ATTR_FSCREATE:
1530                 case PROC_TGID_ATTR_FSCREATE:
1531                 case PROC_TID_ATTR_KEYCREATE:
1532                 case PROC_TGID_ATTR_KEYCREATE:
1533                 case PROC_TID_ATTR_SOCKCREATE:
1534                 case PROC_TGID_ATTR_SOCKCREATE:
1535                         inode->i_fop = &proc_pid_attr_operations;
1536                         break;
1537 #endif
1538 #ifdef CONFIG_KALLSYMS
1539                 case PROC_TID_WCHAN:
1540                 case PROC_TGID_WCHAN:
1541                         inode->i_fop = &proc_info_file_operations;
1542                         ei->op.proc_read = proc_pid_wchan;
1543                         break;
1544 #endif
1545 #ifdef CONFIG_SCHEDSTATS
1546                 case PROC_TID_SCHEDSTAT:
1547                 case PROC_TGID_SCHEDSTAT:
1548                         inode->i_fop = &proc_info_file_operations;
1549                         ei->op.proc_read = proc_pid_schedstat;
1550                         break;
1551 #endif
1552 #ifdef CONFIG_CPUSETS
1553                 case PROC_TID_CPUSET:
1554                 case PROC_TGID_CPUSET:
1555                         inode->i_fop = &proc_cpuset_operations;
1556                         break;
1557 #endif
1558                 case PROC_TID_OOM_SCORE:
1559                 case PROC_TGID_OOM_SCORE:
1560                         inode->i_fop = &proc_info_file_operations;
1561                         ei->op.proc_read = proc_oom_score;
1562                         break;
1563                 case PROC_TID_OOM_ADJUST:
1564                 case PROC_TGID_OOM_ADJUST:
1565                         inode->i_fop = &proc_oom_adjust_operations;
1566                         break;
1567 #ifdef CONFIG_AUDITSYSCALL
1568                 case PROC_TID_LOGINUID:
1569                 case PROC_TGID_LOGINUID:
1570                         inode->i_fop = &proc_loginuid_operations;
1571                         break;
1572 #endif
1573                 default:
1574                         printk("procfs: impossible type (%d)",p->type);
1575                         iput(inode);
1576                         error = ERR_PTR(-EINVAL);
1577                         goto out;
1578         }
1579         dentry->d_op = &pid_dentry_operations;
1580         d_add(dentry, inode);
1581         /* Close the race of the process dying before we return the dentry */
1582         if (pid_revalidate(dentry, NULL))
1583                 error = NULL;
1584 out:
1585         put_task_struct(task);
1586 out_no_task:
1587         return error;
1588 }
1589
1590 static int proc_pident_readdir(struct file *filp,
1591                 void *dirent, filldir_t filldir,
1592                 struct pid_entry *ents, unsigned int nents)
1593 {
1594         int i;
1595         int pid;
1596         struct dentry *dentry = filp->f_dentry;
1597         struct inode *inode = dentry->d_inode;
1598         struct task_struct *task = get_proc_task(inode);
1599         struct pid_entry *p;
1600         ino_t ino;
1601         int ret;
1602
1603         ret = -ENOENT;
1604         if (!task)
1605                 goto out;
1606
1607         ret = 0;
1608         pid = task->pid;
1609         put_task_struct(task);
1610         i = filp->f_pos;
1611         switch (i) {
1612         case 0:
1613                 ino = inode->i_ino;
1614                 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
1615                         goto out;
1616                 i++;
1617                 filp->f_pos++;
1618                 /* fall through */
1619         case 1:
1620                 ino = parent_ino(dentry);
1621                 if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
1622                         goto out;
1623                 i++;
1624                 filp->f_pos++;
1625                 /* fall through */
1626         default:
1627                 i -= 2;
1628                 if (i >= nents) {
1629                         ret = 1;
1630                         goto out;
1631                 }
1632                 p = ents + i;
1633                 while (p->name) {
1634                         if (filldir(dirent, p->name, p->len, filp->f_pos,
1635                                     fake_ino(pid, p->type), p->mode >> 12) < 0)
1636                                 goto out;
1637                         filp->f_pos++;
1638                         p++;
1639                 }
1640         }
1641
1642         ret = 1;
1643 out:
1644         return ret;
1645 }
1646
1647 #ifdef CONFIG_SECURITY
1648 static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
1649                                   size_t count, loff_t *ppos)
1650 {
1651         struct inode * inode = file->f_dentry->d_inode;
1652         unsigned long page;
1653         ssize_t length;
1654         struct task_struct *task = get_proc_task(inode);
1655
1656         length = -ESRCH;
1657         if (!task)
1658                 goto out_no_task;
1659
1660         if (count > PAGE_SIZE)
1661                 count = PAGE_SIZE;
1662         length = -ENOMEM;
1663         if (!(page = __get_free_page(GFP_KERNEL)))
1664                 goto out;
1665
1666         length = security_getprocattr(task,
1667                                       (char*)file->f_dentry->d_name.name,
1668                                       (void*)page, count);
1669         if (length >= 0)
1670                 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length);
1671         free_page(page);
1672 out:
1673         put_task_struct(task);
1674 out_no_task:
1675         return length;
1676 }
1677
1678 static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
1679                                    size_t count, loff_t *ppos)
1680 {
1681         struct inode * inode = file->f_dentry->d_inode;
1682         char *page;
1683         ssize_t length;
1684         struct task_struct *task = get_proc_task(inode);
1685
1686         length = -ESRCH;
1687         if (!task)
1688                 goto out_no_task;
1689         if (count > PAGE_SIZE)
1690                 count = PAGE_SIZE;
1691
1692         /* No partial writes. */
1693         length = -EINVAL;
1694         if (*ppos != 0)
1695                 goto out;
1696
1697         length = -ENOMEM;
1698         page = (char*)__get_free_page(GFP_USER);
1699         if (!page)
1700                 goto out;
1701
1702         length = -EFAULT;
1703         if (copy_from_user(page, buf, count))
1704                 goto out_free;
1705
1706         length = security_setprocattr(task,
1707                                       (char*)file->f_dentry->d_name.name,
1708                                       (void*)page, count);
1709 out_free:
1710         free_page((unsigned long) page);
1711 out:
1712         put_task_struct(task);
1713 out_no_task:
1714         return length;
1715 }
1716
1717 static struct file_operations proc_pid_attr_operations = {
1718         .read           = proc_pid_attr_read,
1719         .write          = proc_pid_attr_write,
1720 };
1721
1722 static struct pid_entry tgid_attr_stuff[] = {
1723         E(PROC_TGID_ATTR_CURRENT,  "current",  S_IFREG|S_IRUGO|S_IWUGO),
1724         E(PROC_TGID_ATTR_PREV,     "prev",     S_IFREG|S_IRUGO),
1725         E(PROC_TGID_ATTR_EXEC,     "exec",     S_IFREG|S_IRUGO|S_IWUGO),
1726         E(PROC_TGID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO),
1727         E(PROC_TGID_ATTR_KEYCREATE, "keycreate", S_IFREG|S_IRUGO|S_IWUGO),
1728         E(PROC_TGID_ATTR_SOCKCREATE, "sockcreate", S_IFREG|S_IRUGO|S_IWUGO),
1729         {0,0,NULL,0}
1730 };
1731 static struct pid_entry tid_attr_stuff[] = {
1732         E(PROC_TID_ATTR_CURRENT,   "current",  S_IFREG|S_IRUGO|S_IWUGO),
1733         E(PROC_TID_ATTR_PREV,      "prev",     S_IFREG|S_IRUGO),
1734         E(PROC_TID_ATTR_EXEC,      "exec",     S_IFREG|S_IRUGO|S_IWUGO),
1735         E(PROC_TID_ATTR_FSCREATE,  "fscreate", S_IFREG|S_IRUGO|S_IWUGO),
1736         E(PROC_TID_ATTR_KEYCREATE, "keycreate", S_IFREG|S_IRUGO|S_IWUGO),
1737         E(PROC_TID_ATTR_SOCKCREATE, "sockcreate", S_IFREG|S_IRUGO|S_IWUGO),
1738         {0,0,NULL,0}
1739 };
1740
1741 static int proc_tgid_attr_readdir(struct file * filp,
1742                              void * dirent, filldir_t filldir)
1743 {
1744         return proc_pident_readdir(filp,dirent,filldir,
1745                                    tgid_attr_stuff,ARRAY_SIZE(tgid_attr_stuff));
1746 }
1747
1748 static int proc_tid_attr_readdir(struct file * filp,
1749                              void * dirent, filldir_t filldir)
1750 {
1751         return proc_pident_readdir(filp,dirent,filldir,
1752                                    tid_attr_stuff,ARRAY_SIZE(tid_attr_stuff));
1753 }
1754
1755 static struct file_operations proc_tgid_attr_operations = {
1756         .read           = generic_read_dir,
1757         .readdir        = proc_tgid_attr_readdir,
1758 };
1759
1760 static struct file_operations proc_tid_attr_operations = {
1761         .read           = generic_read_dir,
1762         .readdir        = proc_tid_attr_readdir,
1763 };
1764
1765 static struct dentry *proc_tgid_attr_lookup(struct inode *dir,
1766                                 struct dentry *dentry, struct nameidata *nd)
1767 {
1768         return proc_pident_lookup(dir, dentry, tgid_attr_stuff);
1769 }
1770
1771 static struct dentry *proc_tid_attr_lookup(struct inode *dir,
1772                                 struct dentry *dentry, struct nameidata *nd)
1773 {
1774         return proc_pident_lookup(dir, dentry, tid_attr_stuff);
1775 }
1776
1777 static struct inode_operations proc_tgid_attr_inode_operations = {
1778         .lookup         = proc_tgid_attr_lookup,
1779         .getattr        = pid_getattr,
1780         .setattr        = proc_setattr,
1781 };
1782
1783 static struct inode_operations proc_tid_attr_inode_operations = {
1784         .lookup         = proc_tid_attr_lookup,
1785         .getattr        = pid_getattr,
1786         .setattr        = proc_setattr,
1787 };
1788 #endif
1789
1790 /*
1791  * /proc/self:
1792  */
1793 static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
1794                               int buflen)
1795 {
1796         char tmp[PROC_NUMBUF];
1797         sprintf(tmp, "%d", current->tgid);
1798         return vfs_readlink(dentry,buffer,buflen,tmp);
1799 }
1800
1801 static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
1802 {
1803         char tmp[PROC_NUMBUF];
1804         sprintf(tmp, "%d", current->tgid);
1805         return ERR_PTR(vfs_follow_link(nd,tmp));
1806 }
1807
1808 static struct inode_operations proc_self_inode_operations = {
1809         .readlink       = proc_self_readlink,
1810         .follow_link    = proc_self_follow_link,
1811 };
1812
1813 /*
1814  * Thread groups
1815  */
1816 static struct pid_entry tgid_base_stuff[] = {
1817         E(PROC_TGID_TASK,      "task",    S_IFDIR|S_IRUGO|S_IXUGO),
1818         E(PROC_TGID_FD,        "fd",      S_IFDIR|S_IRUSR|S_IXUSR),
1819         E(PROC_TGID_ENVIRON,   "environ", S_IFREG|S_IRUSR),
1820         E(PROC_TGID_AUXV,      "auxv",    S_IFREG|S_IRUSR),
1821         E(PROC_TGID_STATUS,    "status",  S_IFREG|S_IRUGO),
1822         E(PROC_TGID_CMDLINE,   "cmdline", S_IFREG|S_IRUGO),
1823         E(PROC_TGID_STAT,      "stat",    S_IFREG|S_IRUGO),
1824         E(PROC_TGID_STATM,     "statm",   S_IFREG|S_IRUGO),
1825         E(PROC_TGID_MAPS,      "maps",    S_IFREG|S_IRUGO),
1826 #ifdef CONFIG_NUMA
1827         E(PROC_TGID_NUMA_MAPS, "numa_maps", S_IFREG|S_IRUGO),
1828 #endif
1829         E(PROC_TGID_MEM,       "mem",     S_IFREG|S_IRUSR|S_IWUSR),
1830 #ifdef CONFIG_SECCOMP
1831         E(PROC_TGID_SECCOMP,   "seccomp", S_IFREG|S_IRUSR|S_IWUSR),
1832 #endif
1833         E(PROC_TGID_CWD,       "cwd",     S_IFLNK|S_IRWXUGO),
1834         E(PROC_TGID_ROOT,      "root",    S_IFLNK|S_IRWXUGO),
1835         E(PROC_TGID_EXE,       "exe",     S_IFLNK|S_IRWXUGO),
1836         E(PROC_TGID_MOUNTS,    "mounts",  S_IFREG|S_IRUGO),
1837         E(PROC_TGID_MOUNTSTATS, "mountstats", S_IFREG|S_IRUSR),
1838 #ifdef CONFIG_MMU
1839         E(PROC_TGID_SMAPS,     "smaps",   S_IFREG|S_IRUGO),
1840 #endif
1841 #ifdef CONFIG_SECURITY
1842         E(PROC_TGID_ATTR,      "attr",    S_IFDIR|S_IRUGO|S_IXUGO),
1843 #endif
1844 #ifdef CONFIG_KALLSYMS
1845         E(PROC_TGID_WCHAN,     "wchan",   S_IFREG|S_IRUGO),
1846 #endif
1847 #ifdef CONFIG_SCHEDSTATS
1848         E(PROC_TGID_SCHEDSTAT, "schedstat", S_IFREG|S_IRUGO),
1849 #endif
1850 #ifdef CONFIG_CPUSETS
1851         E(PROC_TGID_CPUSET,    "cpuset",  S_IFREG|S_IRUGO),
1852 #endif
1853         E(PROC_TGID_OOM_SCORE, "oom_score",S_IFREG|S_IRUGO),
1854         E(PROC_TGID_OOM_ADJUST,"oom_adj", S_IFREG|S_IRUGO|S_IWUSR),
1855 #ifdef CONFIG_AUDITSYSCALL
1856         E(PROC_TGID_LOGINUID, "loginuid", S_IFREG|S_IWUSR|S_IRUGO),
1857 #endif
1858         {0,0,NULL,0}
1859 };
1860
1861 static int proc_tgid_base_readdir(struct file * filp,
1862                              void * dirent, filldir_t filldir)
1863 {
1864         return proc_pident_readdir(filp,dirent,filldir,
1865                                    tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff));
1866 }
1867
1868 static struct file_operations proc_tgid_base_operations = {
1869         .read           = generic_read_dir,
1870         .readdir        = proc_tgid_base_readdir,
1871 };
1872
1873 static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){
1874         return proc_pident_lookup(dir, dentry, tgid_base_stuff);
1875 }
1876
1877 static struct inode_operations proc_tgid_base_inode_operations = {
1878         .lookup         = proc_tgid_base_lookup,
1879         .getattr        = pid_getattr,
1880         .setattr        = proc_setattr,
1881 };
1882
1883 /**
1884  * proc_flush_task -  Remove dcache entries for @task from the /proc dcache.
1885  *
1886  * @task: task that should be flushed.
1887  *
1888  * Looks in the dcache for
1889  * /proc/@pid
1890  * /proc/@tgid/task/@pid
1891  * if either directory is present flushes it and all of it'ts children
1892  * from the dcache.
1893  *
1894  * It is safe and reasonable to cache /proc entries for a task until
1895  * that task exits.  After that they just clog up the dcache with
1896  * useless entries, possibly causing useful dcache entries to be
1897  * flushed instead.  This routine is proved to flush those useless
1898  * dcache entries at process exit time.
1899  *
1900  * NOTE: This routine is just an optimization so it does not guarantee
1901  *       that no dcache entries will exist at process exit time it
1902  *       just makes it very unlikely that any will persist.
1903  */
1904 void proc_flush_task(struct task_struct *task)
1905 {
1906         struct dentry *dentry, *leader, *dir;
1907         char buf[PROC_NUMBUF];
1908         struct qstr name;
1909
1910         name.name = buf;
1911         name.len = snprintf(buf, sizeof(buf), "%d", task->pid);
1912         dentry = d_hash_and_lookup(proc_mnt->mnt_root, &name);
1913         if (dentry) {
1914                 shrink_dcache_parent(dentry);
1915                 d_drop(dentry);
1916                 dput(dentry);
1917         }
1918
1919         if (thread_group_leader(task))
1920                 goto out;
1921
1922         name.name = buf;
1923         name.len = snprintf(buf, sizeof(buf), "%d", task->tgid);
1924         leader = d_hash_and_lookup(proc_mnt->mnt_root, &name);
1925         if (!leader)
1926                 goto out;
1927
1928         name.name = "task";
1929         name.len = strlen(name.name);
1930         dir = d_hash_and_lookup(leader, &name);
1931         if (!dir)
1932                 goto out_put_leader;
1933
1934         name.name = buf;
1935         name.len = snprintf(buf, sizeof(buf), "%d", task->pid);
1936         dentry = d_hash_and_lookup(dir, &name);
1937         if (dentry) {
1938                 shrink_dcache_parent(dentry);
1939                 d_drop(dentry);
1940                 dput(dentry);
1941         }
1942
1943         dput(dir);
1944 out_put_leader:
1945         dput(leader);
1946 out:
1947         return;
1948 }
1949
1950 /* SMP-safe */
1951 struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
1952 {
1953         struct dentry *result = ERR_PTR(-ENOENT);
1954         struct task_struct *task;
1955         struct inode *inode;
1956         struct proc_inode *ei;
1957         unsigned tgid;
1958
1959         if (dentry->d_name.len == 4 && !memcmp(dentry->d_name.name,"self",4)) {
1960                 inode = new_inode(dir->i_sb);
1961                 if (!inode)
1962                         return ERR_PTR(-ENOMEM);
1963                 ei = PROC_I(inode);
1964                 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1965                 inode->i_ino = fake_ino(0, PROC_TGID_INO);
1966                 ei->pde = NULL;
1967                 inode->i_mode = S_IFLNK|S_IRWXUGO;
1968                 inode->i_uid = inode->i_gid = 0;
1969                 inode->i_size = 64;
1970                 inode->i_op = &proc_self_inode_operations;
1971                 d_add(dentry, inode);
1972                 return NULL;
1973         }
1974         tgid = name_to_int(dentry);
1975         if (tgid == ~0U)
1976                 goto out;
1977
1978         rcu_read_lock();
1979         task = find_task_by_pid(tgid);
1980         if (task)
1981                 get_task_struct(task);
1982         rcu_read_unlock();
1983         if (!task)
1984                 goto out;
1985
1986         inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO);
1987         if (!inode)
1988                 goto out_put_task;
1989
1990         inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
1991         inode->i_op = &proc_tgid_base_inode_operations;
1992         inode->i_fop = &proc_tgid_base_operations;
1993         inode->i_flags|=S_IMMUTABLE;
1994 #ifdef CONFIG_SECURITY
1995         inode->i_nlink = 5;
1996 #else
1997         inode->i_nlink = 4;
1998 #endif
1999
2000         dentry->d_op = &pid_dentry_operations;
2001
2002         d_add(dentry, inode);
2003         /* Close the race of the process dying before we return the dentry */
2004         if (pid_revalidate(dentry, NULL))
2005                 result = NULL;
2006
2007 out_put_task:
2008         put_task_struct(task);
2009 out:
2010         return result;
2011 }
2012
2013 /*
2014  * Find the first task with tgid >= tgid
2015  *
2016  */
2017 static struct task_struct *next_tgid(unsigned int tgid)
2018 {
2019         struct task_struct *task;
2020         struct pid *pid;
2021
2022         rcu_read_lock();
2023 retry:
2024         task = NULL;
2025         pid = find_ge_pid(tgid);
2026         if (pid) {
2027                 tgid = pid->nr + 1;
2028                 task = pid_task(pid, PIDTYPE_PID);
2029                 /* What we to know is if the pid we have find is the
2030                  * pid of a thread_group_leader.  Testing for task
2031                  * being a thread_group_leader is the obvious thing
2032                  * todo but there is a window when it fails, due to
2033                  * the pid transfer logic in de_thread.
2034                  *
2035                  * So we perform the straight forward test of seeing
2036                  * if the pid we have found is the pid of a thread
2037                  * group leader, and don't worry if the task we have
2038                  * found doesn't happen to be a thread group leader.
2039                  * As we don't care in the case of readdir.
2040                  */
2041                 if (!task || !has_group_leader_pid(task))
2042                         goto retry;
2043                 get_task_struct(task);
2044         }
2045         rcu_read_unlock();
2046         return task;
2047 }
2048
2049 #define TGID_OFFSET (FIRST_PROCESS_ENTRY + (1 /* /proc/self */))
2050
2051 /* for the /proc/ directory itself, after non-process stuff has been done */
2052 int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
2053 {
2054         char buf[PROC_NUMBUF];
2055         unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY;
2056         struct task_struct *task;
2057         int tgid;
2058
2059         if (!nr) {
2060                 ino_t ino = fake_ino(0,PROC_TGID_INO);
2061                 if (filldir(dirent, "self", 4, filp->f_pos, ino, DT_LNK) < 0)
2062                         return 0;
2063                 filp->f_pos++;
2064                 nr++;
2065         }
2066
2067         tgid = filp->f_pos - TGID_OFFSET;
2068         for (task = next_tgid(tgid);
2069              task;
2070              put_task_struct(task), task = next_tgid(tgid + 1)) {
2071                 int len;
2072                 ino_t ino;
2073                 tgid = task->pid;
2074                 filp->f_pos = tgid + TGID_OFFSET;
2075                 len = snprintf(buf, sizeof(buf), "%d", tgid);
2076                 ino = fake_ino(tgid, PROC_TGID_INO);
2077                 if (filldir(dirent, buf, len, filp->f_pos, ino, DT_DIR) < 0) {
2078                         put_task_struct(task);
2079                         goto out;
2080                 }
2081         }
2082         filp->f_pos = PID_MAX_LIMIT + TGID_OFFSET;
2083 out:
2084         return 0;
2085 }
2086
2087 /*
2088  * Tasks
2089  */
2090 static struct pid_entry tid_base_stuff[] = {
2091         E(PROC_TID_FD,         "fd",      S_IFDIR|S_IRUSR|S_IXUSR),
2092         E(PROC_TID_ENVIRON,    "environ", S_IFREG|S_IRUSR),
2093         E(PROC_TID_AUXV,       "auxv",    S_IFREG|S_IRUSR),
2094         E(PROC_TID_STATUS,     "status",  S_IFREG|S_IRUGO),
2095         E(PROC_TID_CMDLINE,    "cmdline", S_IFREG|S_IRUGO),
2096         E(PROC_TID_STAT,       "stat",    S_IFREG|S_IRUGO),
2097         E(PROC_TID_STATM,      "statm",   S_IFREG|S_IRUGO),
2098         E(PROC_TID_MAPS,       "maps",    S_IFREG|S_IRUGO),
2099 #ifdef CONFIG_NUMA
2100         E(PROC_TID_NUMA_MAPS,  "numa_maps",    S_IFREG|S_IRUGO),
2101 #endif
2102         E(PROC_TID_MEM,        "mem",     S_IFREG|S_IRUSR|S_IWUSR),
2103 #ifdef CONFIG_SECCOMP
2104         E(PROC_TID_SECCOMP,    "seccomp", S_IFREG|S_IRUSR|S_IWUSR),
2105 #endif
2106         E(PROC_TID_CWD,        "cwd",     S_IFLNK|S_IRWXUGO),
2107         E(PROC_TID_ROOT,       "root",    S_IFLNK|S_IRWXUGO),
2108         E(PROC_TID_EXE,        "exe",     S_IFLNK|S_IRWXUGO),
2109         E(PROC_TID_MOUNTS,     "mounts",  S_IFREG|S_IRUGO),
2110 #ifdef CONFIG_MMU
2111         E(PROC_TID_SMAPS,      "smaps",   S_IFREG|S_IRUGO),
2112 #endif
2113 #ifdef CONFIG_SECURITY
2114         E(PROC_TID_ATTR,       "attr",    S_IFDIR|S_IRUGO|S_IXUGO),
2115 #endif
2116 #ifdef CONFIG_KALLSYMS
2117         E(PROC_TID_WCHAN,      "wchan",   S_IFREG|S_IRUGO),
2118 #endif
2119 #ifdef CONFIG_SCHEDSTATS
2120         E(PROC_TID_SCHEDSTAT, "schedstat",S_IFREG|S_IRUGO),
2121 #endif
2122 #ifdef CONFIG_CPUSETS
2123         E(PROC_TID_CPUSET,     "cpuset",  S_IFREG|S_IRUGO),
2124 #endif
2125         E(PROC_TID_OOM_SCORE,  "oom_score",S_IFREG|S_IRUGO),
2126         E(PROC_TID_OOM_ADJUST, "oom_adj", S_IFREG|S_IRUGO|S_IWUSR),
2127 #ifdef CONFIG_AUDITSYSCALL
2128         E(PROC_TID_LOGINUID, "loginuid", S_IFREG|S_IWUSR|S_IRUGO),
2129 #endif
2130         {0,0,NULL,0}
2131 };
2132
2133 static int proc_tid_base_readdir(struct file * filp,
2134                              void * dirent, filldir_t filldir)
2135 {
2136         return proc_pident_readdir(filp,dirent,filldir,
2137                                    tid_base_stuff,ARRAY_SIZE(tid_base_stuff));
2138 }
2139
2140 static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){
2141         return proc_pident_lookup(dir, dentry, tid_base_stuff);
2142 }
2143
2144 static struct file_operations proc_tid_base_operations = {
2145         .read           = generic_read_dir,
2146         .readdir        = proc_tid_base_readdir,
2147 };
2148
2149 static struct inode_operations proc_tid_base_inode_operations = {
2150         .lookup         = proc_tid_base_lookup,
2151         .getattr        = pid_getattr,
2152         .setattr        = proc_setattr,
2153 };
2154
2155 /* SMP-safe */
2156 static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
2157 {
2158         struct dentry *result = ERR_PTR(-ENOENT);
2159         struct task_struct *task;
2160         struct task_struct *leader = get_proc_task(dir);
2161         struct inode *inode;
2162         unsigned tid;
2163
2164         if (!leader)
2165                 goto out_no_task;
2166
2167         tid = name_to_int(dentry);
2168         if (tid == ~0U)
2169                 goto out;
2170
2171         rcu_read_lock();
2172         task = find_task_by_pid(tid);
2173         if (task)
2174                 get_task_struct(task);
2175         rcu_read_unlock();
2176         if (!task)
2177                 goto out;
2178         if (leader->tgid != task->tgid)
2179                 goto out_drop_task;
2180
2181         inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_INO);
2182
2183
2184         if (!inode)
2185                 goto out_drop_task;
2186         inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
2187         inode->i_op = &proc_tid_base_inode_operations;
2188         inode->i_fop = &proc_tid_base_operations;
2189         inode->i_flags|=S_IMMUTABLE;
2190 #ifdef CONFIG_SECURITY
2191         inode->i_nlink = 4;
2192 #else
2193         inode->i_nlink = 3;
2194 #endif
2195
2196         dentry->d_op = &pid_dentry_operations;
2197
2198         d_add(dentry, inode);
2199         /* Close the race of the process dying before we return the dentry */
2200         if (pid_revalidate(dentry, NULL))
2201                 result = NULL;
2202
2203 out_drop_task:
2204         put_task_struct(task);
2205 out:
2206         put_task_struct(leader);
2207 out_no_task:
2208         return result;
2209 }
2210
2211 /*
2212  * Find the first tid of a thread group to return to user space.
2213  *
2214  * Usually this is just the thread group leader, but if the users
2215  * buffer was too small or there was a seek into the middle of the
2216  * directory we have more work todo.
2217  *
2218  * In the case of a short read we start with find_task_by_pid.
2219  *
2220  * In the case of a seek we start with the leader and walk nr
2221  * threads past it.
2222  */
2223 static struct task_struct *first_tid(struct task_struct *leader,
2224                                         int tid, int nr)
2225 {
2226         struct task_struct *pos;
2227
2228         rcu_read_lock();
2229         /* Attempt to start with the pid of a thread */
2230         if (tid && (nr > 0)) {
2231                 pos = find_task_by_pid(tid);
2232                 if (pos && (pos->group_leader == leader))
2233                         goto found;
2234         }
2235
2236         /* If nr exceeds the number of threads there is nothing todo */
2237         pos = NULL;
2238         if (nr && nr >= get_nr_threads(leader))
2239                 goto out;
2240
2241         /* If we haven't found our starting place yet start
2242          * with the leader and walk nr threads forward.
2243          */
2244         for (pos = leader; nr > 0; --nr) {
2245                 pos = next_thread(pos);
2246                 if (pos == leader) {
2247                         pos = NULL;
2248                         goto out;
2249                 }
2250         }
2251 found:
2252         get_task_struct(pos);
2253 out:
2254         rcu_read_unlock();
2255         return pos;
2256 }
2257
2258 /*
2259  * Find the next thread in the thread list.
2260  * Return NULL if there is an error or no next thread.
2261  *
2262  * The reference to the input task_struct is released.
2263  */
2264 static struct task_struct *next_tid(struct task_struct *start)
2265 {
2266         struct task_struct *pos = NULL;
2267         rcu_read_lock();
2268         if (pid_alive(start)) {
2269                 pos = next_thread(start);
2270                 if (thread_group_leader(pos))
2271                         pos = NULL;
2272                 else
2273                         get_task_struct(pos);
2274         }
2275         rcu_read_unlock();
2276         put_task_struct(start);
2277         return pos;
2278 }
2279
2280 /* for the /proc/TGID/task/ directories */
2281 static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir)
2282 {
2283         char buf[PROC_NUMBUF];
2284         struct dentry *dentry = filp->f_dentry;
2285         struct inode *inode = dentry->d_inode;
2286         struct task_struct *leader = get_proc_task(inode);
2287         struct task_struct *task;
2288         int retval = -ENOENT;
2289         ino_t ino;
2290         int tid;
2291         unsigned long pos = filp->f_pos;  /* avoiding "long long" filp->f_pos */
2292
2293         if (!leader)
2294                 goto out_no_task;
2295         retval = 0;
2296
2297         switch (pos) {
2298         case 0:
2299                 ino = inode->i_ino;
2300                 if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0)
2301                         goto out;
2302                 pos++;
2303                 /* fall through */
2304         case 1:
2305                 ino = parent_ino(dentry);
2306                 if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0)
2307                         goto out;
2308                 pos++;
2309                 /* fall through */
2310         }
2311
2312         /* f_version caches the tgid value that the last readdir call couldn't
2313          * return. lseek aka telldir automagically resets f_version to 0.
2314          */
2315         tid = filp->f_version;
2316         filp->f_version = 0;
2317         for (task = first_tid(leader, tid, pos - 2);
2318              task;
2319              task = next_tid(task), pos++) {
2320                 int len;
2321                 tid = task->pid;
2322                 len = snprintf(buf, sizeof(buf), "%d", tid);
2323                 ino = fake_ino(tid, PROC_TID_INO);
2324                 if (filldir(dirent, buf, len, pos, ino, DT_DIR < 0)) {
2325                         /* returning this tgid failed, save it as the first
2326                          * pid for the next readir call */
2327                         filp->f_version = tid;
2328                         put_task_struct(task);
2329                         break;
2330                 }
2331         }
2332 out:
2333         filp->f_pos = pos;
2334         put_task_struct(leader);
2335 out_no_task:
2336         return retval;
2337 }
2338
2339 static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
2340 {
2341         struct inode *inode = dentry->d_inode;
2342         struct task_struct *p = get_proc_task(inode);
2343         generic_fillattr(inode, stat);
2344
2345         if (p) {
2346                 rcu_read_lock();
2347                 stat->nlink += get_nr_threads(p);
2348                 rcu_read_unlock();
2349                 put_task_struct(p);
2350         }
2351
2352         return 0;
2353 }
2354
2355 static struct inode_operations proc_task_inode_operations = {
2356         .lookup         = proc_task_lookup,
2357         .getattr        = proc_task_getattr,
2358         .setattr        = proc_setattr,
2359 };
2360
2361 static struct file_operations proc_task_operations = {
2362         .read           = generic_read_dir,
2363         .readdir        = proc_task_readdir,
2364 };