[PATCH] another round of fs/pipe.c cleanups
[safe/jmp/linux-2.6] / fs / pipe.c
1 /*
2  *  linux/fs/pipe.c
3  *
4  *  Copyright (C) 1991, 1992, 1999  Linus Torvalds
5  */
6
7 #include <linux/mm.h>
8 #include <linux/file.h>
9 #include <linux/poll.h>
10 #include <linux/slab.h>
11 #include <linux/module.h>
12 #include <linux/init.h>
13 #include <linux/fs.h>
14 #include <linux/mount.h>
15 #include <linux/pipe_fs_i.h>
16 #include <linux/uio.h>
17 #include <linux/highmem.h>
18 #include <linux/pagemap.h>
19
20 #include <asm/uaccess.h>
21 #include <asm/ioctls.h>
22
23 /*
24  * We use a start+len construction, which provides full use of the 
25  * allocated memory.
26  * -- Florian Coosmann (FGC)
27  * 
28  * Reads with count = 0 should always return 0.
29  * -- Julian Bradfield 1999-06-07.
30  *
31  * FIFOs and Pipes now generate SIGIO for both readers and writers.
32  * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16
33  *
34  * pipe_read & write cleanup
35  * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
36  */
37
38 /* Drop the inode semaphore and wait for a pipe event, atomically */
39 void pipe_wait(struct pipe_inode_info *pipe)
40 {
41         DEFINE_WAIT(wait);
42
43         /*
44          * Pipes are system-local resources, so sleeping on them
45          * is considered a noninteractive wait:
46          */
47         prepare_to_wait(&pipe->wait, &wait,
48                         TASK_INTERRUPTIBLE | TASK_NONINTERACTIVE);
49         if (pipe->inode)
50                 mutex_unlock(&pipe->inode->i_mutex);
51         schedule();
52         finish_wait(&pipe->wait, &wait);
53         if (pipe->inode)
54                 mutex_lock(&pipe->inode->i_mutex);
55 }
56
57 static int
58 pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len)
59 {
60         unsigned long copy;
61
62         while (len > 0) {
63                 while (!iov->iov_len)
64                         iov++;
65                 copy = min_t(unsigned long, len, iov->iov_len);
66
67                 if (copy_from_user(to, iov->iov_base, copy))
68                         return -EFAULT;
69                 to += copy;
70                 len -= copy;
71                 iov->iov_base += copy;
72                 iov->iov_len -= copy;
73         }
74         return 0;
75 }
76
77 static int
78 pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len)
79 {
80         unsigned long copy;
81
82         while (len > 0) {
83                 while (!iov->iov_len)
84                         iov++;
85                 copy = min_t(unsigned long, len, iov->iov_len);
86
87                 if (copy_to_user(iov->iov_base, from, copy))
88                         return -EFAULT;
89                 from += copy;
90                 len -= copy;
91                 iov->iov_base += copy;
92                 iov->iov_len -= copy;
93         }
94         return 0;
95 }
96
97 static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
98                                   struct pipe_buffer *buf)
99 {
100         struct page *page = buf->page;
101
102         buf->flags &= ~PIPE_BUF_FLAG_STOLEN;
103
104         /*
105          * If nobody else uses this page, and we don't already have a
106          * temporary page, let's keep track of it as a one-deep
107          * allocation cache. (Otherwise just release our reference to it)
108          */
109         if (page_count(page) == 1 && !pipe->tmp_page)
110                 pipe->tmp_page = page;
111         else
112                 page_cache_release(page);
113 }
114
115 static void * anon_pipe_buf_map(struct file *file, struct pipe_inode_info *pipe,
116                                 struct pipe_buffer *buf)
117 {
118         return kmap(buf->page);
119 }
120
121 static void anon_pipe_buf_unmap(struct pipe_inode_info *pipe,
122                                 struct pipe_buffer *buf)
123 {
124         kunmap(buf->page);
125 }
126
127 static int anon_pipe_buf_steal(struct pipe_inode_info *pipe,
128                                struct pipe_buffer *buf)
129 {
130         buf->flags |= PIPE_BUF_FLAG_STOLEN;
131         return 0;
132 }
133
134 static struct pipe_buf_operations anon_pipe_buf_ops = {
135         .can_merge = 1,
136         .map = anon_pipe_buf_map,
137         .unmap = anon_pipe_buf_unmap,
138         .release = anon_pipe_buf_release,
139         .steal = anon_pipe_buf_steal,
140 };
141
142 static ssize_t
143 pipe_readv(struct file *filp, const struct iovec *_iov,
144            unsigned long nr_segs, loff_t *ppos)
145 {
146         struct inode *inode = filp->f_dentry->d_inode;
147         struct pipe_inode_info *pipe;
148         int do_wakeup;
149         ssize_t ret;
150         struct iovec *iov = (struct iovec *)_iov;
151         size_t total_len;
152
153         total_len = iov_length(iov, nr_segs);
154         /* Null read succeeds. */
155         if (unlikely(total_len == 0))
156                 return 0;
157
158         do_wakeup = 0;
159         ret = 0;
160         mutex_lock(&inode->i_mutex);
161         pipe = inode->i_pipe;
162         for (;;) {
163                 int bufs = pipe->nrbufs;
164                 if (bufs) {
165                         int curbuf = pipe->curbuf;
166                         struct pipe_buffer *buf = pipe->bufs + curbuf;
167                         struct pipe_buf_operations *ops = buf->ops;
168                         void *addr;
169                         size_t chars = buf->len;
170                         int error;
171
172                         if (chars > total_len)
173                                 chars = total_len;
174
175                         addr = ops->map(filp, pipe, buf);
176                         if (IS_ERR(addr)) {
177                                 if (!ret)
178                                         ret = PTR_ERR(addr);
179                                 break;
180                         }
181                         error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars);
182                         ops->unmap(pipe, buf);
183                         if (unlikely(error)) {
184                                 if (!ret)
185                                         ret = -EFAULT;
186                                 break;
187                         }
188                         ret += chars;
189                         buf->offset += chars;
190                         buf->len -= chars;
191                         if (!buf->len) {
192                                 buf->ops = NULL;
193                                 ops->release(pipe, buf);
194                                 curbuf = (curbuf + 1) & (PIPE_BUFFERS-1);
195                                 pipe->curbuf = curbuf;
196                                 pipe->nrbufs = --bufs;
197                                 do_wakeup = 1;
198                         }
199                         total_len -= chars;
200                         if (!total_len)
201                                 break;  /* common path: read succeeded */
202                 }
203                 if (bufs)       /* More to do? */
204                         continue;
205                 if (!pipe->writers)
206                         break;
207                 if (!pipe->waiting_writers) {
208                         /* syscall merging: Usually we must not sleep
209                          * if O_NONBLOCK is set, or if we got some data.
210                          * But if a writer sleeps in kernel space, then
211                          * we can wait for that data without violating POSIX.
212                          */
213                         if (ret)
214                                 break;
215                         if (filp->f_flags & O_NONBLOCK) {
216                                 ret = -EAGAIN;
217                                 break;
218                         }
219                 }
220                 if (signal_pending(current)) {
221                         if (!ret)
222                                 ret = -ERESTARTSYS;
223                         break;
224                 }
225                 if (do_wakeup) {
226                         wake_up_interruptible_sync(&pipe->wait);
227                         kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
228                 }
229                 pipe_wait(pipe);
230         }
231         mutex_unlock(&inode->i_mutex);
232
233         /* Signal writers asynchronously that there is more room. */
234         if (do_wakeup) {
235                 wake_up_interruptible(&pipe->wait);
236                 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
237         }
238         if (ret > 0)
239                 file_accessed(filp);
240         return ret;
241 }
242
243 static ssize_t
244 pipe_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
245 {
246         struct iovec iov = { .iov_base = buf, .iov_len = count };
247
248         return pipe_readv(filp, &iov, 1, ppos);
249 }
250
251 static ssize_t
252 pipe_writev(struct file *filp, const struct iovec *_iov,
253             unsigned long nr_segs, loff_t *ppos)
254 {
255         struct inode *inode = filp->f_dentry->d_inode;
256         struct pipe_inode_info *pipe;
257         ssize_t ret;
258         int do_wakeup;
259         struct iovec *iov = (struct iovec *)_iov;
260         size_t total_len;
261         ssize_t chars;
262
263         total_len = iov_length(iov, nr_segs);
264         /* Null write succeeds. */
265         if (unlikely(total_len == 0))
266                 return 0;
267
268         do_wakeup = 0;
269         ret = 0;
270         mutex_lock(&inode->i_mutex);
271         pipe = inode->i_pipe;
272
273         if (!pipe->readers) {
274                 send_sig(SIGPIPE, current, 0);
275                 ret = -EPIPE;
276                 goto out;
277         }
278
279         /* We try to merge small writes */
280         chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */
281         if (pipe->nrbufs && chars != 0) {
282                 int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) &
283                                                         (PIPE_BUFFERS-1);
284                 struct pipe_buffer *buf = pipe->bufs + lastbuf;
285                 struct pipe_buf_operations *ops = buf->ops;
286                 int offset = buf->offset + buf->len;
287
288                 if (ops->can_merge && offset + chars <= PAGE_SIZE) {
289                         void *addr;
290                         int error;
291
292                         addr = ops->map(filp, pipe, buf);
293                         if (IS_ERR(addr)) {
294                                 error = PTR_ERR(addr);
295                                 goto out;
296                         }
297                         error = pipe_iov_copy_from_user(offset + addr, iov,
298                                                         chars);
299                         ops->unmap(pipe, buf);
300                         ret = error;
301                         do_wakeup = 1;
302                         if (error)
303                                 goto out;
304                         buf->len += chars;
305                         total_len -= chars;
306                         ret = chars;
307                         if (!total_len)
308                                 goto out;
309                 }
310         }
311
312         for (;;) {
313                 int bufs;
314
315                 if (!pipe->readers) {
316                         send_sig(SIGPIPE, current, 0);
317                         if (!ret)
318                                 ret = -EPIPE;
319                         break;
320                 }
321                 bufs = pipe->nrbufs;
322                 if (bufs < PIPE_BUFFERS) {
323                         int newbuf = (pipe->curbuf + bufs) & (PIPE_BUFFERS-1);
324                         struct pipe_buffer *buf = pipe->bufs + newbuf;
325                         struct page *page = pipe->tmp_page;
326                         int error;
327
328                         if (!page) {
329                                 page = alloc_page(GFP_HIGHUSER);
330                                 if (unlikely(!page)) {
331                                         ret = ret ? : -ENOMEM;
332                                         break;
333                                 }
334                                 pipe->tmp_page = page;
335                         }
336                         /* Always wake up, even if the copy fails. Otherwise
337                          * we lock up (O_NONBLOCK-)readers that sleep due to
338                          * syscall merging.
339                          * FIXME! Is this really true?
340                          */
341                         do_wakeup = 1;
342                         chars = PAGE_SIZE;
343                         if (chars > total_len)
344                                 chars = total_len;
345
346                         error = pipe_iov_copy_from_user(kmap(page), iov, chars);
347                         kunmap(page);
348                         if (unlikely(error)) {
349                                 if (!ret)
350                                         ret = -EFAULT;
351                                 break;
352                         }
353                         ret += chars;
354
355                         /* Insert it into the buffer array */
356                         buf->page = page;
357                         buf->ops = &anon_pipe_buf_ops;
358                         buf->offset = 0;
359                         buf->len = chars;
360                         pipe->nrbufs = ++bufs;
361                         pipe->tmp_page = NULL;
362
363                         total_len -= chars;
364                         if (!total_len)
365                                 break;
366                 }
367                 if (bufs < PIPE_BUFFERS)
368                         continue;
369                 if (filp->f_flags & O_NONBLOCK) {
370                         if (!ret)
371                                 ret = -EAGAIN;
372                         break;
373                 }
374                 if (signal_pending(current)) {
375                         if (!ret)
376                                 ret = -ERESTARTSYS;
377                         break;
378                 }
379                 if (do_wakeup) {
380                         wake_up_interruptible_sync(&pipe->wait);
381                         kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
382                         do_wakeup = 0;
383                 }
384                 pipe->waiting_writers++;
385                 pipe_wait(pipe);
386                 pipe->waiting_writers--;
387         }
388 out:
389         mutex_unlock(&inode->i_mutex);
390         if (do_wakeup) {
391                 wake_up_interruptible(&pipe->wait);
392                 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
393         }
394         if (ret > 0)
395                 file_update_time(filp);
396         return ret;
397 }
398
399 static ssize_t
400 pipe_write(struct file *filp, const char __user *buf,
401            size_t count, loff_t *ppos)
402 {
403         struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
404
405         return pipe_writev(filp, &iov, 1, ppos);
406 }
407
408 static ssize_t
409 bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
410 {
411         return -EBADF;
412 }
413
414 static ssize_t
415 bad_pipe_w(struct file *filp, const char __user *buf, size_t count,
416            loff_t *ppos)
417 {
418         return -EBADF;
419 }
420
421 static int
422 pipe_ioctl(struct inode *pino, struct file *filp,
423            unsigned int cmd, unsigned long arg)
424 {
425         struct inode *inode = filp->f_dentry->d_inode;
426         struct pipe_inode_info *pipe;
427         int count, buf, nrbufs;
428
429         switch (cmd) {
430                 case FIONREAD:
431                         mutex_lock(&inode->i_mutex);
432                         pipe = inode->i_pipe;
433                         count = 0;
434                         buf = pipe->curbuf;
435                         nrbufs = pipe->nrbufs;
436                         while (--nrbufs >= 0) {
437                                 count += pipe->bufs[buf].len;
438                                 buf = (buf+1) & (PIPE_BUFFERS-1);
439                         }
440                         mutex_unlock(&inode->i_mutex);
441
442                         return put_user(count, (int __user *)arg);
443                 default:
444                         return -EINVAL;
445         }
446 }
447
448 /* No kernel lock held - fine */
449 static unsigned int
450 pipe_poll(struct file *filp, poll_table *wait)
451 {
452         unsigned int mask;
453         struct inode *inode = filp->f_dentry->d_inode;
454         struct pipe_inode_info *pipe = inode->i_pipe;
455         int nrbufs;
456
457         poll_wait(filp, &pipe->wait, wait);
458
459         /* Reading only -- no need for acquiring the semaphore.  */
460         nrbufs = pipe->nrbufs;
461         mask = 0;
462         if (filp->f_mode & FMODE_READ) {
463                 mask = (nrbufs > 0) ? POLLIN | POLLRDNORM : 0;
464                 if (!pipe->writers && filp->f_version != pipe->w_counter)
465                         mask |= POLLHUP;
466         }
467
468         if (filp->f_mode & FMODE_WRITE) {
469                 mask |= (nrbufs < PIPE_BUFFERS) ? POLLOUT | POLLWRNORM : 0;
470                 /*
471                  * Most Unices do not set POLLERR for FIFOs but on Linux they
472                  * behave exactly like pipes for poll().
473                  */
474                 if (!pipe->readers)
475                         mask |= POLLERR;
476         }
477
478         return mask;
479 }
480
481 static int
482 pipe_release(struct inode *inode, int decr, int decw)
483 {
484         struct pipe_inode_info *pipe;
485
486         mutex_lock(&inode->i_mutex);
487         pipe = inode->i_pipe;
488         pipe->readers -= decr;
489         pipe->writers -= decw;
490
491         if (!pipe->readers && !pipe->writers) {
492                 free_pipe_info(inode);
493         } else {
494                 wake_up_interruptible(&pipe->wait);
495                 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
496                 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
497         }
498         mutex_unlock(&inode->i_mutex);
499
500         return 0;
501 }
502
503 static int
504 pipe_read_fasync(int fd, struct file *filp, int on)
505 {
506         struct inode *inode = filp->f_dentry->d_inode;
507         int retval;
508
509         mutex_lock(&inode->i_mutex);
510         retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_readers);
511         mutex_unlock(&inode->i_mutex);
512
513         if (retval < 0)
514                 return retval;
515
516         return 0;
517 }
518
519
520 static int
521 pipe_write_fasync(int fd, struct file *filp, int on)
522 {
523         struct inode *inode = filp->f_dentry->d_inode;
524         int retval;
525
526         mutex_lock(&inode->i_mutex);
527         retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_writers);
528         mutex_unlock(&inode->i_mutex);
529
530         if (retval < 0)
531                 return retval;
532
533         return 0;
534 }
535
536
537 static int
538 pipe_rdwr_fasync(int fd, struct file *filp, int on)
539 {
540         struct inode *inode = filp->f_dentry->d_inode;
541         struct pipe_inode_info *pipe = inode->i_pipe;
542         int retval;
543
544         mutex_lock(&inode->i_mutex);
545
546         retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
547
548         if (retval >= 0)
549                 retval = fasync_helper(fd, filp, on, &pipe->fasync_writers);
550
551         mutex_unlock(&inode->i_mutex);
552
553         if (retval < 0)
554                 return retval;
555
556         return 0;
557 }
558
559
560 static int
561 pipe_read_release(struct inode *inode, struct file *filp)
562 {
563         pipe_read_fasync(-1, filp, 0);
564         return pipe_release(inode, 1, 0);
565 }
566
567 static int
568 pipe_write_release(struct inode *inode, struct file *filp)
569 {
570         pipe_write_fasync(-1, filp, 0);
571         return pipe_release(inode, 0, 1);
572 }
573
574 static int
575 pipe_rdwr_release(struct inode *inode, struct file *filp)
576 {
577         int decr, decw;
578
579         pipe_rdwr_fasync(-1, filp, 0);
580         decr = (filp->f_mode & FMODE_READ) != 0;
581         decw = (filp->f_mode & FMODE_WRITE) != 0;
582         return pipe_release(inode, decr, decw);
583 }
584
585 static int
586 pipe_read_open(struct inode *inode, struct file *filp)
587 {
588         /* We could have perhaps used atomic_t, but this and friends
589            below are the only places.  So it doesn't seem worthwhile.  */
590         mutex_lock(&inode->i_mutex);
591         inode->i_pipe->readers++;
592         mutex_unlock(&inode->i_mutex);
593
594         return 0;
595 }
596
597 static int
598 pipe_write_open(struct inode *inode, struct file *filp)
599 {
600         mutex_lock(&inode->i_mutex);
601         inode->i_pipe->writers++;
602         mutex_unlock(&inode->i_mutex);
603
604         return 0;
605 }
606
607 static int
608 pipe_rdwr_open(struct inode *inode, struct file *filp)
609 {
610         mutex_lock(&inode->i_mutex);
611         if (filp->f_mode & FMODE_READ)
612                 inode->i_pipe->readers++;
613         if (filp->f_mode & FMODE_WRITE)
614                 inode->i_pipe->writers++;
615         mutex_unlock(&inode->i_mutex);
616
617         return 0;
618 }
619
620 /*
621  * The file_operations structs are not static because they
622  * are also used in linux/fs/fifo.c to do operations on FIFOs.
623  */
624 const struct file_operations read_fifo_fops = {
625         .llseek         = no_llseek,
626         .read           = pipe_read,
627         .readv          = pipe_readv,
628         .write          = bad_pipe_w,
629         .poll           = pipe_poll,
630         .ioctl          = pipe_ioctl,
631         .open           = pipe_read_open,
632         .release        = pipe_read_release,
633         .fasync         = pipe_read_fasync,
634 };
635
636 const struct file_operations write_fifo_fops = {
637         .llseek         = no_llseek,
638         .read           = bad_pipe_r,
639         .write          = pipe_write,
640         .writev         = pipe_writev,
641         .poll           = pipe_poll,
642         .ioctl          = pipe_ioctl,
643         .open           = pipe_write_open,
644         .release        = pipe_write_release,
645         .fasync         = pipe_write_fasync,
646 };
647
648 const struct file_operations rdwr_fifo_fops = {
649         .llseek         = no_llseek,
650         .read           = pipe_read,
651         .readv          = pipe_readv,
652         .write          = pipe_write,
653         .writev         = pipe_writev,
654         .poll           = pipe_poll,
655         .ioctl          = pipe_ioctl,
656         .open           = pipe_rdwr_open,
657         .release        = pipe_rdwr_release,
658         .fasync         = pipe_rdwr_fasync,
659 };
660
661 static struct file_operations read_pipe_fops = {
662         .llseek         = no_llseek,
663         .read           = pipe_read,
664         .readv          = pipe_readv,
665         .write          = bad_pipe_w,
666         .poll           = pipe_poll,
667         .ioctl          = pipe_ioctl,
668         .open           = pipe_read_open,
669         .release        = pipe_read_release,
670         .fasync         = pipe_read_fasync,
671 };
672
673 static struct file_operations write_pipe_fops = {
674         .llseek         = no_llseek,
675         .read           = bad_pipe_r,
676         .write          = pipe_write,
677         .writev         = pipe_writev,
678         .poll           = pipe_poll,
679         .ioctl          = pipe_ioctl,
680         .open           = pipe_write_open,
681         .release        = pipe_write_release,
682         .fasync         = pipe_write_fasync,
683 };
684
685 static struct file_operations rdwr_pipe_fops = {
686         .llseek         = no_llseek,
687         .read           = pipe_read,
688         .readv          = pipe_readv,
689         .write          = pipe_write,
690         .writev         = pipe_writev,
691         .poll           = pipe_poll,
692         .ioctl          = pipe_ioctl,
693         .open           = pipe_rdwr_open,
694         .release        = pipe_rdwr_release,
695         .fasync         = pipe_rdwr_fasync,
696 };
697
698 struct pipe_inode_info * alloc_pipe_info(struct inode *inode)
699 {
700         struct pipe_inode_info *pipe;
701
702         pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
703         if (pipe) {
704                 init_waitqueue_head(&pipe->wait);
705                 pipe->r_counter = pipe->w_counter = 1;
706                 pipe->inode = inode;
707         }
708
709         return pipe;
710 }
711
712 void __free_pipe_info(struct pipe_inode_info *pipe)
713 {
714         int i;
715
716         for (i = 0; i < PIPE_BUFFERS; i++) {
717                 struct pipe_buffer *buf = pipe->bufs + i;
718                 if (buf->ops)
719                         buf->ops->release(pipe, buf);
720         }
721         if (pipe->tmp_page)
722                 __free_page(pipe->tmp_page);
723         kfree(pipe);
724 }
725
726 void free_pipe_info(struct inode *inode)
727 {
728         __free_pipe_info(inode->i_pipe);
729         inode->i_pipe = NULL;
730 }
731
732 static struct vfsmount *pipe_mnt __read_mostly;
733 static int pipefs_delete_dentry(struct dentry *dentry)
734 {
735         return 1;
736 }
737
738 static struct dentry_operations pipefs_dentry_operations = {
739         .d_delete       = pipefs_delete_dentry,
740 };
741
742 static struct inode * get_pipe_inode(void)
743 {
744         struct inode *inode = new_inode(pipe_mnt->mnt_sb);
745         struct pipe_inode_info *pipe;
746
747         if (!inode)
748                 goto fail_inode;
749
750         pipe = alloc_pipe_info(inode);
751         if (!pipe)
752                 goto fail_iput;
753         inode->i_pipe = pipe;
754
755         pipe->readers = pipe->writers = 1;
756         inode->i_fop = &rdwr_pipe_fops;
757
758         /*
759          * Mark the inode dirty from the very beginning,
760          * that way it will never be moved to the dirty
761          * list because "mark_inode_dirty()" will think
762          * that it already _is_ on the dirty list.
763          */
764         inode->i_state = I_DIRTY;
765         inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
766         inode->i_uid = current->fsuid;
767         inode->i_gid = current->fsgid;
768         inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
769         inode->i_blksize = PAGE_SIZE;
770
771         return inode;
772
773 fail_iput:
774         iput(inode);
775
776 fail_inode:
777         return NULL;
778 }
779
780 int do_pipe(int *fd)
781 {
782         struct qstr this;
783         char name[32];
784         struct dentry *dentry;
785         struct inode * inode;
786         struct file *f1, *f2;
787         int error;
788         int i, j;
789
790         error = -ENFILE;
791         f1 = get_empty_filp();
792         if (!f1)
793                 goto no_files;
794
795         f2 = get_empty_filp();
796         if (!f2)
797                 goto close_f1;
798
799         inode = get_pipe_inode();
800         if (!inode)
801                 goto close_f12;
802
803         error = get_unused_fd();
804         if (error < 0)
805                 goto close_f12_inode;
806         i = error;
807
808         error = get_unused_fd();
809         if (error < 0)
810                 goto close_f12_inode_i;
811         j = error;
812
813         error = -ENOMEM;
814         sprintf(name, "[%lu]", inode->i_ino);
815         this.name = name;
816         this.len = strlen(name);
817         this.hash = inode->i_ino; /* will go */
818         dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this);
819         if (!dentry)
820                 goto close_f12_inode_i_j;
821
822         dentry->d_op = &pipefs_dentry_operations;
823         d_add(dentry, inode);
824         f1->f_vfsmnt = f2->f_vfsmnt = mntget(mntget(pipe_mnt));
825         f1->f_dentry = f2->f_dentry = dget(dentry);
826         f1->f_mapping = f2->f_mapping = inode->i_mapping;
827
828         /* read file */
829         f1->f_pos = f2->f_pos = 0;
830         f1->f_flags = O_RDONLY;
831         f1->f_op = &read_pipe_fops;
832         f1->f_mode = FMODE_READ;
833         f1->f_version = 0;
834
835         /* write file */
836         f2->f_flags = O_WRONLY;
837         f2->f_op = &write_pipe_fops;
838         f2->f_mode = FMODE_WRITE;
839         f2->f_version = 0;
840
841         fd_install(i, f1);
842         fd_install(j, f2);
843         fd[0] = i;
844         fd[1] = j;
845
846         return 0;
847
848 close_f12_inode_i_j:
849         put_unused_fd(j);
850 close_f12_inode_i:
851         put_unused_fd(i);
852 close_f12_inode:
853         free_pipe_info(inode);
854         iput(inode);
855 close_f12:
856         put_filp(f2);
857 close_f1:
858         put_filp(f1);
859 no_files:
860         return error;   
861 }
862
863 /*
864  * pipefs should _never_ be mounted by userland - too much of security hassle,
865  * no real gain from having the whole whorehouse mounted. So we don't need
866  * any operations on the root directory. However, we need a non-trivial
867  * d_name - pipe: will go nicely and kill the special-casing in procfs.
868  */
869
870 static struct super_block *
871 pipefs_get_sb(struct file_system_type *fs_type, int flags,
872               const char *dev_name, void *data)
873 {
874         return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC);
875 }
876
877 static struct file_system_type pipe_fs_type = {
878         .name           = "pipefs",
879         .get_sb         = pipefs_get_sb,
880         .kill_sb        = kill_anon_super,
881 };
882
883 static int __init init_pipe_fs(void)
884 {
885         int err = register_filesystem(&pipe_fs_type);
886
887         if (!err) {
888                 pipe_mnt = kern_mount(&pipe_fs_type);
889                 if (IS_ERR(pipe_mnt)) {
890                         err = PTR_ERR(pipe_mnt);
891                         unregister_filesystem(&pipe_fs_type);
892                 }
893         }
894         return err;
895 }
896
897 static void __exit exit_pipe_fs(void)
898 {
899         unregister_filesystem(&pipe_fs_type);
900         mntput(pipe_mnt);
901 }
902
903 fs_initcall(init_pipe_fs);
904 module_exit(exit_pipe_fs);