[PATCH] splice: fix bugs with stealing regular pipe pages
[safe/jmp/linux-2.6] / fs / pipe.c
1 /*
2  *  linux/fs/pipe.c
3  *
4  *  Copyright (C) 1991, 1992, 1999  Linus Torvalds
5  */
6
7 #include <linux/mm.h>
8 #include <linux/file.h>
9 #include <linux/poll.h>
10 #include <linux/slab.h>
11 #include <linux/module.h>
12 #include <linux/init.h>
13 #include <linux/fs.h>
14 #include <linux/mount.h>
15 #include <linux/pipe_fs_i.h>
16 #include <linux/uio.h>
17 #include <linux/highmem.h>
18 #include <linux/pagemap.h>
19
20 #include <asm/uaccess.h>
21 #include <asm/ioctls.h>
22
23 /*
24  * We use a start+len construction, which provides full use of the 
25  * allocated memory.
26  * -- Florian Coosmann (FGC)
27  * 
28  * Reads with count = 0 should always return 0.
29  * -- Julian Bradfield 1999-06-07.
30  *
31  * FIFOs and Pipes now generate SIGIO for both readers and writers.
32  * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16
33  *
34  * pipe_read & write cleanup
35  * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
36  */
37
38 /* Drop the inode semaphore and wait for a pipe event, atomically */
39 void pipe_wait(struct pipe_inode_info *pipe)
40 {
41         DEFINE_WAIT(wait);
42
43         /*
44          * Pipes are system-local resources, so sleeping on them
45          * is considered a noninteractive wait:
46          */
47         prepare_to_wait(&pipe->wait, &wait,
48                         TASK_INTERRUPTIBLE | TASK_NONINTERACTIVE);
49         if (pipe->inode)
50                 mutex_unlock(&pipe->inode->i_mutex);
51         schedule();
52         finish_wait(&pipe->wait, &wait);
53         if (pipe->inode)
54                 mutex_lock(&pipe->inode->i_mutex);
55 }
56
57 static int
58 pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len)
59 {
60         unsigned long copy;
61
62         while (len > 0) {
63                 while (!iov->iov_len)
64                         iov++;
65                 copy = min_t(unsigned long, len, iov->iov_len);
66
67                 if (copy_from_user(to, iov->iov_base, copy))
68                         return -EFAULT;
69                 to += copy;
70                 len -= copy;
71                 iov->iov_base += copy;
72                 iov->iov_len -= copy;
73         }
74         return 0;
75 }
76
77 static int
78 pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len)
79 {
80         unsigned long copy;
81
82         while (len > 0) {
83                 while (!iov->iov_len)
84                         iov++;
85                 copy = min_t(unsigned long, len, iov->iov_len);
86
87                 if (copy_to_user(iov->iov_base, from, copy))
88                         return -EFAULT;
89                 from += copy;
90                 len -= copy;
91                 iov->iov_base += copy;
92                 iov->iov_len -= copy;
93         }
94         return 0;
95 }
96
97 static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
98                                   struct pipe_buffer *buf)
99 {
100         struct page *page = buf->page;
101
102         buf->flags &= ~PIPE_BUF_FLAG_STOLEN;
103
104         /*
105          * If nobody else uses this page, and we don't already have a
106          * temporary page, let's keep track of it as a one-deep
107          * allocation cache. (Otherwise just release our reference to it)
108          */
109         if (page_count(page) == 1 && !pipe->tmp_page)
110                 pipe->tmp_page = page;
111         else
112                 page_cache_release(page);
113 }
114
115 static void * anon_pipe_buf_map(struct file *file, struct pipe_inode_info *pipe,
116                                 struct pipe_buffer *buf)
117 {
118         return kmap(buf->page);
119 }
120
121 static void anon_pipe_buf_unmap(struct pipe_inode_info *pipe,
122                                 struct pipe_buffer *buf)
123 {
124         kunmap(buf->page);
125 }
126
127 static int anon_pipe_buf_steal(struct pipe_inode_info *pipe,
128                                struct pipe_buffer *buf)
129 {
130         struct page *page = buf->page;
131
132         if (page_count(page) == 1) {
133                 buf->flags |= PIPE_BUF_FLAG_STOLEN;
134                 lock_page(page);
135                 return 0;
136         }
137
138         return 1;
139 }
140
141 static void anon_pipe_buf_get(struct pipe_inode_info *info,
142                               struct pipe_buffer *buf)
143 {
144         page_cache_get(buf->page);
145 }
146
147 static struct pipe_buf_operations anon_pipe_buf_ops = {
148         .can_merge = 1,
149         .map = anon_pipe_buf_map,
150         .unmap = anon_pipe_buf_unmap,
151         .release = anon_pipe_buf_release,
152         .steal = anon_pipe_buf_steal,
153         .get = anon_pipe_buf_get,
154 };
155
156 static ssize_t
157 pipe_readv(struct file *filp, const struct iovec *_iov,
158            unsigned long nr_segs, loff_t *ppos)
159 {
160         struct inode *inode = filp->f_dentry->d_inode;
161         struct pipe_inode_info *pipe;
162         int do_wakeup;
163         ssize_t ret;
164         struct iovec *iov = (struct iovec *)_iov;
165         size_t total_len;
166
167         total_len = iov_length(iov, nr_segs);
168         /* Null read succeeds. */
169         if (unlikely(total_len == 0))
170                 return 0;
171
172         do_wakeup = 0;
173         ret = 0;
174         mutex_lock(&inode->i_mutex);
175         pipe = inode->i_pipe;
176         for (;;) {
177                 int bufs = pipe->nrbufs;
178                 if (bufs) {
179                         int curbuf = pipe->curbuf;
180                         struct pipe_buffer *buf = pipe->bufs + curbuf;
181                         struct pipe_buf_operations *ops = buf->ops;
182                         void *addr;
183                         size_t chars = buf->len;
184                         int error;
185
186                         if (chars > total_len)
187                                 chars = total_len;
188
189                         addr = ops->map(filp, pipe, buf);
190                         if (IS_ERR(addr)) {
191                                 if (!ret)
192                                         ret = PTR_ERR(addr);
193                                 break;
194                         }
195                         error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars);
196                         ops->unmap(pipe, buf);
197                         if (unlikely(error)) {
198                                 if (!ret)
199                                         ret = -EFAULT;
200                                 break;
201                         }
202                         ret += chars;
203                         buf->offset += chars;
204                         buf->len -= chars;
205                         if (!buf->len) {
206                                 buf->ops = NULL;
207                                 ops->release(pipe, buf);
208                                 curbuf = (curbuf + 1) & (PIPE_BUFFERS-1);
209                                 pipe->curbuf = curbuf;
210                                 pipe->nrbufs = --bufs;
211                                 do_wakeup = 1;
212                         }
213                         total_len -= chars;
214                         if (!total_len)
215                                 break;  /* common path: read succeeded */
216                 }
217                 if (bufs)       /* More to do? */
218                         continue;
219                 if (!pipe->writers)
220                         break;
221                 if (!pipe->waiting_writers) {
222                         /* syscall merging: Usually we must not sleep
223                          * if O_NONBLOCK is set, or if we got some data.
224                          * But if a writer sleeps in kernel space, then
225                          * we can wait for that data without violating POSIX.
226                          */
227                         if (ret)
228                                 break;
229                         if (filp->f_flags & O_NONBLOCK) {
230                                 ret = -EAGAIN;
231                                 break;
232                         }
233                 }
234                 if (signal_pending(current)) {
235                         if (!ret)
236                                 ret = -ERESTARTSYS;
237                         break;
238                 }
239                 if (do_wakeup) {
240                         wake_up_interruptible_sync(&pipe->wait);
241                         kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
242                 }
243                 pipe_wait(pipe);
244         }
245         mutex_unlock(&inode->i_mutex);
246
247         /* Signal writers asynchronously that there is more room. */
248         if (do_wakeup) {
249                 wake_up_interruptible(&pipe->wait);
250                 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
251         }
252         if (ret > 0)
253                 file_accessed(filp);
254         return ret;
255 }
256
257 static ssize_t
258 pipe_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
259 {
260         struct iovec iov = { .iov_base = buf, .iov_len = count };
261
262         return pipe_readv(filp, &iov, 1, ppos);
263 }
264
265 static ssize_t
266 pipe_writev(struct file *filp, const struct iovec *_iov,
267             unsigned long nr_segs, loff_t *ppos)
268 {
269         struct inode *inode = filp->f_dentry->d_inode;
270         struct pipe_inode_info *pipe;
271         ssize_t ret;
272         int do_wakeup;
273         struct iovec *iov = (struct iovec *)_iov;
274         size_t total_len;
275         ssize_t chars;
276
277         total_len = iov_length(iov, nr_segs);
278         /* Null write succeeds. */
279         if (unlikely(total_len == 0))
280                 return 0;
281
282         do_wakeup = 0;
283         ret = 0;
284         mutex_lock(&inode->i_mutex);
285         pipe = inode->i_pipe;
286
287         if (!pipe->readers) {
288                 send_sig(SIGPIPE, current, 0);
289                 ret = -EPIPE;
290                 goto out;
291         }
292
293         /* We try to merge small writes */
294         chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */
295         if (pipe->nrbufs && chars != 0) {
296                 int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) &
297                                                         (PIPE_BUFFERS-1);
298                 struct pipe_buffer *buf = pipe->bufs + lastbuf;
299                 struct pipe_buf_operations *ops = buf->ops;
300                 int offset = buf->offset + buf->len;
301
302                 if (ops->can_merge && offset + chars <= PAGE_SIZE) {
303                         void *addr;
304                         int error;
305
306                         addr = ops->map(filp, pipe, buf);
307                         if (IS_ERR(addr)) {
308                                 error = PTR_ERR(addr);
309                                 goto out;
310                         }
311                         error = pipe_iov_copy_from_user(offset + addr, iov,
312                                                         chars);
313                         ops->unmap(pipe, buf);
314                         ret = error;
315                         do_wakeup = 1;
316                         if (error)
317                                 goto out;
318                         buf->len += chars;
319                         total_len -= chars;
320                         ret = chars;
321                         if (!total_len)
322                                 goto out;
323                 }
324         }
325
326         for (;;) {
327                 int bufs;
328
329                 if (!pipe->readers) {
330                         send_sig(SIGPIPE, current, 0);
331                         if (!ret)
332                                 ret = -EPIPE;
333                         break;
334                 }
335                 bufs = pipe->nrbufs;
336                 if (bufs < PIPE_BUFFERS) {
337                         int newbuf = (pipe->curbuf + bufs) & (PIPE_BUFFERS-1);
338                         struct pipe_buffer *buf = pipe->bufs + newbuf;
339                         struct page *page = pipe->tmp_page;
340                         int error;
341
342                         if (!page) {
343                                 page = alloc_page(GFP_HIGHUSER);
344                                 if (unlikely(!page)) {
345                                         ret = ret ? : -ENOMEM;
346                                         break;
347                                 }
348                                 pipe->tmp_page = page;
349                         }
350                         /* Always wake up, even if the copy fails. Otherwise
351                          * we lock up (O_NONBLOCK-)readers that sleep due to
352                          * syscall merging.
353                          * FIXME! Is this really true?
354                          */
355                         do_wakeup = 1;
356                         chars = PAGE_SIZE;
357                         if (chars > total_len)
358                                 chars = total_len;
359
360                         error = pipe_iov_copy_from_user(kmap(page), iov, chars);
361                         kunmap(page);
362                         if (unlikely(error)) {
363                                 if (!ret)
364                                         ret = -EFAULT;
365                                 break;
366                         }
367                         ret += chars;
368
369                         /* Insert it into the buffer array */
370                         buf->page = page;
371                         buf->ops = &anon_pipe_buf_ops;
372                         buf->offset = 0;
373                         buf->len = chars;
374                         pipe->nrbufs = ++bufs;
375                         pipe->tmp_page = NULL;
376
377                         total_len -= chars;
378                         if (!total_len)
379                                 break;
380                 }
381                 if (bufs < PIPE_BUFFERS)
382                         continue;
383                 if (filp->f_flags & O_NONBLOCK) {
384                         if (!ret)
385                                 ret = -EAGAIN;
386                         break;
387                 }
388                 if (signal_pending(current)) {
389                         if (!ret)
390                                 ret = -ERESTARTSYS;
391                         break;
392                 }
393                 if (do_wakeup) {
394                         wake_up_interruptible_sync(&pipe->wait);
395                         kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
396                         do_wakeup = 0;
397                 }
398                 pipe->waiting_writers++;
399                 pipe_wait(pipe);
400                 pipe->waiting_writers--;
401         }
402 out:
403         mutex_unlock(&inode->i_mutex);
404         if (do_wakeup) {
405                 wake_up_interruptible(&pipe->wait);
406                 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
407         }
408         if (ret > 0)
409                 file_update_time(filp);
410         return ret;
411 }
412
413 static ssize_t
414 pipe_write(struct file *filp, const char __user *buf,
415            size_t count, loff_t *ppos)
416 {
417         struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
418
419         return pipe_writev(filp, &iov, 1, ppos);
420 }
421
422 static ssize_t
423 bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
424 {
425         return -EBADF;
426 }
427
428 static ssize_t
429 bad_pipe_w(struct file *filp, const char __user *buf, size_t count,
430            loff_t *ppos)
431 {
432         return -EBADF;
433 }
434
435 static int
436 pipe_ioctl(struct inode *pino, struct file *filp,
437            unsigned int cmd, unsigned long arg)
438 {
439         struct inode *inode = filp->f_dentry->d_inode;
440         struct pipe_inode_info *pipe;
441         int count, buf, nrbufs;
442
443         switch (cmd) {
444                 case FIONREAD:
445                         mutex_lock(&inode->i_mutex);
446                         pipe = inode->i_pipe;
447                         count = 0;
448                         buf = pipe->curbuf;
449                         nrbufs = pipe->nrbufs;
450                         while (--nrbufs >= 0) {
451                                 count += pipe->bufs[buf].len;
452                                 buf = (buf+1) & (PIPE_BUFFERS-1);
453                         }
454                         mutex_unlock(&inode->i_mutex);
455
456                         return put_user(count, (int __user *)arg);
457                 default:
458                         return -EINVAL;
459         }
460 }
461
462 /* No kernel lock held - fine */
463 static unsigned int
464 pipe_poll(struct file *filp, poll_table *wait)
465 {
466         unsigned int mask;
467         struct inode *inode = filp->f_dentry->d_inode;
468         struct pipe_inode_info *pipe = inode->i_pipe;
469         int nrbufs;
470
471         poll_wait(filp, &pipe->wait, wait);
472
473         /* Reading only -- no need for acquiring the semaphore.  */
474         nrbufs = pipe->nrbufs;
475         mask = 0;
476         if (filp->f_mode & FMODE_READ) {
477                 mask = (nrbufs > 0) ? POLLIN | POLLRDNORM : 0;
478                 if (!pipe->writers && filp->f_version != pipe->w_counter)
479                         mask |= POLLHUP;
480         }
481
482         if (filp->f_mode & FMODE_WRITE) {
483                 mask |= (nrbufs < PIPE_BUFFERS) ? POLLOUT | POLLWRNORM : 0;
484                 /*
485                  * Most Unices do not set POLLERR for FIFOs but on Linux they
486                  * behave exactly like pipes for poll().
487                  */
488                 if (!pipe->readers)
489                         mask |= POLLERR;
490         }
491
492         return mask;
493 }
494
495 static int
496 pipe_release(struct inode *inode, int decr, int decw)
497 {
498         struct pipe_inode_info *pipe;
499
500         mutex_lock(&inode->i_mutex);
501         pipe = inode->i_pipe;
502         pipe->readers -= decr;
503         pipe->writers -= decw;
504
505         if (!pipe->readers && !pipe->writers) {
506                 free_pipe_info(inode);
507         } else {
508                 wake_up_interruptible(&pipe->wait);
509                 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
510                 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
511         }
512         mutex_unlock(&inode->i_mutex);
513
514         return 0;
515 }
516
517 static int
518 pipe_read_fasync(int fd, struct file *filp, int on)
519 {
520         struct inode *inode = filp->f_dentry->d_inode;
521         int retval;
522
523         mutex_lock(&inode->i_mutex);
524         retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_readers);
525         mutex_unlock(&inode->i_mutex);
526
527         if (retval < 0)
528                 return retval;
529
530         return 0;
531 }
532
533
534 static int
535 pipe_write_fasync(int fd, struct file *filp, int on)
536 {
537         struct inode *inode = filp->f_dentry->d_inode;
538         int retval;
539
540         mutex_lock(&inode->i_mutex);
541         retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_writers);
542         mutex_unlock(&inode->i_mutex);
543
544         if (retval < 0)
545                 return retval;
546
547         return 0;
548 }
549
550
551 static int
552 pipe_rdwr_fasync(int fd, struct file *filp, int on)
553 {
554         struct inode *inode = filp->f_dentry->d_inode;
555         struct pipe_inode_info *pipe = inode->i_pipe;
556         int retval;
557
558         mutex_lock(&inode->i_mutex);
559
560         retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
561
562         if (retval >= 0)
563                 retval = fasync_helper(fd, filp, on, &pipe->fasync_writers);
564
565         mutex_unlock(&inode->i_mutex);
566
567         if (retval < 0)
568                 return retval;
569
570         return 0;
571 }
572
573
574 static int
575 pipe_read_release(struct inode *inode, struct file *filp)
576 {
577         pipe_read_fasync(-1, filp, 0);
578         return pipe_release(inode, 1, 0);
579 }
580
581 static int
582 pipe_write_release(struct inode *inode, struct file *filp)
583 {
584         pipe_write_fasync(-1, filp, 0);
585         return pipe_release(inode, 0, 1);
586 }
587
588 static int
589 pipe_rdwr_release(struct inode *inode, struct file *filp)
590 {
591         int decr, decw;
592
593         pipe_rdwr_fasync(-1, filp, 0);
594         decr = (filp->f_mode & FMODE_READ) != 0;
595         decw = (filp->f_mode & FMODE_WRITE) != 0;
596         return pipe_release(inode, decr, decw);
597 }
598
599 static int
600 pipe_read_open(struct inode *inode, struct file *filp)
601 {
602         /* We could have perhaps used atomic_t, but this and friends
603            below are the only places.  So it doesn't seem worthwhile.  */
604         mutex_lock(&inode->i_mutex);
605         inode->i_pipe->readers++;
606         mutex_unlock(&inode->i_mutex);
607
608         return 0;
609 }
610
611 static int
612 pipe_write_open(struct inode *inode, struct file *filp)
613 {
614         mutex_lock(&inode->i_mutex);
615         inode->i_pipe->writers++;
616         mutex_unlock(&inode->i_mutex);
617
618         return 0;
619 }
620
621 static int
622 pipe_rdwr_open(struct inode *inode, struct file *filp)
623 {
624         mutex_lock(&inode->i_mutex);
625         if (filp->f_mode & FMODE_READ)
626                 inode->i_pipe->readers++;
627         if (filp->f_mode & FMODE_WRITE)
628                 inode->i_pipe->writers++;
629         mutex_unlock(&inode->i_mutex);
630
631         return 0;
632 }
633
634 /*
635  * The file_operations structs are not static because they
636  * are also used in linux/fs/fifo.c to do operations on FIFOs.
637  */
638 const struct file_operations read_fifo_fops = {
639         .llseek         = no_llseek,
640         .read           = pipe_read,
641         .readv          = pipe_readv,
642         .write          = bad_pipe_w,
643         .poll           = pipe_poll,
644         .ioctl          = pipe_ioctl,
645         .open           = pipe_read_open,
646         .release        = pipe_read_release,
647         .fasync         = pipe_read_fasync,
648 };
649
650 const struct file_operations write_fifo_fops = {
651         .llseek         = no_llseek,
652         .read           = bad_pipe_r,
653         .write          = pipe_write,
654         .writev         = pipe_writev,
655         .poll           = pipe_poll,
656         .ioctl          = pipe_ioctl,
657         .open           = pipe_write_open,
658         .release        = pipe_write_release,
659         .fasync         = pipe_write_fasync,
660 };
661
662 const struct file_operations rdwr_fifo_fops = {
663         .llseek         = no_llseek,
664         .read           = pipe_read,
665         .readv          = pipe_readv,
666         .write          = pipe_write,
667         .writev         = pipe_writev,
668         .poll           = pipe_poll,
669         .ioctl          = pipe_ioctl,
670         .open           = pipe_rdwr_open,
671         .release        = pipe_rdwr_release,
672         .fasync         = pipe_rdwr_fasync,
673 };
674
675 static struct file_operations read_pipe_fops = {
676         .llseek         = no_llseek,
677         .read           = pipe_read,
678         .readv          = pipe_readv,
679         .write          = bad_pipe_w,
680         .poll           = pipe_poll,
681         .ioctl          = pipe_ioctl,
682         .open           = pipe_read_open,
683         .release        = pipe_read_release,
684         .fasync         = pipe_read_fasync,
685 };
686
687 static struct file_operations write_pipe_fops = {
688         .llseek         = no_llseek,
689         .read           = bad_pipe_r,
690         .write          = pipe_write,
691         .writev         = pipe_writev,
692         .poll           = pipe_poll,
693         .ioctl          = pipe_ioctl,
694         .open           = pipe_write_open,
695         .release        = pipe_write_release,
696         .fasync         = pipe_write_fasync,
697 };
698
699 static struct file_operations rdwr_pipe_fops = {
700         .llseek         = no_llseek,
701         .read           = pipe_read,
702         .readv          = pipe_readv,
703         .write          = pipe_write,
704         .writev         = pipe_writev,
705         .poll           = pipe_poll,
706         .ioctl          = pipe_ioctl,
707         .open           = pipe_rdwr_open,
708         .release        = pipe_rdwr_release,
709         .fasync         = pipe_rdwr_fasync,
710 };
711
712 struct pipe_inode_info * alloc_pipe_info(struct inode *inode)
713 {
714         struct pipe_inode_info *pipe;
715
716         pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
717         if (pipe) {
718                 init_waitqueue_head(&pipe->wait);
719                 pipe->r_counter = pipe->w_counter = 1;
720                 pipe->inode = inode;
721         }
722
723         return pipe;
724 }
725
726 void __free_pipe_info(struct pipe_inode_info *pipe)
727 {
728         int i;
729
730         for (i = 0; i < PIPE_BUFFERS; i++) {
731                 struct pipe_buffer *buf = pipe->bufs + i;
732                 if (buf->ops)
733                         buf->ops->release(pipe, buf);
734         }
735         if (pipe->tmp_page)
736                 __free_page(pipe->tmp_page);
737         kfree(pipe);
738 }
739
740 void free_pipe_info(struct inode *inode)
741 {
742         __free_pipe_info(inode->i_pipe);
743         inode->i_pipe = NULL;
744 }
745
746 static struct vfsmount *pipe_mnt __read_mostly;
747 static int pipefs_delete_dentry(struct dentry *dentry)
748 {
749         return 1;
750 }
751
752 static struct dentry_operations pipefs_dentry_operations = {
753         .d_delete       = pipefs_delete_dentry,
754 };
755
756 static struct inode * get_pipe_inode(void)
757 {
758         struct inode *inode = new_inode(pipe_mnt->mnt_sb);
759         struct pipe_inode_info *pipe;
760
761         if (!inode)
762                 goto fail_inode;
763
764         pipe = alloc_pipe_info(inode);
765         if (!pipe)
766                 goto fail_iput;
767         inode->i_pipe = pipe;
768
769         pipe->readers = pipe->writers = 1;
770         inode->i_fop = &rdwr_pipe_fops;
771
772         /*
773          * Mark the inode dirty from the very beginning,
774          * that way it will never be moved to the dirty
775          * list because "mark_inode_dirty()" will think
776          * that it already _is_ on the dirty list.
777          */
778         inode->i_state = I_DIRTY;
779         inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
780         inode->i_uid = current->fsuid;
781         inode->i_gid = current->fsgid;
782         inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
783         inode->i_blksize = PAGE_SIZE;
784
785         return inode;
786
787 fail_iput:
788         iput(inode);
789
790 fail_inode:
791         return NULL;
792 }
793
794 int do_pipe(int *fd)
795 {
796         struct qstr this;
797         char name[32];
798         struct dentry *dentry;
799         struct inode * inode;
800         struct file *f1, *f2;
801         int error;
802         int i, j;
803
804         error = -ENFILE;
805         f1 = get_empty_filp();
806         if (!f1)
807                 goto no_files;
808
809         f2 = get_empty_filp();
810         if (!f2)
811                 goto close_f1;
812
813         inode = get_pipe_inode();
814         if (!inode)
815                 goto close_f12;
816
817         error = get_unused_fd();
818         if (error < 0)
819                 goto close_f12_inode;
820         i = error;
821
822         error = get_unused_fd();
823         if (error < 0)
824                 goto close_f12_inode_i;
825         j = error;
826
827         error = -ENOMEM;
828         sprintf(name, "[%lu]", inode->i_ino);
829         this.name = name;
830         this.len = strlen(name);
831         this.hash = inode->i_ino; /* will go */
832         dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this);
833         if (!dentry)
834                 goto close_f12_inode_i_j;
835
836         dentry->d_op = &pipefs_dentry_operations;
837         d_add(dentry, inode);
838         f1->f_vfsmnt = f2->f_vfsmnt = mntget(mntget(pipe_mnt));
839         f1->f_dentry = f2->f_dentry = dget(dentry);
840         f1->f_mapping = f2->f_mapping = inode->i_mapping;
841
842         /* read file */
843         f1->f_pos = f2->f_pos = 0;
844         f1->f_flags = O_RDONLY;
845         f1->f_op = &read_pipe_fops;
846         f1->f_mode = FMODE_READ;
847         f1->f_version = 0;
848
849         /* write file */
850         f2->f_flags = O_WRONLY;
851         f2->f_op = &write_pipe_fops;
852         f2->f_mode = FMODE_WRITE;
853         f2->f_version = 0;
854
855         fd_install(i, f1);
856         fd_install(j, f2);
857         fd[0] = i;
858         fd[1] = j;
859
860         return 0;
861
862 close_f12_inode_i_j:
863         put_unused_fd(j);
864 close_f12_inode_i:
865         put_unused_fd(i);
866 close_f12_inode:
867         free_pipe_info(inode);
868         iput(inode);
869 close_f12:
870         put_filp(f2);
871 close_f1:
872         put_filp(f1);
873 no_files:
874         return error;   
875 }
876
877 /*
878  * pipefs should _never_ be mounted by userland - too much of security hassle,
879  * no real gain from having the whole whorehouse mounted. So we don't need
880  * any operations on the root directory. However, we need a non-trivial
881  * d_name - pipe: will go nicely and kill the special-casing in procfs.
882  */
883
884 static struct super_block *
885 pipefs_get_sb(struct file_system_type *fs_type, int flags,
886               const char *dev_name, void *data)
887 {
888         return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC);
889 }
890
891 static struct file_system_type pipe_fs_type = {
892         .name           = "pipefs",
893         .get_sb         = pipefs_get_sb,
894         .kill_sb        = kill_anon_super,
895 };
896
897 static int __init init_pipe_fs(void)
898 {
899         int err = register_filesystem(&pipe_fs_type);
900
901         if (!err) {
902                 pipe_mnt = kern_mount(&pipe_fs_type);
903                 if (IS_ERR(pipe_mnt)) {
904                         err = PTR_ERR(pipe_mnt);
905                         unregister_filesystem(&pipe_fs_type);
906                 }
907         }
908         return err;
909 }
910
911 static void __exit exit_pipe_fs(void)
912 {
913         unregister_filesystem(&pipe_fs_type);
914         mntput(pipe_mnt);
915 }
916
917 fs_initcall(init_pipe_fs);
918 module_exit(exit_pipe_fs);