[PATCH] splice: improve writeback and clean up page stealing
[safe/jmp/linux-2.6] / fs / pipe.c
1 /*
2  *  linux/fs/pipe.c
3  *
4  *  Copyright (C) 1991, 1992, 1999  Linus Torvalds
5  */
6
7 #include <linux/mm.h>
8 #include <linux/file.h>
9 #include <linux/poll.h>
10 #include <linux/slab.h>
11 #include <linux/module.h>
12 #include <linux/init.h>
13 #include <linux/fs.h>
14 #include <linux/mount.h>
15 #include <linux/pipe_fs_i.h>
16 #include <linux/uio.h>
17 #include <linux/highmem.h>
18 #include <linux/pagemap.h>
19
20 #include <asm/uaccess.h>
21 #include <asm/ioctls.h>
22
23 /*
24  * We use a start+len construction, which provides full use of the 
25  * allocated memory.
26  * -- Florian Coosmann (FGC)
27  * 
28  * Reads with count = 0 should always return 0.
29  * -- Julian Bradfield 1999-06-07.
30  *
31  * FIFOs and Pipes now generate SIGIO for both readers and writers.
32  * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16
33  *
34  * pipe_read & write cleanup
35  * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
36  */
37
38 /* Drop the inode semaphore and wait for a pipe event, atomically */
39 void pipe_wait(struct inode * inode)
40 {
41         DEFINE_WAIT(wait);
42
43         /*
44          * Pipes are system-local resources, so sleeping on them
45          * is considered a noninteractive wait:
46          */
47         prepare_to_wait(PIPE_WAIT(*inode), &wait, TASK_INTERRUPTIBLE|TASK_NONINTERACTIVE);
48         mutex_unlock(PIPE_MUTEX(*inode));
49         schedule();
50         finish_wait(PIPE_WAIT(*inode), &wait);
51         mutex_lock(PIPE_MUTEX(*inode));
52 }
53
54 static int
55 pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len)
56 {
57         unsigned long copy;
58
59         while (len > 0) {
60                 while (!iov->iov_len)
61                         iov++;
62                 copy = min_t(unsigned long, len, iov->iov_len);
63
64                 if (copy_from_user(to, iov->iov_base, copy))
65                         return -EFAULT;
66                 to += copy;
67                 len -= copy;
68                 iov->iov_base += copy;
69                 iov->iov_len -= copy;
70         }
71         return 0;
72 }
73
74 static int
75 pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len)
76 {
77         unsigned long copy;
78
79         while (len > 0) {
80                 while (!iov->iov_len)
81                         iov++;
82                 copy = min_t(unsigned long, len, iov->iov_len);
83
84                 if (copy_to_user(iov->iov_base, from, copy))
85                         return -EFAULT;
86                 from += copy;
87                 len -= copy;
88                 iov->iov_base += copy;
89                 iov->iov_len -= copy;
90         }
91         return 0;
92 }
93
94 static void anon_pipe_buf_release(struct pipe_inode_info *info, struct pipe_buffer *buf)
95 {
96         struct page *page = buf->page;
97
98         /*
99          * If nobody else uses this page, and we don't already have a
100          * temporary page, let's keep track of it as a one-deep
101          * allocation cache
102          */
103         if (page_count(page) == 1 && !info->tmp_page) {
104                 info->tmp_page = page;
105                 return;
106         }
107
108         /*
109          * Otherwise just release our reference to it
110          */
111         page_cache_release(page);
112 }
113
114 static void *anon_pipe_buf_map(struct file *file, struct pipe_inode_info *info, struct pipe_buffer *buf)
115 {
116         return kmap(buf->page);
117 }
118
119 static void anon_pipe_buf_unmap(struct pipe_inode_info *info, struct pipe_buffer *buf)
120 {
121         kunmap(buf->page);
122 }
123
124 static int anon_pipe_buf_steal(struct pipe_inode_info *info,
125                                struct pipe_buffer *buf)
126 {
127         return 0;
128 }
129
130 static struct pipe_buf_operations anon_pipe_buf_ops = {
131         .can_merge = 1,
132         .map = anon_pipe_buf_map,
133         .unmap = anon_pipe_buf_unmap,
134         .release = anon_pipe_buf_release,
135         .steal = anon_pipe_buf_steal,
136 };
137
138 static ssize_t
139 pipe_readv(struct file *filp, const struct iovec *_iov,
140            unsigned long nr_segs, loff_t *ppos)
141 {
142         struct inode *inode = filp->f_dentry->d_inode;
143         struct pipe_inode_info *info;
144         int do_wakeup;
145         ssize_t ret;
146         struct iovec *iov = (struct iovec *)_iov;
147         size_t total_len;
148
149         total_len = iov_length(iov, nr_segs);
150         /* Null read succeeds. */
151         if (unlikely(total_len == 0))
152                 return 0;
153
154         do_wakeup = 0;
155         ret = 0;
156         mutex_lock(PIPE_MUTEX(*inode));
157         info = inode->i_pipe;
158         for (;;) {
159                 int bufs = info->nrbufs;
160                 if (bufs) {
161                         int curbuf = info->curbuf;
162                         struct pipe_buffer *buf = info->bufs + curbuf;
163                         struct pipe_buf_operations *ops = buf->ops;
164                         void *addr;
165                         size_t chars = buf->len;
166                         int error;
167
168                         if (chars > total_len)
169                                 chars = total_len;
170
171                         addr = ops->map(filp, info, buf);
172                         if (IS_ERR(addr)) {
173                                 if (!ret)
174                                         ret = PTR_ERR(addr);
175                                 break;
176                         }
177                         error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars);
178                         ops->unmap(info, buf);
179                         if (unlikely(error)) {
180                                 if (!ret) ret = -EFAULT;
181                                 break;
182                         }
183                         ret += chars;
184                         buf->offset += chars;
185                         buf->len -= chars;
186                         if (!buf->len) {
187                                 buf->ops = NULL;
188                                 ops->release(info, buf);
189                                 curbuf = (curbuf + 1) & (PIPE_BUFFERS-1);
190                                 info->curbuf = curbuf;
191                                 info->nrbufs = --bufs;
192                                 do_wakeup = 1;
193                         }
194                         total_len -= chars;
195                         if (!total_len)
196                                 break;  /* common path: read succeeded */
197                 }
198                 if (bufs)       /* More to do? */
199                         continue;
200                 if (!PIPE_WRITERS(*inode))
201                         break;
202                 if (!PIPE_WAITING_WRITERS(*inode)) {
203                         /* syscall merging: Usually we must not sleep
204                          * if O_NONBLOCK is set, or if we got some data.
205                          * But if a writer sleeps in kernel space, then
206                          * we can wait for that data without violating POSIX.
207                          */
208                         if (ret)
209                                 break;
210                         if (filp->f_flags & O_NONBLOCK) {
211                                 ret = -EAGAIN;
212                                 break;
213                         }
214                 }
215                 if (signal_pending(current)) {
216                         if (!ret) ret = -ERESTARTSYS;
217                         break;
218                 }
219                 if (do_wakeup) {
220                         wake_up_interruptible_sync(PIPE_WAIT(*inode));
221                         kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
222                 }
223                 pipe_wait(inode);
224         }
225         mutex_unlock(PIPE_MUTEX(*inode));
226         /* Signal writers asynchronously that there is more room.  */
227         if (do_wakeup) {
228                 wake_up_interruptible(PIPE_WAIT(*inode));
229                 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
230         }
231         if (ret > 0)
232                 file_accessed(filp);
233         return ret;
234 }
235
236 static ssize_t
237 pipe_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
238 {
239         struct iovec iov = { .iov_base = buf, .iov_len = count };
240         return pipe_readv(filp, &iov, 1, ppos);
241 }
242
243 static ssize_t
244 pipe_writev(struct file *filp, const struct iovec *_iov,
245             unsigned long nr_segs, loff_t *ppos)
246 {
247         struct inode *inode = filp->f_dentry->d_inode;
248         struct pipe_inode_info *info;
249         ssize_t ret;
250         int do_wakeup;
251         struct iovec *iov = (struct iovec *)_iov;
252         size_t total_len;
253         ssize_t chars;
254
255         total_len = iov_length(iov, nr_segs);
256         /* Null write succeeds. */
257         if (unlikely(total_len == 0))
258                 return 0;
259
260         do_wakeup = 0;
261         ret = 0;
262         mutex_lock(PIPE_MUTEX(*inode));
263         info = inode->i_pipe;
264
265         if (!PIPE_READERS(*inode)) {
266                 send_sig(SIGPIPE, current, 0);
267                 ret = -EPIPE;
268                 goto out;
269         }
270
271         /* We try to merge small writes */
272         chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */
273         if (info->nrbufs && chars != 0) {
274                 int lastbuf = (info->curbuf + info->nrbufs - 1) & (PIPE_BUFFERS-1);
275                 struct pipe_buffer *buf = info->bufs + lastbuf;
276                 struct pipe_buf_operations *ops = buf->ops;
277                 int offset = buf->offset + buf->len;
278                 if (ops->can_merge && offset + chars <= PAGE_SIZE) {
279                         void *addr;
280                         int error;
281
282                         addr = ops->map(filp, info, buf);
283                         if (IS_ERR(addr)) {
284                                 error = PTR_ERR(addr);
285                                 goto out;
286                         }
287                         error = pipe_iov_copy_from_user(offset + addr, iov,
288                                                         chars);
289                         ops->unmap(info, buf);
290                         ret = error;
291                         do_wakeup = 1;
292                         if (error)
293                                 goto out;
294                         buf->len += chars;
295                         total_len -= chars;
296                         ret = chars;
297                         if (!total_len)
298                                 goto out;
299                 }
300         }
301
302         for (;;) {
303                 int bufs;
304                 if (!PIPE_READERS(*inode)) {
305                         send_sig(SIGPIPE, current, 0);
306                         if (!ret) ret = -EPIPE;
307                         break;
308                 }
309                 bufs = info->nrbufs;
310                 if (bufs < PIPE_BUFFERS) {
311                         int newbuf = (info->curbuf + bufs) & (PIPE_BUFFERS-1);
312                         struct pipe_buffer *buf = info->bufs + newbuf;
313                         struct page *page = info->tmp_page;
314                         int error;
315
316                         if (!page) {
317                                 page = alloc_page(GFP_HIGHUSER);
318                                 if (unlikely(!page)) {
319                                         ret = ret ? : -ENOMEM;
320                                         break;
321                                 }
322                                 info->tmp_page = page;
323                         }
324                         /* Always wakeup, even if the copy fails. Otherwise
325                          * we lock up (O_NONBLOCK-)readers that sleep due to
326                          * syscall merging.
327                          * FIXME! Is this really true?
328                          */
329                         do_wakeup = 1;
330                         chars = PAGE_SIZE;
331                         if (chars > total_len)
332                                 chars = total_len;
333
334                         error = pipe_iov_copy_from_user(kmap(page), iov, chars);
335                         kunmap(page);
336                         if (unlikely(error)) {
337                                 if (!ret) ret = -EFAULT;
338                                 break;
339                         }
340                         ret += chars;
341
342                         /* Insert it into the buffer array */
343                         buf->page = page;
344                         buf->ops = &anon_pipe_buf_ops;
345                         buf->offset = 0;
346                         buf->len = chars;
347                         info->nrbufs = ++bufs;
348                         info->tmp_page = NULL;
349
350                         total_len -= chars;
351                         if (!total_len)
352                                 break;
353                 }
354                 if (bufs < PIPE_BUFFERS)
355                         continue;
356                 if (filp->f_flags & O_NONBLOCK) {
357                         if (!ret) ret = -EAGAIN;
358                         break;
359                 }
360                 if (signal_pending(current)) {
361                         if (!ret) ret = -ERESTARTSYS;
362                         break;
363                 }
364                 if (do_wakeup) {
365                         wake_up_interruptible_sync(PIPE_WAIT(*inode));
366                         kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
367                         do_wakeup = 0;
368                 }
369                 PIPE_WAITING_WRITERS(*inode)++;
370                 pipe_wait(inode);
371                 PIPE_WAITING_WRITERS(*inode)--;
372         }
373 out:
374         mutex_unlock(PIPE_MUTEX(*inode));
375         if (do_wakeup) {
376                 wake_up_interruptible(PIPE_WAIT(*inode));
377                 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
378         }
379         if (ret > 0)
380                 file_update_time(filp);
381         return ret;
382 }
383
384 static ssize_t
385 pipe_write(struct file *filp, const char __user *buf,
386            size_t count, loff_t *ppos)
387 {
388         struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
389         return pipe_writev(filp, &iov, 1, ppos);
390 }
391
392 static ssize_t
393 bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
394 {
395         return -EBADF;
396 }
397
398 static ssize_t
399 bad_pipe_w(struct file *filp, const char __user *buf, size_t count, loff_t *ppos)
400 {
401         return -EBADF;
402 }
403
404 static int
405 pipe_ioctl(struct inode *pino, struct file *filp,
406            unsigned int cmd, unsigned long arg)
407 {
408         struct inode *inode = filp->f_dentry->d_inode;
409         struct pipe_inode_info *info;
410         int count, buf, nrbufs;
411
412         switch (cmd) {
413                 case FIONREAD:
414                         mutex_lock(PIPE_MUTEX(*inode));
415                         info =  inode->i_pipe;
416                         count = 0;
417                         buf = info->curbuf;
418                         nrbufs = info->nrbufs;
419                         while (--nrbufs >= 0) {
420                                 count += info->bufs[buf].len;
421                                 buf = (buf+1) & (PIPE_BUFFERS-1);
422                         }
423                         mutex_unlock(PIPE_MUTEX(*inode));
424                         return put_user(count, (int __user *)arg);
425                 default:
426                         return -EINVAL;
427         }
428 }
429
430 /* No kernel lock held - fine */
431 static unsigned int
432 pipe_poll(struct file *filp, poll_table *wait)
433 {
434         unsigned int mask;
435         struct inode *inode = filp->f_dentry->d_inode;
436         struct pipe_inode_info *info = inode->i_pipe;
437         int nrbufs;
438
439         poll_wait(filp, PIPE_WAIT(*inode), wait);
440
441         /* Reading only -- no need for acquiring the semaphore.  */
442         nrbufs = info->nrbufs;
443         mask = 0;
444         if (filp->f_mode & FMODE_READ) {
445                 mask = (nrbufs > 0) ? POLLIN | POLLRDNORM : 0;
446                 if (!PIPE_WRITERS(*inode) && filp->f_version != PIPE_WCOUNTER(*inode))
447                         mask |= POLLHUP;
448         }
449
450         if (filp->f_mode & FMODE_WRITE) {
451                 mask |= (nrbufs < PIPE_BUFFERS) ? POLLOUT | POLLWRNORM : 0;
452                 /*
453                  * Most Unices do not set POLLERR for FIFOs but on Linux they
454                  * behave exactly like pipes for poll().
455                  */
456                 if (!PIPE_READERS(*inode))
457                         mask |= POLLERR;
458         }
459
460         return mask;
461 }
462
463 static int
464 pipe_release(struct inode *inode, int decr, int decw)
465 {
466         mutex_lock(PIPE_MUTEX(*inode));
467         PIPE_READERS(*inode) -= decr;
468         PIPE_WRITERS(*inode) -= decw;
469         if (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) {
470                 free_pipe_info(inode);
471         } else {
472                 wake_up_interruptible(PIPE_WAIT(*inode));
473                 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
474                 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
475         }
476         mutex_unlock(PIPE_MUTEX(*inode));
477
478         return 0;
479 }
480
481 static int
482 pipe_read_fasync(int fd, struct file *filp, int on)
483 {
484         struct inode *inode = filp->f_dentry->d_inode;
485         int retval;
486
487         mutex_lock(PIPE_MUTEX(*inode));
488         retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode));
489         mutex_unlock(PIPE_MUTEX(*inode));
490
491         if (retval < 0)
492                 return retval;
493
494         return 0;
495 }
496
497
498 static int
499 pipe_write_fasync(int fd, struct file *filp, int on)
500 {
501         struct inode *inode = filp->f_dentry->d_inode;
502         int retval;
503
504         mutex_lock(PIPE_MUTEX(*inode));
505         retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode));
506         mutex_unlock(PIPE_MUTEX(*inode));
507
508         if (retval < 0)
509                 return retval;
510
511         return 0;
512 }
513
514
515 static int
516 pipe_rdwr_fasync(int fd, struct file *filp, int on)
517 {
518         struct inode *inode = filp->f_dentry->d_inode;
519         int retval;
520
521         mutex_lock(PIPE_MUTEX(*inode));
522
523         retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode));
524
525         if (retval >= 0)
526                 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode));
527
528         mutex_unlock(PIPE_MUTEX(*inode));
529
530         if (retval < 0)
531                 return retval;
532
533         return 0;
534 }
535
536
537 static int
538 pipe_read_release(struct inode *inode, struct file *filp)
539 {
540         pipe_read_fasync(-1, filp, 0);
541         return pipe_release(inode, 1, 0);
542 }
543
544 static int
545 pipe_write_release(struct inode *inode, struct file *filp)
546 {
547         pipe_write_fasync(-1, filp, 0);
548         return pipe_release(inode, 0, 1);
549 }
550
551 static int
552 pipe_rdwr_release(struct inode *inode, struct file *filp)
553 {
554         int decr, decw;
555
556         pipe_rdwr_fasync(-1, filp, 0);
557         decr = (filp->f_mode & FMODE_READ) != 0;
558         decw = (filp->f_mode & FMODE_WRITE) != 0;
559         return pipe_release(inode, decr, decw);
560 }
561
562 static int
563 pipe_read_open(struct inode *inode, struct file *filp)
564 {
565         /* We could have perhaps used atomic_t, but this and friends
566            below are the only places.  So it doesn't seem worthwhile.  */
567         mutex_lock(PIPE_MUTEX(*inode));
568         PIPE_READERS(*inode)++;
569         mutex_unlock(PIPE_MUTEX(*inode));
570
571         return 0;
572 }
573
574 static int
575 pipe_write_open(struct inode *inode, struct file *filp)
576 {
577         mutex_lock(PIPE_MUTEX(*inode));
578         PIPE_WRITERS(*inode)++;
579         mutex_unlock(PIPE_MUTEX(*inode));
580
581         return 0;
582 }
583
584 static int
585 pipe_rdwr_open(struct inode *inode, struct file *filp)
586 {
587         mutex_lock(PIPE_MUTEX(*inode));
588         if (filp->f_mode & FMODE_READ)
589                 PIPE_READERS(*inode)++;
590         if (filp->f_mode & FMODE_WRITE)
591                 PIPE_WRITERS(*inode)++;
592         mutex_unlock(PIPE_MUTEX(*inode));
593
594         return 0;
595 }
596
597 /*
598  * The file_operations structs are not static because they
599  * are also used in linux/fs/fifo.c to do operations on FIFOs.
600  */
601 const struct file_operations read_fifo_fops = {
602         .llseek         = no_llseek,
603         .read           = pipe_read,
604         .readv          = pipe_readv,
605         .write          = bad_pipe_w,
606         .poll           = pipe_poll,
607         .ioctl          = pipe_ioctl,
608         .open           = pipe_read_open,
609         .release        = pipe_read_release,
610         .fasync         = pipe_read_fasync,
611 };
612
613 const struct file_operations write_fifo_fops = {
614         .llseek         = no_llseek,
615         .read           = bad_pipe_r,
616         .write          = pipe_write,
617         .writev         = pipe_writev,
618         .poll           = pipe_poll,
619         .ioctl          = pipe_ioctl,
620         .open           = pipe_write_open,
621         .release        = pipe_write_release,
622         .fasync         = pipe_write_fasync,
623 };
624
625 const struct file_operations rdwr_fifo_fops = {
626         .llseek         = no_llseek,
627         .read           = pipe_read,
628         .readv          = pipe_readv,
629         .write          = pipe_write,
630         .writev         = pipe_writev,
631         .poll           = pipe_poll,
632         .ioctl          = pipe_ioctl,
633         .open           = pipe_rdwr_open,
634         .release        = pipe_rdwr_release,
635         .fasync         = pipe_rdwr_fasync,
636 };
637
638 static struct file_operations read_pipe_fops = {
639         .llseek         = no_llseek,
640         .read           = pipe_read,
641         .readv          = pipe_readv,
642         .write          = bad_pipe_w,
643         .poll           = pipe_poll,
644         .ioctl          = pipe_ioctl,
645         .open           = pipe_read_open,
646         .release        = pipe_read_release,
647         .fasync         = pipe_read_fasync,
648 };
649
650 static struct file_operations write_pipe_fops = {
651         .llseek         = no_llseek,
652         .read           = bad_pipe_r,
653         .write          = pipe_write,
654         .writev         = pipe_writev,
655         .poll           = pipe_poll,
656         .ioctl          = pipe_ioctl,
657         .open           = pipe_write_open,
658         .release        = pipe_write_release,
659         .fasync         = pipe_write_fasync,
660 };
661
662 static struct file_operations rdwr_pipe_fops = {
663         .llseek         = no_llseek,
664         .read           = pipe_read,
665         .readv          = pipe_readv,
666         .write          = pipe_write,
667         .writev         = pipe_writev,
668         .poll           = pipe_poll,
669         .ioctl          = pipe_ioctl,
670         .open           = pipe_rdwr_open,
671         .release        = pipe_rdwr_release,
672         .fasync         = pipe_rdwr_fasync,
673 };
674
675 void free_pipe_info(struct inode *inode)
676 {
677         int i;
678         struct pipe_inode_info *info = inode->i_pipe;
679
680         inode->i_pipe = NULL;
681         for (i = 0; i < PIPE_BUFFERS; i++) {
682                 struct pipe_buffer *buf = info->bufs + i;
683                 if (buf->ops)
684                         buf->ops->release(info, buf);
685         }
686         if (info->tmp_page)
687                 __free_page(info->tmp_page);
688         kfree(info);
689 }
690
691 struct inode* pipe_new(struct inode* inode)
692 {
693         struct pipe_inode_info *info;
694
695         info = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
696         if (!info)
697                 goto fail_page;
698         inode->i_pipe = info;
699
700         init_waitqueue_head(PIPE_WAIT(*inode));
701         PIPE_RCOUNTER(*inode) = PIPE_WCOUNTER(*inode) = 1;
702
703         return inode;
704 fail_page:
705         return NULL;
706 }
707
708 static struct vfsmount *pipe_mnt __read_mostly;
709 static int pipefs_delete_dentry(struct dentry *dentry)
710 {
711         return 1;
712 }
713 static struct dentry_operations pipefs_dentry_operations = {
714         .d_delete       = pipefs_delete_dentry,
715 };
716
717 static struct inode * get_pipe_inode(void)
718 {
719         struct inode *inode = new_inode(pipe_mnt->mnt_sb);
720
721         if (!inode)
722                 goto fail_inode;
723
724         if(!pipe_new(inode))
725                 goto fail_iput;
726         PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1;
727         inode->i_fop = &rdwr_pipe_fops;
728
729         /*
730          * Mark the inode dirty from the very beginning,
731          * that way it will never be moved to the dirty
732          * list because "mark_inode_dirty()" will think
733          * that it already _is_ on the dirty list.
734          */
735         inode->i_state = I_DIRTY;
736         inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
737         inode->i_uid = current->fsuid;
738         inode->i_gid = current->fsgid;
739         inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
740         inode->i_blksize = PAGE_SIZE;
741         return inode;
742
743 fail_iput:
744         iput(inode);
745 fail_inode:
746         return NULL;
747 }
748
749 int do_pipe(int *fd)
750 {
751         struct qstr this;
752         char name[32];
753         struct dentry *dentry;
754         struct inode * inode;
755         struct file *f1, *f2;
756         int error;
757         int i,j;
758
759         error = -ENFILE;
760         f1 = get_empty_filp();
761         if (!f1)
762                 goto no_files;
763
764         f2 = get_empty_filp();
765         if (!f2)
766                 goto close_f1;
767
768         inode = get_pipe_inode();
769         if (!inode)
770                 goto close_f12;
771
772         error = get_unused_fd();
773         if (error < 0)
774                 goto close_f12_inode;
775         i = error;
776
777         error = get_unused_fd();
778         if (error < 0)
779                 goto close_f12_inode_i;
780         j = error;
781
782         error = -ENOMEM;
783         sprintf(name, "[%lu]", inode->i_ino);
784         this.name = name;
785         this.len = strlen(name);
786         this.hash = inode->i_ino; /* will go */
787         dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this);
788         if (!dentry)
789                 goto close_f12_inode_i_j;
790         dentry->d_op = &pipefs_dentry_operations;
791         d_add(dentry, inode);
792         f1->f_vfsmnt = f2->f_vfsmnt = mntget(mntget(pipe_mnt));
793         f1->f_dentry = f2->f_dentry = dget(dentry);
794         f1->f_mapping = f2->f_mapping = inode->i_mapping;
795
796         /* read file */
797         f1->f_pos = f2->f_pos = 0;
798         f1->f_flags = O_RDONLY;
799         f1->f_op = &read_pipe_fops;
800         f1->f_mode = FMODE_READ;
801         f1->f_version = 0;
802
803         /* write file */
804         f2->f_flags = O_WRONLY;
805         f2->f_op = &write_pipe_fops;
806         f2->f_mode = FMODE_WRITE;
807         f2->f_version = 0;
808
809         fd_install(i, f1);
810         fd_install(j, f2);
811         fd[0] = i;
812         fd[1] = j;
813         return 0;
814
815 close_f12_inode_i_j:
816         put_unused_fd(j);
817 close_f12_inode_i:
818         put_unused_fd(i);
819 close_f12_inode:
820         free_pipe_info(inode);
821         iput(inode);
822 close_f12:
823         put_filp(f2);
824 close_f1:
825         put_filp(f1);
826 no_files:
827         return error;   
828 }
829
830 /*
831  * pipefs should _never_ be mounted by userland - too much of security hassle,
832  * no real gain from having the whole whorehouse mounted. So we don't need
833  * any operations on the root directory. However, we need a non-trivial
834  * d_name - pipe: will go nicely and kill the special-casing in procfs.
835  */
836
837 static struct super_block *pipefs_get_sb(struct file_system_type *fs_type,
838         int flags, const char *dev_name, void *data)
839 {
840         return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC);
841 }
842
843 static struct file_system_type pipe_fs_type = {
844         .name           = "pipefs",
845         .get_sb         = pipefs_get_sb,
846         .kill_sb        = kill_anon_super,
847 };
848
849 static int __init init_pipe_fs(void)
850 {
851         int err = register_filesystem(&pipe_fs_type);
852         if (!err) {
853                 pipe_mnt = kern_mount(&pipe_fs_type);
854                 if (IS_ERR(pipe_mnt)) {
855                         err = PTR_ERR(pipe_mnt);
856                         unregister_filesystem(&pipe_fs_type);
857                 }
858         }
859         return err;
860 }
861
862 static void __exit exit_pipe_fs(void)
863 {
864         unregister_filesystem(&pipe_fs_type);
865         mntput(pipe_mnt);
866 }
867
868 fs_initcall(init_pipe_fs);
869 module_exit(exit_pipe_fs);