[PATCH] splice: add direct fd <-> fd splicing support
[safe/jmp/linux-2.6] / fs / pipe.c
1 /*
2  *  linux/fs/pipe.c
3  *
4  *  Copyright (C) 1991, 1992, 1999  Linus Torvalds
5  */
6
7 #include <linux/mm.h>
8 #include <linux/file.h>
9 #include <linux/poll.h>
10 #include <linux/slab.h>
11 #include <linux/module.h>
12 #include <linux/init.h>
13 #include <linux/fs.h>
14 #include <linux/mount.h>
15 #include <linux/pipe_fs_i.h>
16 #include <linux/uio.h>
17 #include <linux/highmem.h>
18 #include <linux/pagemap.h>
19
20 #include <asm/uaccess.h>
21 #include <asm/ioctls.h>
22
23 /*
24  * We use a start+len construction, which provides full use of the 
25  * allocated memory.
26  * -- Florian Coosmann (FGC)
27  * 
28  * Reads with count = 0 should always return 0.
29  * -- Julian Bradfield 1999-06-07.
30  *
31  * FIFOs and Pipes now generate SIGIO for both readers and writers.
32  * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16
33  *
34  * pipe_read & write cleanup
35  * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
36  */
37
38 /* Drop the inode semaphore and wait for a pipe event, atomically */
39 void pipe_wait(struct pipe_inode_info *pipe)
40 {
41         DEFINE_WAIT(wait);
42
43         /*
44          * Pipes are system-local resources, so sleeping on them
45          * is considered a noninteractive wait:
46          */
47         prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE|TASK_NONINTERACTIVE);
48         if (pipe->inode)
49                 mutex_unlock(&pipe->inode->i_mutex);
50         schedule();
51         finish_wait(&pipe->wait, &wait);
52         if (pipe->inode)
53                 mutex_lock(&pipe->inode->i_mutex);
54 }
55
56 static int
57 pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len)
58 {
59         unsigned long copy;
60
61         while (len > 0) {
62                 while (!iov->iov_len)
63                         iov++;
64                 copy = min_t(unsigned long, len, iov->iov_len);
65
66                 if (copy_from_user(to, iov->iov_base, copy))
67                         return -EFAULT;
68                 to += copy;
69                 len -= copy;
70                 iov->iov_base += copy;
71                 iov->iov_len -= copy;
72         }
73         return 0;
74 }
75
76 static int
77 pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len)
78 {
79         unsigned long copy;
80
81         while (len > 0) {
82                 while (!iov->iov_len)
83                         iov++;
84                 copy = min_t(unsigned long, len, iov->iov_len);
85
86                 if (copy_to_user(iov->iov_base, from, copy))
87                         return -EFAULT;
88                 from += copy;
89                 len -= copy;
90                 iov->iov_base += copy;
91                 iov->iov_len -= copy;
92         }
93         return 0;
94 }
95
96 static void anon_pipe_buf_release(struct pipe_inode_info *info, struct pipe_buffer *buf)
97 {
98         struct page *page = buf->page;
99
100         buf->flags &= ~PIPE_BUF_FLAG_STOLEN;
101
102         /*
103          * If nobody else uses this page, and we don't already have a
104          * temporary page, let's keep track of it as a one-deep
105          * allocation cache
106          */
107         if (page_count(page) == 1 && !info->tmp_page) {
108                 info->tmp_page = page;
109                 return;
110         }
111
112         /*
113          * Otherwise just release our reference to it
114          */
115         page_cache_release(page);
116 }
117
118 static void *anon_pipe_buf_map(struct file *file, struct pipe_inode_info *info, struct pipe_buffer *buf)
119 {
120         return kmap(buf->page);
121 }
122
123 static void anon_pipe_buf_unmap(struct pipe_inode_info *info, struct pipe_buffer *buf)
124 {
125         kunmap(buf->page);
126 }
127
128 static int anon_pipe_buf_steal(struct pipe_inode_info *info,
129                                struct pipe_buffer *buf)
130 {
131         buf->flags |= PIPE_BUF_FLAG_STOLEN;
132         return 0;
133 }
134
135 static struct pipe_buf_operations anon_pipe_buf_ops = {
136         .can_merge = 1,
137         .map = anon_pipe_buf_map,
138         .unmap = anon_pipe_buf_unmap,
139         .release = anon_pipe_buf_release,
140         .steal = anon_pipe_buf_steal,
141 };
142
143 static ssize_t
144 pipe_readv(struct file *filp, const struct iovec *_iov,
145            unsigned long nr_segs, loff_t *ppos)
146 {
147         struct inode *inode = filp->f_dentry->d_inode;
148         struct pipe_inode_info *info;
149         int do_wakeup;
150         ssize_t ret;
151         struct iovec *iov = (struct iovec *)_iov;
152         size_t total_len;
153
154         total_len = iov_length(iov, nr_segs);
155         /* Null read succeeds. */
156         if (unlikely(total_len == 0))
157                 return 0;
158
159         do_wakeup = 0;
160         ret = 0;
161         mutex_lock(PIPE_MUTEX(*inode));
162         info = inode->i_pipe;
163         for (;;) {
164                 int bufs = info->nrbufs;
165                 if (bufs) {
166                         int curbuf = info->curbuf;
167                         struct pipe_buffer *buf = info->bufs + curbuf;
168                         struct pipe_buf_operations *ops = buf->ops;
169                         void *addr;
170                         size_t chars = buf->len;
171                         int error;
172
173                         if (chars > total_len)
174                                 chars = total_len;
175
176                         addr = ops->map(filp, info, buf);
177                         if (IS_ERR(addr)) {
178                                 if (!ret)
179                                         ret = PTR_ERR(addr);
180                                 break;
181                         }
182                         error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars);
183                         ops->unmap(info, buf);
184                         if (unlikely(error)) {
185                                 if (!ret) ret = -EFAULT;
186                                 break;
187                         }
188                         ret += chars;
189                         buf->offset += chars;
190                         buf->len -= chars;
191                         if (!buf->len) {
192                                 buf->ops = NULL;
193                                 ops->release(info, buf);
194                                 curbuf = (curbuf + 1) & (PIPE_BUFFERS-1);
195                                 info->curbuf = curbuf;
196                                 info->nrbufs = --bufs;
197                                 do_wakeup = 1;
198                         }
199                         total_len -= chars;
200                         if (!total_len)
201                                 break;  /* common path: read succeeded */
202                 }
203                 if (bufs)       /* More to do? */
204                         continue;
205                 if (!PIPE_WRITERS(*inode))
206                         break;
207                 if (!PIPE_WAITING_WRITERS(*inode)) {
208                         /* syscall merging: Usually we must not sleep
209                          * if O_NONBLOCK is set, or if we got some data.
210                          * But if a writer sleeps in kernel space, then
211                          * we can wait for that data without violating POSIX.
212                          */
213                         if (ret)
214                                 break;
215                         if (filp->f_flags & O_NONBLOCK) {
216                                 ret = -EAGAIN;
217                                 break;
218                         }
219                 }
220                 if (signal_pending(current)) {
221                         if (!ret) ret = -ERESTARTSYS;
222                         break;
223                 }
224                 if (do_wakeup) {
225                         wake_up_interruptible_sync(PIPE_WAIT(*inode));
226                         kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
227                 }
228                 pipe_wait(inode->i_pipe);
229         }
230         mutex_unlock(PIPE_MUTEX(*inode));
231         /* Signal writers asynchronously that there is more room.  */
232         if (do_wakeup) {
233                 wake_up_interruptible(PIPE_WAIT(*inode));
234                 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
235         }
236         if (ret > 0)
237                 file_accessed(filp);
238         return ret;
239 }
240
241 static ssize_t
242 pipe_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
243 {
244         struct iovec iov = { .iov_base = buf, .iov_len = count };
245         return pipe_readv(filp, &iov, 1, ppos);
246 }
247
248 static ssize_t
249 pipe_writev(struct file *filp, const struct iovec *_iov,
250             unsigned long nr_segs, loff_t *ppos)
251 {
252         struct inode *inode = filp->f_dentry->d_inode;
253         struct pipe_inode_info *info;
254         ssize_t ret;
255         int do_wakeup;
256         struct iovec *iov = (struct iovec *)_iov;
257         size_t total_len;
258         ssize_t chars;
259
260         total_len = iov_length(iov, nr_segs);
261         /* Null write succeeds. */
262         if (unlikely(total_len == 0))
263                 return 0;
264
265         do_wakeup = 0;
266         ret = 0;
267         mutex_lock(PIPE_MUTEX(*inode));
268         info = inode->i_pipe;
269
270         if (!PIPE_READERS(*inode)) {
271                 send_sig(SIGPIPE, current, 0);
272                 ret = -EPIPE;
273                 goto out;
274         }
275
276         /* We try to merge small writes */
277         chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */
278         if (info->nrbufs && chars != 0) {
279                 int lastbuf = (info->curbuf + info->nrbufs - 1) & (PIPE_BUFFERS-1);
280                 struct pipe_buffer *buf = info->bufs + lastbuf;
281                 struct pipe_buf_operations *ops = buf->ops;
282                 int offset = buf->offset + buf->len;
283                 if (ops->can_merge && offset + chars <= PAGE_SIZE) {
284                         void *addr;
285                         int error;
286
287                         addr = ops->map(filp, info, buf);
288                         if (IS_ERR(addr)) {
289                                 error = PTR_ERR(addr);
290                                 goto out;
291                         }
292                         error = pipe_iov_copy_from_user(offset + addr, iov,
293                                                         chars);
294                         ops->unmap(info, buf);
295                         ret = error;
296                         do_wakeup = 1;
297                         if (error)
298                                 goto out;
299                         buf->len += chars;
300                         total_len -= chars;
301                         ret = chars;
302                         if (!total_len)
303                                 goto out;
304                 }
305         }
306
307         for (;;) {
308                 int bufs;
309                 if (!PIPE_READERS(*inode)) {
310                         send_sig(SIGPIPE, current, 0);
311                         if (!ret) ret = -EPIPE;
312                         break;
313                 }
314                 bufs = info->nrbufs;
315                 if (bufs < PIPE_BUFFERS) {
316                         int newbuf = (info->curbuf + bufs) & (PIPE_BUFFERS-1);
317                         struct pipe_buffer *buf = info->bufs + newbuf;
318                         struct page *page = info->tmp_page;
319                         int error;
320
321                         if (!page) {
322                                 page = alloc_page(GFP_HIGHUSER);
323                                 if (unlikely(!page)) {
324                                         ret = ret ? : -ENOMEM;
325                                         break;
326                                 }
327                                 info->tmp_page = page;
328                         }
329                         /* Always wakeup, even if the copy fails. Otherwise
330                          * we lock up (O_NONBLOCK-)readers that sleep due to
331                          * syscall merging.
332                          * FIXME! Is this really true?
333                          */
334                         do_wakeup = 1;
335                         chars = PAGE_SIZE;
336                         if (chars > total_len)
337                                 chars = total_len;
338
339                         error = pipe_iov_copy_from_user(kmap(page), iov, chars);
340                         kunmap(page);
341                         if (unlikely(error)) {
342                                 if (!ret) ret = -EFAULT;
343                                 break;
344                         }
345                         ret += chars;
346
347                         /* Insert it into the buffer array */
348                         buf->page = page;
349                         buf->ops = &anon_pipe_buf_ops;
350                         buf->offset = 0;
351                         buf->len = chars;
352                         info->nrbufs = ++bufs;
353                         info->tmp_page = NULL;
354
355                         total_len -= chars;
356                         if (!total_len)
357                                 break;
358                 }
359                 if (bufs < PIPE_BUFFERS)
360                         continue;
361                 if (filp->f_flags & O_NONBLOCK) {
362                         if (!ret) ret = -EAGAIN;
363                         break;
364                 }
365                 if (signal_pending(current)) {
366                         if (!ret) ret = -ERESTARTSYS;
367                         break;
368                 }
369                 if (do_wakeup) {
370                         wake_up_interruptible_sync(PIPE_WAIT(*inode));
371                         kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
372                         do_wakeup = 0;
373                 }
374                 PIPE_WAITING_WRITERS(*inode)++;
375                 pipe_wait(inode->i_pipe);
376                 PIPE_WAITING_WRITERS(*inode)--;
377         }
378 out:
379         mutex_unlock(PIPE_MUTEX(*inode));
380         if (do_wakeup) {
381                 wake_up_interruptible(PIPE_WAIT(*inode));
382                 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
383         }
384         if (ret > 0)
385                 file_update_time(filp);
386         return ret;
387 }
388
389 static ssize_t
390 pipe_write(struct file *filp, const char __user *buf,
391            size_t count, loff_t *ppos)
392 {
393         struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
394         return pipe_writev(filp, &iov, 1, ppos);
395 }
396
397 static ssize_t
398 bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
399 {
400         return -EBADF;
401 }
402
403 static ssize_t
404 bad_pipe_w(struct file *filp, const char __user *buf, size_t count, loff_t *ppos)
405 {
406         return -EBADF;
407 }
408
409 static int
410 pipe_ioctl(struct inode *pino, struct file *filp,
411            unsigned int cmd, unsigned long arg)
412 {
413         struct inode *inode = filp->f_dentry->d_inode;
414         struct pipe_inode_info *info;
415         int count, buf, nrbufs;
416
417         switch (cmd) {
418                 case FIONREAD:
419                         mutex_lock(PIPE_MUTEX(*inode));
420                         info =  inode->i_pipe;
421                         count = 0;
422                         buf = info->curbuf;
423                         nrbufs = info->nrbufs;
424                         while (--nrbufs >= 0) {
425                                 count += info->bufs[buf].len;
426                                 buf = (buf+1) & (PIPE_BUFFERS-1);
427                         }
428                         mutex_unlock(PIPE_MUTEX(*inode));
429                         return put_user(count, (int __user *)arg);
430                 default:
431                         return -EINVAL;
432         }
433 }
434
435 /* No kernel lock held - fine */
436 static unsigned int
437 pipe_poll(struct file *filp, poll_table *wait)
438 {
439         unsigned int mask;
440         struct inode *inode = filp->f_dentry->d_inode;
441         struct pipe_inode_info *info = inode->i_pipe;
442         int nrbufs;
443
444         poll_wait(filp, PIPE_WAIT(*inode), wait);
445
446         /* Reading only -- no need for acquiring the semaphore.  */
447         nrbufs = info->nrbufs;
448         mask = 0;
449         if (filp->f_mode & FMODE_READ) {
450                 mask = (nrbufs > 0) ? POLLIN | POLLRDNORM : 0;
451                 if (!PIPE_WRITERS(*inode) && filp->f_version != PIPE_WCOUNTER(*inode))
452                         mask |= POLLHUP;
453         }
454
455         if (filp->f_mode & FMODE_WRITE) {
456                 mask |= (nrbufs < PIPE_BUFFERS) ? POLLOUT | POLLWRNORM : 0;
457                 /*
458                  * Most Unices do not set POLLERR for FIFOs but on Linux they
459                  * behave exactly like pipes for poll().
460                  */
461                 if (!PIPE_READERS(*inode))
462                         mask |= POLLERR;
463         }
464
465         return mask;
466 }
467
468 static int
469 pipe_release(struct inode *inode, int decr, int decw)
470 {
471         mutex_lock(PIPE_MUTEX(*inode));
472         PIPE_READERS(*inode) -= decr;
473         PIPE_WRITERS(*inode) -= decw;
474         if (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) {
475                 free_pipe_info(inode);
476         } else {
477                 wake_up_interruptible(PIPE_WAIT(*inode));
478                 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
479                 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
480         }
481         mutex_unlock(PIPE_MUTEX(*inode));
482
483         return 0;
484 }
485
486 static int
487 pipe_read_fasync(int fd, struct file *filp, int on)
488 {
489         struct inode *inode = filp->f_dentry->d_inode;
490         int retval;
491
492         mutex_lock(PIPE_MUTEX(*inode));
493         retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode));
494         mutex_unlock(PIPE_MUTEX(*inode));
495
496         if (retval < 0)
497                 return retval;
498
499         return 0;
500 }
501
502
503 static int
504 pipe_write_fasync(int fd, struct file *filp, int on)
505 {
506         struct inode *inode = filp->f_dentry->d_inode;
507         int retval;
508
509         mutex_lock(PIPE_MUTEX(*inode));
510         retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode));
511         mutex_unlock(PIPE_MUTEX(*inode));
512
513         if (retval < 0)
514                 return retval;
515
516         return 0;
517 }
518
519
520 static int
521 pipe_rdwr_fasync(int fd, struct file *filp, int on)
522 {
523         struct inode *inode = filp->f_dentry->d_inode;
524         int retval;
525
526         mutex_lock(PIPE_MUTEX(*inode));
527
528         retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode));
529
530         if (retval >= 0)
531                 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode));
532
533         mutex_unlock(PIPE_MUTEX(*inode));
534
535         if (retval < 0)
536                 return retval;
537
538         return 0;
539 }
540
541
542 static int
543 pipe_read_release(struct inode *inode, struct file *filp)
544 {
545         pipe_read_fasync(-1, filp, 0);
546         return pipe_release(inode, 1, 0);
547 }
548
549 static int
550 pipe_write_release(struct inode *inode, struct file *filp)
551 {
552         pipe_write_fasync(-1, filp, 0);
553         return pipe_release(inode, 0, 1);
554 }
555
556 static int
557 pipe_rdwr_release(struct inode *inode, struct file *filp)
558 {
559         int decr, decw;
560
561         pipe_rdwr_fasync(-1, filp, 0);
562         decr = (filp->f_mode & FMODE_READ) != 0;
563         decw = (filp->f_mode & FMODE_WRITE) != 0;
564         return pipe_release(inode, decr, decw);
565 }
566
567 static int
568 pipe_read_open(struct inode *inode, struct file *filp)
569 {
570         /* We could have perhaps used atomic_t, but this and friends
571            below are the only places.  So it doesn't seem worthwhile.  */
572         mutex_lock(PIPE_MUTEX(*inode));
573         PIPE_READERS(*inode)++;
574         mutex_unlock(PIPE_MUTEX(*inode));
575
576         return 0;
577 }
578
579 static int
580 pipe_write_open(struct inode *inode, struct file *filp)
581 {
582         mutex_lock(PIPE_MUTEX(*inode));
583         PIPE_WRITERS(*inode)++;
584         mutex_unlock(PIPE_MUTEX(*inode));
585
586         return 0;
587 }
588
589 static int
590 pipe_rdwr_open(struct inode *inode, struct file *filp)
591 {
592         mutex_lock(PIPE_MUTEX(*inode));
593         if (filp->f_mode & FMODE_READ)
594                 PIPE_READERS(*inode)++;
595         if (filp->f_mode & FMODE_WRITE)
596                 PIPE_WRITERS(*inode)++;
597         mutex_unlock(PIPE_MUTEX(*inode));
598
599         return 0;
600 }
601
602 /*
603  * The file_operations structs are not static because they
604  * are also used in linux/fs/fifo.c to do operations on FIFOs.
605  */
606 const struct file_operations read_fifo_fops = {
607         .llseek         = no_llseek,
608         .read           = pipe_read,
609         .readv          = pipe_readv,
610         .write          = bad_pipe_w,
611         .poll           = pipe_poll,
612         .ioctl          = pipe_ioctl,
613         .open           = pipe_read_open,
614         .release        = pipe_read_release,
615         .fasync         = pipe_read_fasync,
616 };
617
618 const struct file_operations write_fifo_fops = {
619         .llseek         = no_llseek,
620         .read           = bad_pipe_r,
621         .write          = pipe_write,
622         .writev         = pipe_writev,
623         .poll           = pipe_poll,
624         .ioctl          = pipe_ioctl,
625         .open           = pipe_write_open,
626         .release        = pipe_write_release,
627         .fasync         = pipe_write_fasync,
628 };
629
630 const struct file_operations rdwr_fifo_fops = {
631         .llseek         = no_llseek,
632         .read           = pipe_read,
633         .readv          = pipe_readv,
634         .write          = pipe_write,
635         .writev         = pipe_writev,
636         .poll           = pipe_poll,
637         .ioctl          = pipe_ioctl,
638         .open           = pipe_rdwr_open,
639         .release        = pipe_rdwr_release,
640         .fasync         = pipe_rdwr_fasync,
641 };
642
643 static struct file_operations read_pipe_fops = {
644         .llseek         = no_llseek,
645         .read           = pipe_read,
646         .readv          = pipe_readv,
647         .write          = bad_pipe_w,
648         .poll           = pipe_poll,
649         .ioctl          = pipe_ioctl,
650         .open           = pipe_read_open,
651         .release        = pipe_read_release,
652         .fasync         = pipe_read_fasync,
653 };
654
655 static struct file_operations write_pipe_fops = {
656         .llseek         = no_llseek,
657         .read           = bad_pipe_r,
658         .write          = pipe_write,
659         .writev         = pipe_writev,
660         .poll           = pipe_poll,
661         .ioctl          = pipe_ioctl,
662         .open           = pipe_write_open,
663         .release        = pipe_write_release,
664         .fasync         = pipe_write_fasync,
665 };
666
667 static struct file_operations rdwr_pipe_fops = {
668         .llseek         = no_llseek,
669         .read           = pipe_read,
670         .readv          = pipe_readv,
671         .write          = pipe_write,
672         .writev         = pipe_writev,
673         .poll           = pipe_poll,
674         .ioctl          = pipe_ioctl,
675         .open           = pipe_rdwr_open,
676         .release        = pipe_rdwr_release,
677         .fasync         = pipe_rdwr_fasync,
678 };
679
680 struct pipe_inode_info * alloc_pipe_info(struct inode *inode)
681 {
682         struct pipe_inode_info *info;
683
684         info = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
685         if (info) {
686                 init_waitqueue_head(&info->wait);
687                 info->r_counter = info->w_counter = 1;
688                 info->inode = inode;
689         }
690
691         return info;
692 }
693
694 void __free_pipe_info(struct pipe_inode_info *info)
695 {
696         int i;
697
698         for (i = 0; i < PIPE_BUFFERS; i++) {
699                 struct pipe_buffer *buf = info->bufs + i;
700                 if (buf->ops)
701                         buf->ops->release(info, buf);
702         }
703         if (info->tmp_page)
704                 __free_page(info->tmp_page);
705         kfree(info);
706 }
707
708 void free_pipe_info(struct inode *inode)
709 {
710         __free_pipe_info(inode->i_pipe);
711         inode->i_pipe = NULL;
712 }
713
714 static struct vfsmount *pipe_mnt __read_mostly;
715 static int pipefs_delete_dentry(struct dentry *dentry)
716 {
717         return 1;
718 }
719 static struct dentry_operations pipefs_dentry_operations = {
720         .d_delete       = pipefs_delete_dentry,
721 };
722
723 static struct inode * get_pipe_inode(void)
724 {
725         struct inode *inode = new_inode(pipe_mnt->mnt_sb);
726
727         if (!inode)
728                 goto fail_inode;
729
730         inode->i_pipe = alloc_pipe_info(inode);
731         if (!inode->i_pipe)
732                 goto fail_iput;
733
734         PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1;
735         inode->i_fop = &rdwr_pipe_fops;
736
737         /*
738          * Mark the inode dirty from the very beginning,
739          * that way it will never be moved to the dirty
740          * list because "mark_inode_dirty()" will think
741          * that it already _is_ on the dirty list.
742          */
743         inode->i_state = I_DIRTY;
744         inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
745         inode->i_uid = current->fsuid;
746         inode->i_gid = current->fsgid;
747         inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
748         inode->i_blksize = PAGE_SIZE;
749         return inode;
750
751 fail_iput:
752         iput(inode);
753 fail_inode:
754         return NULL;
755 }
756
757 int do_pipe(int *fd)
758 {
759         struct qstr this;
760         char name[32];
761         struct dentry *dentry;
762         struct inode * inode;
763         struct file *f1, *f2;
764         int error;
765         int i,j;
766
767         error = -ENFILE;
768         f1 = get_empty_filp();
769         if (!f1)
770                 goto no_files;
771
772         f2 = get_empty_filp();
773         if (!f2)
774                 goto close_f1;
775
776         inode = get_pipe_inode();
777         if (!inode)
778                 goto close_f12;
779
780         error = get_unused_fd();
781         if (error < 0)
782                 goto close_f12_inode;
783         i = error;
784
785         error = get_unused_fd();
786         if (error < 0)
787                 goto close_f12_inode_i;
788         j = error;
789
790         error = -ENOMEM;
791         sprintf(name, "[%lu]", inode->i_ino);
792         this.name = name;
793         this.len = strlen(name);
794         this.hash = inode->i_ino; /* will go */
795         dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this);
796         if (!dentry)
797                 goto close_f12_inode_i_j;
798         dentry->d_op = &pipefs_dentry_operations;
799         d_add(dentry, inode);
800         f1->f_vfsmnt = f2->f_vfsmnt = mntget(mntget(pipe_mnt));
801         f1->f_dentry = f2->f_dentry = dget(dentry);
802         f1->f_mapping = f2->f_mapping = inode->i_mapping;
803
804         /* read file */
805         f1->f_pos = f2->f_pos = 0;
806         f1->f_flags = O_RDONLY;
807         f1->f_op = &read_pipe_fops;
808         f1->f_mode = FMODE_READ;
809         f1->f_version = 0;
810
811         /* write file */
812         f2->f_flags = O_WRONLY;
813         f2->f_op = &write_pipe_fops;
814         f2->f_mode = FMODE_WRITE;
815         f2->f_version = 0;
816
817         fd_install(i, f1);
818         fd_install(j, f2);
819         fd[0] = i;
820         fd[1] = j;
821         return 0;
822
823 close_f12_inode_i_j:
824         put_unused_fd(j);
825 close_f12_inode_i:
826         put_unused_fd(i);
827 close_f12_inode:
828         free_pipe_info(inode);
829         iput(inode);
830 close_f12:
831         put_filp(f2);
832 close_f1:
833         put_filp(f1);
834 no_files:
835         return error;   
836 }
837
838 /*
839  * pipefs should _never_ be mounted by userland - too much of security hassle,
840  * no real gain from having the whole whorehouse mounted. So we don't need
841  * any operations on the root directory. However, we need a non-trivial
842  * d_name - pipe: will go nicely and kill the special-casing in procfs.
843  */
844
845 static struct super_block *pipefs_get_sb(struct file_system_type *fs_type,
846         int flags, const char *dev_name, void *data)
847 {
848         return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC);
849 }
850
851 static struct file_system_type pipe_fs_type = {
852         .name           = "pipefs",
853         .get_sb         = pipefs_get_sb,
854         .kill_sb        = kill_anon_super,
855 };
856
857 static int __init init_pipe_fs(void)
858 {
859         int err = register_filesystem(&pipe_fs_type);
860         if (!err) {
861                 pipe_mnt = kern_mount(&pipe_fs_type);
862                 if (IS_ERR(pipe_mnt)) {
863                         err = PTR_ERR(pipe_mnt);
864                         unregister_filesystem(&pipe_fs_type);
865                 }
866         }
867         return err;
868 }
869
870 static void __exit exit_pipe_fs(void)
871 {
872         unregister_filesystem(&pipe_fs_type);
873         mntput(pipe_mnt);
874 }
875
876 fs_initcall(init_pipe_fs);
877 module_exit(exit_pipe_fs);