[PATCH] pipe: introduce ->pin() buffer operation
[safe/jmp/linux-2.6] / fs / pipe.c
1 /*
2  *  linux/fs/pipe.c
3  *
4  *  Copyright (C) 1991, 1992, 1999  Linus Torvalds
5  */
6
7 #include <linux/mm.h>
8 #include <linux/file.h>
9 #include <linux/poll.h>
10 #include <linux/slab.h>
11 #include <linux/module.h>
12 #include <linux/init.h>
13 #include <linux/fs.h>
14 #include <linux/mount.h>
15 #include <linux/pipe_fs_i.h>
16 #include <linux/uio.h>
17 #include <linux/highmem.h>
18 #include <linux/pagemap.h>
19
20 #include <asm/uaccess.h>
21 #include <asm/ioctls.h>
22
23 /*
24  * We use a start+len construction, which provides full use of the 
25  * allocated memory.
26  * -- Florian Coosmann (FGC)
27  * 
28  * Reads with count = 0 should always return 0.
29  * -- Julian Bradfield 1999-06-07.
30  *
31  * FIFOs and Pipes now generate SIGIO for both readers and writers.
32  * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16
33  *
34  * pipe_read & write cleanup
35  * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
36  */
37
38 /* Drop the inode semaphore and wait for a pipe event, atomically */
39 void pipe_wait(struct pipe_inode_info *pipe)
40 {
41         DEFINE_WAIT(wait);
42
43         /*
44          * Pipes are system-local resources, so sleeping on them
45          * is considered a noninteractive wait:
46          */
47         prepare_to_wait(&pipe->wait, &wait,
48                         TASK_INTERRUPTIBLE | TASK_NONINTERACTIVE);
49         if (pipe->inode)
50                 mutex_unlock(&pipe->inode->i_mutex);
51         schedule();
52         finish_wait(&pipe->wait, &wait);
53         if (pipe->inode)
54                 mutex_lock(&pipe->inode->i_mutex);
55 }
56
57 static int
58 pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len)
59 {
60         unsigned long copy;
61
62         while (len > 0) {
63                 while (!iov->iov_len)
64                         iov++;
65                 copy = min_t(unsigned long, len, iov->iov_len);
66
67                 if (copy_from_user(to, iov->iov_base, copy))
68                         return -EFAULT;
69                 to += copy;
70                 len -= copy;
71                 iov->iov_base += copy;
72                 iov->iov_len -= copy;
73         }
74         return 0;
75 }
76
77 static int
78 pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len)
79 {
80         unsigned long copy;
81
82         while (len > 0) {
83                 while (!iov->iov_len)
84                         iov++;
85                 copy = min_t(unsigned long, len, iov->iov_len);
86
87                 if (copy_to_user(iov->iov_base, from, copy))
88                         return -EFAULT;
89                 from += copy;
90                 len -= copy;
91                 iov->iov_base += copy;
92                 iov->iov_len -= copy;
93         }
94         return 0;
95 }
96
97 static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
98                                   struct pipe_buffer *buf)
99 {
100         struct page *page = buf->page;
101
102         /*
103          * If nobody else uses this page, and we don't already have a
104          * temporary page, let's keep track of it as a one-deep
105          * allocation cache. (Otherwise just release our reference to it)
106          */
107         if (page_count(page) == 1 && !pipe->tmp_page)
108                 pipe->tmp_page = page;
109         else
110                 page_cache_release(page);
111 }
112
113 void *generic_pipe_buf_map(struct pipe_inode_info *pipe,
114                            struct pipe_buffer *buf)
115 {
116         return kmap(buf->page);
117 }
118
119 void generic_pipe_buf_unmap(struct pipe_inode_info *pipe,
120                             struct pipe_buffer *buf)
121 {
122         kunmap(buf->page);
123 }
124
125 static int anon_pipe_buf_steal(struct pipe_inode_info *pipe,
126                                struct pipe_buffer *buf)
127 {
128         struct page *page = buf->page;
129
130         if (page_count(page) == 1) {
131                 lock_page(page);
132                 return 0;
133         }
134
135         return 1;
136 }
137
138 void generic_pipe_buf_get(struct pipe_inode_info *info, struct pipe_buffer *buf)
139 {
140         page_cache_get(buf->page);
141 }
142
143 int generic_pipe_buf_pin(struct pipe_inode_info *info, struct pipe_buffer *buf)
144 {
145         return 0;
146 }
147
148 static struct pipe_buf_operations anon_pipe_buf_ops = {
149         .can_merge = 1,
150         .map = generic_pipe_buf_map,
151         .unmap = generic_pipe_buf_unmap,
152         .pin = generic_pipe_buf_pin,
153         .release = anon_pipe_buf_release,
154         .steal = anon_pipe_buf_steal,
155         .get = generic_pipe_buf_get,
156 };
157
158 static ssize_t
159 pipe_readv(struct file *filp, const struct iovec *_iov,
160            unsigned long nr_segs, loff_t *ppos)
161 {
162         struct inode *inode = filp->f_dentry->d_inode;
163         struct pipe_inode_info *pipe;
164         int do_wakeup;
165         ssize_t ret;
166         struct iovec *iov = (struct iovec *)_iov;
167         size_t total_len;
168
169         total_len = iov_length(iov, nr_segs);
170         /* Null read succeeds. */
171         if (unlikely(total_len == 0))
172                 return 0;
173
174         do_wakeup = 0;
175         ret = 0;
176         mutex_lock(&inode->i_mutex);
177         pipe = inode->i_pipe;
178         for (;;) {
179                 int bufs = pipe->nrbufs;
180                 if (bufs) {
181                         int curbuf = pipe->curbuf;
182                         struct pipe_buffer *buf = pipe->bufs + curbuf;
183                         struct pipe_buf_operations *ops = buf->ops;
184                         void *addr;
185                         size_t chars = buf->len;
186                         int error;
187
188                         if (chars > total_len)
189                                 chars = total_len;
190
191                         error = ops->pin(pipe, buf);
192                         if (error) {
193                                 if (!ret)
194                                         error = ret;
195                                 break;
196                         }
197
198                         addr = ops->map(pipe, buf);
199                         error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars);
200                         ops->unmap(pipe, buf);
201                         if (unlikely(error)) {
202                                 if (!ret)
203                                         ret = -EFAULT;
204                                 break;
205                         }
206                         ret += chars;
207                         buf->offset += chars;
208                         buf->len -= chars;
209                         if (!buf->len) {
210                                 buf->ops = NULL;
211                                 ops->release(pipe, buf);
212                                 curbuf = (curbuf + 1) & (PIPE_BUFFERS-1);
213                                 pipe->curbuf = curbuf;
214                                 pipe->nrbufs = --bufs;
215                                 do_wakeup = 1;
216                         }
217                         total_len -= chars;
218                         if (!total_len)
219                                 break;  /* common path: read succeeded */
220                 }
221                 if (bufs)       /* More to do? */
222                         continue;
223                 if (!pipe->writers)
224                         break;
225                 if (!pipe->waiting_writers) {
226                         /* syscall merging: Usually we must not sleep
227                          * if O_NONBLOCK is set, or if we got some data.
228                          * But if a writer sleeps in kernel space, then
229                          * we can wait for that data without violating POSIX.
230                          */
231                         if (ret)
232                                 break;
233                         if (filp->f_flags & O_NONBLOCK) {
234                                 ret = -EAGAIN;
235                                 break;
236                         }
237                 }
238                 if (signal_pending(current)) {
239                         if (!ret)
240                                 ret = -ERESTARTSYS;
241                         break;
242                 }
243                 if (do_wakeup) {
244                         wake_up_interruptible_sync(&pipe->wait);
245                         kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
246                 }
247                 pipe_wait(pipe);
248         }
249         mutex_unlock(&inode->i_mutex);
250
251         /* Signal writers asynchronously that there is more room. */
252         if (do_wakeup) {
253                 wake_up_interruptible(&pipe->wait);
254                 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
255         }
256         if (ret > 0)
257                 file_accessed(filp);
258         return ret;
259 }
260
261 static ssize_t
262 pipe_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
263 {
264         struct iovec iov = { .iov_base = buf, .iov_len = count };
265
266         return pipe_readv(filp, &iov, 1, ppos);
267 }
268
269 static ssize_t
270 pipe_writev(struct file *filp, const struct iovec *_iov,
271             unsigned long nr_segs, loff_t *ppos)
272 {
273         struct inode *inode = filp->f_dentry->d_inode;
274         struct pipe_inode_info *pipe;
275         ssize_t ret;
276         int do_wakeup;
277         struct iovec *iov = (struct iovec *)_iov;
278         size_t total_len;
279         ssize_t chars;
280
281         total_len = iov_length(iov, nr_segs);
282         /* Null write succeeds. */
283         if (unlikely(total_len == 0))
284                 return 0;
285
286         do_wakeup = 0;
287         ret = 0;
288         mutex_lock(&inode->i_mutex);
289         pipe = inode->i_pipe;
290
291         if (!pipe->readers) {
292                 send_sig(SIGPIPE, current, 0);
293                 ret = -EPIPE;
294                 goto out;
295         }
296
297         /* We try to merge small writes */
298         chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */
299         if (pipe->nrbufs && chars != 0) {
300                 int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) &
301                                                         (PIPE_BUFFERS-1);
302                 struct pipe_buffer *buf = pipe->bufs + lastbuf;
303                 struct pipe_buf_operations *ops = buf->ops;
304                 int offset = buf->offset + buf->len;
305
306                 if (ops->can_merge && offset + chars <= PAGE_SIZE) {
307                         void *addr;
308                         int error;
309
310                         error = ops->pin(pipe, buf);
311                         if (error)
312                                 goto out;
313
314                         addr = ops->map(pipe, buf);
315                         error = pipe_iov_copy_from_user(offset + addr, iov,
316                                                         chars);
317                         ops->unmap(pipe, buf);
318                         ret = error;
319                         do_wakeup = 1;
320                         if (error)
321                                 goto out;
322                         buf->len += chars;
323                         total_len -= chars;
324                         ret = chars;
325                         if (!total_len)
326                                 goto out;
327                 }
328         }
329
330         for (;;) {
331                 int bufs;
332
333                 if (!pipe->readers) {
334                         send_sig(SIGPIPE, current, 0);
335                         if (!ret)
336                                 ret = -EPIPE;
337                         break;
338                 }
339                 bufs = pipe->nrbufs;
340                 if (bufs < PIPE_BUFFERS) {
341                         int newbuf = (pipe->curbuf + bufs) & (PIPE_BUFFERS-1);
342                         struct pipe_buffer *buf = pipe->bufs + newbuf;
343                         struct page *page = pipe->tmp_page;
344                         int error;
345
346                         if (!page) {
347                                 page = alloc_page(GFP_HIGHUSER);
348                                 if (unlikely(!page)) {
349                                         ret = ret ? : -ENOMEM;
350                                         break;
351                                 }
352                                 pipe->tmp_page = page;
353                         }
354                         /* Always wake up, even if the copy fails. Otherwise
355                          * we lock up (O_NONBLOCK-)readers that sleep due to
356                          * syscall merging.
357                          * FIXME! Is this really true?
358                          */
359                         do_wakeup = 1;
360                         chars = PAGE_SIZE;
361                         if (chars > total_len)
362                                 chars = total_len;
363
364                         error = pipe_iov_copy_from_user(kmap(page), iov, chars);
365                         kunmap(page);
366                         if (unlikely(error)) {
367                                 if (!ret)
368                                         ret = -EFAULT;
369                                 break;
370                         }
371                         ret += chars;
372
373                         /* Insert it into the buffer array */
374                         buf->page = page;
375                         buf->ops = &anon_pipe_buf_ops;
376                         buf->offset = 0;
377                         buf->len = chars;
378                         pipe->nrbufs = ++bufs;
379                         pipe->tmp_page = NULL;
380
381                         total_len -= chars;
382                         if (!total_len)
383                                 break;
384                 }
385                 if (bufs < PIPE_BUFFERS)
386                         continue;
387                 if (filp->f_flags & O_NONBLOCK) {
388                         if (!ret)
389                                 ret = -EAGAIN;
390                         break;
391                 }
392                 if (signal_pending(current)) {
393                         if (!ret)
394                                 ret = -ERESTARTSYS;
395                         break;
396                 }
397                 if (do_wakeup) {
398                         wake_up_interruptible_sync(&pipe->wait);
399                         kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
400                         do_wakeup = 0;
401                 }
402                 pipe->waiting_writers++;
403                 pipe_wait(pipe);
404                 pipe->waiting_writers--;
405         }
406 out:
407         mutex_unlock(&inode->i_mutex);
408         if (do_wakeup) {
409                 wake_up_interruptible(&pipe->wait);
410                 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
411         }
412         if (ret > 0)
413                 file_update_time(filp);
414         return ret;
415 }
416
417 static ssize_t
418 pipe_write(struct file *filp, const char __user *buf,
419            size_t count, loff_t *ppos)
420 {
421         struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
422
423         return pipe_writev(filp, &iov, 1, ppos);
424 }
425
426 static ssize_t
427 bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
428 {
429         return -EBADF;
430 }
431
432 static ssize_t
433 bad_pipe_w(struct file *filp, const char __user *buf, size_t count,
434            loff_t *ppos)
435 {
436         return -EBADF;
437 }
438
439 static int
440 pipe_ioctl(struct inode *pino, struct file *filp,
441            unsigned int cmd, unsigned long arg)
442 {
443         struct inode *inode = filp->f_dentry->d_inode;
444         struct pipe_inode_info *pipe;
445         int count, buf, nrbufs;
446
447         switch (cmd) {
448                 case FIONREAD:
449                         mutex_lock(&inode->i_mutex);
450                         pipe = inode->i_pipe;
451                         count = 0;
452                         buf = pipe->curbuf;
453                         nrbufs = pipe->nrbufs;
454                         while (--nrbufs >= 0) {
455                                 count += pipe->bufs[buf].len;
456                                 buf = (buf+1) & (PIPE_BUFFERS-1);
457                         }
458                         mutex_unlock(&inode->i_mutex);
459
460                         return put_user(count, (int __user *)arg);
461                 default:
462                         return -EINVAL;
463         }
464 }
465
466 /* No kernel lock held - fine */
467 static unsigned int
468 pipe_poll(struct file *filp, poll_table *wait)
469 {
470         unsigned int mask;
471         struct inode *inode = filp->f_dentry->d_inode;
472         struct pipe_inode_info *pipe = inode->i_pipe;
473         int nrbufs;
474
475         poll_wait(filp, &pipe->wait, wait);
476
477         /* Reading only -- no need for acquiring the semaphore.  */
478         nrbufs = pipe->nrbufs;
479         mask = 0;
480         if (filp->f_mode & FMODE_READ) {
481                 mask = (nrbufs > 0) ? POLLIN | POLLRDNORM : 0;
482                 if (!pipe->writers && filp->f_version != pipe->w_counter)
483                         mask |= POLLHUP;
484         }
485
486         if (filp->f_mode & FMODE_WRITE) {
487                 mask |= (nrbufs < PIPE_BUFFERS) ? POLLOUT | POLLWRNORM : 0;
488                 /*
489                  * Most Unices do not set POLLERR for FIFOs but on Linux they
490                  * behave exactly like pipes for poll().
491                  */
492                 if (!pipe->readers)
493                         mask |= POLLERR;
494         }
495
496         return mask;
497 }
498
499 static int
500 pipe_release(struct inode *inode, int decr, int decw)
501 {
502         struct pipe_inode_info *pipe;
503
504         mutex_lock(&inode->i_mutex);
505         pipe = inode->i_pipe;
506         pipe->readers -= decr;
507         pipe->writers -= decw;
508
509         if (!pipe->readers && !pipe->writers) {
510                 free_pipe_info(inode);
511         } else {
512                 wake_up_interruptible(&pipe->wait);
513                 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
514                 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
515         }
516         mutex_unlock(&inode->i_mutex);
517
518         return 0;
519 }
520
521 static int
522 pipe_read_fasync(int fd, struct file *filp, int on)
523 {
524         struct inode *inode = filp->f_dentry->d_inode;
525         int retval;
526
527         mutex_lock(&inode->i_mutex);
528         retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_readers);
529         mutex_unlock(&inode->i_mutex);
530
531         if (retval < 0)
532                 return retval;
533
534         return 0;
535 }
536
537
538 static int
539 pipe_write_fasync(int fd, struct file *filp, int on)
540 {
541         struct inode *inode = filp->f_dentry->d_inode;
542         int retval;
543
544         mutex_lock(&inode->i_mutex);
545         retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_writers);
546         mutex_unlock(&inode->i_mutex);
547
548         if (retval < 0)
549                 return retval;
550
551         return 0;
552 }
553
554
555 static int
556 pipe_rdwr_fasync(int fd, struct file *filp, int on)
557 {
558         struct inode *inode = filp->f_dentry->d_inode;
559         struct pipe_inode_info *pipe = inode->i_pipe;
560         int retval;
561
562         mutex_lock(&inode->i_mutex);
563
564         retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
565
566         if (retval >= 0)
567                 retval = fasync_helper(fd, filp, on, &pipe->fasync_writers);
568
569         mutex_unlock(&inode->i_mutex);
570
571         if (retval < 0)
572                 return retval;
573
574         return 0;
575 }
576
577
578 static int
579 pipe_read_release(struct inode *inode, struct file *filp)
580 {
581         pipe_read_fasync(-1, filp, 0);
582         return pipe_release(inode, 1, 0);
583 }
584
585 static int
586 pipe_write_release(struct inode *inode, struct file *filp)
587 {
588         pipe_write_fasync(-1, filp, 0);
589         return pipe_release(inode, 0, 1);
590 }
591
592 static int
593 pipe_rdwr_release(struct inode *inode, struct file *filp)
594 {
595         int decr, decw;
596
597         pipe_rdwr_fasync(-1, filp, 0);
598         decr = (filp->f_mode & FMODE_READ) != 0;
599         decw = (filp->f_mode & FMODE_WRITE) != 0;
600         return pipe_release(inode, decr, decw);
601 }
602
603 static int
604 pipe_read_open(struct inode *inode, struct file *filp)
605 {
606         /* We could have perhaps used atomic_t, but this and friends
607            below are the only places.  So it doesn't seem worthwhile.  */
608         mutex_lock(&inode->i_mutex);
609         inode->i_pipe->readers++;
610         mutex_unlock(&inode->i_mutex);
611
612         return 0;
613 }
614
615 static int
616 pipe_write_open(struct inode *inode, struct file *filp)
617 {
618         mutex_lock(&inode->i_mutex);
619         inode->i_pipe->writers++;
620         mutex_unlock(&inode->i_mutex);
621
622         return 0;
623 }
624
625 static int
626 pipe_rdwr_open(struct inode *inode, struct file *filp)
627 {
628         mutex_lock(&inode->i_mutex);
629         if (filp->f_mode & FMODE_READ)
630                 inode->i_pipe->readers++;
631         if (filp->f_mode & FMODE_WRITE)
632                 inode->i_pipe->writers++;
633         mutex_unlock(&inode->i_mutex);
634
635         return 0;
636 }
637
638 /*
639  * The file_operations structs are not static because they
640  * are also used in linux/fs/fifo.c to do operations on FIFOs.
641  */
642 const struct file_operations read_fifo_fops = {
643         .llseek         = no_llseek,
644         .read           = pipe_read,
645         .readv          = pipe_readv,
646         .write          = bad_pipe_w,
647         .poll           = pipe_poll,
648         .ioctl          = pipe_ioctl,
649         .open           = pipe_read_open,
650         .release        = pipe_read_release,
651         .fasync         = pipe_read_fasync,
652 };
653
654 const struct file_operations write_fifo_fops = {
655         .llseek         = no_llseek,
656         .read           = bad_pipe_r,
657         .write          = pipe_write,
658         .writev         = pipe_writev,
659         .poll           = pipe_poll,
660         .ioctl          = pipe_ioctl,
661         .open           = pipe_write_open,
662         .release        = pipe_write_release,
663         .fasync         = pipe_write_fasync,
664 };
665
666 const struct file_operations rdwr_fifo_fops = {
667         .llseek         = no_llseek,
668         .read           = pipe_read,
669         .readv          = pipe_readv,
670         .write          = pipe_write,
671         .writev         = pipe_writev,
672         .poll           = pipe_poll,
673         .ioctl          = pipe_ioctl,
674         .open           = pipe_rdwr_open,
675         .release        = pipe_rdwr_release,
676         .fasync         = pipe_rdwr_fasync,
677 };
678
679 static struct file_operations read_pipe_fops = {
680         .llseek         = no_llseek,
681         .read           = pipe_read,
682         .readv          = pipe_readv,
683         .write          = bad_pipe_w,
684         .poll           = pipe_poll,
685         .ioctl          = pipe_ioctl,
686         .open           = pipe_read_open,
687         .release        = pipe_read_release,
688         .fasync         = pipe_read_fasync,
689 };
690
691 static struct file_operations write_pipe_fops = {
692         .llseek         = no_llseek,
693         .read           = bad_pipe_r,
694         .write          = pipe_write,
695         .writev         = pipe_writev,
696         .poll           = pipe_poll,
697         .ioctl          = pipe_ioctl,
698         .open           = pipe_write_open,
699         .release        = pipe_write_release,
700         .fasync         = pipe_write_fasync,
701 };
702
703 static struct file_operations rdwr_pipe_fops = {
704         .llseek         = no_llseek,
705         .read           = pipe_read,
706         .readv          = pipe_readv,
707         .write          = pipe_write,
708         .writev         = pipe_writev,
709         .poll           = pipe_poll,
710         .ioctl          = pipe_ioctl,
711         .open           = pipe_rdwr_open,
712         .release        = pipe_rdwr_release,
713         .fasync         = pipe_rdwr_fasync,
714 };
715
716 struct pipe_inode_info * alloc_pipe_info(struct inode *inode)
717 {
718         struct pipe_inode_info *pipe;
719
720         pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
721         if (pipe) {
722                 init_waitqueue_head(&pipe->wait);
723                 pipe->r_counter = pipe->w_counter = 1;
724                 pipe->inode = inode;
725         }
726
727         return pipe;
728 }
729
730 void __free_pipe_info(struct pipe_inode_info *pipe)
731 {
732         int i;
733
734         for (i = 0; i < PIPE_BUFFERS; i++) {
735                 struct pipe_buffer *buf = pipe->bufs + i;
736                 if (buf->ops)
737                         buf->ops->release(pipe, buf);
738         }
739         if (pipe->tmp_page)
740                 __free_page(pipe->tmp_page);
741         kfree(pipe);
742 }
743
744 void free_pipe_info(struct inode *inode)
745 {
746         __free_pipe_info(inode->i_pipe);
747         inode->i_pipe = NULL;
748 }
749
750 static struct vfsmount *pipe_mnt __read_mostly;
751 static int pipefs_delete_dentry(struct dentry *dentry)
752 {
753         return 1;
754 }
755
756 static struct dentry_operations pipefs_dentry_operations = {
757         .d_delete       = pipefs_delete_dentry,
758 };
759
760 static struct inode * get_pipe_inode(void)
761 {
762         struct inode *inode = new_inode(pipe_mnt->mnt_sb);
763         struct pipe_inode_info *pipe;
764
765         if (!inode)
766                 goto fail_inode;
767
768         pipe = alloc_pipe_info(inode);
769         if (!pipe)
770                 goto fail_iput;
771         inode->i_pipe = pipe;
772
773         pipe->readers = pipe->writers = 1;
774         inode->i_fop = &rdwr_pipe_fops;
775
776         /*
777          * Mark the inode dirty from the very beginning,
778          * that way it will never be moved to the dirty
779          * list because "mark_inode_dirty()" will think
780          * that it already _is_ on the dirty list.
781          */
782         inode->i_state = I_DIRTY;
783         inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
784         inode->i_uid = current->fsuid;
785         inode->i_gid = current->fsgid;
786         inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
787         inode->i_blksize = PAGE_SIZE;
788
789         return inode;
790
791 fail_iput:
792         iput(inode);
793
794 fail_inode:
795         return NULL;
796 }
797
798 int do_pipe(int *fd)
799 {
800         struct qstr this;
801         char name[32];
802         struct dentry *dentry;
803         struct inode * inode;
804         struct file *f1, *f2;
805         int error;
806         int i, j;
807
808         error = -ENFILE;
809         f1 = get_empty_filp();
810         if (!f1)
811                 goto no_files;
812
813         f2 = get_empty_filp();
814         if (!f2)
815                 goto close_f1;
816
817         inode = get_pipe_inode();
818         if (!inode)
819                 goto close_f12;
820
821         error = get_unused_fd();
822         if (error < 0)
823                 goto close_f12_inode;
824         i = error;
825
826         error = get_unused_fd();
827         if (error < 0)
828                 goto close_f12_inode_i;
829         j = error;
830
831         error = -ENOMEM;
832         sprintf(name, "[%lu]", inode->i_ino);
833         this.name = name;
834         this.len = strlen(name);
835         this.hash = inode->i_ino; /* will go */
836         dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this);
837         if (!dentry)
838                 goto close_f12_inode_i_j;
839
840         dentry->d_op = &pipefs_dentry_operations;
841         d_add(dentry, inode);
842         f1->f_vfsmnt = f2->f_vfsmnt = mntget(mntget(pipe_mnt));
843         f1->f_dentry = f2->f_dentry = dget(dentry);
844         f1->f_mapping = f2->f_mapping = inode->i_mapping;
845
846         /* read file */
847         f1->f_pos = f2->f_pos = 0;
848         f1->f_flags = O_RDONLY;
849         f1->f_op = &read_pipe_fops;
850         f1->f_mode = FMODE_READ;
851         f1->f_version = 0;
852
853         /* write file */
854         f2->f_flags = O_WRONLY;
855         f2->f_op = &write_pipe_fops;
856         f2->f_mode = FMODE_WRITE;
857         f2->f_version = 0;
858
859         fd_install(i, f1);
860         fd_install(j, f2);
861         fd[0] = i;
862         fd[1] = j;
863
864         return 0;
865
866 close_f12_inode_i_j:
867         put_unused_fd(j);
868 close_f12_inode_i:
869         put_unused_fd(i);
870 close_f12_inode:
871         free_pipe_info(inode);
872         iput(inode);
873 close_f12:
874         put_filp(f2);
875 close_f1:
876         put_filp(f1);
877 no_files:
878         return error;   
879 }
880
881 /*
882  * pipefs should _never_ be mounted by userland - too much of security hassle,
883  * no real gain from having the whole whorehouse mounted. So we don't need
884  * any operations on the root directory. However, we need a non-trivial
885  * d_name - pipe: will go nicely and kill the special-casing in procfs.
886  */
887
888 static struct super_block *
889 pipefs_get_sb(struct file_system_type *fs_type, int flags,
890               const char *dev_name, void *data)
891 {
892         return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC);
893 }
894
895 static struct file_system_type pipe_fs_type = {
896         .name           = "pipefs",
897         .get_sb         = pipefs_get_sb,
898         .kill_sb        = kill_anon_super,
899 };
900
901 static int __init init_pipe_fs(void)
902 {
903         int err = register_filesystem(&pipe_fs_type);
904
905         if (!err) {
906                 pipe_mnt = kern_mount(&pipe_fs_type);
907                 if (IS_ERR(pipe_mnt)) {
908                         err = PTR_ERR(pipe_mnt);
909                         unregister_filesystem(&pipe_fs_type);
910                 }
911         }
912         return err;
913 }
914
915 static void __exit exit_pipe_fs(void)
916 {
917         unregister_filesystem(&pipe_fs_type);
918         mntput(pipe_mnt);
919 }
920
921 fs_initcall(init_pipe_fs);
922 module_exit(exit_pipe_fs);