[PATCH] splice: fix page stealing LRU handling.
[safe/jmp/linux-2.6] / fs / pipe.c
1 /*
2  *  linux/fs/pipe.c
3  *
4  *  Copyright (C) 1991, 1992, 1999  Linus Torvalds
5  */
6
7 #include <linux/mm.h>
8 #include <linux/file.h>
9 #include <linux/poll.h>
10 #include <linux/slab.h>
11 #include <linux/module.h>
12 #include <linux/init.h>
13 #include <linux/fs.h>
14 #include <linux/mount.h>
15 #include <linux/pipe_fs_i.h>
16 #include <linux/uio.h>
17 #include <linux/highmem.h>
18 #include <linux/pagemap.h>
19
20 #include <asm/uaccess.h>
21 #include <asm/ioctls.h>
22
23 /*
24  * We use a start+len construction, which provides full use of the 
25  * allocated memory.
26  * -- Florian Coosmann (FGC)
27  * 
28  * Reads with count = 0 should always return 0.
29  * -- Julian Bradfield 1999-06-07.
30  *
31  * FIFOs and Pipes now generate SIGIO for both readers and writers.
32  * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16
33  *
34  * pipe_read & write cleanup
35  * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
36  */
37
38 /* Drop the inode semaphore and wait for a pipe event, atomically */
39 void pipe_wait(struct inode * inode)
40 {
41         DEFINE_WAIT(wait);
42
43         /*
44          * Pipes are system-local resources, so sleeping on them
45          * is considered a noninteractive wait:
46          */
47         prepare_to_wait(PIPE_WAIT(*inode), &wait, TASK_INTERRUPTIBLE|TASK_NONINTERACTIVE);
48         mutex_unlock(PIPE_MUTEX(*inode));
49         schedule();
50         finish_wait(PIPE_WAIT(*inode), &wait);
51         mutex_lock(PIPE_MUTEX(*inode));
52 }
53
54 static int
55 pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len)
56 {
57         unsigned long copy;
58
59         while (len > 0) {
60                 while (!iov->iov_len)
61                         iov++;
62                 copy = min_t(unsigned long, len, iov->iov_len);
63
64                 if (copy_from_user(to, iov->iov_base, copy))
65                         return -EFAULT;
66                 to += copy;
67                 len -= copy;
68                 iov->iov_base += copy;
69                 iov->iov_len -= copy;
70         }
71         return 0;
72 }
73
74 static int
75 pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len)
76 {
77         unsigned long copy;
78
79         while (len > 0) {
80                 while (!iov->iov_len)
81                         iov++;
82                 copy = min_t(unsigned long, len, iov->iov_len);
83
84                 if (copy_to_user(iov->iov_base, from, copy))
85                         return -EFAULT;
86                 from += copy;
87                 len -= copy;
88                 iov->iov_base += copy;
89                 iov->iov_len -= copy;
90         }
91         return 0;
92 }
93
94 static void anon_pipe_buf_release(struct pipe_inode_info *info, struct pipe_buffer *buf)
95 {
96         struct page *page = buf->page;
97
98         buf->flags &= ~PIPE_BUF_FLAG_STOLEN;
99
100         /*
101          * If nobody else uses this page, and we don't already have a
102          * temporary page, let's keep track of it as a one-deep
103          * allocation cache
104          */
105         if (page_count(page) == 1 && !info->tmp_page) {
106                 info->tmp_page = page;
107                 return;
108         }
109
110         /*
111          * Otherwise just release our reference to it
112          */
113         page_cache_release(page);
114 }
115
116 static void *anon_pipe_buf_map(struct file *file, struct pipe_inode_info *info, struct pipe_buffer *buf)
117 {
118         return kmap(buf->page);
119 }
120
121 static void anon_pipe_buf_unmap(struct pipe_inode_info *info, struct pipe_buffer *buf)
122 {
123         kunmap(buf->page);
124 }
125
126 static int anon_pipe_buf_steal(struct pipe_inode_info *info,
127                                struct pipe_buffer *buf)
128 {
129         buf->flags |= PIPE_BUF_FLAG_STOLEN;
130         return 0;
131 }
132
133 static struct pipe_buf_operations anon_pipe_buf_ops = {
134         .can_merge = 1,
135         .map = anon_pipe_buf_map,
136         .unmap = anon_pipe_buf_unmap,
137         .release = anon_pipe_buf_release,
138         .steal = anon_pipe_buf_steal,
139 };
140
141 static ssize_t
142 pipe_readv(struct file *filp, const struct iovec *_iov,
143            unsigned long nr_segs, loff_t *ppos)
144 {
145         struct inode *inode = filp->f_dentry->d_inode;
146         struct pipe_inode_info *info;
147         int do_wakeup;
148         ssize_t ret;
149         struct iovec *iov = (struct iovec *)_iov;
150         size_t total_len;
151
152         total_len = iov_length(iov, nr_segs);
153         /* Null read succeeds. */
154         if (unlikely(total_len == 0))
155                 return 0;
156
157         do_wakeup = 0;
158         ret = 0;
159         mutex_lock(PIPE_MUTEX(*inode));
160         info = inode->i_pipe;
161         for (;;) {
162                 int bufs = info->nrbufs;
163                 if (bufs) {
164                         int curbuf = info->curbuf;
165                         struct pipe_buffer *buf = info->bufs + curbuf;
166                         struct pipe_buf_operations *ops = buf->ops;
167                         void *addr;
168                         size_t chars = buf->len;
169                         int error;
170
171                         if (chars > total_len)
172                                 chars = total_len;
173
174                         addr = ops->map(filp, info, buf);
175                         if (IS_ERR(addr)) {
176                                 if (!ret)
177                                         ret = PTR_ERR(addr);
178                                 break;
179                         }
180                         error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars);
181                         ops->unmap(info, buf);
182                         if (unlikely(error)) {
183                                 if (!ret) ret = -EFAULT;
184                                 break;
185                         }
186                         ret += chars;
187                         buf->offset += chars;
188                         buf->len -= chars;
189                         if (!buf->len) {
190                                 buf->ops = NULL;
191                                 ops->release(info, buf);
192                                 curbuf = (curbuf + 1) & (PIPE_BUFFERS-1);
193                                 info->curbuf = curbuf;
194                                 info->nrbufs = --bufs;
195                                 do_wakeup = 1;
196                         }
197                         total_len -= chars;
198                         if (!total_len)
199                                 break;  /* common path: read succeeded */
200                 }
201                 if (bufs)       /* More to do? */
202                         continue;
203                 if (!PIPE_WRITERS(*inode))
204                         break;
205                 if (!PIPE_WAITING_WRITERS(*inode)) {
206                         /* syscall merging: Usually we must not sleep
207                          * if O_NONBLOCK is set, or if we got some data.
208                          * But if a writer sleeps in kernel space, then
209                          * we can wait for that data without violating POSIX.
210                          */
211                         if (ret)
212                                 break;
213                         if (filp->f_flags & O_NONBLOCK) {
214                                 ret = -EAGAIN;
215                                 break;
216                         }
217                 }
218                 if (signal_pending(current)) {
219                         if (!ret) ret = -ERESTARTSYS;
220                         break;
221                 }
222                 if (do_wakeup) {
223                         wake_up_interruptible_sync(PIPE_WAIT(*inode));
224                         kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
225                 }
226                 pipe_wait(inode);
227         }
228         mutex_unlock(PIPE_MUTEX(*inode));
229         /* Signal writers asynchronously that there is more room.  */
230         if (do_wakeup) {
231                 wake_up_interruptible(PIPE_WAIT(*inode));
232                 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
233         }
234         if (ret > 0)
235                 file_accessed(filp);
236         return ret;
237 }
238
239 static ssize_t
240 pipe_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
241 {
242         struct iovec iov = { .iov_base = buf, .iov_len = count };
243         return pipe_readv(filp, &iov, 1, ppos);
244 }
245
246 static ssize_t
247 pipe_writev(struct file *filp, const struct iovec *_iov,
248             unsigned long nr_segs, loff_t *ppos)
249 {
250         struct inode *inode = filp->f_dentry->d_inode;
251         struct pipe_inode_info *info;
252         ssize_t ret;
253         int do_wakeup;
254         struct iovec *iov = (struct iovec *)_iov;
255         size_t total_len;
256         ssize_t chars;
257
258         total_len = iov_length(iov, nr_segs);
259         /* Null write succeeds. */
260         if (unlikely(total_len == 0))
261                 return 0;
262
263         do_wakeup = 0;
264         ret = 0;
265         mutex_lock(PIPE_MUTEX(*inode));
266         info = inode->i_pipe;
267
268         if (!PIPE_READERS(*inode)) {
269                 send_sig(SIGPIPE, current, 0);
270                 ret = -EPIPE;
271                 goto out;
272         }
273
274         /* We try to merge small writes */
275         chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */
276         if (info->nrbufs && chars != 0) {
277                 int lastbuf = (info->curbuf + info->nrbufs - 1) & (PIPE_BUFFERS-1);
278                 struct pipe_buffer *buf = info->bufs + lastbuf;
279                 struct pipe_buf_operations *ops = buf->ops;
280                 int offset = buf->offset + buf->len;
281                 if (ops->can_merge && offset + chars <= PAGE_SIZE) {
282                         void *addr;
283                         int error;
284
285                         addr = ops->map(filp, info, buf);
286                         if (IS_ERR(addr)) {
287                                 error = PTR_ERR(addr);
288                                 goto out;
289                         }
290                         error = pipe_iov_copy_from_user(offset + addr, iov,
291                                                         chars);
292                         ops->unmap(info, buf);
293                         ret = error;
294                         do_wakeup = 1;
295                         if (error)
296                                 goto out;
297                         buf->len += chars;
298                         total_len -= chars;
299                         ret = chars;
300                         if (!total_len)
301                                 goto out;
302                 }
303         }
304
305         for (;;) {
306                 int bufs;
307                 if (!PIPE_READERS(*inode)) {
308                         send_sig(SIGPIPE, current, 0);
309                         if (!ret) ret = -EPIPE;
310                         break;
311                 }
312                 bufs = info->nrbufs;
313                 if (bufs < PIPE_BUFFERS) {
314                         int newbuf = (info->curbuf + bufs) & (PIPE_BUFFERS-1);
315                         struct pipe_buffer *buf = info->bufs + newbuf;
316                         struct page *page = info->tmp_page;
317                         int error;
318
319                         if (!page) {
320                                 page = alloc_page(GFP_HIGHUSER);
321                                 if (unlikely(!page)) {
322                                         ret = ret ? : -ENOMEM;
323                                         break;
324                                 }
325                                 info->tmp_page = page;
326                         }
327                         /* Always wakeup, even if the copy fails. Otherwise
328                          * we lock up (O_NONBLOCK-)readers that sleep due to
329                          * syscall merging.
330                          * FIXME! Is this really true?
331                          */
332                         do_wakeup = 1;
333                         chars = PAGE_SIZE;
334                         if (chars > total_len)
335                                 chars = total_len;
336
337                         error = pipe_iov_copy_from_user(kmap(page), iov, chars);
338                         kunmap(page);
339                         if (unlikely(error)) {
340                                 if (!ret) ret = -EFAULT;
341                                 break;
342                         }
343                         ret += chars;
344
345                         /* Insert it into the buffer array */
346                         buf->page = page;
347                         buf->ops = &anon_pipe_buf_ops;
348                         buf->offset = 0;
349                         buf->len = chars;
350                         info->nrbufs = ++bufs;
351                         info->tmp_page = NULL;
352
353                         total_len -= chars;
354                         if (!total_len)
355                                 break;
356                 }
357                 if (bufs < PIPE_BUFFERS)
358                         continue;
359                 if (filp->f_flags & O_NONBLOCK) {
360                         if (!ret) ret = -EAGAIN;
361                         break;
362                 }
363                 if (signal_pending(current)) {
364                         if (!ret) ret = -ERESTARTSYS;
365                         break;
366                 }
367                 if (do_wakeup) {
368                         wake_up_interruptible_sync(PIPE_WAIT(*inode));
369                         kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
370                         do_wakeup = 0;
371                 }
372                 PIPE_WAITING_WRITERS(*inode)++;
373                 pipe_wait(inode);
374                 PIPE_WAITING_WRITERS(*inode)--;
375         }
376 out:
377         mutex_unlock(PIPE_MUTEX(*inode));
378         if (do_wakeup) {
379                 wake_up_interruptible(PIPE_WAIT(*inode));
380                 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
381         }
382         if (ret > 0)
383                 file_update_time(filp);
384         return ret;
385 }
386
387 static ssize_t
388 pipe_write(struct file *filp, const char __user *buf,
389            size_t count, loff_t *ppos)
390 {
391         struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
392         return pipe_writev(filp, &iov, 1, ppos);
393 }
394
395 static ssize_t
396 bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
397 {
398         return -EBADF;
399 }
400
401 static ssize_t
402 bad_pipe_w(struct file *filp, const char __user *buf, size_t count, loff_t *ppos)
403 {
404         return -EBADF;
405 }
406
407 static int
408 pipe_ioctl(struct inode *pino, struct file *filp,
409            unsigned int cmd, unsigned long arg)
410 {
411         struct inode *inode = filp->f_dentry->d_inode;
412         struct pipe_inode_info *info;
413         int count, buf, nrbufs;
414
415         switch (cmd) {
416                 case FIONREAD:
417                         mutex_lock(PIPE_MUTEX(*inode));
418                         info =  inode->i_pipe;
419                         count = 0;
420                         buf = info->curbuf;
421                         nrbufs = info->nrbufs;
422                         while (--nrbufs >= 0) {
423                                 count += info->bufs[buf].len;
424                                 buf = (buf+1) & (PIPE_BUFFERS-1);
425                         }
426                         mutex_unlock(PIPE_MUTEX(*inode));
427                         return put_user(count, (int __user *)arg);
428                 default:
429                         return -EINVAL;
430         }
431 }
432
433 /* No kernel lock held - fine */
434 static unsigned int
435 pipe_poll(struct file *filp, poll_table *wait)
436 {
437         unsigned int mask;
438         struct inode *inode = filp->f_dentry->d_inode;
439         struct pipe_inode_info *info = inode->i_pipe;
440         int nrbufs;
441
442         poll_wait(filp, PIPE_WAIT(*inode), wait);
443
444         /* Reading only -- no need for acquiring the semaphore.  */
445         nrbufs = info->nrbufs;
446         mask = 0;
447         if (filp->f_mode & FMODE_READ) {
448                 mask = (nrbufs > 0) ? POLLIN | POLLRDNORM : 0;
449                 if (!PIPE_WRITERS(*inode) && filp->f_version != PIPE_WCOUNTER(*inode))
450                         mask |= POLLHUP;
451         }
452
453         if (filp->f_mode & FMODE_WRITE) {
454                 mask |= (nrbufs < PIPE_BUFFERS) ? POLLOUT | POLLWRNORM : 0;
455                 /*
456                  * Most Unices do not set POLLERR for FIFOs but on Linux they
457                  * behave exactly like pipes for poll().
458                  */
459                 if (!PIPE_READERS(*inode))
460                         mask |= POLLERR;
461         }
462
463         return mask;
464 }
465
466 static int
467 pipe_release(struct inode *inode, int decr, int decw)
468 {
469         mutex_lock(PIPE_MUTEX(*inode));
470         PIPE_READERS(*inode) -= decr;
471         PIPE_WRITERS(*inode) -= decw;
472         if (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) {
473                 free_pipe_info(inode);
474         } else {
475                 wake_up_interruptible(PIPE_WAIT(*inode));
476                 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
477                 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
478         }
479         mutex_unlock(PIPE_MUTEX(*inode));
480
481         return 0;
482 }
483
484 static int
485 pipe_read_fasync(int fd, struct file *filp, int on)
486 {
487         struct inode *inode = filp->f_dentry->d_inode;
488         int retval;
489
490         mutex_lock(PIPE_MUTEX(*inode));
491         retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode));
492         mutex_unlock(PIPE_MUTEX(*inode));
493
494         if (retval < 0)
495                 return retval;
496
497         return 0;
498 }
499
500
501 static int
502 pipe_write_fasync(int fd, struct file *filp, int on)
503 {
504         struct inode *inode = filp->f_dentry->d_inode;
505         int retval;
506
507         mutex_lock(PIPE_MUTEX(*inode));
508         retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode));
509         mutex_unlock(PIPE_MUTEX(*inode));
510
511         if (retval < 0)
512                 return retval;
513
514         return 0;
515 }
516
517
518 static int
519 pipe_rdwr_fasync(int fd, struct file *filp, int on)
520 {
521         struct inode *inode = filp->f_dentry->d_inode;
522         int retval;
523
524         mutex_lock(PIPE_MUTEX(*inode));
525
526         retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode));
527
528         if (retval >= 0)
529                 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode));
530
531         mutex_unlock(PIPE_MUTEX(*inode));
532
533         if (retval < 0)
534                 return retval;
535
536         return 0;
537 }
538
539
540 static int
541 pipe_read_release(struct inode *inode, struct file *filp)
542 {
543         pipe_read_fasync(-1, filp, 0);
544         return pipe_release(inode, 1, 0);
545 }
546
547 static int
548 pipe_write_release(struct inode *inode, struct file *filp)
549 {
550         pipe_write_fasync(-1, filp, 0);
551         return pipe_release(inode, 0, 1);
552 }
553
554 static int
555 pipe_rdwr_release(struct inode *inode, struct file *filp)
556 {
557         int decr, decw;
558
559         pipe_rdwr_fasync(-1, filp, 0);
560         decr = (filp->f_mode & FMODE_READ) != 0;
561         decw = (filp->f_mode & FMODE_WRITE) != 0;
562         return pipe_release(inode, decr, decw);
563 }
564
565 static int
566 pipe_read_open(struct inode *inode, struct file *filp)
567 {
568         /* We could have perhaps used atomic_t, but this and friends
569            below are the only places.  So it doesn't seem worthwhile.  */
570         mutex_lock(PIPE_MUTEX(*inode));
571         PIPE_READERS(*inode)++;
572         mutex_unlock(PIPE_MUTEX(*inode));
573
574         return 0;
575 }
576
577 static int
578 pipe_write_open(struct inode *inode, struct file *filp)
579 {
580         mutex_lock(PIPE_MUTEX(*inode));
581         PIPE_WRITERS(*inode)++;
582         mutex_unlock(PIPE_MUTEX(*inode));
583
584         return 0;
585 }
586
587 static int
588 pipe_rdwr_open(struct inode *inode, struct file *filp)
589 {
590         mutex_lock(PIPE_MUTEX(*inode));
591         if (filp->f_mode & FMODE_READ)
592                 PIPE_READERS(*inode)++;
593         if (filp->f_mode & FMODE_WRITE)
594                 PIPE_WRITERS(*inode)++;
595         mutex_unlock(PIPE_MUTEX(*inode));
596
597         return 0;
598 }
599
600 /*
601  * The file_operations structs are not static because they
602  * are also used in linux/fs/fifo.c to do operations on FIFOs.
603  */
604 const struct file_operations read_fifo_fops = {
605         .llseek         = no_llseek,
606         .read           = pipe_read,
607         .readv          = pipe_readv,
608         .write          = bad_pipe_w,
609         .poll           = pipe_poll,
610         .ioctl          = pipe_ioctl,
611         .open           = pipe_read_open,
612         .release        = pipe_read_release,
613         .fasync         = pipe_read_fasync,
614 };
615
616 const struct file_operations write_fifo_fops = {
617         .llseek         = no_llseek,
618         .read           = bad_pipe_r,
619         .write          = pipe_write,
620         .writev         = pipe_writev,
621         .poll           = pipe_poll,
622         .ioctl          = pipe_ioctl,
623         .open           = pipe_write_open,
624         .release        = pipe_write_release,
625         .fasync         = pipe_write_fasync,
626 };
627
628 const struct file_operations rdwr_fifo_fops = {
629         .llseek         = no_llseek,
630         .read           = pipe_read,
631         .readv          = pipe_readv,
632         .write          = pipe_write,
633         .writev         = pipe_writev,
634         .poll           = pipe_poll,
635         .ioctl          = pipe_ioctl,
636         .open           = pipe_rdwr_open,
637         .release        = pipe_rdwr_release,
638         .fasync         = pipe_rdwr_fasync,
639 };
640
641 static struct file_operations read_pipe_fops = {
642         .llseek         = no_llseek,
643         .read           = pipe_read,
644         .readv          = pipe_readv,
645         .write          = bad_pipe_w,
646         .poll           = pipe_poll,
647         .ioctl          = pipe_ioctl,
648         .open           = pipe_read_open,
649         .release        = pipe_read_release,
650         .fasync         = pipe_read_fasync,
651 };
652
653 static struct file_operations write_pipe_fops = {
654         .llseek         = no_llseek,
655         .read           = bad_pipe_r,
656         .write          = pipe_write,
657         .writev         = pipe_writev,
658         .poll           = pipe_poll,
659         .ioctl          = pipe_ioctl,
660         .open           = pipe_write_open,
661         .release        = pipe_write_release,
662         .fasync         = pipe_write_fasync,
663 };
664
665 static struct file_operations rdwr_pipe_fops = {
666         .llseek         = no_llseek,
667         .read           = pipe_read,
668         .readv          = pipe_readv,
669         .write          = pipe_write,
670         .writev         = pipe_writev,
671         .poll           = pipe_poll,
672         .ioctl          = pipe_ioctl,
673         .open           = pipe_rdwr_open,
674         .release        = pipe_rdwr_release,
675         .fasync         = pipe_rdwr_fasync,
676 };
677
678 void free_pipe_info(struct inode *inode)
679 {
680         int i;
681         struct pipe_inode_info *info = inode->i_pipe;
682
683         inode->i_pipe = NULL;
684         for (i = 0; i < PIPE_BUFFERS; i++) {
685                 struct pipe_buffer *buf = info->bufs + i;
686                 if (buf->ops)
687                         buf->ops->release(info, buf);
688         }
689         if (info->tmp_page)
690                 __free_page(info->tmp_page);
691         kfree(info);
692 }
693
694 struct inode* pipe_new(struct inode* inode)
695 {
696         struct pipe_inode_info *info;
697
698         info = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
699         if (!info)
700                 goto fail_page;
701         inode->i_pipe = info;
702
703         init_waitqueue_head(PIPE_WAIT(*inode));
704         PIPE_RCOUNTER(*inode) = PIPE_WCOUNTER(*inode) = 1;
705
706         return inode;
707 fail_page:
708         return NULL;
709 }
710
711 static struct vfsmount *pipe_mnt __read_mostly;
712 static int pipefs_delete_dentry(struct dentry *dentry)
713 {
714         return 1;
715 }
716 static struct dentry_operations pipefs_dentry_operations = {
717         .d_delete       = pipefs_delete_dentry,
718 };
719
720 static struct inode * get_pipe_inode(void)
721 {
722         struct inode *inode = new_inode(pipe_mnt->mnt_sb);
723
724         if (!inode)
725                 goto fail_inode;
726
727         if(!pipe_new(inode))
728                 goto fail_iput;
729         PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1;
730         inode->i_fop = &rdwr_pipe_fops;
731
732         /*
733          * Mark the inode dirty from the very beginning,
734          * that way it will never be moved to the dirty
735          * list because "mark_inode_dirty()" will think
736          * that it already _is_ on the dirty list.
737          */
738         inode->i_state = I_DIRTY;
739         inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
740         inode->i_uid = current->fsuid;
741         inode->i_gid = current->fsgid;
742         inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
743         inode->i_blksize = PAGE_SIZE;
744         return inode;
745
746 fail_iput:
747         iput(inode);
748 fail_inode:
749         return NULL;
750 }
751
752 int do_pipe(int *fd)
753 {
754         struct qstr this;
755         char name[32];
756         struct dentry *dentry;
757         struct inode * inode;
758         struct file *f1, *f2;
759         int error;
760         int i,j;
761
762         error = -ENFILE;
763         f1 = get_empty_filp();
764         if (!f1)
765                 goto no_files;
766
767         f2 = get_empty_filp();
768         if (!f2)
769                 goto close_f1;
770
771         inode = get_pipe_inode();
772         if (!inode)
773                 goto close_f12;
774
775         error = get_unused_fd();
776         if (error < 0)
777                 goto close_f12_inode;
778         i = error;
779
780         error = get_unused_fd();
781         if (error < 0)
782                 goto close_f12_inode_i;
783         j = error;
784
785         error = -ENOMEM;
786         sprintf(name, "[%lu]", inode->i_ino);
787         this.name = name;
788         this.len = strlen(name);
789         this.hash = inode->i_ino; /* will go */
790         dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this);
791         if (!dentry)
792                 goto close_f12_inode_i_j;
793         dentry->d_op = &pipefs_dentry_operations;
794         d_add(dentry, inode);
795         f1->f_vfsmnt = f2->f_vfsmnt = mntget(mntget(pipe_mnt));
796         f1->f_dentry = f2->f_dentry = dget(dentry);
797         f1->f_mapping = f2->f_mapping = inode->i_mapping;
798
799         /* read file */
800         f1->f_pos = f2->f_pos = 0;
801         f1->f_flags = O_RDONLY;
802         f1->f_op = &read_pipe_fops;
803         f1->f_mode = FMODE_READ;
804         f1->f_version = 0;
805
806         /* write file */
807         f2->f_flags = O_WRONLY;
808         f2->f_op = &write_pipe_fops;
809         f2->f_mode = FMODE_WRITE;
810         f2->f_version = 0;
811
812         fd_install(i, f1);
813         fd_install(j, f2);
814         fd[0] = i;
815         fd[1] = j;
816         return 0;
817
818 close_f12_inode_i_j:
819         put_unused_fd(j);
820 close_f12_inode_i:
821         put_unused_fd(i);
822 close_f12_inode:
823         free_pipe_info(inode);
824         iput(inode);
825 close_f12:
826         put_filp(f2);
827 close_f1:
828         put_filp(f1);
829 no_files:
830         return error;   
831 }
832
833 /*
834  * pipefs should _never_ be mounted by userland - too much of security hassle,
835  * no real gain from having the whole whorehouse mounted. So we don't need
836  * any operations on the root directory. However, we need a non-trivial
837  * d_name - pipe: will go nicely and kill the special-casing in procfs.
838  */
839
840 static struct super_block *pipefs_get_sb(struct file_system_type *fs_type,
841         int flags, const char *dev_name, void *data)
842 {
843         return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC);
844 }
845
846 static struct file_system_type pipe_fs_type = {
847         .name           = "pipefs",
848         .get_sb         = pipefs_get_sb,
849         .kill_sb        = kill_anon_super,
850 };
851
852 static int __init init_pipe_fs(void)
853 {
854         int err = register_filesystem(&pipe_fs_type);
855         if (!err) {
856                 pipe_mnt = kern_mount(&pipe_fs_type);
857                 if (IS_ERR(pipe_mnt)) {
858                         err = PTR_ERR(pipe_mnt);
859                         unregister_filesystem(&pipe_fs_type);
860                 }
861         }
862         return err;
863 }
864
865 static void __exit exit_pipe_fs(void)
866 {
867         unregister_filesystem(&pipe_fs_type);
868         mntput(pipe_mnt);
869 }
870
871 fs_initcall(init_pipe_fs);
872 module_exit(exit_pipe_fs);