NFSD: Remove NFSD_TCP kernel build option
[safe/jmp/linux-2.6] / fs / ext3 / inode.c
index 0775354..c683609 100644 (file)
@@ -392,7 +392,7 @@ no_block:
  *     @inode: owner
  *     @ind: descriptor of indirect block.
  *
- *     This function returns the prefered place for block allocation.
+ *     This function returns the preferred place for block allocation.
  *     It is used when heuristic for sequential allocation fails.
  *     Rules are:
  *       + if there is a block to the left of our position - allocate near it.
@@ -436,19 +436,17 @@ static ext3_fsblk_t ext3_find_near(struct inode *inode, Indirect *ind)
 }
 
 /**
- *     ext3_find_goal - find a prefered place for allocation.
+ *     ext3_find_goal - find a preferred place for allocation.
  *     @inode: owner
  *     @block:  block we want
- *     @chain:  chain of indirect blocks
  *     @partial: pointer to the last triple within a chain
- *     @goal:  place to store the result.
  *
- *     Normally this function find the prefered place for block allocation,
- *     stores it in *@goal and returns zero.
+ *     Normally this function find the preferred place for block allocation,
+ *     returns it.
  */
 
 static ext3_fsblk_t ext3_find_goal(struct inode *inode, long block,
-               Indirect chain[4], Indirect *partial)
+                                  Indirect *partial)
 {
        struct ext3_block_alloc_info *block_i;
 
@@ -884,7 +882,7 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
        if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info))
                ext3_init_block_alloc_info(inode);
 
-       goal = ext3_find_goal(inode, iblock, chain, partial);
+       goal = ext3_find_goal(inode, iblock, partial);
 
        /* the number of blocks need to allocate for [d,t]indirect blocks */
        indirect_blks = (chain + depth) - partial - 1;
@@ -941,55 +939,45 @@ out:
        return err;
 }
 
-#define DIO_CREDITS (EXT3_RESERVE_TRANS_BLOCKS + 32)
+/* Maximum number of blocks we map for direct IO at once. */
+#define DIO_MAX_BLOCKS 4096
+/*
+ * Number of credits we need for writing DIO_MAX_BLOCKS:
+ * We need sb + group descriptor + bitmap + inode -> 4
+ * For B blocks with A block pointers per block we need:
+ * 1 (triple ind.) + (B/A/A + 2) (doubly ind.) + (B/A + 2) (indirect).
+ * If we plug in 4096 for B and 256 for A (for 1KB block size), we get 25.
+ */
+#define DIO_CREDITS 25
 
 static int ext3_get_block(struct inode *inode, sector_t iblock,
                        struct buffer_head *bh_result, int create)
 {
        handle_t *handle = ext3_journal_current_handle();
-       int ret = 0;
+       int ret = 0, started = 0;
        unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
 
-       if (!create)
-               goto get_block;         /* A read */
-
-       if (max_blocks == 1)
-               goto get_block;         /* A single block get */
-
-       if (handle->h_transaction->t_state == T_LOCKED) {
-               /*
-                * Huge direct-io writes can hold off commits for long
-                * periods of time.  Let this commit run.
-                */
-               ext3_journal_stop(handle);
-               handle = ext3_journal_start(inode, DIO_CREDITS);
-               if (IS_ERR(handle))
+       if (create && !handle) {        /* Direct IO write... */
+               if (max_blocks > DIO_MAX_BLOCKS)
+                       max_blocks = DIO_MAX_BLOCKS;
+               handle = ext3_journal_start(inode, DIO_CREDITS +
+                               2 * EXT3_QUOTA_TRANS_BLOCKS(inode->i_sb));
+               if (IS_ERR(handle)) {
                        ret = PTR_ERR(handle);
-               goto get_block;
-       }
-
-       if (handle->h_buffer_credits <= EXT3_RESERVE_TRANS_BLOCKS) {
-               /*
-                * Getting low on buffer credits...
-                */
-               ret = ext3_journal_extend(handle, DIO_CREDITS);
-               if (ret > 0) {
-                       /*
-                        * Couldn't extend the transaction.  Start a new one.
-                        */
-                       ret = ext3_journal_restart(handle, DIO_CREDITS);
+                       goto out;
                }
+               started = 1;
        }
 
-get_block:
-       if (ret == 0) {
-               ret = ext3_get_blocks_handle(handle, inode, iblock,
+       ret = ext3_get_blocks_handle(handle, inode, iblock,
                                        max_blocks, bh_result, create, 0);
-               if (ret > 0) {
-                       bh_result->b_size = (ret << inode->i_blkbits);
-                       ret = 0;
-               }
+       if (ret > 0) {
+               bh_result->b_size = (ret << inode->i_blkbits);
+               ret = 0;
        }
+       if (started)
+               ext3_journal_stop(handle);
+out:
        return ret;
 }
 
@@ -1680,7 +1668,8 @@ static int ext3_releasepage(struct page *page, gfp_t wait)
  * if the machine crashes during the write.
  *
  * If the O_DIRECT write is intantiating holes inside i_size and the machine
- * crashes then stale disk data _may_ be exposed inside the file.
+ * crashes then stale disk data _may_ be exposed inside the file. But current
+ * VFS code falls back into buffered path in that case so we are safe.
  */
 static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
                        const struct iovec *iov, loff_t offset,
@@ -1689,7 +1678,7 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
        struct file *file = iocb->ki_filp;
        struct inode *inode = file->f_mapping->host;
        struct ext3_inode_info *ei = EXT3_I(inode);
-       handle_t *handle = NULL;
+       handle_t *handle;
        ssize_t ret;
        int orphan = 0;
        size_t count = iov_length(iov, nr_segs);
@@ -1697,17 +1686,21 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
        if (rw == WRITE) {
                loff_t final_size = offset + count;
 
-               handle = ext3_journal_start(inode, DIO_CREDITS);
-               if (IS_ERR(handle)) {
-                       ret = PTR_ERR(handle);
-                       goto out;
-               }
                if (final_size > inode->i_size) {
+                       /* Credits for sb + inode write */
+                       handle = ext3_journal_start(inode, 2);
+                       if (IS_ERR(handle)) {
+                               ret = PTR_ERR(handle);
+                               goto out;
+                       }
                        ret = ext3_orphan_add(handle, inode);
-                       if (ret)
-                               goto out_stop;
+                       if (ret) {
+                               ext3_journal_stop(handle);
+                               goto out;
+                       }
                        orphan = 1;
                        ei->i_disksize = inode->i_size;
+                       ext3_journal_stop(handle);
                }
        }
 
@@ -1715,18 +1708,21 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
                                 offset, nr_segs,
                                 ext3_get_block, NULL);
 
-       /*
-        * Reacquire the handle: ext3_get_block() can restart the transaction
-        */
-       handle = ext3_journal_current_handle();
-
-out_stop:
-       if (handle) {
+       if (orphan) {
                int err;
 
-               if (orphan && inode->i_nlink)
+               /* Credits for sb + inode write */
+               handle = ext3_journal_start(inode, 2);
+               if (IS_ERR(handle)) {
+                       /* This is really bad luck. We've written the data
+                        * but cannot extend i_size. Bail out and pretend
+                        * the write failed... */
+                       ret = PTR_ERR(handle);
+                       goto out;
+               }
+               if (inode->i_nlink)
                        ext3_orphan_del(handle, inode);
-               if (orphan && ret > 0) {
+               if (ret > 0) {
                        loff_t end = offset + ret;
                        if (end > inode->i_size) {
                                ei->i_disksize = end;
@@ -2658,21 +2654,31 @@ void ext3_get_inode_flags(struct ext3_inode_info *ei)
                ei->i_flags |= EXT3_DIRSYNC_FL;
 }
 
-void ext3_read_inode(struct inode * inode)
+struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
 {
        struct ext3_iloc iloc;
        struct ext3_inode *raw_inode;
-       struct ext3_inode_info *ei = EXT3_I(inode);
+       struct ext3_inode_info *ei;
        struct buffer_head *bh;
+       struct inode *inode;
+       long ret;
        int block;
 
+       inode = iget_locked(sb, ino);
+       if (!inode)
+               return ERR_PTR(-ENOMEM);
+       if (!(inode->i_state & I_NEW))
+               return inode;
+
+       ei = EXT3_I(inode);
 #ifdef CONFIG_EXT3_FS_POSIX_ACL
        ei->i_acl = EXT3_ACL_NOT_CACHED;
        ei->i_default_acl = EXT3_ACL_NOT_CACHED;
 #endif
        ei->i_block_alloc_info = NULL;
 
-       if (__ext3_get_inode_loc(inode, &iloc, 0))
+       ret = __ext3_get_inode_loc(inode, &iloc, 0);
+       if (ret < 0)
                goto bad_inode;
        bh = iloc.bh;
        raw_inode = ext3_raw_inode(&iloc);
@@ -2703,6 +2709,7 @@ void ext3_read_inode(struct inode * inode)
                    !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ORPHAN_FS)) {
                        /* this inode is deleted */
                        brelse (bh);
+                       ret = -ESTALE;
                        goto bad_inode;
                }
                /* The only unlinked inodes we let through here have
@@ -2746,6 +2753,7 @@ void ext3_read_inode(struct inode * inode)
                if (EXT3_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
                    EXT3_INODE_SIZE(inode->i_sb)) {
                        brelse (bh);
+                       ret = -EIO;
                        goto bad_inode;
                }
                if (ei->i_extra_isize == 0) {
@@ -2787,11 +2795,12 @@ void ext3_read_inode(struct inode * inode)
        }
        brelse (iloc.bh);
        ext3_set_inode_flags(inode);
-       return;
+       unlock_new_inode(inode);
+       return inode;
 
 bad_inode:
-       make_bad_inode(inode);
-       return;
+       iget_failed(inode);
+       return ERR_PTR(ret);
 }
 
 /*