Btrfs: Use CONFIG_BTRFS_POSIX_ACL to enable ACL code
[safe/jmp/linux-2.6] / fs / ext4 / namei.c
index 92db9e9..de04013 100644 (file)
@@ -37,7 +37,6 @@
 #include "ext4.h"
 #include "ext4_jbd2.h"
 
-#include "namei.h"
 #include "xattr.h"
 #include "acl.h"
 
@@ -74,10 +73,6 @@ static struct buffer_head *ext4_append(handle_t *handle,
 #define assert(test) J_ASSERT(test)
 #endif
 
-#ifndef swap
-#define swap(x, y) do { typeof(x) z = x; x = y; y = z; } while (0)
-#endif
-
 #ifdef DX_DEBUG
 #define dxtrace(command) command
 #else
@@ -165,12 +160,12 @@ static struct dx_frame *dx_probe(const struct qstr *d_name,
                                 struct dx_frame *frame,
                                 int *err);
 static void dx_release(struct dx_frame *frames);
-static int dx_make_map(struct ext4_dir_entry_2 *de, int size,
+static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
                       struct dx_hash_info *hinfo, struct dx_map_entry map[]);
 static void dx_sort_map(struct dx_map_entry *map, unsigned count);
 static struct ext4_dir_entry_2 *dx_move_dirents(char *from, char *to,
-               struct dx_map_entry *offsets, int count);
-static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size);
+               struct dx_map_entry *offsets, int count, unsigned blocksize);
+static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize);
 static void dx_insert_block(struct dx_frame *frame,
                                        u32 hash, ext4_lblk_t block);
 static int ext4_htree_next_block(struct inode *dir, __u32 hash,
@@ -184,14 +179,38 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
 static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
                             struct inode *inode);
 
+unsigned int ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize)
+{
+       unsigned len = le16_to_cpu(dlen);
+
+       if (len == EXT4_MAX_REC_LEN || len == 0)
+               return blocksize;
+       return (len & 65532) | ((len & 3) << 16);
+}
+  
+__le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize)
+{
+       if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3))
+               BUG();
+       if (len < 65536)
+               return cpu_to_le16(len);
+       if (len == blocksize) {
+               if (blocksize == 65536)
+                       return cpu_to_le16(EXT4_MAX_REC_LEN);
+               else 
+                       return cpu_to_le16(0);
+       }
+       return cpu_to_le16((len & 65532) | ((len >> 16) & 3));
+}
+
 /*
  * p is at least 6 bytes before the end of page
  */
 static inline struct ext4_dir_entry_2 *
-ext4_next_entry(struct ext4_dir_entry_2 *p)
+ext4_next_entry(struct ext4_dir_entry_2 *p, unsigned long blocksize)
 {
        return (struct ext4_dir_entry_2 *)((char *)p +
-               ext4_rec_len_from_disk(p->rec_len));
+               ext4_rec_len_from_disk(p->rec_len, blocksize));
 }
 
 /*
@@ -298,7 +317,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext4_dir_ent
                        space += EXT4_DIR_REC_LEN(de->name_len);
                        names++;
                }
-               de = ext4_next_entry(de);
+               de = ext4_next_entry(de, size);
        }
        printk("(%i)\n", names);
        return (struct stats) { names, space, 1 };
@@ -372,6 +391,8 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
                goto fail;
        }
        hinfo->hash_version = root->info.hash_version;
+       if (hinfo->hash_version <= DX_HASH_TEA)
+               hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
        hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
        if (d_name)
                ext4fs_dirhash(d_name->name, d_name->len, hinfo);
@@ -587,7 +608,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
        top = (struct ext4_dir_entry_2 *) ((char *) de +
                                           dir->i_sb->s_blocksize -
                                           EXT4_DIR_REC_LEN(0));
-       for (; de < top; de = ext4_next_entry(de)) {
+       for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) {
                if (!ext4_check_dir_entry("htree_dirblock_to_tree", dir, de, bh,
                                        (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb))
                                                +((char *)de - bh->b_data))) {
@@ -641,6 +662,9 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
        dir = dir_file->f_path.dentry->d_inode;
        if (!(EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) {
                hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
+               if (hinfo.hash_version <= DX_HASH_TEA)
+                       hinfo.hash_version +=
+                               EXT4_SB(dir->i_sb)->s_hash_unsigned;
                hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
                count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo,
                                               start_hash, start_minor_hash);
@@ -662,7 +686,7 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
        }
        if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) {
                de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
-               de = ext4_next_entry(de);
+               de = ext4_next_entry(de, dir->i_sb->s_blocksize);
                if ((err = ext4_htree_store_dirent(dir_file, 2, 0, de)) != 0)
                        goto errout;
                count++;
@@ -712,26 +736,26 @@ errout:
  * Create map of hash values, offsets, and sizes, stored at end of block.
  * Returns number of entries mapped.
  */
-static int dx_make_map (struct ext4_dir_entry_2 *de, int size,
-                       struct dx_hash_info *hinfo, struct dx_map_entry *map_tail)
+static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
+                      struct dx_hash_info *hinfo,
+                      struct dx_map_entry *map_tail)
 {
        int count = 0;
        char *base = (char *) de;
        struct dx_hash_info h = *hinfo;
 
-       while ((char *) de < base + size)
-       {
+       while ((char *) de < base + blocksize) {
                if (de->name_len && de->inode) {
                        ext4fs_dirhash(de->name, de->name_len, &h);
                        map_tail--;
                        map_tail->hash = h.hash;
-                       map_tail->offs = (u16) ((char *) de - base);
+                       map_tail->offs = ((char *) de - base)>>2;
                        map_tail->size = le16_to_cpu(de->rec_len);
                        count++;
                        cond_resched();
                }
                /* XXX: do we need to check rec_len == 0 case? -Chris */
-               de = ext4_next_entry(de);
+               de = ext4_next_entry(de, blocksize);
        }
        return count;
 }
@@ -806,7 +830,7 @@ static inline int ext4_match (int len, const char * const name,
 static inline int search_dirblock(struct buffer_head *bh,
                                  struct inode *dir,
                                  const struct qstr *d_name,
-                                 unsigned long offset,
+                                 unsigned int offset,
                                  struct ext4_dir_entry_2 ** res_dir)
 {
        struct ext4_dir_entry_2 * de;
@@ -831,7 +855,8 @@ static inline int search_dirblock(struct buffer_head *bh,
                        return 1;
                }
                /* prevent looping on a bad block */
-               de_len = ext4_rec_len_from_disk(de->rec_len);
+               de_len = ext4_rec_len_from_disk(de->rec_len,
+                                               dir->i_sb->s_blocksize);
                if (de_len <= 0)
                        return -1;
                offset += de_len;
@@ -995,7 +1020,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q
                de = (struct ext4_dir_entry_2 *) bh->b_data;
                top = (struct ext4_dir_entry_2 *) ((char *) de + sb->s_blocksize -
                                       EXT4_DIR_REC_LEN(0));
-               for (; de < top; de = ext4_next_entry(de)) {
+               for (; de < top; de = ext4_next_entry(de, sb->s_blocksize)) {
                        int off = (block << EXT4_BLOCK_SIZE_BITS(sb))
                                  + ((char *) de - bh->b_data);
 
@@ -1043,16 +1068,24 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru
        bh = ext4_find_entry(dir, &dentry->d_name, &de);
        inode = NULL;
        if (bh) {
-               unsigned long ino = le32_to_cpu(de->inode);
+               __u32 ino = le32_to_cpu(de->inode);
                brelse(bh);
                if (!ext4_valid_inum(dir->i_sb, ino)) {
                        ext4_error(dir->i_sb, "ext4_lookup",
-                                  "bad inode number: %lu", ino);
+                                  "bad inode number: %u", ino);
                        return ERR_PTR(-EIO);
                }
                inode = ext4_iget(dir->i_sb, ino);
-               if (IS_ERR(inode))
-                       return ERR_CAST(inode);
+               if (unlikely(IS_ERR(inode))) {
+                       if (PTR_ERR(inode) == -ESTALE) {
+                               ext4_error(dir->i_sb, __func__,
+                                               "deleted inode referenced: %u",
+                                               ino);
+                               return ERR_PTR(-EIO);
+                       } else {
+                               return ERR_CAST(inode);
+                       }
+               }
        }
        return d_splice_alias(inode, dentry);
 }
@@ -1060,8 +1093,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru
 
 struct dentry *ext4_get_parent(struct dentry *child)
 {
-       unsigned long ino;
-       struct dentry *parent;
+       __u32 ino;
        struct inode *inode;
        static const struct qstr dotdot = {
                .name = "..",
@@ -1079,20 +1111,11 @@ struct dentry *ext4_get_parent(struct dentry *child)
 
        if (!ext4_valid_inum(child->d_inode->i_sb, ino)) {
                ext4_error(child->d_inode->i_sb, "ext4_get_parent",
-                          "bad inode number: %lu", ino);
+                          "bad inode number: %u", ino);
                return ERR_PTR(-EIO);
        }
 
-       inode = ext4_iget(child->d_inode->i_sb, ino);
-       if (IS_ERR(inode))
-               return ERR_CAST(inode);
-
-       parent = d_alloc_anon(inode);
-       if (!parent) {
-               iput(inode);
-               parent = ERR_PTR(-ENOMEM);
-       }
-       return parent;
+       return d_obtain_alias(ext4_iget(child->d_inode->i_sb, ino));
 }
 
 #define S_SHIFT 12
@@ -1118,16 +1141,18 @@ static inline void ext4_set_de_type(struct super_block *sb,
  * Returns pointer to last entry moved.
  */
 static struct ext4_dir_entry_2 *
-dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
+dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count,
+               unsigned blocksize)
 {
        unsigned rec_len = 0;
 
        while (count--) {
-               struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) (from + map->offs);
+               struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) 
+                                               (from + (map->offs<<2));
                rec_len = EXT4_DIR_REC_LEN(de->name_len);
                memcpy (to, de, rec_len);
                ((struct ext4_dir_entry_2 *) to)->rec_len =
-                               ext4_rec_len_to_disk(rec_len);
+                               ext4_rec_len_to_disk(rec_len, blocksize);
                de->inode = 0;
                map++;
                to += rec_len;
@@ -1139,19 +1164,19 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
  * Compact each dir entry in the range to the minimal rec_len.
  * Returns pointer to last entry in range.
  */
-static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size)
+static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize)
 {
        struct ext4_dir_entry_2 *next, *to, *prev, *de = (struct ext4_dir_entry_2 *) base;
        unsigned rec_len = 0;
 
        prev = to = de;
-       while ((char*)de < base + size) {
-               next = ext4_next_entry(de);
+       while ((char*)de < base + blocksize) {
+               next = ext4_next_entry(de, blocksize);
                if (de->inode && de->name_len) {
                        rec_len = EXT4_DIR_REC_LEN(de->name_len);
                        if (de > to)
                                memmove(to, de, rec_len);
-                       to->rec_len = ext4_rec_len_to_disk(rec_len);
+                       to->rec_len = ext4_rec_len_to_disk(rec_len, blocksize);
                        prev = to;
                        to = (struct ext4_dir_entry_2 *) (((char *) to) + rec_len);
                }
@@ -1176,9 +1201,9 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
        u32 hash2;
        struct dx_map_entry *map;
        char *data1 = (*bh)->b_data, *data2;
-       unsigned split, move, size, i;
+       unsigned split, move, size;
        struct ext4_dir_entry_2 *de = NULL, *de2;
-       int     err = 0;
+       int     err = 0, i;
 
        bh2 = ext4_append (handle, dir, &newblock, &err);
        if (!(bh2)) {
@@ -1224,10 +1249,12 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
                                        hash2, split, count-split));
 
        /* Fancy dance to stay within two buffers */
-       de2 = dx_move_dirents(data1, data2, map + split, count - split);
+       de2 = dx_move_dirents(data1, data2, map + split, count - split, blocksize);
        de = dx_pack_dirents(data1, blocksize);
-       de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de);
-       de2->rec_len = ext4_rec_len_to_disk(data2 + blocksize - (char *) de2);
+       de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de,
+                                          blocksize);
+       de2->rec_len = ext4_rec_len_to_disk(data2 + blocksize - (char *) de2,
+                                           blocksize);
        dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1));
        dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1));
 
@@ -1238,10 +1265,10 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
                de = de2;
        }
        dx_insert_block(frame, hash2 + continued, newblock);
-       err = ext4_journal_dirty_metadata(handle, bh2);
+       err = ext4_handle_dirty_metadata(handle, dir, bh2);
        if (err)
                goto journal_error;
-       err = ext4_journal_dirty_metadata(handle, frame->bh);
+       err = ext4_handle_dirty_metadata(handle, dir, frame->bh);
        if (err)
                goto journal_error;
        brelse(bh2);
@@ -1276,7 +1303,8 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
        struct inode    *dir = dentry->d_parent->d_inode;
        const char      *name = dentry->d_name.name;
        int             namelen = dentry->d_name.len;
-       unsigned long   offset = 0;
+       unsigned int    offset = 0;
+       unsigned int    blocksize = dir->i_sb->s_blocksize;
        unsigned short  reclen;
        int             nlen, rlen, err;
        char            *top;
@@ -1284,7 +1312,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
        reclen = EXT4_DIR_REC_LEN(namelen);
        if (!de) {
                de = (struct ext4_dir_entry_2 *)bh->b_data;
-               top = bh->b_data + dir->i_sb->s_blocksize - reclen;
+               top = bh->b_data + blocksize - reclen;
                while ((char *) de <= top) {
                        if (!ext4_check_dir_entry("ext4_add_entry", dir, de,
                                                  bh, offset)) {
@@ -1296,7 +1324,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
                                return -EEXIST;
                        }
                        nlen = EXT4_DIR_REC_LEN(de->name_len);
-                       rlen = ext4_rec_len_from_disk(de->rec_len);
+                       rlen = ext4_rec_len_from_disk(de->rec_len, blocksize);
                        if ((de->inode? rlen - nlen: rlen) >= reclen)
                                break;
                        de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
@@ -1315,11 +1343,11 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
 
        /* By now the buffer is marked for journaling */
        nlen = EXT4_DIR_REC_LEN(de->name_len);
-       rlen = ext4_rec_len_from_disk(de->rec_len);
+       rlen = ext4_rec_len_from_disk(de->rec_len, blocksize);
        if (de->inode) {
                struct ext4_dir_entry_2 *de1 = (struct ext4_dir_entry_2 *)((char *)de + nlen);
-               de1->rec_len = ext4_rec_len_to_disk(rlen - nlen);
-               de->rec_len = ext4_rec_len_to_disk(nlen);
+               de1->rec_len = ext4_rec_len_to_disk(rlen - nlen, blocksize);
+               de->rec_len = ext4_rec_len_to_disk(nlen, blocksize);
                de = de1;
        }
        de->file_type = EXT4_FT_UNKNOWN;
@@ -1345,8 +1373,8 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
        ext4_update_dx_flag(dir);
        dir->i_version++;
        ext4_mark_inode_dirty(handle, dir);
-       BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
-       err = ext4_journal_dirty_metadata(handle, bh);
+       BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+       err = ext4_handle_dirty_metadata(handle, dir, bh);
        if (err)
                ext4_std_error(dir->i_sb, err);
        brelse(bh);
@@ -1377,7 +1405,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
        struct fake_dirent *fde;
 
        blocksize =  dir->i_sb->s_blocksize;
-       dxtrace(printk(KERN_DEBUG "Creating index\n"));
+       dxtrace(printk(KERN_DEBUG "Creating index: inode %lu\n", dir->i_ino));
        retval = ext4_journal_get_write_access(handle, bh);
        if (retval) {
                ext4_std_error(dir->i_sb, retval);
@@ -1386,6 +1414,20 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
        }
        root = (struct dx_root *) bh->b_data;
 
+       /* The 0th block becomes the root, move the dirents out */
+       fde = &root->dotdot;
+       de = (struct ext4_dir_entry_2 *)((char *)fde +
+               ext4_rec_len_from_disk(fde->rec_len, blocksize));
+       if ((char *) de >= (((char *) root) + blocksize)) {
+               ext4_error(dir->i_sb, __func__,
+                          "invalid rec_len for '..' in inode %lu",
+                          dir->i_ino);
+               brelse(bh);
+               return -EIO;
+       }
+       len = ((char *) root) + blocksize - (char *) de;
+
+       /* Allocate new block for the 0th block's dirents */
        bh2 = ext4_append(handle, dir, &block, &retval);
        if (!(bh2)) {
                brelse(bh);
@@ -1394,20 +1436,17 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
        EXT4_I(dir)->i_flags |= EXT4_INDEX_FL;
        data1 = bh2->b_data;
 
-       /* The 0th block becomes the root, move the dirents out */
-       fde = &root->dotdot;
-       de = (struct ext4_dir_entry_2 *)((char *)fde +
-               ext4_rec_len_from_disk(fde->rec_len));
-       len = ((char *) root) + blocksize - (char *) de;
        memcpy (data1, de, len);
        de = (struct ext4_dir_entry_2 *) data1;
        top = data1 + len;
-       while ((char *)(de2 = ext4_next_entry(de)) < top)
+       while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top)
                de = de2;
-       de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de);
+       de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de,
+                                          blocksize);
        /* Initialize the root; the dot dirents already exist */
        de = (struct ext4_dir_entry_2 *) (&root->dotdot);
-       de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(2));
+       de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(2),
+                                          blocksize);
        memset (&root->info, 0, sizeof(root->info));
        root->info.info_length = sizeof(root->info);
        root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
@@ -1418,6 +1457,8 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
 
        /* Initialize as for dx_probe */
        hinfo.hash_version = root->info.hash_version;
+       if (hinfo.hash_version <= DX_HASH_TEA)
+               hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
        hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
        ext4fs_dirhash(name, namelen, &hinfo);
        frame = frames;
@@ -1447,7 +1488,6 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
                          struct inode *inode)
 {
        struct inode *dir = dentry->d_parent->d_inode;
-       unsigned long offset;
        struct buffer_head *bh;
        struct ext4_dir_entry_2 *de;
        struct super_block *sb;
@@ -1469,7 +1509,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
                ext4_mark_inode_dirty(handle, dir);
        }
        blocks = dir->i_size >> sb->s_blocksize_bits;
-       for (block = 0, offset = 0; block < blocks; block++) {
+       for (block = 0; block < blocks; block++) {
                bh = ext4_bread(handle, dir, block, 0, &retval);
                if(!bh)
                        return retval;
@@ -1487,7 +1527,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
                return retval;
        de = (struct ext4_dir_entry_2 *) bh->b_data;
        de->inode = 0;
-       de->rec_len = ext4_rec_len_to_disk(blocksize);
+       de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize);
        return add_dirent_to_buf(handle, dentry, inode, de, bh);
 }
 
@@ -1550,7 +1590,8 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
                        goto cleanup;
                node2 = (struct dx_node *)(bh2->b_data);
                entries2 = node2->entries;
-               node2->fake.rec_len = ext4_rec_len_to_disk(sb->s_blocksize);
+               node2->fake.rec_len = ext4_rec_len_to_disk(sb->s_blocksize,
+                                                          sb->s_blocksize);
                node2->fake.inode = 0;
                BUFFER_TRACE(frame->bh, "get_write_access");
                err = ext4_journal_get_write_access(handle, frame->bh);
@@ -1584,7 +1625,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
                        dxtrace(dx_show_index("node", frames[1].entries));
                        dxtrace(dx_show_index("node",
                               ((struct dx_node *) bh2->b_data)->entries));
-                       err = ext4_journal_dirty_metadata(handle, bh2);
+                       err = ext4_handle_dirty_metadata(handle, inode, bh2);
                        if (err)
                                goto journal_error;
                        brelse (bh2);
@@ -1610,7 +1651,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
                        if (err)
                                goto journal_error;
                }
-               ext4_journal_dirty_metadata(handle, frames[0].bh);
+               ext4_handle_dirty_metadata(handle, inode, frames[0].bh);
        }
        de = do_split(handle, dir, &bh, frame, &hinfo, &err);
        if (!de)
@@ -1638,6 +1679,7 @@ static int ext4_delete_entry(handle_t *handle,
                             struct buffer_head *bh)
 {
        struct ext4_dir_entry_2 *de, *pde;
+       unsigned int blocksize = dir->i_sb->s_blocksize;
        int i;
 
        i = 0;
@@ -1651,18 +1693,21 @@ static int ext4_delete_entry(handle_t *handle,
                        ext4_journal_get_write_access(handle, bh);
                        if (pde)
                                pde->rec_len = ext4_rec_len_to_disk(
-                                       ext4_rec_len_from_disk(pde->rec_len) +
-                                       ext4_rec_len_from_disk(de->rec_len));
+                                       ext4_rec_len_from_disk(pde->rec_len,
+                                                              blocksize) +
+                                       ext4_rec_len_from_disk(de->rec_len,
+                                                              blocksize),
+                                       blocksize);
                        else
                                de->inode = 0;
                        dir->i_version++;
-                       BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
-                       ext4_journal_dirty_metadata(handle, bh);
+                       BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+                       ext4_handle_dirty_metadata(handle, dir, bh);
                        return 0;
                }
-               i += ext4_rec_len_from_disk(de->rec_len);
+               i += ext4_rec_len_from_disk(de->rec_len, blocksize);
                pde = de;
-               de = ext4_next_entry(de);
+               de = ext4_next_entry(de, blocksize);
        }
        return -ENOENT;
 }
@@ -1703,9 +1748,11 @@ static int ext4_add_nondir(handle_t *handle,
        if (!err) {
                ext4_mark_inode_dirty(handle, inode);
                d_instantiate(dentry, inode);
+               unlock_new_inode(inode);
                return 0;
        }
        drop_nlink(inode);
+       unlock_new_inode(inode);
        iput(inode);
        return err;
 }
@@ -1733,9 +1780,9 @@ retry:
                return PTR_ERR(handle);
 
        if (IS_DIRSYNC(dir))
-               handle->h_sync = 1;
+               ext4_handle_sync(handle);
 
-       inode = ext4_new_inode (handle, dir, mode);
+       inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0);
        err = PTR_ERR(inode);
        if (!IS_ERR(inode)) {
                inode->i_op = &ext4_file_inode_operations;
@@ -1767,9 +1814,9 @@ retry:
                return PTR_ERR(handle);
 
        if (IS_DIRSYNC(dir))
-               handle->h_sync = 1;
+               ext4_handle_sync(handle);
 
-       inode = ext4_new_inode(handle, dir, mode);
+       inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0);
        err = PTR_ERR(inode);
        if (!IS_ERR(inode)) {
                init_special_inode(inode, inode->i_mode, rdev);
@@ -1790,6 +1837,7 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
        struct inode *inode;
        struct buffer_head *dir_block;
        struct ext4_dir_entry_2 *de;
+       unsigned int blocksize = dir->i_sb->s_blocksize;
        int err, retries = 0;
 
        if (EXT4_DIR_LINK_MAX(dir))
@@ -1803,9 +1851,10 @@ retry:
                return PTR_ERR(handle);
 
        if (IS_DIRSYNC(dir))
-               handle->h_sync = 1;
+               ext4_handle_sync(handle);
 
-       inode = ext4_new_inode(handle, dir, S_IFDIR | mode);
+       inode = ext4_new_inode(handle, dir, S_IFDIR | mode,
+                              &dentry->d_name, 0);
        err = PTR_ERR(inode);
        if (IS_ERR(inode))
                goto out_stop;
@@ -1821,25 +1870,27 @@ retry:
        de = (struct ext4_dir_entry_2 *) dir_block->b_data;
        de->inode = cpu_to_le32(inode->i_ino);
        de->name_len = 1;
-       de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len));
+       de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len),
+                                          blocksize);
        strcpy(de->name, ".");
        ext4_set_de_type(dir->i_sb, de, S_IFDIR);
-       de = ext4_next_entry(de);
+       de = ext4_next_entry(de, blocksize);
        de->inode = cpu_to_le32(dir->i_ino);
-       de->rec_len = ext4_rec_len_to_disk(inode->i_sb->s_blocksize -
-                                               EXT4_DIR_REC_LEN(1));
+       de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(1),
+                                          blocksize);
        de->name_len = 2;
        strcpy(de->name, "..");
        ext4_set_de_type(dir->i_sb, de, S_IFDIR);
        inode->i_nlink = 2;
-       BUFFER_TRACE(dir_block, "call ext4_journal_dirty_metadata");
-       ext4_journal_dirty_metadata(handle, dir_block);
+       BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
+       ext4_handle_dirty_metadata(handle, dir, dir_block);
        brelse(dir_block);
        ext4_mark_inode_dirty(handle, inode);
        err = ext4_add_entry(handle, dentry, inode);
        if (err) {
 out_clear_inode:
                clear_nlink(inode);
+               unlock_new_inode(inode);
                ext4_mark_inode_dirty(handle, inode);
                iput(inode);
                goto out_stop;
@@ -1848,6 +1899,7 @@ out_clear_inode:
        ext4_update_dx_flag(dir);
        ext4_mark_inode_dirty(handle, dir);
        d_instantiate(dentry, inode);
+       unlock_new_inode(inode);
 out_stop:
        ext4_journal_stop(handle);
        if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
@@ -1860,7 +1912,7 @@ out_stop:
  */
 static int empty_dir(struct inode *inode)
 {
-       unsigned long offset;
+       unsigned int offset;
        struct buffer_head *bh;
        struct ext4_dir_entry_2 *de, *de1;
        struct super_block *sb;
@@ -1880,7 +1932,7 @@ static int empty_dir(struct inode *inode)
                return 1;
        }
        de = (struct ext4_dir_entry_2 *) bh->b_data;
-       de1 = ext4_next_entry(de);
+       de1 = ext4_next_entry(de, sb->s_blocksize);
        if (le32_to_cpu(de->inode) != inode->i_ino ||
                        !le32_to_cpu(de1->inode) ||
                        strcmp(".", de->name) ||
@@ -1891,9 +1943,9 @@ static int empty_dir(struct inode *inode)
                brelse(bh);
                return 1;
        }
-       offset = ext4_rec_len_from_disk(de->rec_len) +
-                ext4_rec_len_from_disk(de1->rec_len);
-       de = ext4_next_entry(de1);
+       offset = ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize) +
+                ext4_rec_len_from_disk(de1->rec_len, sb->s_blocksize);
+       de = ext4_next_entry(de1, sb->s_blocksize);
        while (offset < inode->i_size) {
                if (!bh ||
                        (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
@@ -1905,7 +1957,7 @@ static int empty_dir(struct inode *inode)
                                if (err)
                                        ext4_error(sb, __func__,
                                                   "error %d reading directory"
-                                                  " #%lu offset %lu",
+                                                  " #%lu offset %u",
                                                   err, inode->i_ino, offset);
                                offset += sb->s_blocksize;
                                continue;
@@ -1922,8 +1974,8 @@ static int empty_dir(struct inode *inode)
                        brelse(bh);
                        return 0;
                }
-               offset += ext4_rec_len_from_disk(de->rec_len);
-               de = ext4_next_entry(de);
+               offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize);
+               de = ext4_next_entry(de, sb->s_blocksize);
        }
        brelse(bh);
        return 1;
@@ -1943,7 +1995,10 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
        struct ext4_iloc iloc;
        int err = 0, rc;
 
-       lock_super(sb);
+       if (!ext4_handle_valid(handle))
+               return 0;
+
+       mutex_lock(&EXT4_SB(sb)->s_orphan_lock);
        if (!list_empty(&EXT4_I(inode)->i_orphan))
                goto out_unlock;
 
@@ -1952,9 +2007,13 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
 
        /* @@@ FIXME: Observation from aviro:
         * I think I can trigger J_ASSERT in ext4_orphan_add().  We block
-        * here (on lock_super()), so race with ext4_link() which might bump
+        * here (on s_orphan_lock), so race with ext4_link() which might bump
         * ->i_nlink. For, say it, character device. Not a regular file,
         * not a directory, not a symlink and ->i_nlink > 0.
+        *
+        * tytso, 4/25/2009: I'm not sure how that could happen;
+        * shouldn't the fs core protect us from these sort of
+        * unlink()/link() races?
         */
        J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
                  S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
@@ -1971,7 +2030,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
        /* Insert this inode at the head of the on-disk orphan list... */
        NEXT_ORPHAN(inode) = le32_to_cpu(EXT4_SB(sb)->s_es->s_last_orphan);
        EXT4_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
-       err = ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
+       err = ext4_handle_dirty_metadata(handle, inode, EXT4_SB(sb)->s_sbh);
        rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
        if (!err)
                err = rc;
@@ -1991,7 +2050,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
        jbd_debug(4, "orphan inode %lu will point to %d\n",
                        inode->i_ino, NEXT_ORPHAN(inode));
 out_unlock:
-       unlock_super(sb);
+       mutex_unlock(&EXT4_SB(sb)->s_orphan_lock);
        ext4_std_error(inode->i_sb, err);
        return err;
 }
@@ -2005,15 +2064,16 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
        struct list_head *prev;
        struct ext4_inode_info *ei = EXT4_I(inode);
        struct ext4_sb_info *sbi;
-       unsigned long ino_next;
+       __u32 ino_next;
        struct ext4_iloc iloc;
        int err = 0;
 
-       lock_super(inode->i_sb);
-       if (list_empty(&ei->i_orphan)) {
-               unlock_super(inode->i_sb);
+       if (!ext4_handle_valid(handle))
                return 0;
-       }
+
+       mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock);
+       if (list_empty(&ei->i_orphan))
+               goto out;
 
        ino_next = NEXT_ORPHAN(inode);
        prev = ei->i_orphan.prev;
@@ -2027,7 +2087,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
         * transaction handle with which to update the orphan list on
         * disk, but we still need to remove the inode from the linked
         * list in memory. */
-       if (!handle)
+       if (sbi->s_journal && !handle)
                goto out;
 
        err = ext4_reserve_inode_write(handle, inode, &iloc);
@@ -2035,19 +2095,19 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
                goto out_err;
 
        if (prev == &sbi->s_orphan) {
-               jbd_debug(4, "superblock will point to %lu\n", ino_next);
+               jbd_debug(4, "superblock will point to %u\n", ino_next);
                BUFFER_TRACE(sbi->s_sbh, "get_write_access");
                err = ext4_journal_get_write_access(handle, sbi->s_sbh);
                if (err)
                        goto out_brelse;
                sbi->s_es->s_last_orphan = cpu_to_le32(ino_next);
-               err = ext4_journal_dirty_metadata(handle, sbi->s_sbh);
+               err = ext4_handle_dirty_metadata(handle, inode, sbi->s_sbh);
        } else {
                struct ext4_iloc iloc2;
                struct inode *i_prev =
                        &list_entry(prev, struct ext4_inode_info, i_orphan)->vfs_inode;
 
-               jbd_debug(4, "orphan inode %lu will point to %lu\n",
+               jbd_debug(4, "orphan inode %lu will point to %u\n",
                          i_prev->i_ino, ino_next);
                err = ext4_reserve_inode_write(handle, i_prev, &iloc2);
                if (err)
@@ -2063,7 +2123,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
 out_err:
        ext4_std_error(inode->i_sb, err);
 out:
-       unlock_super(inode->i_sb);
+       mutex_unlock(&EXT4_SB(inode->i_sb)->s_orphan_lock);
        return err;
 
 out_brelse:
@@ -2081,7 +2141,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
 
        /* Initialize quotas before so that eventual writes go in
         * separate transaction */
-       DQUOT_INIT(dentry->d_inode);
+       vfs_dq_init(dentry->d_inode);
        handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb));
        if (IS_ERR(handle))
                return PTR_ERR(handle);
@@ -2092,7 +2152,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
                goto end_rmdir;
 
        if (IS_DIRSYNC(dir))
-               handle->h_sync = 1;
+               ext4_handle_sync(handle);
 
        inode = dentry->d_inode;
 
@@ -2140,13 +2200,13 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
 
        /* Initialize quotas before so that eventual writes go
         * in separate transaction */
-       DQUOT_INIT(dentry->d_inode);
+       vfs_dq_init(dentry->d_inode);
        handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb));
        if (IS_ERR(handle))
                return PTR_ERR(handle);
 
        if (IS_DIRSYNC(dir))
-               handle->h_sync = 1;
+               ext4_handle_sync(handle);
 
        retval = -ENOENT;
        bh = ext4_find_entry(dir, &dentry->d_name, &de);
@@ -2203,9 +2263,10 @@ retry:
                return PTR_ERR(handle);
 
        if (IS_DIRSYNC(dir))
-               handle->h_sync = 1;
+               ext4_handle_sync(handle);
 
-       inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO);
+       inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO,
+                              &dentry->d_name, 0);
        err = PTR_ERR(inode);
        if (IS_ERR(inode))
                goto out_stop;
@@ -2218,10 +2279,10 @@ retry:
                 * We have a transaction open.  All is sweetness.  It also sets
                 * i_size in generic_commit_write().
                 */
-               err = __page_symlink(inode, symname, l,
-                               mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
+               err = __page_symlink(inode, symname, l, 1);
                if (err) {
                        clear_nlink(inode);
+                       unlock_new_inode(inode);
                        ext4_mark_inode_dirty(handle, inode);
                        iput(inode);
                        goto out_stop;
@@ -2266,21 +2327,28 @@ retry:
                return PTR_ERR(handle);
 
        if (IS_DIRSYNC(dir))
-               handle->h_sync = 1;
+               ext4_handle_sync(handle);
 
        inode->i_ctime = ext4_current_time(inode);
        ext4_inc_count(handle, inode);
        atomic_inc(&inode->i_count);
 
-       err = ext4_add_nondir(handle, dentry, inode);
+       err = ext4_add_entry(handle, dentry, inode);
+       if (!err) {
+               ext4_mark_inode_dirty(handle, inode);
+               d_instantiate(dentry, inode);
+       } else {
+               drop_nlink(inode);
+               iput(inode);
+       }
        ext4_journal_stop(handle);
        if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
                goto retry;
        return err;
 }
 
-#define PARENT_INO(buffer) \
-       (ext4_next_entry((struct ext4_dir_entry_2 *)(buffer))->inode)
+#define PARENT_INO(buffer, size) \
+       (ext4_next_entry((struct ext4_dir_entry_2 *)(buffer), size)->inode)
 
 /*
  * Anybody can rename anything with this: the permission checks are left to the
@@ -2293,14 +2361,14 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
        struct inode *old_inode, *new_inode;
        struct buffer_head *old_bh, *new_bh, *dir_bh;
        struct ext4_dir_entry_2 *old_de, *new_de;
-       int retval;
+       int retval, force_da_alloc = 0;
 
        old_bh = new_bh = dir_bh = NULL;
 
        /* Initialize quotas before so that eventual writes go
         * in separate transaction */
        if (new_dentry->d_inode)
-               DQUOT_INIT(new_dentry->d_inode);
+               vfs_dq_init(new_dentry->d_inode);
        handle = ext4_journal_start(old_dir, 2 *
                                        EXT4_DATA_TRANS_BLOCKS(old_dir->i_sb) +
                                        EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2);
@@ -2308,7 +2376,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
                return PTR_ERR(handle);
 
        if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
-               handle->h_sync = 1;
+               ext4_handle_sync(handle);
 
        old_bh = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de);
        /*
@@ -2340,7 +2408,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
                dir_bh = ext4_bread(handle, old_inode, 0, 0, &retval);
                if (!dir_bh)
                        goto end_rename;
-               if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino)
+               if (le32_to_cpu(PARENT_INO(dir_bh->b_data,
+                               old_dir->i_sb->s_blocksize)) != old_dir->i_ino)
                        goto end_rename;
                retval = -EMLINK;
                if (!new_inode && new_dir != old_dir &&
@@ -2362,8 +2431,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
                new_dir->i_ctime = new_dir->i_mtime =
                                        ext4_current_time(new_dir);
                ext4_mark_inode_dirty(handle, new_dir);
-               BUFFER_TRACE(new_bh, "call ext4_journal_dirty_metadata");
-               ext4_journal_dirty_metadata(handle, new_bh);
+               BUFFER_TRACE(new_bh, "call ext4_handle_dirty_metadata");
+               ext4_handle_dirty_metadata(handle, new_dir, new_bh);
                brelse(new_bh);
                new_bh = NULL;
        }
@@ -2412,9 +2481,10 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
        if (dir_bh) {
                BUFFER_TRACE(dir_bh, "get_write_access");
                ext4_journal_get_write_access(handle, dir_bh);
-               PARENT_INO(dir_bh->b_data) = cpu_to_le32(new_dir->i_ino);
-               BUFFER_TRACE(dir_bh, "call ext4_journal_dirty_metadata");
-               ext4_journal_dirty_metadata(handle, dir_bh);
+               PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) =
+                                               cpu_to_le32(new_dir->i_ino);
+               BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata");
+               ext4_handle_dirty_metadata(handle, old_dir, dir_bh);
                ext4_dec_count(handle, old_dir);
                if (new_inode) {
                        /* checked empty_dir above, can't have another parent,
@@ -2431,6 +2501,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
                ext4_mark_inode_dirty(handle, new_inode);
                if (!new_inode->i_nlink)
                        ext4_orphan_add(handle, new_inode);
+               if (!test_opt(new_dir->i_sb, NO_AUTO_DA_ALLOC))
+                       force_da_alloc = 1;
        }
        retval = 0;
 
@@ -2439,6 +2511,8 @@ end_rename:
        brelse(old_bh);
        brelse(new_bh);
        ext4_journal_stop(handle);
+       if (retval == 0 && force_da_alloc)
+               ext4_alloc_da_blocks(old_inode);
        return retval;
 }
 
@@ -2463,6 +2537,7 @@ const struct inode_operations ext4_dir_inode_operations = {
        .removexattr    = generic_removexattr,
 #endif
        .permission     = ext4_permission,
+       .fiemap         = ext4_fiemap,
 };
 
 const struct inode_operations ext4_special_inode_operations = {