sysfs: Move all of inode initialization into sysfs_init_inode
[safe/jmp/linux-2.6] / fs / ext3 / namei.c
index b45c88b..c1fa190 100644 (file)
@@ -36,7 +36,6 @@
 #include <linux/quotaops.h>
 #include <linux/buffer_head.h>
 #include <linux/bio.h>
-#include <linux/smp_lock.h>
 
 #include "namei.h"
 #include "xattr.h"
@@ -141,7 +140,8 @@ struct dx_frame
 struct dx_map_entry
 {
        u32 hash;
-       u32 offs;
+       u16 offs;
+       u16 size;
 };
 
 #ifdef CONFIG_EXT3_INDEX
@@ -380,13 +380,28 @@ dx_probe(struct dentry *dentry, struct inode *dir,
 
        entries = (struct dx_entry *) (((char *)&root->info) +
                                       root->info.info_length);
-       assert(dx_get_limit(entries) == dx_root_limit(dir,
-                                                     root->info.info_length));
+
+       if (dx_get_limit(entries) != dx_root_limit(dir,
+                                                  root->info.info_length)) {
+               ext3_warning(dir->i_sb, __FUNCTION__,
+                            "dx entry: limit != root limit");
+               brelse(bh);
+               *err = ERR_BAD_DX_DIR;
+               goto fail;
+       }
+
        dxtrace (printk("Look up %x", hash));
        while (1)
        {
                count = dx_get_count(entries);
-               assert (count && count <= dx_get_limit(entries));
+               if (!count || count > dx_get_limit(entries)) {
+                       ext3_warning(dir->i_sb, __FUNCTION__,
+                                    "dx entry: no count or count > limit");
+                       brelse(bh);
+                       *err = ERR_BAD_DX_DIR;
+                       goto fail2;
+               }
+
                p = entries + 1;
                q = entries + count - 1;
                while (p <= q)
@@ -424,8 +439,15 @@ dx_probe(struct dentry *dentry, struct inode *dir,
                if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err)))
                        goto fail2;
                at = entries = ((struct dx_node *) bh->b_data)->entries;
-               assert (dx_get_limit(entries) == dx_node_limit (dir));
+               if (dx_get_limit(entries) != dx_node_limit (dir)) {
+                       ext3_warning(dir->i_sb, __FUNCTION__,
+                                    "dx entry: limit != node limit");
+                       brelse(bh);
+                       *err = ERR_BAD_DX_DIR;
+                       goto fail2;
+               }
                frame++;
+               frame->bh = NULL;
        }
 fail2:
        while (frame >= frame_in) {
@@ -433,6 +455,10 @@ fail2:
                frame--;
        }
 fail:
+       if (*err == ERR_BAD_DX_DIR)
+               ext3_warning(dir->i_sb, __FUNCTION__,
+                            "Corrupt dir inode %ld, running e2fsck is "
+                            "recommended.", dir->i_ino);
        return NULL;
 }
 
@@ -552,6 +578,15 @@ static int htree_dirblock_to_tree(struct file *dir_file,
                                           dir->i_sb->s_blocksize -
                                           EXT3_DIR_REC_LEN(0));
        for (; de < top; de = ext3_next_entry(de)) {
+               if (!ext3_check_dir_entry("htree_dirblock_to_tree", dir, de, bh,
+                                       (block<<EXT3_BLOCK_SIZE_BITS(dir->i_sb))
+                                               +((char *)de - bh->b_data))) {
+                       /* On error, skip the f_pos to the next block. */
+                       dir_file->f_pos = (dir_file->f_pos |
+                                       (dir->i_sb->s_blocksize - 1)) + 1;
+                       brelse (bh);
+                       return count;
+               }
                ext3fs_dirhash(de->name, de->name_len, hinfo);
                if ((hinfo->hash < start_hash) ||
                    ((hinfo->hash == start_hash) &&
@@ -593,7 +628,7 @@ int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
 
        dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash,
                       start_minor_hash));
-       dir = dir_file->f_dentry->d_inode;
+       dir = dir_file->f_path.dentry->d_inode;
        if (!(EXT3_I(dir)->i_flags & EXT3_INDEX_FL)) {
                hinfo.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version;
                hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed;
@@ -604,7 +639,7 @@ int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
        }
        hinfo.hash = start_hash;
        hinfo.minor_hash = 0;
-       frame = dx_probe(NULL, dir_file->f_dentry->d_inode, &hinfo, frames, &err);
+       frame = dx_probe(NULL, dir_file->f_path.dentry->d_inode, &hinfo, frames, &err);
        if (!frame)
                return err;
 
@@ -663,6 +698,10 @@ errout:
  * Directory block splitting, compacting
  */
 
+/*
+ * Create map of hash values, offsets, and sizes, stored at end of block.
+ * Returns number of entries mapped.
+ */
 static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
                        struct dx_hash_info *hinfo, struct dx_map_entry *map_tail)
 {
@@ -676,7 +715,8 @@ static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
                        ext3fs_dirhash(de->name, de->name_len, &h);
                        map_tail--;
                        map_tail->hash = h.hash;
-                       map_tail->offs = (u32) ((char *) de - base);
+                       map_tail->offs = (u16) ((char *) de - base);
+                       map_tail->size = le16_to_cpu(de->rec_len);
                        count++;
                        cond_resched();
                }
@@ -686,6 +726,7 @@ static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
        return count;
 }
 
+/* Sort map by hash value */
 static void dx_sort_map (struct dx_map_entry *map, unsigned count)
 {
         struct dx_map_entry *p, *q, *top = map + count - 1;
@@ -960,6 +1001,7 @@ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
                                  (block<<EXT3_BLOCK_SIZE_BITS(sb))
                                          +((char *)de - bh->b_data))) {
                                brelse (bh);
+                               *err = ERR_BAD_DX_DIR;
                                goto errout;
                        }
                        *res_dir = de;
@@ -1010,6 +1052,11 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str
 
                if (!inode)
                        return ERR_PTR(-EACCES);
+
+               if (is_bad_inode(inode)) {
+                       iput(inode);
+                       return ERR_PTR(-ENOENT);
+               }
        }
        return d_splice_alias(inode, dentry);
 }
@@ -1045,6 +1092,11 @@ struct dentry *ext3_get_parent(struct dentry *child)
        if (!inode)
                return ERR_PTR(-EACCES);
 
+       if (is_bad_inode(inode)) {
+               iput(inode);
+               return ERR_PTR(-ENOENT);
+       }
+
        parent = d_alloc_anon(inode);
        if (!parent) {
                iput(inode);
@@ -1072,6 +1124,10 @@ static inline void ext3_set_de_type(struct super_block *sb,
 }
 
 #ifdef CONFIG_EXT3_INDEX
+/*
+ * Move count entries from end of map between two memory locations.
+ * Returns pointer to last entry moved.
+ */
 static struct ext3_dir_entry_2 *
 dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
 {
@@ -1090,6 +1146,10 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
        return (struct ext3_dir_entry_2 *) (to - rec_len);
 }
 
+/*
+ * Compact each dir entry in the range to the minimal rec_len.
+ * Returns pointer to last entry in range.
+ */
 static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
 {
        struct ext3_dir_entry_2 *next, *to, *prev, *de = (struct ext3_dir_entry_2 *) base;
@@ -1112,6 +1172,11 @@ static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
        return prev;
 }
 
+/*
+ * Split a full leaf block to make room for a new dir entry.
+ * Allocate a new block, and move entries so that they are approx. equally full.
+ * Returns pointer to de in block into which the new entry will be inserted.
+ */
 static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
                        struct buffer_head **bh,struct dx_frame *frame,
                        struct dx_hash_info *hinfo, int *error)
@@ -1123,11 +1188,11 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
        u32 hash2;
        struct dx_map_entry *map;
        char *data1 = (*bh)->b_data, *data2;
-       unsigned split;
+       unsigned split, move, size, i;
        struct ext3_dir_entry_2 *de = NULL, *de2;
-       int     err;
+       int     err = 0;
 
-       bh2 = ext3_append (handle, dir, &newblock, error);
+       bh2 = ext3_append (handle, dir, &newblock, &err);
        if (!(bh2)) {
                brelse(*bh);
                *bh = NULL;
@@ -1136,14 +1201,9 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
 
        BUFFER_TRACE(*bh, "get_write_access");
        err = ext3_journal_get_write_access(handle, *bh);
-       if (err) {
-       journal_error:
-               brelse(*bh);
-               brelse(bh2);
-               *bh = NULL;
-               ext3_std_error(dir->i_sb, err);
-               goto errout;
-       }
+       if (err)
+               goto journal_error;
+
        BUFFER_TRACE(frame->bh, "get_write_access");
        err = ext3_journal_get_write_access(handle, frame->bh);
        if (err)
@@ -1156,8 +1216,19 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
        count = dx_make_map ((struct ext3_dir_entry_2 *) data1,
                             blocksize, hinfo, map);
        map -= count;
-       split = count/2; // need to adjust to actual middle
        dx_sort_map (map, count);
+       /* Split the existing block in the middle, size-wise */
+       size = 0;
+       move = 0;
+       for (i = count-1; i >= 0; i--) {
+               /* is more than half of this entry in 2nd half of the block? */
+               if (size + map[i].size/2 > blocksize/2)
+                       break;
+               size += map[i].size;
+               move++;
+       }
+       /* map index at which we will split */
+       split = count - move;
        hash2 = map[split].hash;
        continued = hash2 == map[split - 1].hash;
        dxtrace(printk("Split block %i at %x, %i/%i\n",
@@ -1186,8 +1257,16 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
                goto journal_error;
        brelse (bh2);
        dxtrace(dx_show_index ("frame", frame->entries));
-errout:
        return de;
+
+journal_error:
+       brelse(*bh);
+       brelse(bh2);
+       *bh = NULL;
+       ext3_std_error(dir->i_sb, err);
+errout:
+       *error = err;
+       return NULL;
 }
 #endif
 
@@ -1609,21 +1688,6 @@ static int ext3_delete_entry (handle_t *handle,
        return -ENOENT;
 }
 
-/*
- * ext3_mark_inode_dirty is somewhat expensive, so unlike ext2 we
- * do not perform it in these functions.  We perform it at the call site,
- * if it is needed.
- */
-static inline void ext3_inc_count(handle_t *handle, struct inode *inode)
-{
-       inc_nlink(inode);
-}
-
-static inline void ext3_dec_count(handle_t *handle, struct inode *inode)
-{
-       drop_nlink(inode);
-}
-
 static int ext3_add_nondir(handle_t *handle,
                struct dentry *dentry, struct inode *inode)
 {
@@ -1633,7 +1697,7 @@ static int ext3_add_nondir(handle_t *handle,
                d_instantiate(dentry, inode);
                return 0;
        }
-       ext3_dec_count(handle, inode);
+       drop_nlink(inode);
        iput(inode);
        return err;
 }
@@ -2045,7 +2109,7 @@ static int ext3_rmdir (struct inode * dir, struct dentry *dentry)
                              "empty directory has nlink!=2 (%d)",
                              inode->i_nlink);
        inode->i_version++;
-       inode->i_nlink = 0;
+       clear_nlink(inode);
        /* There's no need to set i_disksize: the fact that i_nlink is
         * zero will ensure that the right thing happens during any
         * recovery. */
@@ -2154,7 +2218,7 @@ retry:
                err = __page_symlink(inode, symname, l,
                                mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
                if (err) {
-                       ext3_dec_count(handle, inode);
+                       drop_nlink(inode);
                        ext3_mark_inode_dirty(handle, inode);
                        iput (inode);
                        goto out_stop;
@@ -2182,6 +2246,12 @@ static int ext3_link (struct dentry * old_dentry,
 
        if (inode->i_nlink >= EXT3_LINK_MAX)
                return -EMLINK;
+       /*
+        * Return -ENOENT if we've raced with unlink and i_nlink is 0.  Doing
+        * otherwise has the potential to corrupt the orphan inode list.
+        */
+       if (inode->i_nlink == 0)
+               return -ENOENT;
 
 retry:
        handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
@@ -2193,7 +2263,7 @@ retry:
                handle->h_sync = 1;
 
        inode->i_ctime = CURRENT_TIME_SEC;
-       ext3_inc_count(handle, inode);
+       inc_nlink(inode);
        atomic_inc(&inode->i_count);
 
        err = ext3_add_nondir(handle, dentry, inode);
@@ -2365,7 +2435,7 @@ end_rename:
 /*
  * directories can handle most operations...
  */
-struct inode_operations ext3_dir_inode_operations = {
+const struct inode_operations ext3_dir_inode_operations = {
        .create         = ext3_create,
        .lookup         = ext3_lookup,
        .link           = ext3_link,
@@ -2385,7 +2455,7 @@ struct inode_operations ext3_dir_inode_operations = {
        .permission     = ext3_permission,
 };
 
-struct inode_operations ext3_special_inode_operations = {
+const struct inode_operations ext3_special_inode_operations = {
        .setattr        = ext3_setattr,
 #ifdef CONFIG_EXT3_FS_XATTR
        .setxattr       = generic_setxattr,