[XFS] add keys_inorder and recs_inorder btree methods
[safe/jmp/linux-2.6] / fs / xfs / xfs_inode.c
index 419cfc2..cc0474d 100644 (file)
@@ -41,6 +41,7 @@
 #include "xfs_buf_item.h"
 #include "xfs_inode_item.h"
 #include "xfs_btree.h"
+#include "xfs_btree_trace.h"
 #include "xfs_alloc.h"
 #include "xfs_ialloc.h"
 #include "xfs_bmap.h"
@@ -788,6 +789,70 @@ xfs_dic2xflags(
 }
 
 /*
+ * Allocate and initialise an xfs_inode.
+ */
+struct xfs_inode *
+xfs_inode_alloc(
+       struct xfs_mount        *mp,
+       xfs_ino_t               ino)
+{
+       struct xfs_inode        *ip;
+
+       /*
+        * if this didn't occur in transactions, we could use
+        * KM_MAYFAIL and return NULL here on ENOMEM. Set the
+        * code up to do this anyway.
+        */
+       ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP);
+       if (!ip)
+               return NULL;
+
+       ASSERT(atomic_read(&ip->i_iocount) == 0);
+       ASSERT(atomic_read(&ip->i_pincount) == 0);
+       ASSERT(!spin_is_locked(&ip->i_flags_lock));
+       ASSERT(list_empty(&ip->i_reclaim));
+
+       ip->i_ino = ino;
+       ip->i_mount = mp;
+       ip->i_blkno = 0;
+       ip->i_len = 0;
+       ip->i_boffset =0;
+       ip->i_afp = NULL;
+       memset(&ip->i_df, 0, sizeof(xfs_ifork_t));
+       ip->i_flags = 0;
+       ip->i_update_core = 0;
+       ip->i_update_size = 0;
+       ip->i_delayed_blks = 0;
+       memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
+       ip->i_size = 0;
+       ip->i_new_size = 0;
+
+       /*
+        * Initialize inode's trace buffers.
+        */
+#ifdef XFS_INODE_TRACE
+       ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_NOFS);
+#endif
+#ifdef XFS_BMAP_TRACE
+       ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_NOFS);
+#endif
+#ifdef XFS_BTREE_TRACE
+       ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_NOFS);
+#endif
+#ifdef XFS_RW_TRACE
+       ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_NOFS);
+#endif
+#ifdef XFS_ILOCK_TRACE
+       ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_NOFS);
+#endif
+#ifdef XFS_DIR2_TRACE
+       ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS);
+#endif
+
+       return ip;
+}
+
+/*
  * Given a mount structure and an inode number, return a pointer
  * to a newly allocated in-core inode corresponding to the given
  * inode number.
@@ -809,13 +874,9 @@ xfs_iread(
        xfs_inode_t     *ip;
        int             error;
 
-       ASSERT(xfs_inode_zone != NULL);
-
-       ip = kmem_zone_zalloc(xfs_inode_zone, KM_SLEEP);
-       ip->i_ino = ino;
-       ip->i_mount = mp;
-       atomic_set(&ip->i_iocount, 0);
-       spin_lock_init(&ip->i_flags_lock);
+       ip = xfs_inode_alloc(mp, ino);
+       if (!ip)
+               return ENOMEM;
 
        /*
         * Get pointer's to the on-disk inode and the buffer containing it.
@@ -826,39 +887,16 @@ xfs_iread(
         */
        error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, imap_flags, XFS_BUF_LOCK);
        if (error) {
-               kmem_zone_free(xfs_inode_zone, ip);
+               xfs_idestroy(ip);
                return error;
        }
 
        /*
-        * Initialize inode's trace buffers.
-        * Do this before xfs_iformat in case it adds entries.
-        */
-#ifdef XFS_INODE_TRACE
-       ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_NOFS);
-#endif
-#ifdef XFS_BMAP_TRACE
-       ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_NOFS);
-#endif
-#ifdef XFS_BMBT_TRACE
-       ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_NOFS);
-#endif
-#ifdef XFS_RW_TRACE
-       ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_NOFS);
-#endif
-#ifdef XFS_ILOCK_TRACE
-       ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_NOFS);
-#endif
-#ifdef XFS_DIR2_TRACE
-       ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS);
-#endif
-
-       /*
         * If we got something that isn't an inode it means someone
         * (nfs or dmi) has a stale handle.
         */
        if (be16_to_cpu(dip->di_core.di_magic) != XFS_DINODE_MAGIC) {
-               kmem_zone_free(xfs_inode_zone, ip);
+               xfs_idestroy(ip);
                xfs_trans_brelse(tp, bp);
 #ifdef DEBUG
                xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: "
@@ -881,7 +919,7 @@ xfs_iread(
                xfs_dinode_from_disk(&ip->i_d, &dip->di_core);
                error = xfs_iformat(ip, dip);
                if (error)  {
-                       kmem_zone_free(xfs_inode_zone, ip);
+                       xfs_idestroy(ip);
                        xfs_trans_brelse(tp, bp);
 #ifdef DEBUG
                        xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: "
@@ -911,8 +949,6 @@ xfs_iread(
                        XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
        }
 
-       INIT_LIST_HEAD(&ip->i_reclaim);
-
        /*
         * The inode format changed when we moved the link count and
         * made it 32 bits long.  If this is an old format inode,
@@ -2631,8 +2667,6 @@ xfs_idestroy(
        }
        if (ip->i_afp)
                xfs_idestroy_fork(ip, XFS_ATTR_FORK);
-       mrfree(&ip->i_lock);
-       mrfree(&ip->i_iolock);
 
 #ifdef XFS_INODE_TRACE
        ktrace_free(ip->i_trace);
@@ -2640,7 +2674,7 @@ xfs_idestroy(
 #ifdef XFS_BMAP_TRACE
        ktrace_free(ip->i_xtrace);
 #endif
-#ifdef XFS_BMBT_TRACE
+#ifdef XFS_BTREE_TRACE
        ktrace_free(ip->i_btrace);
 #endif
 #ifdef XFS_RW_TRACE
@@ -2671,7 +2705,13 @@ xfs_idestroy(
                                spin_unlock(&mp->m_ail_lock);
                }
                xfs_inode_item_destroy(ip);
+               ip->i_itemp = NULL;
        }
+       /* asserts to verify all state is correct here */
+       ASSERT(atomic_read(&ip->i_iocount) == 0);
+       ASSERT(atomic_read(&ip->i_pincount) == 0);
+       ASSERT(!spin_is_locked(&ip->i_flags_lock));
+       ASSERT(list_empty(&ip->i_reclaim));
        kmem_zone_free(xfs_inode_zone, ip);
 }
 
@@ -4118,7 +4158,7 @@ xfs_iext_indirect_to_direct(
        ASSERT(nextents <= XFS_LINEAR_EXTS);
        size = nextents * sizeof(xfs_bmbt_rec_t);
 
-       xfs_iext_irec_compact_full(ifp);
+       xfs_iext_irec_compact_pages(ifp);
        ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ);
 
        ep = ifp->if_u1.if_ext_irec->er_extbuf;
@@ -4449,8 +4489,7 @@ xfs_iext_irec_remove(
  * compaction policy is as follows:
  *
  *    Full Compaction: Extents fit into a single page (or inline buffer)
- *    Full Compaction: Extents occupy less than 10% of allocated space
- * Partial Compaction: Extents occupy > 10% and < 50% of allocated space
+ * Partial Compaction: Extents occupy less than 50% of allocated space
  *      No Compaction: Extents occupy at least 50% of allocated space
  */
 void
@@ -4471,8 +4510,6 @@ xfs_iext_irec_compact(
                xfs_iext_direct_to_inline(ifp, nextents);
        } else if (nextents <= XFS_LINEAR_EXTS) {
                xfs_iext_indirect_to_direct(ifp);
-       } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 3) {
-               xfs_iext_irec_compact_full(ifp);
        } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) {
                xfs_iext_irec_compact_pages(ifp);
        }
@@ -4496,7 +4533,7 @@ xfs_iext_irec_compact_pages(
                erp_next = erp + 1;
                if (erp_next->er_extcount <=
                    (XFS_LINEAR_EXTS - erp->er_extcount)) {
-                       memmove(&erp->er_extbuf[erp->er_extcount],
+                       memcpy(&erp->er_extbuf[erp->er_extcount],
                                erp_next->er_extbuf, erp_next->er_extcount *
                                sizeof(xfs_bmbt_rec_t));
                        erp->er_extcount += erp_next->er_extcount;
@@ -4516,92 +4553,6 @@ xfs_iext_irec_compact_pages(
 }
 
 /*
- * Fully compact the extent records managed by the indirection array.
- */
-void
-xfs_iext_irec_compact_full(
-       xfs_ifork_t     *ifp)                   /* inode fork pointer */
-{
-       xfs_bmbt_rec_host_t *ep, *ep_next;      /* extent record pointers */
-       xfs_ext_irec_t  *erp, *erp_next;        /* extent irec pointers */
-       int             erp_idx = 0;            /* extent irec index */
-       int             ext_avail;              /* empty entries in ex list */
-       int             ext_diff;               /* number of exts to add */
-       int             nlists;                 /* number of irec's (ex lists) */
-
-       ASSERT(ifp->if_flags & XFS_IFEXTIREC);
-
-       nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
-       erp = ifp->if_u1.if_ext_irec;
-       ep = &erp->er_extbuf[erp->er_extcount];
-       erp_next = erp + 1;
-       ep_next = erp_next->er_extbuf;
-
-       while (erp_idx < nlists - 1) {
-               /*
-                * Check how many extent records are available in this irec.
-                * If there is none skip the whole exercise.
-                */
-               ext_avail = XFS_LINEAR_EXTS - erp->er_extcount;
-               if (ext_avail) {
-
-                       /*
-                        * Copy over as many as possible extent records into
-                        * the previous page.
-                        */
-                       ext_diff = MIN(ext_avail, erp_next->er_extcount);
-                       memcpy(ep, ep_next, ext_diff * sizeof(xfs_bmbt_rec_t));
-                       erp->er_extcount += ext_diff;
-                       erp_next->er_extcount -= ext_diff;
-
-                       /*
-                        * If the next irec is empty now we can simply
-                        * remove it.
-                        */
-                       if (erp_next->er_extcount == 0) {
-                               /*
-                                * Free page before removing extent record
-                                * so er_extoffs don't get modified in
-                                * xfs_iext_irec_remove.
-                                */
-                               kmem_free(erp_next->er_extbuf);
-                               erp_next->er_extbuf = NULL;
-                               xfs_iext_irec_remove(ifp, erp_idx + 1);
-                               erp = &ifp->if_u1.if_ext_irec[erp_idx];
-                               nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
-
-                       /*
-                        * If the next irec is not empty move up the content
-                        * that has not been copied to the previous page to
-                        * the beggining of this one.
-                        */
-                       } else {
-                               memmove(erp_next->er_extbuf, &ep_next[ext_diff],
-                                       erp_next->er_extcount *
-                                       sizeof(xfs_bmbt_rec_t));
-                               ep_next = erp_next->er_extbuf;
-                               memset(&ep_next[erp_next->er_extcount], 0,
-                                       (XFS_LINEAR_EXTS -
-                                               erp_next->er_extcount) *
-                                       sizeof(xfs_bmbt_rec_t));
-                               erp_next->er_extoff += ext_diff;
-                       }
-               }
-
-               if (erp->er_extcount == XFS_LINEAR_EXTS) {
-                       erp_idx++;
-                       if (erp_idx < nlists)
-                               erp = &ifp->if_u1.if_ext_irec[erp_idx];
-                       else
-                               break;
-               }
-               ep = &erp->er_extbuf[erp->er_extcount];
-               erp_next = erp + 1;
-               ep_next = erp_next->er_extbuf;
-       }
-}
-
-/*
  * This is called to update the er_extoff field in the indirection
  * array when extents have been added or removed from one of the
  * extent lists. erp_idx contains the irec index to begin updating