[XFS] add keys_inorder and recs_inorder btree methods

[safe/jmp/linux-2.6] / fs / xfs / xfs_inode.c
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c

index 19e7a7b..cc0474d 100644 (file)
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -41,6 +41,7 @@
  #include "xfs_buf_item.h"
  #include "xfs_inode_item.h"
  #include "xfs_btree.h"
+#include "xfs_btree_trace.h"
  #include "xfs_alloc.h"
  #include "xfs_ialloc.h"
  #include "xfs_bmap.h"
@@ -580,8 +581,8 @@ xfs_iformat_extents(
                 xfs_validate_extents(ifp, nex, XFS_EXTFMT_INODE(ip));
                 for (i = 0; i < nex; i++, dp++) {
                         xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
-                       ep->l0 = be64_to_cpu(get_unaligned(&dp->l0));
-                       ep->l1 = be64_to_cpu(get_unaligned(&dp->l1));
+                       ep->l0 = get_unaligned_be64(&dp->l0);
+                       ep->l1 = get_unaligned_be64(&dp->l1);
                 }
                 XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork);
                 if (whichfork != XFS_DATA_FORK ||
@@ -788,6 +789,70 @@ xfs_dic2xflags(
  }
  
  /*
+ * Allocate and initialise an xfs_inode.
+ */
+struct xfs_inode *
+xfs_inode_alloc(
+       struct xfs_mount        *mp,
+       xfs_ino_t               ino)
+{
+       struct xfs_inode        *ip;
+
+       /*
+        * if this didn't occur in transactions, we could use
+        * KM_MAYFAIL and return NULL here on ENOMEM. Set the
+        * code up to do this anyway.
+        */
+       ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP);
+       if (!ip)
+               return NULL;
+
+       ASSERT(atomic_read(&ip->i_iocount) == 0);
+       ASSERT(atomic_read(&ip->i_pincount) == 0);
+       ASSERT(!spin_is_locked(&ip->i_flags_lock));
+       ASSERT(list_empty(&ip->i_reclaim));
+
+       ip->i_ino = ino;
+       ip->i_mount = mp;
+       ip->i_blkno = 0;
+       ip->i_len = 0;
+       ip->i_boffset =0;
+       ip->i_afp = NULL;
+       memset(&ip->i_df, 0, sizeof(xfs_ifork_t));
+       ip->i_flags = 0;
+       ip->i_update_core = 0;
+       ip->i_update_size = 0;
+       ip->i_delayed_blks = 0;
+       memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
+       ip->i_size = 0;
+       ip->i_new_size = 0;
+
+       /*
+        * Initialize inode's trace buffers.
+        */
+#ifdef XFS_INODE_TRACE
+       ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_NOFS);
+#endif
+#ifdef XFS_BMAP_TRACE
+       ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_NOFS);
+#endif
+#ifdef XFS_BTREE_TRACE
+       ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_NOFS);
+#endif
+#ifdef XFS_RW_TRACE
+       ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_NOFS);
+#endif
+#ifdef XFS_ILOCK_TRACE
+       ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_NOFS);
+#endif
+#ifdef XFS_DIR2_TRACE
+       ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS);
+#endif
+
+       return ip;
+}
+
+/*
   * Given a mount structure and an inode number, return a pointer
   * to a newly allocated in-core inode corresponding to the given
   * inode number.
@@ -809,13 +874,9 @@ xfs_iread(
         xfs_inode_t     *ip;
         int             error;
  
-       ASSERT(xfs_inode_zone != NULL);
-
-       ip = kmem_zone_zalloc(xfs_inode_zone, KM_SLEEP);
-       ip->i_ino = ino;
-       ip->i_mount = mp;
-       atomic_set(&ip->i_iocount, 0);
-       spin_lock_init(&ip->i_flags_lock);
+       ip = xfs_inode_alloc(mp, ino);
+       if (!ip)
+               return ENOMEM;
  
         /*
          * Get pointer's to the on-disk inode and the buffer containing it.
@@ -826,39 +887,16 @@ xfs_iread(
          */
         error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, imap_flags, XFS_BUF_LOCK);
         if (error) {
-               kmem_zone_free(xfs_inode_zone, ip);
+               xfs_idestroy(ip);
                 return error;
         }
  
         /*
-        * Initialize inode's trace buffers.
-        * Do this before xfs_iformat in case it adds entries.
-        */
-#ifdef XFS_INODE_TRACE
-       ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_SLEEP);
-#endif
-#ifdef XFS_BMAP_TRACE
-       ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_SLEEP);
-#endif
-#ifdef XFS_BMBT_TRACE
-       ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_SLEEP);
-#endif
-#ifdef XFS_RW_TRACE
-       ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_SLEEP);
-#endif
-#ifdef XFS_ILOCK_TRACE
-       ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_SLEEP);
-#endif
-#ifdef XFS_DIR2_TRACE
-       ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_SLEEP);
-#endif
-
-       /*
          * If we got something that isn't an inode it means someone
          * (nfs or dmi) has a stale handle.
          */
         if (be16_to_cpu(dip->di_core.di_magic) != XFS_DINODE_MAGIC) {
-               kmem_zone_free(xfs_inode_zone, ip);
+               xfs_idestroy(ip);
                 xfs_trans_brelse(tp, bp);
  #ifdef DEBUG
                 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: "
@@ -881,7 +919,7 @@ xfs_iread(
                 xfs_dinode_from_disk(&ip->i_d, &dip->di_core);
                 error = xfs_iformat(ip, dip);
                 if (error)  {
-                       kmem_zone_free(xfs_inode_zone, ip);
+                       xfs_idestroy(ip);
                         xfs_trans_brelse(tp, bp);
  #ifdef DEBUG
                         xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: "
@@ -911,8 +949,6 @@ xfs_iread(
                         XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
         }
  
-       INIT_LIST_HEAD(&ip->i_reclaim);
-
         /*
          * The inode format changed when we moved the link count and
          * made it 32 bits long.  If this is an old format inode,
@@ -1046,9 +1082,9 @@ xfs_ialloc(
  {
         xfs_ino_t       ino;
         xfs_inode_t     *ip;
-       struct inode    *vp;
         uint            flags;
         int             error;
+       timespec_t      tv;
  
         /*
          * Call the space management code to pick
@@ -1077,13 +1113,12 @@ xfs_ialloc(
         }
         ASSERT(ip != NULL);
  
-       vp = VFS_I(ip);
         ip->i_d.di_mode = (__uint16_t)mode;
         ip->i_d.di_onlink = 0;
         ip->i_d.di_nlink = nlink;
         ASSERT(ip->i_d.di_nlink == nlink);
-       ip->i_d.di_uid = current_fsuid(cr);
-       ip->i_d.di_gid = current_fsgid(cr);
+       ip->i_d.di_uid = current_fsuid();
+       ip->i_d.di_gid = current_fsgid();
         ip->i_d.di_projid = prid;
         memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
  
@@ -1130,7 +1165,13 @@ xfs_ialloc(
         ip->i_size = 0;
         ip->i_d.di_nextents = 0;
         ASSERT(ip->i_d.di_nblocks == 0);
-       xfs_ichgtime(ip, XFS_ICHGTIME_CHG|XFS_ICHGTIME_ACC|XFS_ICHGTIME_MOD);
+
+       nanotime(&tv);
+       ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec;
+       ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec;
+       ip->i_d.di_atime = ip->i_d.di_mtime;
+       ip->i_d.di_ctime = ip->i_d.di_mtime;
+
         /*
          * di_gen will have been taken care of in xfs_iread.
          */
@@ -1220,7 +1261,7 @@ xfs_ialloc(
         xfs_trans_log_inode(tp, ip, flags);
  
         /* now that we have an i_mode we can setup inode ops and unlock */
-       xfs_initialize_vnode(tp->t_mountp, vp, ip);
+       xfs_setup_inode(ip);
  
         *ipp = ip;
         return 0;
@@ -2626,9 +2667,6 @@ xfs_idestroy(
         }
         if (ip->i_afp)
                 xfs_idestroy_fork(ip, XFS_ATTR_FORK);
-       mrfree(&ip->i_lock);
-       mrfree(&ip->i_iolock);
-       freesema(&ip->i_flock);
  
  #ifdef XFS_INODE_TRACE
         ktrace_free(ip->i_trace);
@@ -2636,7 +2674,7 @@ xfs_idestroy(
  #ifdef XFS_BMAP_TRACE
         ktrace_free(ip->i_xtrace);
  #endif
-#ifdef XFS_BMBT_TRACE
+#ifdef XFS_BTREE_TRACE
         ktrace_free(ip->i_btrace);
  #endif
  #ifdef XFS_RW_TRACE
@@ -2667,7 +2705,13 @@ xfs_idestroy(
                                 spin_unlock(&mp->m_ail_lock);
                 }
                 xfs_inode_item_destroy(ip);
+               ip->i_itemp = NULL;
         }
+       /* asserts to verify all state is correct here */
+       ASSERT(atomic_read(&ip->i_iocount) == 0);
+       ASSERT(atomic_read(&ip->i_pincount) == 0);
+       ASSERT(!spin_is_locked(&ip->i_flags_lock));
+       ASSERT(list_empty(&ip->i_reclaim));
         kmem_zone_free(xfs_inode_zone, ip);
  }
  
@@ -3046,10 +3090,10 @@ cluster_corrupt_out:
  /*
   * xfs_iflush() will write a modified inode's changes out to the
   * inode's on disk home.  The caller must have the inode lock held
- * in at least shared mode and the inode flush semaphore must be
- * held as well.  The inode lock will still be held upon return from
+ * in at least shared mode and the inode flush completion must be
+ * active as well.  The inode lock will still be held upon return from
   * the call and the caller is free to unlock it.
- * The inode flush lock will be unlocked when the inode reaches the disk.
+ * The inode flush will be completed when the inode reaches the disk.
   * The flags indicate how the inode's buffer should be written out.
   */
  int
@@ -3068,7 +3112,7 @@ xfs_iflush(
         XFS_STATS_INC(xs_iflush_count);
  
         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
-       ASSERT(issemalocked(&(ip->i_flock)));
+       ASSERT(!completion_done(&ip->i_flush));
         ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
                ip->i_d.di_nextents > ip->i_df.if_ext_max);
  
@@ -3231,7 +3275,7 @@ xfs_iflush_int(
  #endif
  
         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
-       ASSERT(issemalocked(&(ip->i_flock)));
+       ASSERT(!completion_done(&ip->i_flush));
         ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
                ip->i_d.di_nextents > ip->i_df.if_ext_max);
  
@@ -4114,7 +4158,7 @@ xfs_iext_indirect_to_direct(
         ASSERT(nextents <= XFS_LINEAR_EXTS);
         size = nextents * sizeof(xfs_bmbt_rec_t);
  
-       xfs_iext_irec_compact_full(ifp);
+       xfs_iext_irec_compact_pages(ifp);
         ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ);
  
         ep = ifp->if_u1.if_ext_irec->er_extbuf;
@@ -4445,8 +4489,7 @@ xfs_iext_irec_remove(
   * compaction policy is as follows:
   *
   *    Full Compaction: Extents fit into a single page (or inline buffer)
- *    Full Compaction: Extents occupy less than 10% of allocated space
- * Partial Compaction: Extents occupy > 10% and < 50% of allocated space
+ * Partial Compaction: Extents occupy less than 50% of allocated space
   *      No Compaction: Extents occupy at least 50% of allocated space
   */
  void
@@ -4467,8 +4510,6 @@ xfs_iext_irec_compact(
                 xfs_iext_direct_to_inline(ifp, nextents);
         } else if (nextents <= XFS_LINEAR_EXTS) {
                 xfs_iext_indirect_to_direct(ifp);
-       } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 3) {
-               xfs_iext_irec_compact_full(ifp);
         } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) {
                 xfs_iext_irec_compact_pages(ifp);
         }
@@ -4492,7 +4533,7 @@ xfs_iext_irec_compact_pages(
                 erp_next = erp + 1;
                 if (erp_next->er_extcount <=
                     (XFS_LINEAR_EXTS - erp->er_extcount)) {
-                       memmove(&erp->er_extbuf[erp->er_extcount],
+                       memcpy(&erp->er_extbuf[erp->er_extcount],
                                 erp_next->er_extbuf, erp_next->er_extcount *
                                 sizeof(xfs_bmbt_rec_t));
                         erp->er_extcount += erp_next->er_extcount;
@@ -4512,91 +4553,6 @@ xfs_iext_irec_compact_pages(
  }
  
  /*
- * Fully compact the extent records managed by the indirection array.
- */
-void
-xfs_iext_irec_compact_full(
-       xfs_ifork_t     *ifp)                   /* inode fork pointer */
-{
-       xfs_bmbt_rec_host_t *ep, *ep_next;      /* extent record pointers */
-       xfs_ext_irec_t  *erp, *erp_next;        /* extent irec pointers */
-       int             erp_idx = 0;            /* extent irec index */
-       int             ext_avail;              /* empty entries in ex list */
-       int             ext_diff;               /* number of exts to add */
-       int             nlists;                 /* number of irec's (ex lists) */
-
-       ASSERT(ifp->if_flags & XFS_IFEXTIREC);
-
-       nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
-       erp = ifp->if_u1.if_ext_irec;
-       ep = &erp->er_extbuf[erp->er_extcount];
-       erp_next = erp + 1;
-       ep_next = erp_next->er_extbuf;
-
-       while (erp_idx < nlists - 1) {
-               /*
-                * Check how many extent records are available in this irec.
-                * If there is none skip the whole exercise.
-                */
-               ext_avail = XFS_LINEAR_EXTS - erp->er_extcount;
-               if (ext_avail) {
-
-                       /*
-                        * Copy over as many as possible extent records into
-                        * the previous page.
-                        */
-                       ext_diff = MIN(ext_avail, erp_next->er_extcount);
-                       memcpy(ep, ep_next, ext_diff * sizeof(xfs_bmbt_rec_t));
-                       erp->er_extcount += ext_diff;
-                       erp_next->er_extcount -= ext_diff;
-
-                       /*
-                        * If the next irec is empty now we can simply
-                        * remove it.
-                        */
-                       if (erp_next->er_extcount == 0) {
-                               /*
-                                * Free page before removing extent record
-                                * so er_extoffs don't get modified in
-                                * xfs_iext_irec_remove.
-                                */
-                               kmem_free(erp_next->er_extbuf);
-                               erp_next->er_extbuf = NULL;
-                               xfs_iext_irec_remove(ifp, erp_idx + 1);
-                               erp = &ifp->if_u1.if_ext_irec[erp_idx];
-                               nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
-
-                       /*
-                        * If the next irec is not empty move up the content
-                        * that has not been copied to the previous page to
-                        * the beggining of this one.
-                        */
-                       } else {
-                               memmove(erp_next->er_extbuf, &ep_next[ext_diff],
-                                       erp_next->er_extcount *
-                                       sizeof(xfs_bmbt_rec_t));
-                               ep_next = erp_next->er_extbuf;
-                               memset(&ep_next[erp_next->er_extcount], 0,
-                                       (XFS_LINEAR_EXTS -
-                                               erp_next->er_extcount) *
-                                       sizeof(xfs_bmbt_rec_t));
-                       }
-               }
-
-               if (erp->er_extcount == XFS_LINEAR_EXTS) {
-                       erp_idx++;
-                       if (erp_idx < nlists)
-                               erp = &ifp->if_u1.if_ext_irec[erp_idx];
-                       else
-                               break;
-               }
-               ep = &erp->er_extbuf[erp->er_extcount];
-               erp_next = erp + 1;
-               ep_next = erp_next->er_extbuf;
-       }
-}
-
-/*
   * This is called to update the er_extoff field in the indirection
   * array when extents have been added or removed from one of the
   * extent lists. erp_idx contains the irec index to begin updating