X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=fs%2Fxfs%2Fxfs_iget.c;h=6845db90818f2223cf9fdbc735ed8a423b9338bc;hb=0e3c9a2284f5417f196e327c254d0b84c9ee8929;hp=73e1c0d767ace264cb0ecbe93bd85439da566159;hpb=7d095257e321214e4cf359abd131ba1f09c60cba;p=safe%2Fjmp%2Flinux-2.6 diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 73e1c0d..6845db9 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -18,6 +18,7 @@ #include "xfs.h" #include "xfs_fs.h" #include "xfs_types.h" +#include "xfs_acl.h" #include "xfs_bit.h" #include "xfs_log.h" #include "xfs_inum.h" @@ -42,7 +43,7 @@ #include "xfs_inode_item.h" #include "xfs_bmap.h" #include "xfs_btree_trace.h" -#include "xfs_dir2_trace.h" +#include "xfs_trace.h" /* @@ -63,12 +64,18 @@ xfs_inode_alloc( ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP); if (!ip) return NULL; + if (inode_init_always(mp->m_super, VFS_I(ip))) { + kmem_zone_free(xfs_inode_zone, ip); + return NULL; + } ASSERT(atomic_read(&ip->i_iocount) == 0); ASSERT(atomic_read(&ip->i_pincount) == 0); ASSERT(!spin_is_locked(&ip->i_flags_lock)); ASSERT(completion_done(&ip->i_flush)); + mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); + /* initialise the xfs inode */ ip->i_ino = ino; ip->i_mount = mp; @@ -77,51 +84,63 @@ xfs_inode_alloc( memset(&ip->i_df, 0, sizeof(xfs_ifork_t)); ip->i_flags = 0; ip->i_update_core = 0; - ip->i_update_size = 0; ip->i_delayed_blks = 0; memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); ip->i_size = 0; ip->i_new_size = 0; - /* - * Initialize inode's trace buffers. - */ -#ifdef XFS_INODE_TRACE - ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_NOFS); -#endif -#ifdef XFS_BMAP_TRACE - ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_NOFS); -#endif -#ifdef XFS_BTREE_TRACE - ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_NOFS); -#endif -#ifdef XFS_RW_TRACE - ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_NOFS); -#endif -#ifdef XFS_ILOCK_TRACE - ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_NOFS); -#endif -#ifdef XFS_DIR2_TRACE - ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS); -#endif - /* - * Now initialise the VFS inode. We do this after the xfs_inode - * initialisation as internal failures will result in ->destroy_inode - * being called and that will pass down through the reclaim path and - * free the XFS inode. This path requires the XFS inode to already be - * initialised. Hence if this call fails, the xfs_inode has already - * been freed and we should not reference it at all in the error - * handling. - */ - if (!inode_init_always(mp->m_super, VFS_I(ip))) - return NULL; - /* prevent anyone from using this yet */ - VFS_I(ip)->i_state = I_NEW|I_LOCK; + VFS_I(ip)->i_state = I_NEW; return ip; } +STATIC void +xfs_inode_free( + struct xfs_inode *ip) +{ + switch (ip->i_d.di_mode & S_IFMT) { + case S_IFREG: + case S_IFDIR: + case S_IFLNK: + xfs_idestroy_fork(ip, XFS_DATA_FORK); + break; + } + + if (ip->i_afp) + xfs_idestroy_fork(ip, XFS_ATTR_FORK); + + if (ip->i_itemp) { + /* + * Only if we are shutting down the fs will we see an + * inode still in the AIL. If it is there, we should remove + * it to prevent a use-after-free from occurring. + */ + xfs_log_item_t *lip = &ip->i_itemp->ili_item; + struct xfs_ail *ailp = lip->li_ailp; + + ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) || + XFS_FORCED_SHUTDOWN(ip->i_mount)); + if (lip->li_flags & XFS_LI_IN_AIL) { + spin_lock(&ailp->xa_lock); + if (lip->li_flags & XFS_LI_IN_AIL) + xfs_trans_ail_delete(ailp, lip); + else + spin_unlock(&ailp->xa_lock); + } + xfs_inode_item_destroy(ip); + ip->i_itemp = NULL; + } + + /* asserts to verify all state is correct here */ + ASSERT(atomic_read(&ip->i_iocount) == 0); + ASSERT(atomic_read(&ip->i_pincount) == 0); + ASSERT(!spin_is_locked(&ip->i_flags_lock)); + ASSERT(completion_done(&ip->i_flush)); + + kmem_zone_free(xfs_inode_zone, ip); +} + /* * Check the validity of the inode we just found it the cache */ @@ -132,89 +151,102 @@ xfs_iget_cache_hit( int flags, int lock_flags) __releases(pag->pag_ici_lock) { + struct inode *inode = VFS_I(ip); struct xfs_mount *mp = ip->i_mount; - int error = EAGAIN; + int error; + + spin_lock(&ip->i_flags_lock); /* - * If INEW is set this inode is being set up - * If IRECLAIM is set this inode is being torn down - * Pause and try again. + * If we are racing with another cache hit that is currently + * instantiating this inode or currently recycling it out of + * reclaimabe state, wait for the initialisation to complete + * before continuing. + * + * XXX(hch): eventually we should do something equivalent to + * wait_on_inode to wait for these flags to be cleared + * instead of polling for it. */ - if (xfs_iflags_test(ip, (XFS_INEW|XFS_IRECLAIM))) { + if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) { + trace_xfs_iget_skip(ip); XFS_STATS_INC(xs_ig_frecycle); + error = EAGAIN; goto out_error; } - /* If IRECLAIMABLE is set, we've torn down the vfs inode part */ - if (xfs_iflags_test(ip, XFS_IRECLAIMABLE)) { + /* + * If lookup is racing with unlink return an error immediately. + */ + if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { + error = ENOENT; + goto out_error; + } + + /* + * If IRECLAIMABLE is set, we've torn down the VFS inode already. + * Need to carefully get it back into useable state. + */ + if (ip->i_flags & XFS_IRECLAIMABLE) { + trace_xfs_iget_reclaim(ip); /* - * If lookup is racing with unlink, then we should return an - * error immediately so we don't remove it from the reclaim - * list and potentially leak the inode. + * We need to set XFS_IRECLAIM to prevent xfs_reclaim_inode + * from stomping over us while we recycle the inode. We can't + * clear the radix tree reclaimable tag yet as it requires + * pag_ici_lock to be held exclusive. */ - if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { - error = ENOENT; - goto out_error; - } + ip->i_flags |= XFS_IRECLAIM; - xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); + spin_unlock(&ip->i_flags_lock); + read_unlock(&pag->pag_ici_lock); - /* - * We need to re-initialise the VFS inode as it has been - * 'freed' by the VFS. Do this here so we can deal with - * errors cleanly, then tag it so it can be set up correctly - * later. - */ - if (!inode_init_always(mp->m_super, VFS_I(ip))) { - error = ENOMEM; + error = -inode_init_always(mp->m_super, inode); + if (error) { + /* + * Re-initializing the inode failed, and we are in deep + * trouble. Try to re-add it to the reclaim list. + */ + read_lock(&pag->pag_ici_lock); + spin_lock(&ip->i_flags_lock); + + ip->i_flags &= ~XFS_INEW; + ip->i_flags |= XFS_IRECLAIMABLE; + __xfs_inode_set_reclaim_tag(pag, ip); + trace_xfs_iget_reclaim(ip); goto out_error; } - /* - * We must set the XFS_INEW flag before clearing the - * XFS_IRECLAIMABLE flag so that if a racing lookup does - * not find the XFS_IRECLAIMABLE above but has the igrab() - * below succeed we can safely check XFS_INEW to detect - * that this inode is still being initialised. - */ - xfs_iflags_set(ip, XFS_INEW); - xfs_iflags_clear(ip, XFS_IRECLAIMABLE); - - /* clear the radix tree reclaim flag as well. */ + write_lock(&pag->pag_ici_lock); + spin_lock(&ip->i_flags_lock); + ip->i_flags &= ~(XFS_IRECLAIMABLE | XFS_IRECLAIM); + ip->i_flags |= XFS_INEW; __xfs_inode_clear_reclaim_tag(mp, pag, ip); - } else if (!igrab(VFS_I(ip))) { + inode->i_state = I_NEW; + spin_unlock(&ip->i_flags_lock); + write_unlock(&pag->pag_ici_lock); + } else { /* If the VFS inode is being torn down, pause and try again. */ - XFS_STATS_INC(xs_ig_frecycle); - goto out_error; - } else if (xfs_iflags_test(ip, XFS_INEW)) { - /* - * We are racing with another cache hit that is - * currently recycling this inode out of the XFS_IRECLAIMABLE - * state. Wait for the initialisation to complete before - * continuing. - */ - wait_on_inode(VFS_I(ip)); - } + if (!igrab(inode)) { + error = EAGAIN; + goto out_error; + } - if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { - error = ENOENT; - iput(VFS_I(ip)); - goto out_error; + /* We've got a live one. */ + spin_unlock(&ip->i_flags_lock); + read_unlock(&pag->pag_ici_lock); } - /* We've got a live one. */ - read_unlock(&pag->pag_ici_lock); - if (lock_flags != 0) xfs_ilock(ip, lock_flags); xfs_iflags_clear(ip, XFS_ISTALE); - xfs_itrace_exit_tag(ip, "xfs_iget.found"); XFS_STATS_INC(xs_ig_found); + + trace_xfs_iget_found(ip); return 0; out_error: + spin_unlock(&ip->i_flags_lock); read_unlock(&pag->pag_ici_lock); return error; } @@ -229,7 +261,7 @@ xfs_iget_cache_miss( struct xfs_inode **ipp, xfs_daddr_t bno, int flags, - int lock_flags) __releases(pag->pag_ici_lock) + int lock_flags) { struct xfs_inode *ip; int error; @@ -244,7 +276,7 @@ xfs_iget_cache_miss( if (error) goto out_destroy; - xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); + xfs_itrace_entry(ip); if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { error = ENOENT; @@ -289,6 +321,8 @@ xfs_iget_cache_miss( write_unlock(&pag->pag_ici_lock); radix_tree_preload_end(); + + trace_xfs_iget_alloc(ip); *ipp = ip; return 0; @@ -298,7 +332,8 @@ out_preload_end: if (lock_flags) xfs_iunlock(ip, lock_flags); out_destroy: - xfs_destroy_inode(ip); + __destroy_inode(VFS_I(ip)); + xfs_inode_free(ip); return error; } @@ -346,7 +381,7 @@ xfs_iget( return EINVAL; /* get the perag structure and ensure that it's inode capable */ - pag = xfs_get_perag(mp, ino); + pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino)); if (!pag->pagi_inodeok) return EINVAL; ASSERT(pag->pag_ici_init); @@ -370,7 +405,7 @@ again: if (error) goto out_error_or_again; } - xfs_put_perag(mp, pag); + xfs_perag_put(pag); *ipp = ip; @@ -389,36 +424,10 @@ out_error_or_again: delay(1); goto again; } - xfs_put_perag(mp, pag); + xfs_perag_put(pag); return error; } - -/* - * Look for the inode corresponding to the given ino in the hash table. - * If it is there and its i_transp pointer matches tp, return it. - * Otherwise, return NULL. - */ -xfs_inode_t * -xfs_inode_incore(xfs_mount_t *mp, - xfs_ino_t ino, - xfs_trans_t *tp) -{ - xfs_inode_t *ip; - xfs_perag_t *pag; - - pag = xfs_get_perag(mp, ino); - read_lock(&pag->pag_ici_lock); - ip = radix_tree_lookup(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ino)); - read_unlock(&pag->pag_ici_lock); - xfs_put_perag(mp, pag); - - /* the returned inode must match the transaction */ - if (ip && (ip->i_transp != tp)) - return NULL; - return ip; -} - /* * Decrement reference count of an inode structure and unlock it. * @@ -475,19 +484,23 @@ xfs_ireclaim( { struct xfs_mount *mp = ip->i_mount; struct xfs_perag *pag; + xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); XFS_STATS_INC(xs_ig_reclaims); /* - * Remove the inode from the per-AG radix tree. It doesn't matter - * if it was never added to it because radix_tree_delete can deal - * with that case just fine. + * Remove the inode from the per-AG radix tree. + * + * Because radix_tree_delete won't complain even if the item was never + * added to the tree assert that it's been there before to catch + * problems with the inode life time early on. */ - pag = xfs_get_perag(mp, ip->i_ino); + pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); write_lock(&pag->pag_ici_lock); - radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino)); + if (!radix_tree_delete(&pag->pag_ici_root, agino)) + ASSERT(0); write_unlock(&pag->pag_ici_lock); - xfs_put_perag(mp, pag); + xfs_perag_put(pag); /* * Here we do an (almost) spurious inode lock in order to coordinate @@ -503,62 +516,7 @@ xfs_ireclaim( xfs_qm_dqdetach(ip); xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); - switch (ip->i_d.di_mode & S_IFMT) { - case S_IFREG: - case S_IFDIR: - case S_IFLNK: - xfs_idestroy_fork(ip, XFS_DATA_FORK); - break; - } - - if (ip->i_afp) - xfs_idestroy_fork(ip, XFS_ATTR_FORK); - -#ifdef XFS_INODE_TRACE - ktrace_free(ip->i_trace); -#endif -#ifdef XFS_BMAP_TRACE - ktrace_free(ip->i_xtrace); -#endif -#ifdef XFS_BTREE_TRACE - ktrace_free(ip->i_btrace); -#endif -#ifdef XFS_RW_TRACE - ktrace_free(ip->i_rwtrace); -#endif -#ifdef XFS_ILOCK_TRACE - ktrace_free(ip->i_lock_trace); -#endif -#ifdef XFS_DIR2_TRACE - ktrace_free(ip->i_dir_trace); -#endif - if (ip->i_itemp) { - /* - * Only if we are shutting down the fs will we see an - * inode still in the AIL. If it is there, we should remove - * it to prevent a use-after-free from occurring. - */ - xfs_log_item_t *lip = &ip->i_itemp->ili_item; - struct xfs_ail *ailp = lip->li_ailp; - - ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) || - XFS_FORCED_SHUTDOWN(ip->i_mount)); - if (lip->li_flags & XFS_LI_IN_AIL) { - spin_lock(&ailp->xa_lock); - if (lip->li_flags & XFS_LI_IN_AIL) - xfs_trans_ail_delete(ailp, lip); - else - spin_unlock(&ailp->xa_lock); - } - xfs_inode_item_destroy(ip); - ip->i_itemp = NULL; - } - /* asserts to verify all state is correct here */ - ASSERT(atomic_read(&ip->i_iocount) == 0); - ASSERT(atomic_read(&ip->i_pincount) == 0); - ASSERT(!spin_is_locked(&ip->i_flags_lock)); - ASSERT(completion_done(&ip->i_flush)); - kmem_zone_free(xfs_inode_zone, ip); + xfs_inode_free(ip); } /* @@ -655,7 +613,7 @@ xfs_ilock( else if (lock_flags & XFS_ILOCK_SHARED) mraccess_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); - xfs_ilock_trace(ip, 1, lock_flags, (inst_t *)__return_address); + trace_xfs_ilock(ip, lock_flags, _RET_IP_); } /* @@ -700,7 +658,7 @@ xfs_ilock_nowait( if (!mrtryaccess(&ip->i_lock)) goto out_undo_iolock; } - xfs_ilock_trace(ip, 2, lock_flags, (inst_t *)__return_address); + trace_xfs_ilock_nowait(ip, lock_flags, _RET_IP_); return 1; out_undo_iolock: @@ -762,7 +720,7 @@ xfs_iunlock( xfs_trans_unlocked_item(ip->i_itemp->ili_item.li_ailp, (xfs_log_item_t*)(ip->i_itemp)); } - xfs_ilock_trace(ip, 3, lock_flags, (inst_t *)__return_address); + trace_xfs_iunlock(ip, lock_flags, _RET_IP_); } /* @@ -781,6 +739,8 @@ xfs_ilock_demote( mrdemote(&ip->i_lock); if (lock_flags & XFS_IOLOCK_EXCL) mrdemote(&ip->i_iolock); + + trace_xfs_ilock_demote(ip, lock_flags, _RET_IP_); } #ifdef DEBUG @@ -811,52 +771,3 @@ xfs_isilocked( return 1; } #endif - -#ifdef XFS_INODE_TRACE - -#define KTRACE_ENTER(ip, vk, s, line, ra) \ - ktrace_enter((ip)->i_trace, \ -/* 0 */ (void *)(__psint_t)(vk), \ -/* 1 */ (void *)(s), \ -/* 2 */ (void *)(__psint_t) line, \ -/* 3 */ (void *)(__psint_t)atomic_read(&VFS_I(ip)->i_count), \ -/* 4 */ (void *)(ra), \ -/* 5 */ NULL, \ -/* 6 */ (void *)(__psint_t)current_cpu(), \ -/* 7 */ (void *)(__psint_t)current_pid(), \ -/* 8 */ (void *)__return_address, \ -/* 9 */ NULL, NULL, NULL, NULL, NULL, NULL, NULL) - -/* - * Vnode tracing code. - */ -void -_xfs_itrace_entry(xfs_inode_t *ip, const char *func, inst_t *ra) -{ - KTRACE_ENTER(ip, INODE_KTRACE_ENTRY, func, 0, ra); -} - -void -_xfs_itrace_exit(xfs_inode_t *ip, const char *func, inst_t *ra) -{ - KTRACE_ENTER(ip, INODE_KTRACE_EXIT, func, 0, ra); -} - -void -xfs_itrace_hold(xfs_inode_t *ip, char *file, int line, inst_t *ra) -{ - KTRACE_ENTER(ip, INODE_KTRACE_HOLD, file, line, ra); -} - -void -_xfs_itrace_ref(xfs_inode_t *ip, char *file, int line, inst_t *ra) -{ - KTRACE_ENTER(ip, INODE_KTRACE_REF, file, line, ra); -} - -void -xfs_itrace_rele(xfs_inode_t *ip, char *file, int line, inst_t *ra) -{ - KTRACE_ENTER(ip, INODE_KTRACE_RELE, file, line, ra); -} -#endif /* XFS_INODE_TRACE */