SAFE public projects git trees. - safe/jmp/linux-2.6/blob - fs/xfs/xfs_iget.c

   1 /*
   2  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
   3  * All Rights Reserved.
   4  *
   5  * This program is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU General Public License as
   7  * published by the Free Software Foundation.
   8  *
   9  * This program is distributed in the hope that it would be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  * GNU General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU General Public License
  15  * along with this program; if not, write the Free Software Foundation,
  16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  17  */
  18 #include "xfs.h"
  19 #include "xfs_fs.h"
  20 #include "xfs_types.h"
  21 #include "xfs_bit.h"
  22 #include "xfs_log.h"
  23 #include "xfs_inum.h"
  24 #include "xfs_trans.h"
  25 #include "xfs_sb.h"
  26 #include "xfs_ag.h"
  27 #include "xfs_dir2.h"
  28 #include "xfs_dmapi.h"
  29 #include "xfs_mount.h"
  30 #include "xfs_bmap_btree.h"
  31 #include "xfs_alloc_btree.h"
  32 #include "xfs_ialloc_btree.h"
  33 #include "xfs_dir2_sf.h"
  34 #include "xfs_attr_sf.h"
  35 #include "xfs_dinode.h"
  36 #include "xfs_inode.h"
  37 #include "xfs_btree.h"
  38 #include "xfs_ialloc.h"
  39 #include "xfs_quota.h"
  40 #include "xfs_utils.h"
  41
  42 /*
  43  * Look up an inode by number in the given file system.
  44  * The inode is looked up in the cache held in each AG.
  45  * If the inode is found in the cache, attach it to the provided
  46  * vnode.
  47  *
  48  * If it is not in core, read it in from the file system's device,
  49  * add it to the cache and attach the provided vnode.
  50  *
  51  * The inode is locked according to the value of the lock_flags parameter.
  52  * This flag parameter indicates how and if the inode's IO lock and inode lock
  53  * should be taken.
  54  *
  55  * mp -- the mount point structure for the current file system.  It points
  56  *       to the inode hash table.
  57  * tp -- a pointer to the current transaction if there is one.  This is
  58  *       simply passed through to the xfs_iread() call.
  59  * ino -- the number of the inode desired.  This is the unique identifier
  60  *        within the file system for the inode being requested.
  61  * lock_flags -- flags indicating how to lock the inode.  See the comment
  62  *               for xfs_ilock() for a list of valid values.
  63  * bno -- the block number starting the buffer containing the inode,
  64  *        if known (as by bulkstat), else 0.
  65  */
  66 STATIC int
  67 xfs_iget_core(
  68         bhv_vnode_t     *vp,
  69         xfs_mount_t     *mp,
  70         xfs_trans_t     *tp,
  71         xfs_ino_t       ino,
  72         uint            flags,
  73         uint            lock_flags,
  74         xfs_inode_t     **ipp,
  75         xfs_daddr_t     bno)
  76 {
  77         xfs_inode_t     *ip;
  78         xfs_inode_t     *iq;
  79         bhv_vnode_t     *inode_vp;
  80         int             error;
  81         xfs_icluster_t  *icl, *new_icl = NULL;
  82         unsigned long   first_index, mask;
  83         xfs_perag_t     *pag;
  84         xfs_agino_t     agino;
  85
  86         /* the radix tree exists only in inode capable AGs */
  87         if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_maxagi)
  88                 return EINVAL;
  89
  90         /* get the perag structure and ensure that it's inode capable */
  91         pag = xfs_get_perag(mp, ino);
  92         if (!pag->pagi_inodeok)
  93                 return EINVAL;
  94         ASSERT(pag->pag_ici_init);
  95         agino = XFS_INO_TO_AGINO(mp, ino);
  96
  97 again:
  98         read_lock(&pag->pag_ici_lock);
  99         ip = radix_tree_lookup(&pag->pag_ici_root, agino);
 100
 101         if (ip != NULL) {
 102                 /*
 103                  * If INEW is set this inode is being set up
 104                  * we need to pause and try again.
 105                  */
 106                 if (xfs_iflags_test(ip, XFS_INEW)) {
 107                         read_unlock(&pag->pag_ici_lock);
 108                         delay(1);
 109                         XFS_STATS_INC(xs_ig_frecycle);
 110
 111                         goto again;
 112                 }
 113
 114                 inode_vp = XFS_ITOV_NULL(ip);
 115                 if (inode_vp == NULL) {
 116                         /*
 117                          * If IRECLAIM is set this inode is
 118                          * on its way out of the system,
 119                          * we need to pause and try again.
 120                          */
 121                         if (xfs_iflags_test(ip, XFS_IRECLAIM)) {
 122                                 read_unlock(&pag->pag_ici_lock);
 123                                 delay(1);
 124                                 XFS_STATS_INC(xs_ig_frecycle);
 125
 126                                 goto again;
 127                         }
 128                         ASSERT(xfs_iflags_test(ip, XFS_IRECLAIMABLE));
 129
 130                         /*
 131                          * If lookup is racing with unlink, then we
 132                          * should return an error immediately so we
 133                          * don't remove it from the reclaim list and
 134                          * potentially leak the inode.
 135                          */
 136                         if ((ip->i_d.di_mode == 0) &&
 137                             !(flags & XFS_IGET_CREATE)) {
 138                                 read_unlock(&pag->pag_ici_lock);
 139                                 xfs_put_perag(mp, pag);
 140                                 return ENOENT;
 141                         }
 142
 143                         /*
 144                          * There may be transactions sitting in the
 145                          * incore log buffers or being flushed to disk
 146                          * at this time.  We can't clear the
 147                          * XFS_IRECLAIMABLE flag until these
 148                          * transactions have hit the disk, otherwise we
 149                          * will void the guarantee the flag provides
 150                          * xfs_iunpin()
 151                          */
 152                         if (xfs_ipincount(ip)) {
 153                                 read_unlock(&pag->pag_ici_lock);
 154                                 xfs_log_force(mp, 0,
 155                                         XFS_LOG_FORCE|XFS_LOG_SYNC);
 156                                 XFS_STATS_INC(xs_ig_frecycle);
 157                                 goto again;
 158                         }
 159
 160                         xfs_itrace_exit_tag(ip, "xfs_iget.alloc");
 161
 162                         XFS_STATS_INC(xs_ig_found);
 163
 164                         xfs_iflags_clear(ip, XFS_IRECLAIMABLE);
 165                         read_unlock(&pag->pag_ici_lock);
 166
 167                         XFS_MOUNT_ILOCK(mp);
 168                         list_del_init(&ip->i_reclaim);
 169                         XFS_MOUNT_IUNLOCK(mp);
 170
 171                         goto finish_inode;
 172
 173                 } else if (vp != inode_vp) {
 174                         struct inode *inode = vn_to_inode(inode_vp);
 175
 176                         /* The inode is being torn down, pause and
 177                          * try again.
 178                          */
 179                         if (inode->i_state & (I_FREEING | I_CLEAR)) {
 180                                 read_unlock(&pag->pag_ici_lock);
 181                                 delay(1);
 182                                 XFS_STATS_INC(xs_ig_frecycle);
 183
 184                                 goto again;
 185                         }
 186 /* Chances are the other vnode (the one in the inode) is being torn
 187 * down right now, and we landed on top of it. Question is, what do
 188 * we do? Unhook the old inode and hook up the new one?
 189 */
 190                         cmn_err(CE_PANIC,
 191                 "xfs_iget_core: ambiguous vns: vp/0x%p, invp/0x%p",
 192                                         inode_vp, vp);
 193                 }
 194
 195                 /*
 196                  * Inode cache hit
 197                  */
 198                 read_unlock(&pag->pag_ici_lock);
 199                 XFS_STATS_INC(xs_ig_found);
 200
 201 finish_inode:
 202                 if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
 203                         xfs_put_perag(mp, pag);
 204                         return ENOENT;
 205                 }
 206
 207                 if (lock_flags != 0)
 208                         xfs_ilock(ip, lock_flags);
 209
 210                 xfs_iflags_clear(ip, XFS_ISTALE);
 211                 xfs_itrace_exit_tag(ip, "xfs_iget.found");
 212                 goto return_ip;
 213         }
 214
 215         /*
 216          * Inode cache miss
 217          */
 218         read_unlock(&pag->pag_ici_lock);
 219         XFS_STATS_INC(xs_ig_missed);
 220
 221         /*
 222          * Read the disk inode attributes into a new inode structure and get
 223          * a new vnode for it. This should also initialize i_ino and i_mount.
 224          */
 225         error = xfs_iread(mp, tp, ino, &ip, bno,
 226                           (flags & XFS_IGET_BULKSTAT) ? XFS_IMAP_BULKSTAT : 0);
 227         if (error) {
 228                 xfs_put_perag(mp, pag);
 229                 return error;
 230         }
 231
 232         xfs_itrace_exit_tag(ip, "xfs_iget.alloc");
 233
 234         xfs_inode_lock_init(ip, vp);
 235         if (lock_flags)
 236                 xfs_ilock(ip, lock_flags);
 237
 238         if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
 239                 xfs_idestroy(ip);
 240                 xfs_put_perag(mp, pag);
 241                 return ENOENT;
 242         }
 243
 244         /*
 245          * This is a bit messy - we preallocate everything we _might_
 246          * need before we pick up the ici lock. That way we don't have to
 247          * juggle locks and go all the way back to the start.
 248          */
 249         new_icl = kmem_zone_alloc(xfs_icluster_zone, KM_SLEEP);
 250         if (radix_tree_preload(GFP_KERNEL)) {
 251                 delay(1);
 252                 goto again;
 253         }
 254         mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
 255         first_index = agino & mask;
 256         write_lock(&pag->pag_ici_lock);
 257
 258         /*
 259          * Find the cluster if it exists
 260          */
 261         icl = NULL;
 262         if (radix_tree_gang_lookup(&pag->pag_ici_root, (void**)&iq,
 263                                                         first_index, 1)) {
 264                 if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) == first_index)
 265                         icl = iq->i_cluster;
 266         }
 267
 268         /*
 269          * insert the new inode
 270          */
 271         error = radix_tree_insert(&pag->pag_ici_root, agino, ip);
 272         if (unlikely(error)) {
 273                 BUG_ON(error != -EEXIST);
 274                 write_unlock(&pag->pag_ici_lock);
 275                 radix_tree_preload_end();
 276                 xfs_idestroy(ip);
 277                 XFS_STATS_INC(xs_ig_dup);
 278                 goto again;
 279         }
 280
 281         /*
 282          * These values _must_ be set before releasing ihlock!
 283          */
 284         ip->i_udquot = ip->i_gdquot = NULL;
 285         xfs_iflags_set(ip, XFS_INEW);
 286
 287         ASSERT(ip->i_cluster == NULL);
 288
 289         if (!icl) {
 290                 spin_lock_init(&new_icl->icl_lock);
 291                 INIT_HLIST_HEAD(&new_icl->icl_inodes);
 292                 icl = new_icl;
 293                 new_icl = NULL;
 294         } else {
 295                 ASSERT(!hlist_empty(&icl->icl_inodes));
 296         }
 297         spin_lock(&icl->icl_lock);
 298         hlist_add_head(&ip->i_cnode, &icl->icl_inodes);
 299         ip->i_cluster = icl;
 300         spin_unlock(&icl->icl_lock);
 301
 302         write_unlock(&pag->pag_ici_lock);
 303         radix_tree_preload_end();
 304         if (new_icl)
 305                 kmem_zone_free(xfs_icluster_zone, new_icl);
 306
 307         /*
 308          * Link ip to its mount and thread it on the mount's inode list.
 309          */
 310         XFS_MOUNT_ILOCK(mp);
 311         if ((iq = mp->m_inodes)) {
 312                 ASSERT(iq->i_mprev->i_mnext == iq);
 313                 ip->i_mprev = iq->i_mprev;
 314                 iq->i_mprev->i_mnext = ip;
 315                 iq->i_mprev = ip;
 316                 ip->i_mnext = iq;
 317         } else {
 318                 ip->i_mnext = ip;
 319                 ip->i_mprev = ip;
 320         }
 321         mp->m_inodes = ip;
 322
 323         XFS_MOUNT_IUNLOCK(mp);
 324         xfs_put_perag(mp, pag);
 325
 326  return_ip:
 327         ASSERT(ip->i_df.if_ext_max ==
 328                XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t));
 329
 330         xfs_iflags_set(ip, XFS_IMODIFIED);
 331         *ipp = ip;
 332
 333         /*
 334          * If we have a real type for an on-disk inode, we can set ops(&unlock)
 335          * now.  If it's a new inode being created, xfs_ialloc will handle it.
 336          */
 337         xfs_initialize_vnode(mp, vp, ip);
 338         return 0;
 339 }
 340
 341
 342 /*
 343  * The 'normal' internal xfs_iget, if needed it will
 344  * 'allocate', or 'get', the vnode.
 345  */
 346 int
 347 xfs_iget(
 348         xfs_mount_t     *mp,
 349         xfs_trans_t     *tp,
 350         xfs_ino_t       ino,
 351         uint            flags,
 352         uint            lock_flags,
 353         xfs_inode_t     **ipp,
 354         xfs_daddr_t     bno)
 355 {
 356         struct inode    *inode;
 357         bhv_vnode_t     *vp = NULL;
 358         int             error;
 359
 360         XFS_STATS_INC(xs_ig_attempts);
 361
 362 retry:
 363         inode = iget_locked(mp->m_super, ino);
 364         if (inode) {
 365                 xfs_inode_t     *ip;
 366
 367                 vp = vn_from_inode(inode);
 368                 if (inode->i_state & I_NEW) {
 369                         vn_initialize(inode);
 370                         error = xfs_iget_core(vp, mp, tp, ino, flags,
 371                                         lock_flags, ipp, bno);
 372                         if (error) {
 373                                 vn_mark_bad(vp);
 374                                 if (inode->i_state & I_NEW)
 375                                         unlock_new_inode(inode);
 376                                 iput(inode);
 377                         }
 378                 } else {
 379                         /*
 380                          * If the inode is not fully constructed due to
 381                          * filehandle mismatches wait for the inode to go
 382                          * away and try again.
 383                          *
 384                          * iget_locked will call __wait_on_freeing_inode
 385                          * to wait for the inode to go away.
 386                          */
 387                         if (is_bad_inode(inode) ||
 388                             ((ip = xfs_vtoi(vp)) == NULL)) {
 389                                 iput(inode);
 390                                 delay(1);
 391                                 goto retry;
 392                         }
 393
 394                         if (lock_flags != 0)
 395                                 xfs_ilock(ip, lock_flags);
 396                         XFS_STATS_INC(xs_ig_found);
 397                         *ipp = ip;
 398                         error = 0;
 399                 }
 400         } else
 401                 error = ENOMEM; /* If we got no inode we are out of memory */
 402
 403         return error;
 404 }
 405
 406 /*
 407  * Do the setup for the various locks within the incore inode.
 408  */
 409 void
 410 xfs_inode_lock_init(
 411         xfs_inode_t     *ip,
 412         bhv_vnode_t     *vp)
 413 {
 414         mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
 415                      "xfsino", ip->i_ino);
 416         mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
 417         init_waitqueue_head(&ip->i_ipin_wait);
 418         atomic_set(&ip->i_pincount, 0);
 419         initnsema(&ip->i_flock, 1, "xfsfino");
 420 }
 421
 422 /*
 423  * Look for the inode corresponding to the given ino in the hash table.
 424  * If it is there and its i_transp pointer matches tp, return it.
 425  * Otherwise, return NULL.
 426  */
 427 xfs_inode_t *
 428 xfs_inode_incore(xfs_mount_t    *mp,
 429                  xfs_ino_t      ino,
 430                  xfs_trans_t    *tp)
 431 {
 432         xfs_inode_t     *ip;
 433         xfs_perag_t     *pag;
 434
 435         pag = xfs_get_perag(mp, ino);
 436         read_lock(&pag->pag_ici_lock);
 437         ip = radix_tree_lookup(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ino));
 438         read_unlock(&pag->pag_ici_lock);
 439         xfs_put_perag(mp, pag);
 440
 441         /* the returned inode must match the transaction */
 442         if (ip && (ip->i_transp != tp))
 443                 return NULL;
 444         return ip;
 445 }
 446
 447 /*
 448  * Decrement reference count of an inode structure and unlock it.
 449  *
 450  * ip -- the inode being released
 451  * lock_flags -- this parameter indicates the inode's locks to be
 452  *       to be released.  See the comment on xfs_iunlock() for a list
 453  *       of valid values.
 454  */
 455 void
 456 xfs_iput(xfs_inode_t    *ip,
 457          uint           lock_flags)
 458 {
 459         bhv_vnode_t     *vp = XFS_ITOV(ip);
 460
 461         xfs_itrace_entry(ip);
 462         xfs_iunlock(ip, lock_flags);
 463         VN_RELE(vp);
 464 }
 465
 466 /*
 467  * Special iput for brand-new inodes that are still locked
 468  */
 469 void
 470 xfs_iput_new(xfs_inode_t        *ip,
 471              uint               lock_flags)
 472 {
 473         bhv_vnode_t     *vp = XFS_ITOV(ip);
 474         struct inode    *inode = vn_to_inode(vp);
 475
 476         xfs_itrace_entry(ip);
 477
 478         if ((ip->i_d.di_mode == 0)) {
 479                 ASSERT(!xfs_iflags_test(ip, XFS_IRECLAIMABLE));
 480                 vn_mark_bad(vp);
 481         }
 482         if (inode->i_state & I_NEW)
 483                 unlock_new_inode(inode);
 484         if (lock_flags)
 485                 xfs_iunlock(ip, lock_flags);
 486         VN_RELE(vp);
 487 }
 488
 489
 490 /*
 491  * This routine embodies the part of the reclaim code that pulls
 492  * the inode from the inode hash table and the mount structure's
 493  * inode list.
 494  * This should only be called from xfs_reclaim().
 495  */
 496 void
 497 xfs_ireclaim(xfs_inode_t *ip)
 498 {
 499         bhv_vnode_t     *vp;
 500
 501         /*
 502          * Remove from old hash list and mount list.
 503          */
 504         XFS_STATS_INC(xs_ig_reclaims);
 505
 506         xfs_iextract(ip);
 507
 508         /*
 509          * Here we do a spurious inode lock in order to coordinate with
 510          * xfs_sync().  This is because xfs_sync() references the inodes
 511          * in the mount list without taking references on the corresponding
 512          * vnodes.  We make that OK here by ensuring that we wait until
 513          * the inode is unlocked in xfs_sync() before we go ahead and
 514          * free it.  We get both the regular lock and the io lock because
 515          * the xfs_sync() code may need to drop the regular one but will
 516          * still hold the io lock.
 517          */
 518         xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
 519
 520         /*
 521          * Release dquots (and their references) if any. An inode may escape
 522          * xfs_inactive and get here via vn_alloc->vn_reclaim path.
 523          */
 524         XFS_QM_DQDETACH(ip->i_mount, ip);
 525
 526         /*
 527          * Pull our behavior descriptor from the vnode chain.
 528          */
 529         vp = XFS_ITOV_NULL(ip);
 530         if (vp) {
 531                 vn_to_inode(vp)->i_private = NULL;
 532                 ip->i_vnode = NULL;
 533         }
 534
 535         /*
 536          * Free all memory associated with the inode.
 537          */
 538         xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
 539         xfs_idestroy(ip);
 540 }
 541
 542 /*
 543  * This routine removes an about-to-be-destroyed inode from
 544  * all of the lists in which it is located with the exception
 545  * of the behavior chain.
 546  */
 547 void
 548 xfs_iextract(
 549         xfs_inode_t     *ip)
 550 {
 551         xfs_mount_t     *mp = ip->i_mount;
 552         xfs_perag_t     *pag = xfs_get_perag(mp, ip->i_ino);
 553         xfs_inode_t     *iq;
 554
 555         write_lock(&pag->pag_ici_lock);
 556         radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino));
 557         write_unlock(&pag->pag_ici_lock);
 558         xfs_put_perag(mp, pag);
 559
 560         /*
 561          * Remove from cluster list
 562          */
 563         mp = ip->i_mount;
 564         spin_lock(&ip->i_cluster->icl_lock);
 565         hlist_del(&ip->i_cnode);
 566         spin_unlock(&ip->i_cluster->icl_lock);
 567
 568         /* was last inode in cluster? */
 569         if (hlist_empty(&ip->i_cluster->icl_inodes))
 570                 kmem_zone_free(xfs_icluster_zone, ip->i_cluster);
 571
 572         /*
 573          * Remove from mount's inode list.
 574          */
 575         XFS_MOUNT_ILOCK(mp);
 576         ASSERT((ip->i_mnext != NULL) && (ip->i_mprev != NULL));
 577         iq = ip->i_mnext;
 578         iq->i_mprev = ip->i_mprev;
 579         ip->i_mprev->i_mnext = iq;
 580
 581         /*
 582          * Fix up the head pointer if it points to the inode being deleted.
 583          */
 584         if (mp->m_inodes == ip) {
 585                 if (ip == iq) {
 586                         mp->m_inodes = NULL;
 587                 } else {
 588                         mp->m_inodes = iq;
 589                 }
 590         }
 591
 592         /* Deal with the deleted inodes list */
 593         list_del_init(&ip->i_reclaim);
 594
 595         mp->m_ireclaims++;
 596         XFS_MOUNT_IUNLOCK(mp);
 597 }
 598
 599 /*
 600  * This is a wrapper routine around the xfs_ilock() routine
 601  * used to centralize some grungy code.  It is used in places
 602  * that wish to lock the inode solely for reading the extents.
 603  * The reason these places can't just call xfs_ilock(SHARED)
 604  * is that the inode lock also guards to bringing in of the
 605  * extents from disk for a file in b-tree format.  If the inode
 606  * is in b-tree format, then we need to lock the inode exclusively
 607  * until the extents are read in.  Locking it exclusively all
 608  * the time would limit our parallelism unnecessarily, though.
 609  * What we do instead is check to see if the extents have been
 610  * read in yet, and only lock the inode exclusively if they
 611  * have not.
 612  *
 613  * The function returns a value which should be given to the
 614  * corresponding xfs_iunlock_map_shared().  This value is
 615  * the mode in which the lock was actually taken.
 616  */
 617 uint
 618 xfs_ilock_map_shared(
 619         xfs_inode_t     *ip)
 620 {
 621         uint    lock_mode;
 622
 623         if ((ip->i_d.di_format == XFS_DINODE_FMT_BTREE) &&
 624             ((ip->i_df.if_flags & XFS_IFEXTENTS) == 0)) {
 625                 lock_mode = XFS_ILOCK_EXCL;
 626         } else {
 627                 lock_mode = XFS_ILOCK_SHARED;
 628         }
 629
 630         xfs_ilock(ip, lock_mode);
 631
 632         return lock_mode;
 633 }
 634
 635 /*
 636  * This is simply the unlock routine to go with xfs_ilock_map_shared().
 637  * All it does is call xfs_iunlock() with the given lock_mode.
 638  */
 639 void
 640 xfs_iunlock_map_shared(
 641         xfs_inode_t     *ip,
 642         unsigned int    lock_mode)
 643 {
 644         xfs_iunlock(ip, lock_mode);
 645 }
 646
 647 /*
 648  * The xfs inode contains 2 locks: a multi-reader lock called the
 649  * i_iolock and a multi-reader lock called the i_lock.  This routine
 650  * allows either or both of the locks to be obtained.
 651  *
 652  * The 2 locks should always be ordered so that the IO lock is
 653  * obtained first in order to prevent deadlock.
 654  *
 655  * ip -- the inode being locked
 656  * lock_flags -- this parameter indicates the inode's locks
 657  *       to be locked.  It can be:
 658  *              XFS_IOLOCK_SHARED,
 659  *              XFS_IOLOCK_EXCL,
 660  *              XFS_ILOCK_SHARED,
 661  *              XFS_ILOCK_EXCL,
 662  *              XFS_IOLOCK_SHARED | XFS_ILOCK_SHARED,
 663  *              XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL,
 664  *              XFS_IOLOCK_EXCL | XFS_ILOCK_SHARED,
 665  *              XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL
 666  */
 667 void
 668 xfs_ilock(xfs_inode_t   *ip,
 669           uint          lock_flags)
 670 {
 671         /*
 672          * You can't set both SHARED and EXCL for the same lock,
 673          * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
 674          * and XFS_ILOCK_EXCL are valid values to set in lock_flags.
 675          */
 676         ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
 677                (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
 678         ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
 679                (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
 680         ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
 681
 682         if (lock_flags & XFS_IOLOCK_EXCL) {
 683                 mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
 684         } else if (lock_flags & XFS_IOLOCK_SHARED) {
 685                 mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
 686         }
 687         if (lock_flags & XFS_ILOCK_EXCL) {
 688                 mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
 689         } else if (lock_flags & XFS_ILOCK_SHARED) {
 690                 mraccess_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
 691         }
 692         xfs_ilock_trace(ip, 1, lock_flags, (inst_t *)__return_address);
 693 }
 694
 695 /*
 696  * This is just like xfs_ilock(), except that the caller
 697  * is guaranteed not to sleep.  It returns 1 if it gets
 698  * the requested locks and 0 otherwise.  If the IO lock is
 699  * obtained but the inode lock cannot be, then the IO lock
 700  * is dropped before returning.
 701  *
 702  * ip -- the inode being locked
 703  * lock_flags -- this parameter indicates the inode's locks to be
 704  *       to be locked.  See the comment for xfs_ilock() for a list
 705  *       of valid values.
 706  *
 707  */
 708 int
 709 xfs_ilock_nowait(xfs_inode_t    *ip,
 710                  uint           lock_flags)
 711 {
 712         int     iolocked;
 713         int     ilocked;
 714
 715         /*
 716          * You can't set both SHARED and EXCL for the same lock,
 717          * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
 718          * and XFS_ILOCK_EXCL are valid values to set in lock_flags.
 719          */
 720         ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
 721                (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
 722         ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
 723                (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
 724         ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
 725
 726         iolocked = 0;
 727         if (lock_flags & XFS_IOLOCK_EXCL) {
 728                 iolocked = mrtryupdate(&ip->i_iolock);
 729                 if (!iolocked) {
 730                         return 0;
 731                 }
 732         } else if (lock_flags & XFS_IOLOCK_SHARED) {
 733                 iolocked = mrtryaccess(&ip->i_iolock);
 734                 if (!iolocked) {
 735                         return 0;
 736                 }
 737         }
 738         if (lock_flags & XFS_ILOCK_EXCL) {
 739                 ilocked = mrtryupdate(&ip->i_lock);
 740                 if (!ilocked) {
 741                         if (iolocked) {
 742                                 mrunlock(&ip->i_iolock);
 743                         }
 744                         return 0;
 745                 }
 746         } else if (lock_flags & XFS_ILOCK_SHARED) {
 747                 ilocked = mrtryaccess(&ip->i_lock);
 748                 if (!ilocked) {
 749                         if (iolocked) {
 750                                 mrunlock(&ip->i_iolock);
 751                         }
 752                         return 0;
 753                 }
 754         }
 755         xfs_ilock_trace(ip, 2, lock_flags, (inst_t *)__return_address);
 756         return 1;
 757 }
 758
 759 /*
 760  * xfs_iunlock() is used to drop the inode locks acquired with
 761  * xfs_ilock() and xfs_ilock_nowait().  The caller must pass
 762  * in the flags given to xfs_ilock() or xfs_ilock_nowait() so
 763  * that we know which locks to drop.
 764  *
 765  * ip -- the inode being unlocked
 766  * lock_flags -- this parameter indicates the inode's locks to be
 767  *       to be unlocked.  See the comment for xfs_ilock() for a list
 768  *       of valid values for this parameter.
 769  *
 770  */
 771 void
 772 xfs_iunlock(xfs_inode_t *ip,
 773             uint        lock_flags)
 774 {
 775         /*
 776          * You can't set both SHARED and EXCL for the same lock,
 777          * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
 778          * and XFS_ILOCK_EXCL are valid values to set in lock_flags.
 779          */
 780         ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
 781                (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
 782         ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
 783                (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
 784         ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_IUNLOCK_NONOTIFY |
 785                         XFS_LOCK_DEP_MASK)) == 0);
 786         ASSERT(lock_flags != 0);
 787
 788         if (lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) {
 789                 ASSERT(!(lock_flags & XFS_IOLOCK_SHARED) ||
 790                        (ismrlocked(&ip->i_iolock, MR_ACCESS)));
 791                 ASSERT(!(lock_flags & XFS_IOLOCK_EXCL) ||
 792                        (ismrlocked(&ip->i_iolock, MR_UPDATE)));
 793                 mrunlock(&ip->i_iolock);
 794         }
 795
 796         if (lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) {
 797                 ASSERT(!(lock_flags & XFS_ILOCK_SHARED) ||
 798                        (ismrlocked(&ip->i_lock, MR_ACCESS)));
 799                 ASSERT(!(lock_flags & XFS_ILOCK_EXCL) ||
 800                        (ismrlocked(&ip->i_lock, MR_UPDATE)));
 801                 mrunlock(&ip->i_lock);
 802
 803                 /*
 804                  * Let the AIL know that this item has been unlocked in case
 805                  * it is in the AIL and anyone is waiting on it.  Don't do
 806                  * this if the caller has asked us not to.
 807                  */
 808                 if (!(lock_flags & XFS_IUNLOCK_NONOTIFY) &&
 809                      ip->i_itemp != NULL) {
 810                         xfs_trans_unlocked_item(ip->i_mount,
 811                                                 (xfs_log_item_t*)(ip->i_itemp));
 812                 }
 813         }
 814         xfs_ilock_trace(ip, 3, lock_flags, (inst_t *)__return_address);
 815 }
 816
 817 /*
 818  * give up write locks.  the i/o lock cannot be held nested
 819  * if it is being demoted.
 820  */
 821 void
 822 xfs_ilock_demote(xfs_inode_t    *ip,
 823                  uint           lock_flags)
 824 {
 825         ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL));
 826         ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0);
 827
 828         if (lock_flags & XFS_ILOCK_EXCL) {
 829                 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
 830                 mrdemote(&ip->i_lock);
 831         }
 832         if (lock_flags & XFS_IOLOCK_EXCL) {
 833                 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE));
 834                 mrdemote(&ip->i_iolock);
 835         }
 836 }
 837
 838 /*
 839  * The following three routines simply manage the i_flock
 840  * semaphore embedded in the inode.  This semaphore synchronizes
 841  * processes attempting to flush the in-core inode back to disk.
 842  */
 843 void
 844 xfs_iflock(xfs_inode_t *ip)
 845 {
 846         psema(&(ip->i_flock), PINOD|PLTWAIT);
 847 }
 848
 849 int
 850 xfs_iflock_nowait(xfs_inode_t *ip)
 851 {
 852         return (cpsema(&(ip->i_flock)));
 853 }
 854
 855 void
 856 xfs_ifunlock(xfs_inode_t *ip)
 857 {
 858         ASSERT(issemalocked(&(ip->i_flock)));
 859         vsema(&(ip->i_flock));
 860 }