#include "xfs_ialloc.h"
#include "xfs_alloc.h"
#include "xfs_error.h"
+#include "xfs_trace.h"
#define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
#define XFSA_FIXUP_BNO_OK 1
#define XFSA_FIXUP_CNT_OK 2
-STATIC void
-xfs_alloc_search_busy(xfs_trans_t *tp,
- xfs_agnumber_t agno,
- xfs_agblock_t bno,
- xfs_extlen_t len);
-
-#if defined(XFS_ALLOC_TRACE)
-ktrace_t *xfs_alloc_trace_buf;
-
-#define TRACE_ALLOC(s,a) \
- xfs_alloc_trace_alloc(__func__, s, a, __LINE__)
-#define TRACE_FREE(s,a,b,x,f) \
- xfs_alloc_trace_free(__func__, s, mp, a, b, x, f, __LINE__)
-#define TRACE_MODAGF(s,a,f) \
- xfs_alloc_trace_modagf(__func__, s, mp, a, f, __LINE__)
-#define TRACE_BUSY(__func__,s,ag,agb,l,sl,tp) \
- xfs_alloc_trace_busy(__func__, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSY, __LINE__)
-#define TRACE_UNBUSY(__func__,s,ag,sl,tp) \
- xfs_alloc_trace_busy(__func__, s, mp, ag, -1, -1, sl, tp, XFS_ALLOC_KTRACE_UNBUSY, __LINE__)
-#define TRACE_BUSYSEARCH(__func__,s,ag,agb,l,tp) \
- xfs_alloc_trace_busy(__func__, s, mp, ag, agb, l, 0, tp, XFS_ALLOC_KTRACE_BUSYSEARCH, __LINE__)
-#else
-#define TRACE_ALLOC(s,a)
-#define TRACE_FREE(s,a,b,x,f)
-#define TRACE_MODAGF(s,a,f)
-#define TRACE_BUSY(s,a,ag,agb,l,sl,tp)
-#define TRACE_UNBUSY(fname,s,ag,sl,tp)
-#define TRACE_BUSYSEARCH(fname,s,ag,agb,l,tp)
-#endif /* XFS_ALLOC_TRACE */
+static int
+xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno,
+ xfs_agblock_t bno, xfs_extlen_t len);
/*
* Prototypes for per-ag allocation routines
*/
/*
+ * Lookup the record equal to [bno, len] in the btree given by cur.
+ */
+STATIC int /* error */
+xfs_alloc_lookup_eq(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_agblock_t bno, /* starting block of extent */
+ xfs_extlen_t len, /* length of extent */
+ int *stat) /* success/failure */
+{
+ cur->bc_rec.a.ar_startblock = bno;
+ cur->bc_rec.a.ar_blockcount = len;
+ return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
+}
+
+/*
+ * Lookup the first record greater than or equal to [bno, len]
+ * in the btree given by cur.
+ */
+STATIC int /* error */
+xfs_alloc_lookup_ge(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_agblock_t bno, /* starting block of extent */
+ xfs_extlen_t len, /* length of extent */
+ int *stat) /* success/failure */
+{
+ cur->bc_rec.a.ar_startblock = bno;
+ cur->bc_rec.a.ar_blockcount = len;
+ return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
+}
+
+/*
+ * Lookup the first record less than or equal to [bno, len]
+ * in the btree given by cur.
+ */
+STATIC int /* error */
+xfs_alloc_lookup_le(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_agblock_t bno, /* starting block of extent */
+ xfs_extlen_t len, /* length of extent */
+ int *stat) /* success/failure */
+{
+ cur->bc_rec.a.ar_startblock = bno;
+ cur->bc_rec.a.ar_blockcount = len;
+ return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
+}
+
+/*
+ * Update the record referred to by cur to the value given
+ * by [bno, len].
+ * This either works (return 0) or gets an EFSCORRUPTED error.
+ */
+STATIC int /* error */
+xfs_alloc_update(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_agblock_t bno, /* starting block of extent */
+ xfs_extlen_t len) /* length of extent */
+{
+ union xfs_btree_rec rec;
+
+ rec.alloc.ar_startblock = cpu_to_be32(bno);
+ rec.alloc.ar_blockcount = cpu_to_be32(len);
+ return xfs_btree_update(cur, &rec);
+}
+
+/*
+ * Get the data from the pointed-to record.
+ */
+STATIC int /* error */
+xfs_alloc_get_rec(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_agblock_t *bno, /* output: starting block of extent */
+ xfs_extlen_t *len, /* output: length of extent */
+ int *stat) /* output: success/failure */
+{
+ union xfs_btree_rec *rec;
+ int error;
+
+ error = xfs_btree_get_rec(cur, &rec, stat);
+ if (!error && *stat == 1) {
+ *bno = be32_to_cpu(rec->alloc.ar_startblock);
+ *len = be32_to_cpu(rec->alloc.ar_blockcount);
+ }
+ return error;
+}
+
+/*
* Compute aligned version of the found extent.
* Takes alignment and min length into account.
*/
return error;
XFS_WANT_CORRUPTED_RETURN(i == 1);
}
+
#ifdef DEBUG
- {
- xfs_alloc_block_t *bnoblock;
- xfs_alloc_block_t *cntblock;
-
- if (bno_cur->bc_nlevels == 1 &&
- cnt_cur->bc_nlevels == 1) {
- bnoblock = XFS_BUF_TO_ALLOC_BLOCK(bno_cur->bc_bufs[0]);
- cntblock = XFS_BUF_TO_ALLOC_BLOCK(cnt_cur->bc_bufs[0]);
- XFS_WANT_CORRUPTED_RETURN(
- be16_to_cpu(bnoblock->bb_numrecs) ==
- be16_to_cpu(cntblock->bb_numrecs));
- }
+ if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
+ struct xfs_btree_block *bnoblock;
+ struct xfs_btree_block *cntblock;
+
+ bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
+ cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
+
+ XFS_WANT_CORRUPTED_RETURN(
+ bnoblock->bb_numrecs == cntblock->bb_numrecs);
}
#endif
+
/*
* Deal with all four cases: the allocated record is contained
* within the freespace record, so we can have new freespace
/*
* Delete the entry from the by-size btree.
*/
- if ((error = xfs_alloc_delete(cnt_cur, &i)))
+ if ((error = xfs_btree_delete(cnt_cur, &i)))
return error;
XFS_WANT_CORRUPTED_RETURN(i == 1);
/*
if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
return error;
XFS_WANT_CORRUPTED_RETURN(i == 0);
- if ((error = xfs_alloc_insert(cnt_cur, &i)))
+ if ((error = xfs_btree_insert(cnt_cur, &i)))
return error;
XFS_WANT_CORRUPTED_RETURN(i == 1);
}
if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
return error;
XFS_WANT_CORRUPTED_RETURN(i == 0);
- if ((error = xfs_alloc_insert(cnt_cur, &i)))
+ if ((error = xfs_btree_insert(cnt_cur, &i)))
return error;
XFS_WANT_CORRUPTED_RETURN(i == 1);
}
/*
* No remaining freespace, just delete the by-block tree entry.
*/
- if ((error = xfs_alloc_delete(bno_cur, &i)))
+ if ((error = xfs_btree_delete(bno_cur, &i)))
return error;
XFS_WANT_CORRUPTED_RETURN(i == 1);
} else {
if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
return error;
XFS_WANT_CORRUPTED_RETURN(i == 0);
- if ((error = xfs_alloc_insert(bno_cur, &i)))
+ if ((error = xfs_btree_insert(bno_cur, &i)))
return error;
XFS_WANT_CORRUPTED_RETURN(i == 1);
}
return 0;
}
-#if defined(XFS_ALLOC_TRACE)
-/*
- * Add an allocation trace entry for an alloc call.
- */
-STATIC void
-xfs_alloc_trace_alloc(
- const char *name, /* function tag string */
- char *str, /* additional string */
- xfs_alloc_arg_t *args, /* allocation argument structure */
- int line) /* source line number */
-{
- ktrace_enter(xfs_alloc_trace_buf,
- (void *)(__psint_t)(XFS_ALLOC_KTRACE_ALLOC | (line << 16)),
- (void *)name,
- (void *)str,
- (void *)args->mp,
- (void *)(__psunsigned_t)args->agno,
- (void *)(__psunsigned_t)args->agbno,
- (void *)(__psunsigned_t)args->minlen,
- (void *)(__psunsigned_t)args->maxlen,
- (void *)(__psunsigned_t)args->mod,
- (void *)(__psunsigned_t)args->prod,
- (void *)(__psunsigned_t)args->minleft,
- (void *)(__psunsigned_t)args->total,
- (void *)(__psunsigned_t)args->alignment,
- (void *)(__psunsigned_t)args->len,
- (void *)((((__psint_t)args->type) << 16) |
- (__psint_t)args->otype),
- (void *)(__psint_t)((args->wasdel << 3) |
- (args->wasfromfl << 2) |
- (args->isfl << 1) |
- (args->userdata << 0)));
-}
-
-/*
- * Add an allocation trace entry for a free call.
- */
-STATIC void
-xfs_alloc_trace_free(
- const char *name, /* function tag string */
- char *str, /* additional string */
- xfs_mount_t *mp, /* file system mount point */
- xfs_agnumber_t agno, /* allocation group number */
- xfs_agblock_t agbno, /* a.g. relative block number */
- xfs_extlen_t len, /* length of extent */
- int isfl, /* set if is freelist allocation/free */
- int line) /* source line number */
-{
- ktrace_enter(xfs_alloc_trace_buf,
- (void *)(__psint_t)(XFS_ALLOC_KTRACE_FREE | (line << 16)),
- (void *)name,
- (void *)str,
- (void *)mp,
- (void *)(__psunsigned_t)agno,
- (void *)(__psunsigned_t)agbno,
- (void *)(__psunsigned_t)len,
- (void *)(__psint_t)isfl,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
-}
-
-/*
- * Add an allocation trace entry for modifying an agf.
- */
-STATIC void
-xfs_alloc_trace_modagf(
- const char *name, /* function tag string */
- char *str, /* additional string */
- xfs_mount_t *mp, /* file system mount point */
- xfs_agf_t *agf, /* new agf value */
- int flags, /* logging flags for agf */
- int line) /* source line number */
-{
- ktrace_enter(xfs_alloc_trace_buf,
- (void *)(__psint_t)(XFS_ALLOC_KTRACE_MODAGF | (line << 16)),
- (void *)name,
- (void *)str,
- (void *)mp,
- (void *)(__psint_t)flags,
- (void *)(__psunsigned_t)be32_to_cpu(agf->agf_seqno),
- (void *)(__psunsigned_t)be32_to_cpu(agf->agf_length),
- (void *)(__psunsigned_t)be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]),
- (void *)(__psunsigned_t)be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]),
- (void *)(__psunsigned_t)be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]),
- (void *)(__psunsigned_t)be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]),
- (void *)(__psunsigned_t)be32_to_cpu(agf->agf_flfirst),
- (void *)(__psunsigned_t)be32_to_cpu(agf->agf_fllast),
- (void *)(__psunsigned_t)be32_to_cpu(agf->agf_flcount),
- (void *)(__psunsigned_t)be32_to_cpu(agf->agf_freeblks),
- (void *)(__psunsigned_t)be32_to_cpu(agf->agf_longest));
-}
-
-STATIC void
-xfs_alloc_trace_busy(
- const char *name, /* function tag string */
- char *str, /* additional string */
- xfs_mount_t *mp, /* file system mount point */
- xfs_agnumber_t agno, /* allocation group number */
- xfs_agblock_t agbno, /* a.g. relative block number */
- xfs_extlen_t len, /* length of extent */
- int slot, /* perag Busy slot */
- xfs_trans_t *tp,
- int trtype, /* type: add, delete, search */
- int line) /* source line number */
-{
- ktrace_enter(xfs_alloc_trace_buf,
- (void *)(__psint_t)(trtype | (line << 16)),
- (void *)name,
- (void *)str,
- (void *)mp,
- (void *)(__psunsigned_t)agno,
- (void *)(__psunsigned_t)agbno,
- (void *)(__psunsigned_t)len,
- (void *)(__psint_t)slot,
- (void *)tp,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL);
-}
-#endif /* XFS_ALLOC_TRACE */
-
/*
* Allocation group level functions.
*/
*/
if (args->agbno != NULLAGBLOCK) {
xfs_agf_t *agf; /* allocation group freelist header */
-#ifdef XFS_ALLOC_TRACE
- xfs_mount_t *mp = args->mp;
-#endif
long slen = (long)args->len;
ASSERT(args->len >= args->minlen && args->len <= args->maxlen);
args->pag->pagf_freeblks -= args->len;
ASSERT(be32_to_cpu(agf->agf_freeblks) <=
be32_to_cpu(agf->agf_length));
- TRACE_MODAGF(NULL, agf, XFS_AGF_FREEBLKS);
xfs_alloc_log_agf(args->tp, args->agbp,
XFS_AGF_FREEBLKS);
- /* search the busylist for these blocks */
- xfs_alloc_search_busy(args->tp, args->agno,
- args->agbno, args->len);
+ /*
+ * Search the busylist for these blocks and mark the
+ * transaction as synchronous if blocks are found. This
+ * avoids the need to block due to a synchronous log
+ * force to ensure correct ordering as the synchronous
+ * transaction will guarantee that for us.
+ */
+ if (xfs_alloc_busy_search(args->mp, args->agno,
+ args->agbno, args->len))
+ xfs_trans_set_sync(args->tp);
}
if (!args->isfl)
xfs_trans_mod_sb(args->tp,
}
xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
- TRACE_ALLOC("normal", args);
+
+ trace_xfs_alloc_exact_done(args);
args->wasfromfl = 0;
return 0;
error0:
xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
- TRACE_ALLOC("error", args);
+ trace_xfs_alloc_exact_error(args);
return error;
}
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
if (ltlen >= args->minlen)
break;
- if ((error = xfs_alloc_increment(cnt_cur, 0, &i)))
+ if ((error = xfs_btree_increment(cnt_cur, 0, &i)))
goto error0;
} while (i);
ASSERT(ltlen >= args->minlen);
i = cnt_cur->bc_ptrs[0];
for (j = 1, blen = 0, bdiff = 0;
!error && j && (blen < args->maxlen || bdiff > 0);
- error = xfs_alloc_increment(cnt_cur, 0, &j)) {
+ error = xfs_btree_increment(cnt_cur, 0, &j)) {
/*
* For each entry, decide if it's better than
* the previous best entry.
args->len = blen;
if (!xfs_alloc_fix_minleft(args)) {
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
- TRACE_ALLOC("nominleft", args);
+ trace_xfs_alloc_near_nominleft(args);
return 0;
}
blen = args->len;
goto error0;
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
- TRACE_ALLOC("first", args);
+
+ trace_xfs_alloc_near_first(args);
return 0;
}
/*
* Increment the cursor, so we will point at the entry just right
* of the leftward entry if any, or to the leftmost entry.
*/
- if ((error = xfs_alloc_increment(bno_cur_gt, 0, &i)))
+ if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
goto error0;
if (!i) {
/*
args->minlen, <bnoa, <lena);
if (ltlena >= args->minlen)
break;
- if ((error = xfs_alloc_decrement(bno_cur_lt, 0, &i)))
+ if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
goto error0;
if (!i) {
xfs_btree_del_cursor(bno_cur_lt,
args->minlen, >bnoa, >lena);
if (gtlena >= args->minlen)
break;
- if ((error = xfs_alloc_increment(bno_cur_gt, 0, &i)))
+ if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
goto error0;
if (!i) {
xfs_btree_del_cursor(bno_cur_gt,
/*
* Fell off the right end.
*/
- if ((error = xfs_alloc_increment(
+ if ((error = xfs_btree_increment(
bno_cur_gt, 0, &i)))
goto error0;
if (!i) {
/*
* Fell off the left end.
*/
- if ((error = xfs_alloc_decrement(
+ if ((error = xfs_btree_decrement(
bno_cur_lt, 0, &i)))
goto error0;
if (!i) {
* If we couldn't get anything, give up.
*/
if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
- TRACE_ALLOC("neither", args);
+ trace_xfs_alloc_size_neither(args);
args->agbno = NULLAGBLOCK;
return 0;
}
args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
xfs_alloc_fix_len(args);
if (!xfs_alloc_fix_minleft(args)) {
- TRACE_ALLOC("nominleft", args);
+ trace_xfs_alloc_near_nominleft(args);
xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
return 0;
if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
ltnew, rlen, XFSA_FIXUP_BNO_OK)))
goto error0;
- TRACE_ALLOC(j ? "gt" : "lt", args);
+
+ if (j)
+ trace_xfs_alloc_near_greater(args);
+ else
+ trace_xfs_alloc_near_lesser(args);
+
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
return 0;
error0:
- TRACE_ALLOC("error", args);
+ trace_xfs_alloc_near_error(args);
if (cnt_cur != NULL)
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
if (bno_cur_lt != NULL)
goto error0;
if (i == 0 || flen == 0) {
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
- TRACE_ALLOC("noentry", args);
+ trace_xfs_alloc_size_noentry(args);
return 0;
}
ASSERT(i == 1);
bestflen = flen;
bestfbno = fbno;
for (;;) {
- if ((error = xfs_alloc_decrement(cnt_cur, 0, &i)))
+ if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
goto error0;
if (i == 0)
break;
xfs_alloc_fix_len(args);
if (rlen < args->minlen || !xfs_alloc_fix_minleft(args)) {
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
- TRACE_ALLOC("nominleft", args);
+ trace_xfs_alloc_size_nominleft(args);
args->agbno = NULLAGBLOCK;
return 0;
}
args->agbno + args->len <=
be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
error0);
- TRACE_ALLOC("normal", args);
+ trace_xfs_alloc_size_done(args);
return 0;
error0:
- TRACE_ALLOC("error", args);
+ trace_xfs_alloc_size_error(args);
if (cnt_cur)
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
if (bno_cur)
xfs_extlen_t flen;
int i;
- if ((error = xfs_alloc_decrement(ccur, 0, &i)))
+ if ((error = xfs_btree_decrement(ccur, 0, &i)))
goto error0;
if (i) {
if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i)))
be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
error0);
args->wasfromfl = 1;
- TRACE_ALLOC("freelist", args);
+ trace_xfs_alloc_small_freelist(args);
*stat = 0;
return 0;
}
*/
if (flen < args->minlen) {
args->agbno = NULLAGBLOCK;
- TRACE_ALLOC("notenough", args);
+ trace_xfs_alloc_small_notenough(args);
flen = 0;
}
*fbnop = fbno;
*flenp = flen;
*stat = 1;
- TRACE_ALLOC("normal", args);
+ trace_xfs_alloc_small_done(args);
return 0;
error0:
- TRACE_ALLOC("error", args);
+ trace_xfs_alloc_small_error(args);
return error;
}
* Look for a neighboring block on the right (higher block numbers)
* that is contiguous with this space.
*/
- if ((error = xfs_alloc_increment(bno_cur, 0, &haveright)))
+ if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
goto error0;
if (haveright) {
/*
if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- if ((error = xfs_alloc_delete(cnt_cur, &i)))
+ if ((error = xfs_btree_delete(cnt_cur, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
/*
if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- if ((error = xfs_alloc_delete(cnt_cur, &i)))
+ if ((error = xfs_btree_delete(cnt_cur, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
/*
* Delete the old by-block entry for the right block.
*/
- if ((error = xfs_alloc_delete(bno_cur, &i)))
+ if ((error = xfs_btree_delete(bno_cur, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
/*
* Move the by-block cursor back to the left neighbor.
*/
- if ((error = xfs_alloc_decrement(bno_cur, 0, &i)))
+ if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
#ifdef DEBUG
if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- if ((error = xfs_alloc_delete(cnt_cur, &i)))
+ if ((error = xfs_btree_delete(cnt_cur, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
/*
* Back up the by-block cursor to the left neighbor, and
* update its length.
*/
- if ((error = xfs_alloc_decrement(bno_cur, 0, &i)))
+ if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
nbno = ltbno;
if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- if ((error = xfs_alloc_delete(cnt_cur, &i)))
+ if ((error = xfs_btree_delete(cnt_cur, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
/*
else {
nbno = bno;
nlen = len;
- if ((error = xfs_alloc_insert(bno_cur, &i)))
+ if ((error = xfs_btree_insert(bno_cur, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
}
if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 0, error0);
- if ((error = xfs_alloc_insert(cnt_cur, &i)))
+ if ((error = xfs_btree_insert(cnt_cur, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
xfs_agf_t *agf;
xfs_perag_t *pag; /* per allocation group data */
+ pag = xfs_perag_get(mp, agno);
+ pag->pagf_freeblks += len;
+ xfs_perag_put(pag);
+
agf = XFS_BUF_TO_AGF(agbp);
- pag = &mp->m_perag[agno];
be32_add_cpu(&agf->agf_freeblks, len);
xfs_trans_agblocks_delta(tp, len);
- pag->pagf_freeblks += len;
XFS_WANT_CORRUPTED_GOTO(
be32_to_cpu(agf->agf_freeblks) <=
be32_to_cpu(agf->agf_length),
error0);
- TRACE_MODAGF(NULL, agf, XFS_AGF_FREEBLKS);
xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
if (!isfl)
xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len);
XFS_STATS_INC(xs_freex);
XFS_STATS_ADD(xs_freeb, len);
}
- TRACE_FREE(haveleft ?
- (haveright ? "both" : "left") :
- (haveright ? "right" : "none"),
- agno, bno, len, isfl);
+
+ trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright);
/*
* Since blocks move to the free list without the coordination
* when the iclog commits to disk. If a busy block is allocated,
* the iclog is pushed up to the LSN that freed the block.
*/
- xfs_alloc_mark_busy(tp, agno, bno, len);
+ xfs_alloc_busy_insert(tp, agno, bno, len);
return 0;
error0:
- TRACE_FREE("error", agno, bno, len, isfl);
+ trace_xfs_free_extent(mp, agno, bno, len, isfl, -1, -1);
if (bno_cur)
xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
if (cnt_cur)
}
/*
+ * Find the length of the longest extent in an AG.
+ */
+xfs_extlen_t
+xfs_alloc_longest_free_extent(
+ struct xfs_mount *mp,
+ struct xfs_perag *pag)
+{
+ xfs_extlen_t need, delta = 0;
+
+ need = XFS_MIN_FREELIST_PAG(pag, mp);
+ if (need > pag->pagf_flcount)
+ delta = need - pag->pagf_flcount;
+
+ if (pag->pagf_longest > delta)
+ return pag->pagf_longest - delta;
+ return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
+}
+
+/*
* Decide whether to use this allocation group for this allocation.
* If so, fix up the btree freelist's size.
*/
}
if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
- need = XFS_MIN_FREELIST_PAG(pag, mp);
- delta = need > pag->pagf_flcount ? need - pag->pagf_flcount : 0;
/*
* If it looks like there isn't a long enough extent, or enough
* total blocks, reject it.
*/
- longest = (pag->pagf_longest > delta) ?
- (pag->pagf_longest - delta) :
- (pag->pagf_flcount > 0 || pag->pagf_longest > 0);
+ need = XFS_MIN_FREELIST_PAG(pag, mp);
+ longest = xfs_alloc_longest_free_extent(mp, pag);
if ((args->minlen + args->alignment + args->minalignslop - 1) >
longest ||
((int)(pag->pagf_freeblks + pag->pagf_flcount -
xfs_trans_brelse(tp, agflbp);
if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp))
agf->agf_flfirst = 0;
- pag = &mp->m_perag[be32_to_cpu(agf->agf_seqno)];
+
+ pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
be32_add_cpu(&agf->agf_flcount, -1);
xfs_trans_agflist_delta(tp, -1);
pag->pagf_flcount--;
+ xfs_perag_put(pag);
logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
if (btreeblk) {
logflags |= XFS_AGF_BTREEBLKS;
}
- TRACE_MODAGF(NULL, agf, logflags);
xfs_alloc_log_agf(tp, agbp, logflags);
*bnop = bno;
/*
- * As blocks are freed, they are added to the per-ag busy list
- * and remain there until the freeing transaction is committed to
- * disk. Now that we have allocated blocks, this list must be
- * searched to see if a block is being reused. If one is, then
- * the freeing transaction must be pushed to disk NOW by forcing
- * to disk all iclogs up that transaction's LSN.
+ * As blocks are freed, they are added to the per-ag busy list and
+ * remain there until the freeing transaction is committed to disk.
+ * Now that we have allocated blocks, this list must be searched to see
+ * if a block is being reused. If one is, then the freeing transaction
+ * must be pushed to disk before this transaction.
+ *
+ * We do this by setting the current transaction to a sync transaction
+ * which guarantees that the freeing transaction is on disk before this
+ * transaction. This is done instead of a synchronous log force here so
+ * that we don't sit and wait with the AGF locked in the transaction
+ * during the log force.
*/
- xfs_alloc_search_busy(tp, be32_to_cpu(agf->agf_seqno), bno, 1);
+ if (xfs_alloc_busy_search(mp, be32_to_cpu(agf->agf_seqno), bno, 1))
+ xfs_trans_set_sync(tp);
return 0;
}
sizeof(xfs_agf_t)
};
+ trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_);
+
xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
}
be32_add_cpu(&agf->agf_fllast, 1);
if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp))
agf->agf_fllast = 0;
- pag = &mp->m_perag[be32_to_cpu(agf->agf_seqno)];
+
+ pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
be32_add_cpu(&agf->agf_flcount, 1);
xfs_trans_agflist_delta(tp, 1);
pag->pagf_flcount++;
pag->pagf_btreeblks--;
logflags |= XFS_AGF_BTREEBLKS;
}
+ xfs_perag_put(pag);
- TRACE_MODAGF(NULL, agf, logflags);
xfs_alloc_log_agf(tp, agbp, logflags);
ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
blockp = &agfl->agfl_bno[be32_to_cpu(agf->agf_fllast)];
*blockp = cpu_to_be32(bno);
- TRACE_MODAGF(NULL, agf, logflags);
xfs_alloc_log_agf(tp, agbp, logflags);
xfs_trans_log_buf(tp, agflbp,
(int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl),
* Read in the allocation group header (free/alloc section).
*/
int /* error */
-xfs_alloc_read_agf(
- xfs_mount_t *mp, /* mount point structure */
- xfs_trans_t *tp, /* transaction pointer */
- xfs_agnumber_t agno, /* allocation group number */
- int flags, /* XFS_ALLOC_FLAG_... */
- xfs_buf_t **bpp) /* buffer for the ag freelist header */
+xfs_read_agf(
+ struct xfs_mount *mp, /* mount point structure */
+ struct xfs_trans *tp, /* transaction pointer */
+ xfs_agnumber_t agno, /* allocation group number */
+ int flags, /* XFS_BUF_ */
+ struct xfs_buf **bpp) /* buffer for the ag freelist header */
{
- xfs_agf_t *agf; /* ag freelist header */
+ struct xfs_agf *agf; /* ag freelist header */
int agf_ok; /* set if agf is consistent */
- xfs_buf_t *bp; /* return value */
- xfs_perag_t *pag; /* per allocation group data */
int error;
ASSERT(agno != NULLAGNUMBER);
error = xfs_trans_read_buf(
mp, tp, mp->m_ddev_targp,
XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
- XFS_FSS_TO_BB(mp, 1),
- (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XFS_BUF_TRYLOCK : 0U,
- &bp);
+ XFS_FSS_TO_BB(mp, 1), flags, bpp);
if (error)
return error;
- ASSERT(!bp || !XFS_BUF_GETERROR(bp));
- if (!bp) {
- *bpp = NULL;
+ if (!*bpp)
return 0;
- }
+
+ ASSERT(!XFS_BUF_GETERROR(*bpp));
+ agf = XFS_BUF_TO_AGF(*bpp);
+
/*
* Validate the magic number of the agf block.
*/
- agf = XFS_BUF_TO_AGF(bp);
agf_ok =
be32_to_cpu(agf->agf_magicnum) == XFS_AGF_MAGIC &&
XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
- be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp);
+ be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp) &&
+ be32_to_cpu(agf->agf_seqno) == agno;
+ if (xfs_sb_version_haslazysbcount(&mp->m_sb))
+ agf_ok = agf_ok && be32_to_cpu(agf->agf_btreeblks) <=
+ be32_to_cpu(agf->agf_length);
if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF,
XFS_RANDOM_ALLOC_READ_AGF))) {
XFS_CORRUPTION_ERROR("xfs_alloc_read_agf",
XFS_ERRLEVEL_LOW, mp, agf);
- xfs_trans_brelse(tp, bp);
+ xfs_trans_brelse(tp, *bpp);
return XFS_ERROR(EFSCORRUPTED);
}
- pag = &mp->m_perag[agno];
+ XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_AGF, XFS_AGF_REF);
+ return 0;
+}
+
+/*
+ * Read in the allocation group header (free/alloc section).
+ */
+int /* error */
+xfs_alloc_read_agf(
+ struct xfs_mount *mp, /* mount point structure */
+ struct xfs_trans *tp, /* transaction pointer */
+ xfs_agnumber_t agno, /* allocation group number */
+ int flags, /* XFS_ALLOC_FLAG_... */
+ struct xfs_buf **bpp) /* buffer for the ag freelist header */
+{
+ struct xfs_agf *agf; /* ag freelist header */
+ struct xfs_perag *pag; /* per allocation group data */
+ int error;
+
+ ASSERT(agno != NULLAGNUMBER);
+
+ error = xfs_read_agf(mp, tp, agno,
+ (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
+ bpp);
+ if (error)
+ return error;
+ if (!*bpp)
+ return 0;
+ ASSERT(!XFS_BUF_GETERROR(*bpp));
+
+ agf = XFS_BUF_TO_AGF(*bpp);
+ pag = xfs_perag_get(mp, agno);
if (!pag->pagf_init) {
pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
pag->pagf_levels[XFS_BTNUM_CNTi] =
be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
spin_lock_init(&pag->pagb_lock);
- pag->pagb_list = kmem_zalloc(XFS_PAGB_NUM_SLOTS *
- sizeof(xfs_perag_busy_t), KM_SLEEP);
+ pag->pagb_count = 0;
+ pag->pagb_tree = RB_ROOT;
pag->pagf_init = 1;
}
#ifdef DEBUG
else if (!XFS_FORCED_SHUTDOWN(mp)) {
ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
+ ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
}
#endif
- XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGF, XFS_AGF_REF);
- *bpp = bp;
+ xfs_perag_put(pag);
return 0;
}
args->minlen > args->maxlen || args->minlen > agsize ||
args->mod >= args->prod) {
args->fsbno = NULLFSBLOCK;
- TRACE_ALLOC("badargs", args);
+ trace_xfs_alloc_vextent_badargs(args);
return 0;
}
minleft = args->minleft;
* These three force us into a single a.g.
*/
args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
- down_read(&mp->m_peraglock);
- args->pag = &mp->m_perag[args->agno];
+ args->pag = xfs_perag_get(mp, args->agno);
args->minleft = 0;
error = xfs_alloc_fix_freelist(args, 0);
args->minleft = minleft;
if (error) {
- TRACE_ALLOC("nofix", args);
+ trace_xfs_alloc_vextent_nofix(args);
goto error0;
}
if (!args->agbp) {
- up_read(&mp->m_peraglock);
- TRACE_ALLOC("noagbp", args);
+ trace_xfs_alloc_vextent_noagbp(args);
break;
}
args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
if ((error = xfs_alloc_ag_vextent(args)))
goto error0;
- up_read(&mp->m_peraglock);
break;
case XFS_ALLOCTYPE_START_BNO:
/*
* Loop over allocation groups twice; first time with
* trylock set, second time without.
*/
- down_read(&mp->m_peraglock);
for (;;) {
- args->pag = &mp->m_perag[args->agno];
+ args->pag = xfs_perag_get(mp, args->agno);
if (no_min) args->minleft = 0;
error = xfs_alloc_fix_freelist(args, flags);
args->minleft = minleft;
if (error) {
- TRACE_ALLOC("nofix", args);
+ trace_xfs_alloc_vextent_nofix(args);
goto error0;
}
/*
goto error0;
break;
}
- TRACE_ALLOC("loopfailed", args);
+
+ trace_xfs_alloc_vextent_loopfailed(args);
+
/*
* Didn't work, figure out the next iteration.
*/
if (args->agno == sagno) {
if (no_min == 1) {
args->agbno = NULLAGBLOCK;
- TRACE_ALLOC("allfailed", args);
+ trace_xfs_alloc_vextent_allfailed(args);
break;
}
if (flags == 0) {
}
}
}
+ xfs_perag_put(args->pag);
}
- up_read(&mp->m_peraglock);
if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) {
if (args->agno == sagno)
mp->m_agfrotor = (mp->m_agfrotor + 1) %
args->len);
#endif
}
+ xfs_perag_put(args->pag);
return 0;
error0:
- up_read(&mp->m_peraglock);
+ xfs_perag_put(args->pag);
return error;
}
args.agno = XFS_FSB_TO_AGNO(args.mp, bno);
ASSERT(args.agno < args.mp->m_sb.sb_agcount);
args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno);
- down_read(&args.mp->m_peraglock);
- args.pag = &args.mp->m_perag[args.agno];
+ args.pag = xfs_perag_get(args.mp, args.agno);
if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING)))
goto error0;
#ifdef DEBUG
#endif
error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
error0:
- up_read(&args.mp->m_peraglock);
+ xfs_perag_put(args.pag);
return error;
}
* list is reused, the transaction that freed it must be forced to disk
* before continuing to use the block.
*
- * xfs_alloc_mark_busy - add to the per-ag busy list
- * xfs_alloc_clear_busy - remove an item from the per-ag busy list
+ * xfs_alloc_busy_insert - add to the per-ag busy list
+ * xfs_alloc_busy_clear - remove an item from the per-ag busy list
+ * xfs_alloc_busy_search - search for a busy extent
+ */
+
+/*
+ * Insert a new extent into the busy tree.
+ *
+ * The busy extent tree is indexed by the start block of the busy extent.
+ * there can be multiple overlapping ranges in the busy extent tree but only
+ * ever one entry at a given start block. The reason for this is that
+ * multi-block extents can be freed, then smaller chunks of that extent
+ * allocated and freed again before the first transaction commit is on disk.
+ * If the exact same start block is freed a second time, we have to wait for
+ * that busy extent to pass out of the tree before the new extent is inserted.
+ * There are two main cases we have to handle here.
+ *
+ * The first case is a transaction that triggers a "free - allocate - free"
+ * cycle. This can occur during btree manipulations as a btree block is freed
+ * to the freelist, then allocated from the free list, then freed again. In
+ * this case, the second extxpnet free is what triggers the duplicate and as
+ * such the transaction IDs should match. Because the extent was allocated in
+ * this transaction, the transaction must be marked as synchronous. This is
+ * true for all cases where the free/alloc/free occurs in the one transaction,
+ * hence the addition of the ASSERT(tp->t_flags & XFS_TRANS_SYNC) to this case.
+ * This serves to catch violations of the second case quite effectively.
+ *
+ * The second case is where the free/alloc/free occur in different
+ * transactions. In this case, the thread freeing the extent the second time
+ * can't mark the extent busy immediately because it is already tracked in a
+ * transaction that may be committing. When the log commit for the existing
+ * busy extent completes, the busy extent will be removed from the tree. If we
+ * allow the second busy insert to continue using that busy extent structure,
+ * it can be freed before this transaction is safely in the log. Hence our
+ * only option in this case is to force the log to remove the existing busy
+ * extent from the list before we insert the new one with the current
+ * transaction ID.
+ *
+ * The problem we are trying to avoid in the free-alloc-free in separate
+ * transactions is most easily described with a timeline:
+ *
+ * Thread 1 Thread 2 Thread 3 xfslogd
+ * xact alloc
+ * free X
+ * mark busy
+ * commit xact
+ * free xact
+ * xact alloc
+ * alloc X
+ * busy search
+ * mark xact sync
+ * commit xact
+ * free xact
+ * force log
+ * checkpoint starts
+ * ....
+ * xact alloc
+ * free X
+ * mark busy
+ * finds match
+ * *** KABOOM! ***
+ * ....
+ * log IO completes
+ * unbusy X
+ * checkpoint completes
+ *
+ * By issuing a log force in thread 3 @ "KABOOM", the thread will block until
+ * the checkpoint completes, and the busy extent it matched will have been
+ * removed from the tree when it is woken. Hence it can then continue safely.
+ *
+ * However, to ensure this matching process is robust, we need to use the
+ * transaction ID for identifying transaction, as delayed logging results in
+ * the busy extent and transaction lifecycles being different. i.e. the busy
+ * extent is active for a lot longer than the transaction. Hence the
+ * transaction structure can be freed and reallocated, then mark the same
+ * extent busy again in the new transaction. In this case the new transaction
+ * will have a different tid but can have the same address, and hence we need
+ * to check against the tid.
+ *
+ * Future: for delayed logging, we could avoid the log force if the extent was
+ * first freed in the current checkpoint sequence. This, however, requires the
+ * ability to pin the current checkpoint in memory until this transaction
+ * commits to ensure that both the original free and the current one combine
+ * logically into the one checkpoint. If the checkpoint sequences are
+ * different, however, we still need to wait on a log force.
*/
void
-xfs_alloc_mark_busy(xfs_trans_t *tp,
- xfs_agnumber_t agno,
- xfs_agblock_t bno,
- xfs_extlen_t len)
+xfs_alloc_busy_insert(
+ struct xfs_trans *tp,
+ xfs_agnumber_t agno,
+ xfs_agblock_t bno,
+ xfs_extlen_t len)
{
- xfs_mount_t *mp;
- xfs_perag_busy_t *bsy;
- int n;
+ struct xfs_busy_extent *new;
+ struct xfs_busy_extent *busyp;
+ struct xfs_perag *pag;
+ struct rb_node **rbp;
+ struct rb_node *parent;
+ int match;
- mp = tp->t_mountp;
- spin_lock(&mp->m_perag[agno].pagb_lock);
- /* search pagb_list for an open slot */
- for (bsy = mp->m_perag[agno].pagb_list, n = 0;
- n < XFS_PAGB_NUM_SLOTS;
- bsy++, n++) {
- if (bsy->busy_tp == NULL) {
- break;
- }
- }
-
- if (n < XFS_PAGB_NUM_SLOTS) {
- bsy = &mp->m_perag[agno].pagb_list[n];
- mp->m_perag[agno].pagb_count++;
- TRACE_BUSY("xfs_alloc_mark_busy", "got", agno, bno, len, n, tp);
- bsy->busy_start = bno;
- bsy->busy_length = len;
- bsy->busy_tp = tp;
- xfs_trans_add_busy(tp, agno, n);
- } else {
- TRACE_BUSY("xfs_alloc_mark_busy", "FULL", agno, bno, len, -1, tp);
+ new = kmem_zalloc(sizeof(struct xfs_busy_extent), KM_MAYFAIL);
+ if (!new) {
/*
- * The busy list is full! Since it is now not possible to
- * track the free block, make this a synchronous transaction
- * to insure that the block is not reused before this
- * transaction commits.
+ * No Memory! Since it is now not possible to track the free
+ * block, make this a synchronous transaction to insure that
+ * the block is not reused before this transaction commits.
*/
+ trace_xfs_alloc_busy(tp, agno, bno, len, 1);
xfs_trans_set_sync(tp);
+ return;
}
- spin_unlock(&mp->m_perag[agno].pagb_lock);
-}
-
-void
-xfs_alloc_clear_busy(xfs_trans_t *tp,
- xfs_agnumber_t agno,
- int idx)
-{
- xfs_mount_t *mp;
- xfs_perag_busy_t *list;
-
- mp = tp->t_mountp;
-
- spin_lock(&mp->m_perag[agno].pagb_lock);
- list = mp->m_perag[agno].pagb_list;
-
- ASSERT(idx < XFS_PAGB_NUM_SLOTS);
- if (list[idx].busy_tp == tp) {
- TRACE_UNBUSY("xfs_alloc_clear_busy", "found", agno, idx, tp);
- list[idx].busy_tp = NULL;
- mp->m_perag[agno].pagb_count--;
- } else {
- TRACE_UNBUSY("xfs_alloc_clear_busy", "missing", agno, idx, tp);
+ new->agno = agno;
+ new->bno = bno;
+ new->length = len;
+ new->tid = xfs_log_get_trans_ident(tp);
+
+ INIT_LIST_HEAD(&new->list);
+
+ /* trace before insert to be able to see failed inserts */
+ trace_xfs_alloc_busy(tp, agno, bno, len, 0);
+
+ pag = xfs_perag_get(tp->t_mountp, new->agno);
+restart:
+ spin_lock(&pag->pagb_lock);
+ rbp = &pag->pagb_tree.rb_node;
+ parent = NULL;
+ busyp = NULL;
+ match = 0;
+ while (*rbp && match >= 0) {
+ parent = *rbp;
+ busyp = rb_entry(parent, struct xfs_busy_extent, rb_node);
+
+ if (new->bno < busyp->bno) {
+ /* may overlap, but exact start block is lower */
+ rbp = &(*rbp)->rb_left;
+ if (new->bno + new->length > busyp->bno)
+ match = busyp->tid == new->tid ? 1 : -1;
+ } else if (new->bno > busyp->bno) {
+ /* may overlap, but exact start block is higher */
+ rbp = &(*rbp)->rb_right;
+ if (bno < busyp->bno + busyp->length)
+ match = busyp->tid == new->tid ? 1 : -1;
+ } else {
+ match = busyp->tid == new->tid ? 1 : -1;
+ break;
+ }
}
+ if (match < 0) {
+ /* overlap marked busy in different transaction */
+ spin_unlock(&pag->pagb_lock);
+ xfs_log_force(tp->t_mountp, XFS_LOG_SYNC);
+ goto restart;
+ }
+ if (match > 0) {
+ /*
+ * overlap marked busy in same transaction. Update if exact
+ * start block match, otherwise combine the busy extents into
+ * a single range.
+ */
+ if (busyp->bno == new->bno) {
+ busyp->length = max(busyp->length, new->length);
+ spin_unlock(&pag->pagb_lock);
+ ASSERT(tp->t_flags & XFS_TRANS_SYNC);
+ xfs_perag_put(pag);
+ kmem_free(new);
+ return;
+ }
+ rb_erase(&busyp->rb_node, &pag->pagb_tree);
+ new->length = max(busyp->bno + busyp->length,
+ new->bno + new->length) -
+ min(busyp->bno, new->bno);
+ new->bno = min(busyp->bno, new->bno);
+ } else
+ busyp = NULL;
- spin_unlock(&mp->m_perag[agno].pagb_lock);
-}
+ rb_link_node(&new->rb_node, parent, rbp);
+ rb_insert_color(&new->rb_node, &pag->pagb_tree);
+ list_add(&new->list, &tp->t_busy);
+ spin_unlock(&pag->pagb_lock);
+ xfs_perag_put(pag);
+ kmem_free(busyp);
+}
/*
- * If we find the extent in the busy list, force the log out to get the
- * extent out of the busy list so the caller can use it straight away.
+ * Search for a busy extent within the range of the extent we are about to
+ * allocate. You need to be holding the busy extent tree lock when calling
+ * xfs_alloc_busy_search(). This function returns 0 for no overlapping busy
+ * extent, -1 for an overlapping but not exact busy extent, and 1 for an exact
+ * match. This is done so that a non-zero return indicates an overlap that
+ * will require a synchronous transaction, but it can still be
+ * used to distinguish between a partial or exact match.
*/
-STATIC void
-xfs_alloc_search_busy(xfs_trans_t *tp,
- xfs_agnumber_t agno,
- xfs_agblock_t bno,
- xfs_extlen_t len)
+static int
+xfs_alloc_busy_search(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno,
+ xfs_agblock_t bno,
+ xfs_extlen_t len)
{
- xfs_mount_t *mp;
- xfs_perag_busy_t *bsy;
- xfs_agblock_t uend, bend;
- xfs_lsn_t lsn;
- int cnt;
+ struct xfs_perag *pag;
+ struct rb_node *rbp;
+ struct xfs_busy_extent *busyp;
+ int match = 0;
+
+ pag = xfs_perag_get(mp, agno);
+ spin_lock(&pag->pagb_lock);
+
+ rbp = pag->pagb_tree.rb_node;
+
+ /* find closest start bno overlap */
+ while (rbp) {
+ busyp = rb_entry(rbp, struct xfs_busy_extent, rb_node);
+ if (bno < busyp->bno) {
+ /* may overlap, but exact start block is lower */
+ if (bno + len > busyp->bno)
+ match = -1;
+ rbp = rbp->rb_left;
+ } else if (bno > busyp->bno) {
+ /* may overlap, but exact start block is higher */
+ if (bno < busyp->bno + busyp->length)
+ match = -1;
+ rbp = rbp->rb_right;
+ } else {
+ /* bno matches busyp, length determines exact match */
+ match = (busyp->length == len) ? 1 : -1;
+ break;
+ }
+ }
+ spin_unlock(&pag->pagb_lock);
+ trace_xfs_alloc_busysearch(mp, agno, bno, len, !!match);
+ xfs_perag_put(pag);
+ return match;
+}
- mp = tp->t_mountp;
+void
+xfs_alloc_busy_clear(
+ struct xfs_mount *mp,
+ struct xfs_busy_extent *busyp)
+{
+ struct xfs_perag *pag;
- spin_lock(&mp->m_perag[agno].pagb_lock);
- cnt = mp->m_perag[agno].pagb_count;
+ trace_xfs_alloc_unbusy(mp, busyp->agno, busyp->bno,
+ busyp->length);
- uend = bno + len - 1;
+ ASSERT(xfs_alloc_busy_search(mp, busyp->agno, busyp->bno,
+ busyp->length) == 1);
- /* search pagb_list for this slot, skipping open slots */
- for (bsy = mp->m_perag[agno].pagb_list; cnt; bsy++) {
+ list_del_init(&busyp->list);
- /*
- * (start1,length1) within (start2, length2)
- */
- if (bsy->busy_tp != NULL) {
- bend = bsy->busy_start + bsy->busy_length - 1;
- if ((bno > bend) || (uend < bsy->busy_start)) {
- cnt--;
- } else {
- TRACE_BUSYSEARCH("xfs_alloc_search_busy",
- "found1", agno, bno, len, tp);
- break;
- }
- }
- }
+ pag = xfs_perag_get(mp, busyp->agno);
+ spin_lock(&pag->pagb_lock);
+ rb_erase(&busyp->rb_node, &pag->pagb_tree);
+ spin_unlock(&pag->pagb_lock);
+ xfs_perag_put(pag);
- /*
- * If a block was found, force the log through the LSN of the
- * transaction that freed the block
- */
- if (cnt) {
- TRACE_BUSYSEARCH("xfs_alloc_search_busy", "found", agno, bno, len, tp);
- lsn = bsy->busy_tp->t_commit_lsn;
- spin_unlock(&mp->m_perag[agno].pagb_lock);
- xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC);
- } else {
- TRACE_BUSYSEARCH("xfs_alloc_search_busy", "not-found", agno, bno, len, tp);
- spin_unlock(&mp->m_perag[agno].pagb_lock);
- }
+ kmem_free(busyp);
}