}
zn = copy_znode(c, znode);
- if (unlikely(IS_ERR(zn)))
+ if (IS_ERR(zn))
return zn;
if (zbr->len) {
* This function performs that same function as ubifs_read_node except that
* it does not require that there is actually a node present and instead
* the return code indicates if a node was read.
+ *
+ * Note, this function does not check CRC of data nodes if @c->no_chk_data_crc
+ * is true (it is controlled by corresponding mount option). However, if
+ * @c->always_chk_crc is true, @c->no_chk_data_crc is ignored and CRC is always
+ * checked.
*/
static int try_read_node(const struct ubifs_info *c, void *buf, int type,
int len, int lnum, int offs)
if (node_len != len)
return 0;
+ if (type == UBIFS_DATA_NODE && !c->always_chk_crc && c->no_chk_data_crc)
+ return 1;
+
crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8);
node_crc = le32_to_cpu(ch->crc);
if (crc != node_crc)
if (keys_cmp(c, key, &node_key) != 0)
ret = 0;
}
- if (ret == 0)
+ if (ret == 0 && c->replaying)
dbg_mnt("dangling branch LEB %d:%d len %d, key %s",
zbr->lnum, zbr->offs, zbr->len, DBGKEY(key));
return ret;
ubifs_assert(znode == c->zroot.znode);
znode = dirty_cow_znode(c, &c->zroot);
}
- if (unlikely(IS_ERR(znode)) || !p)
+ if (IS_ERR(znode) || !p)
break;
ubifs_assert(path[p - 1] >= 0);
ubifs_assert(path[p - 1] < znode->child_cnt);
* splitting in the middle of the colliding sequence. Also, when
* removing the leftmost key, we would have to correct the key of the
* parent node, which would introduce additional complications. Namely,
- * if we changed the the leftmost key of the parent znode, the garbage
+ * if we changed the leftmost key of the parent znode, the garbage
* collector would be unable to find it (GC is doing this when GC'ing
* indexing LEBs). Although we already have an additional RB-tree where
* we save such changed znodes (see 'ins_clr_old_idx_znode()') until
}
/**
- * ubifs_tnc_lookup - look up a file-system node.
+ * maybe_leb_gced - determine if a LEB may have been garbage collected.
+ * @c: UBIFS file-system description object
+ * @lnum: LEB number
+ * @gc_seq1: garbage collection sequence number
+ *
+ * This function determines if @lnum may have been garbage collected since
+ * sequence number @gc_seq1. If it may have been then %1 is returned, otherwise
+ * %0 is returned.
+ */
+static int maybe_leb_gced(struct ubifs_info *c, int lnum, int gc_seq1)
+{
+ int gc_seq2, gced_lnum;
+
+ gced_lnum = c->gced_lnum;
+ smp_rmb();
+ gc_seq2 = c->gc_seq;
+ /* Same seq means no GC */
+ if (gc_seq1 == gc_seq2)
+ return 0;
+ /* Different by more than 1 means we don't know */
+ if (gc_seq1 + 1 != gc_seq2)
+ return 1;
+ /*
+ * We have seen the sequence number has increased by 1. Now we need to
+ * be sure we read the right LEB number, so read it again.
+ */
+ smp_rmb();
+ if (gced_lnum != c->gced_lnum)
+ return 1;
+ /* Finally we can check lnum */
+ if (gced_lnum == lnum)
+ return 1;
+ return 0;
+}
+
+/**
+ * ubifs_tnc_locate - look up a file-system node and return it and its location.
* @c: UBIFS file-system description object
* @key: node key to lookup
* @node: the node is returned here
+ * @lnum: LEB number is returned here
+ * @offs: offset is returned here
*
* This function look up and reads node with key @key. The caller has to make
* sure the @node buffer is large enough to fit the node. Returns zero in case
* of success, %-ENOENT if the node was not found, and a negative error code in
- * case of failure.
+ * case of failure. The node location can be returned in @lnum and @offs.
*/
-int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key,
- void *node)
+int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key,
+ void *node, int *lnum, int *offs)
{
- int found, n, err;
+ int found, n, err, safely = 0, gc_seq1;
struct ubifs_znode *znode;
struct ubifs_zbranch zbr, *zt;
+again:
mutex_lock(&c->tnc_mutex);
found = ubifs_lookup_level0(c, key, &znode, &n);
if (!found) {
goto out;
}
zt = &znode->zbranch[n];
+ if (lnum) {
+ *lnum = zt->lnum;
+ *offs = zt->offs;
+ }
if (is_hash_key(c, key)) {
/*
* In this case the leaf node cache gets used, so we pass the
err = tnc_read_node_nm(c, zt, node);
goto out;
}
+ if (safely) {
+ err = ubifs_tnc_read_node(c, zt, node);
+ goto out;
+ }
+ /* Drop the TNC mutex prematurely and race with garbage collection */
zbr = znode->zbranch[n];
+ gc_seq1 = c->gc_seq;
mutex_unlock(&c->tnc_mutex);
- err = ubifs_tnc_read_node(c, &zbr, node);
- return err;
+ if (ubifs_get_wbuf(c, zbr.lnum)) {
+ /* We do not GC journal heads */
+ err = ubifs_tnc_read_node(c, &zbr, node);
+ return err;
+ }
+
+ err = fallible_read_node(c, key, &zbr, node);
+ if (err <= 0 || maybe_leb_gced(c, zbr.lnum, gc_seq1)) {
+ /*
+ * The node may have been GC'ed out from under us so try again
+ * while keeping the TNC mutex locked.
+ */
+ safely = 1;
+ goto again;
+ }
+ return 0;
out:
mutex_unlock(&c->tnc_mutex);
}
/**
- * ubifs_tnc_locate - look up a file-system node and return it and its location.
+ * ubifs_tnc_get_bu_keys - lookup keys for bulk-read.
* @c: UBIFS file-system description object
- * @key: node key to lookup
- * @node: the node is returned here
- * @lnum: LEB number is returned here
- * @offs: offset is returned here
+ * @bu: bulk-read parameters and results
*
- * This function is the same as 'ubifs_tnc_lookup()' but it returns the node
- * location also. See 'ubifs_tnc_lookup()'.
+ * Lookup consecutive data node keys for the same inode that reside
+ * consecutively in the same LEB. This function returns zero in case of success
+ * and a negative error code in case of failure.
+ *
+ * Note, if the bulk-read buffer length (@bu->buf_len) is known, this function
+ * makes sure bulk-read nodes fit the buffer. Otherwise, this function prepares
+ * maximum possible amount of nodes for bulk-read.
*/
-int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key,
- void *node, int *lnum, int *offs)
+int ubifs_tnc_get_bu_keys(struct ubifs_info *c, struct bu_info *bu)
{
- int found, n, err;
+ int n, err = 0, lnum = -1, uninitialized_var(offs);
+ int uninitialized_var(len);
+ unsigned int block = key_block(c, &bu->key);
struct ubifs_znode *znode;
- struct ubifs_zbranch zbr, *zt;
+
+ bu->cnt = 0;
+ bu->blk_cnt = 0;
+ bu->eof = 0;
mutex_lock(&c->tnc_mutex);
- found = ubifs_lookup_level0(c, key, &znode, &n);
- if (!found) {
- err = -ENOENT;
- goto out;
- } else if (found < 0) {
- err = found;
+ /* Find first key */
+ err = ubifs_lookup_level0(c, &bu->key, &znode, &n);
+ if (err < 0)
goto out;
+ if (err) {
+ /* Key found */
+ len = znode->zbranch[n].len;
+ /* The buffer must be big enough for at least 1 node */
+ if (len > bu->buf_len) {
+ err = -EINVAL;
+ goto out;
+ }
+ /* Add this key */
+ bu->zbranch[bu->cnt++] = znode->zbranch[n];
+ bu->blk_cnt += 1;
+ lnum = znode->zbranch[n].lnum;
+ offs = ALIGN(znode->zbranch[n].offs + len, 8);
}
- zt = &znode->zbranch[n];
- if (is_hash_key(c, key)) {
- /*
- * In this case the leaf node cache gets used, so we pass the
- * address of the zbranch and keep the mutex locked
- */
- *lnum = zt->lnum;
- *offs = zt->offs;
- err = tnc_read_node_nm(c, zt, node);
- goto out;
+ while (1) {
+ struct ubifs_zbranch *zbr;
+ union ubifs_key *key;
+ unsigned int next_block;
+
+ /* Find next key */
+ err = tnc_next(c, &znode, &n);
+ if (err)
+ goto out;
+ zbr = &znode->zbranch[n];
+ key = &zbr->key;
+ /* See if there is another data key for this file */
+ if (key_inum(c, key) != key_inum(c, &bu->key) ||
+ key_type(c, key) != UBIFS_DATA_KEY) {
+ err = -ENOENT;
+ goto out;
+ }
+ if (lnum < 0) {
+ /* First key found */
+ lnum = zbr->lnum;
+ offs = ALIGN(zbr->offs + zbr->len, 8);
+ len = zbr->len;
+ if (len > bu->buf_len) {
+ err = -EINVAL;
+ goto out;
+ }
+ } else {
+ /*
+ * The data nodes must be in consecutive positions in
+ * the same LEB.
+ */
+ if (zbr->lnum != lnum || zbr->offs != offs)
+ goto out;
+ offs += ALIGN(zbr->len, 8);
+ len = ALIGN(len, 8) + zbr->len;
+ /* Must not exceed buffer length */
+ if (len > bu->buf_len)
+ goto out;
+ }
+ /* Allow for holes */
+ next_block = key_block(c, key);
+ bu->blk_cnt += (next_block - block - 1);
+ if (bu->blk_cnt >= UBIFS_MAX_BULK_READ)
+ goto out;
+ block = next_block;
+ /* Add this key */
+ bu->zbranch[bu->cnt++] = *zbr;
+ bu->blk_cnt += 1;
+ /* See if we have room for more */
+ if (bu->cnt >= UBIFS_MAX_BULK_READ)
+ goto out;
+ if (bu->blk_cnt >= UBIFS_MAX_BULK_READ)
+ goto out;
}
- zbr = znode->zbranch[n];
+out:
+ if (err == -ENOENT) {
+ bu->eof = 1;
+ err = 0;
+ }
+ bu->gc_seq = c->gc_seq;
mutex_unlock(&c->tnc_mutex);
+ if (err)
+ return err;
+ /*
+ * An enormous hole could cause bulk-read to encompass too many
+ * page cache pages, so limit the number here.
+ */
+ if (bu->blk_cnt > UBIFS_MAX_BULK_READ)
+ bu->blk_cnt = UBIFS_MAX_BULK_READ;
+ /*
+ * Ensure that bulk-read covers a whole number of page cache
+ * pages.
+ */
+ if (UBIFS_BLOCKS_PER_PAGE == 1 ||
+ !(bu->blk_cnt & (UBIFS_BLOCKS_PER_PAGE - 1)))
+ return 0;
+ if (bu->eof) {
+ /* At the end of file we can round up */
+ bu->blk_cnt += UBIFS_BLOCKS_PER_PAGE - 1;
+ return 0;
+ }
+ /* Exclude data nodes that do not make up a whole page cache page */
+ block = key_block(c, &bu->key) + bu->blk_cnt;
+ block &= ~(UBIFS_BLOCKS_PER_PAGE - 1);
+ while (bu->cnt) {
+ if (key_block(c, &bu->zbranch[bu->cnt - 1].key) < block)
+ break;
+ bu->cnt -= 1;
+ }
+ return 0;
+}
- *lnum = zbr.lnum;
- *offs = zbr.offs;
+/**
+ * read_wbuf - bulk-read from a LEB with a wbuf.
+ * @wbuf: wbuf that may overlap the read
+ * @buf: buffer into which to read
+ * @len: read length
+ * @lnum: LEB number from which to read
+ * @offs: offset from which to read
+ *
+ * This functions returns %0 on success or a negative error code on failure.
+ */
+static int read_wbuf(struct ubifs_wbuf *wbuf, void *buf, int len, int lnum,
+ int offs)
+{
+ const struct ubifs_info *c = wbuf->c;
+ int rlen, overlap;
- err = ubifs_tnc_read_node(c, &zbr, node);
- return err;
+ dbg_io("LEB %d:%d, length %d", lnum, offs, len);
+ ubifs_assert(wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
+ ubifs_assert(!(offs & 7) && offs < c->leb_size);
+ ubifs_assert(offs + len <= c->leb_size);
+
+ spin_lock(&wbuf->lock);
+ overlap = (lnum == wbuf->lnum && offs + len > wbuf->offs);
+ if (!overlap) {
+ /* We may safely unlock the write-buffer and read the data */
+ spin_unlock(&wbuf->lock);
+ return ubi_read(c->ubi, lnum, buf, offs, len);
+ }
+
+ /* Don't read under wbuf */
+ rlen = wbuf->offs - offs;
+ if (rlen < 0)
+ rlen = 0;
+
+ /* Copy the rest from the write-buffer */
+ memcpy(buf + rlen, wbuf->buf + offs + rlen - wbuf->offs, len - rlen);
+ spin_unlock(&wbuf->lock);
+
+ if (rlen > 0)
+ /* Read everything that goes before write-buffer */
+ return ubi_read(c->ubi, lnum, buf, offs, rlen);
+
+ return 0;
+}
+
+/**
+ * validate_data_node - validate data nodes for bulk-read.
+ * @c: UBIFS file-system description object
+ * @buf: buffer containing data node to validate
+ * @zbr: zbranch of data node to validate
+ *
+ * This functions returns %0 on success or a negative error code on failure.
+ */
+static int validate_data_node(struct ubifs_info *c, void *buf,
+ struct ubifs_zbranch *zbr)
+{
+ union ubifs_key key1;
+ struct ubifs_ch *ch = buf;
+ int err, len;
+
+ if (ch->node_type != UBIFS_DATA_NODE) {
+ ubifs_err("bad node type (%d but expected %d)",
+ ch->node_type, UBIFS_DATA_NODE);
+ goto out_err;
+ }
+ err = ubifs_check_node(c, buf, zbr->lnum, zbr->offs, 0, 0);
+ if (err) {
+ ubifs_err("expected node type %d", UBIFS_DATA_NODE);
+ goto out;
+ }
+
+ len = le32_to_cpu(ch->len);
+ if (len != zbr->len) {
+ ubifs_err("bad node length %d, expected %d", len, zbr->len);
+ goto out_err;
+ }
+
+ /* Make sure the key of the read node is correct */
+ key_read(c, buf + UBIFS_KEY_OFFSET, &key1);
+ if (!keys_eq(c, &zbr->key, &key1)) {
+ ubifs_err("bad key in node at LEB %d:%d",
+ zbr->lnum, zbr->offs);
+ dbg_tnc("looked for key %s found node's key %s",
+ DBGKEY(&zbr->key), DBGKEY1(&key1));
+ goto out_err;
+ }
+
+ return 0;
+
+out_err:
+ err = -EINVAL;
out:
- mutex_unlock(&c->tnc_mutex);
+ ubifs_err("bad node at LEB %d:%d", zbr->lnum, zbr->offs);
+ dbg_dump_node(c, buf);
+ dbg_dump_stack();
return err;
}
/**
+ * ubifs_tnc_bulk_read - read a number of data nodes in one go.
+ * @c: UBIFS file-system description object
+ * @bu: bulk-read parameters and results
+ *
+ * This functions reads and validates the data nodes that were identified by the
+ * 'ubifs_tnc_get_bu_keys()' function. This functions returns %0 on success,
+ * -EAGAIN to indicate a race with GC, or another negative error code on
+ * failure.
+ */
+int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu)
+{
+ int lnum = bu->zbranch[0].lnum, offs = bu->zbranch[0].offs, len, err, i;
+ struct ubifs_wbuf *wbuf;
+ void *buf;
+
+ len = bu->zbranch[bu->cnt - 1].offs;
+ len += bu->zbranch[bu->cnt - 1].len - offs;
+ if (len > bu->buf_len) {
+ ubifs_err("buffer too small %d vs %d", bu->buf_len, len);
+ return -EINVAL;
+ }
+
+ /* Do the read */
+ wbuf = ubifs_get_wbuf(c, lnum);
+ if (wbuf)
+ err = read_wbuf(wbuf, bu->buf, len, lnum, offs);
+ else
+ err = ubi_read(c->ubi, lnum, bu->buf, offs, len);
+
+ /* Check for a race with GC */
+ if (maybe_leb_gced(c, lnum, bu->gc_seq))
+ return -EAGAIN;
+
+ if (err && err != -EBADMSG) {
+ ubifs_err("failed to read from LEB %d:%d, error %d",
+ lnum, offs, err);
+ dbg_dump_stack();
+ dbg_tnc("key %s", DBGKEY(&bu->key));
+ return err;
+ }
+
+ /* Validate the nodes read */
+ buf = bu->buf;
+ for (i = 0; i < bu->cnt; i++) {
+ err = validate_data_node(c, buf, &bu->zbranch[i]);
+ if (err)
+ return err;
+ buf = buf + ALIGN(bu->zbranch[i].len, 8);
+ }
+
+ return 0;
+}
+
+/**
* do_lookup_nm- look up a "hashed" node.
* @c: UBIFS file-system description object
* @key: node key to lookup
{
int found, n, err;
struct ubifs_znode *znode;
- struct ubifs_zbranch zbr;
dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key));
mutex_lock(&c->tnc_mutex);
goto out_unlock;
}
- zbr = znode->zbranch[n];
- mutex_unlock(&c->tnc_mutex);
-
- err = tnc_read_node_nm(c, &zbr, node);
- return err;
+ err = tnc_read_node_nm(c, &znode->zbranch[n], node);
out_unlock:
mutex_unlock(&c->tnc_mutex);
{
struct ubifs_znode *zn, *zi, *zp;
int i, keep, move, appending = 0;
- union ubifs_key *key = &zbr->key;
+ union ubifs_key *key = &zbr->key, *key1;
ubifs_assert(n >= 0 && n <= c->fanout);
zn->level = znode->level;
/* Decide where to split */
- if (znode->level == 0 && n == c->fanout &&
- key_type(c, key) == UBIFS_DATA_KEY) {
- union ubifs_key *key1;
-
- /*
- * If this is an inode which is being appended - do not split
- * it because no other zbranches can be inserted between
- * zbranches of consecutive data nodes anyway.
- */
- key1 = &znode->zbranch[n - 1].key;
- if (key_inum(c, key1) == key_inum(c, key) &&
- key_type(c, key1) == UBIFS_DATA_KEY &&
- key_block(c, key1) == key_block(c, key) - 1)
- appending = 1;
+ if (znode->level == 0 && key_type(c, key) == UBIFS_DATA_KEY) {
+ /* Try not to split consecutive data keys */
+ if (n == c->fanout) {
+ key1 = &znode->zbranch[n - 1].key;
+ if (key_inum(c, key1) == key_inum(c, key) &&
+ key_type(c, key1) == UBIFS_DATA_KEY)
+ appending = 1;
+ } else
+ goto check_split;
+ } else if (appending && n != c->fanout) {
+ /* Try not to split consecutive data keys */
+ appending = 0;
+check_split:
+ if (n >= (c->fanout + 1) / 2) {
+ key1 = &znode->zbranch[0].key;
+ if (key_inum(c, key1) == key_inum(c, key) &&
+ key_type(c, key1) == UBIFS_DATA_KEY) {
+ key1 = &znode->zbranch[n].key;
+ if (key_inum(c, key1) != key_inum(c, key) ||
+ key_type(c, key1) != UBIFS_DATA_KEY) {
+ keep = n;
+ move = c->fanout - keep;
+ zi = znode;
+ goto do_split;
+ }
+ }
+ }
}
if (appending) {
zbr->znode->parent = zn;
}
+do_split:
+
__set_bit(DIRTY_ZNODE, &zn->flags);
atomic_long_inc(&c->dirty_zn_cnt);
/* Insert new znode (produced by spitting) into the parent */
if (zp) {
- i = n;
+ if (n == 0 && zi == znode && znode->iip == 0)
+ correct_parent_keys(c, znode);
+
/* Locate insertion point */
n = znode->iip + 1;
- if (appending && n != c->fanout)
- appending = 0;
-
- if (i == 0 && zi == znode && znode->iip == 0)
- correct_parent_keys(c, znode);
/* Tail recursion */
zbr->key = zn->zbranch[0].key;
if (found) {
/* Ensure the znode is dirtied */
if (znode->cnext || !ubifs_zn_dirty(znode)) {
- znode = dirty_cow_bottom_up(c,
- znode);
- if (IS_ERR(znode)) {
- err = PTR_ERR(znode);
- goto out_unlock;
- }
+ znode = dirty_cow_bottom_up(c, znode);
+ if (IS_ERR(znode)) {
+ err = PTR_ERR(znode);
+ goto out_unlock;
+ }
}
zbr = &znode->zbranch[n];
lnc_free(zbr);
/* Ensure the znode is dirtied */
if (znode->cnext || !ubifs_zn_dirty(znode)) {
- znode = dirty_cow_bottom_up(c, znode);
- if (IS_ERR(znode)) {
- err = PTR_ERR(znode);
- goto out_unlock;
- }
+ znode = dirty_cow_bottom_up(c, znode);
+ if (IS_ERR(znode)) {
+ err = PTR_ERR(znode);
+ goto out_unlock;
+ }
}
if (found == 1) {
/* Ensure the znode is dirtied */
if (znode->cnext || !ubifs_zn_dirty(znode)) {
- znode = dirty_cow_bottom_up(c, znode);
- if (IS_ERR(znode)) {
- err = PTR_ERR(znode);
- goto out_unlock;
- }
+ znode = dirty_cow_bottom_up(c, znode);
+ if (IS_ERR(znode)) {
+ err = PTR_ERR(znode);
+ goto out_unlock;
+ }
}
/* Remove all keys in range except the first */
struct ubifs_dent_node *xent, *pxent = NULL;
struct qstr nm = { .name = NULL };
- dbg_tnc("ino %lu", inum);
+ dbg_tnc("ino %lu", (unsigned long)inum);
/*
* Walk all extended attribute entries and remove them together with
}
xattr_inum = le64_to_cpu(xent->inum);
- dbg_tnc("xent '%s', ino %lu", xent->name, xattr_inum);
+ dbg_tnc("xent '%s', ino %lu", xent->name,
+ (unsigned long)xattr_inum);
nm.name = xent->name;
nm.len = le16_to_cpu(xent->nlen);