.xlo_fill_value_buf = ocfs2_xa_bucket_fill_value_buf,
};
+static unsigned int ocfs2_xa_value_clusters(struct ocfs2_xa_loc *loc)
+{
+ struct ocfs2_xattr_value_buf vb;
+
+ if (ocfs2_xattr_is_local(loc->xl_entry))
+ return 0;
+
+ ocfs2_xa_fill_value_buf(loc, &vb);
+ return le32_to_cpu(vb.vb_xv->xr_clusters);
+}
+
static int ocfs2_xa_value_truncate(struct ocfs2_xa_loc *loc, u64 bytes,
struct ocfs2_xattr_set_ctxt *ctxt)
{
}
}
+/*
+ * If we have a problem adjusting the size of an external value during
+ * ocfs2_xa_prepare_entry() or ocfs2_xa_remove(), we may have an xattr
+ * in an intermediate state. For example, the value may be partially
+ * truncated.
+ *
+ * If the value tree hasn't changed, the extend/truncate went nowhere.
+ * We have nothing to do. The caller can treat it as a straight error.
+ *
+ * If the value tree got partially truncated, we now have a corrupted
+ * extended attribute. We're going to wipe its entry and leak the
+ * clusters. Better to leak some storage than leave a corrupt entry.
+ *
+ * If the value tree grew, it obviously didn't grow enough for the
+ * new entry. We're not going to try and reclaim those clusters either.
+ * If there was already an external value there (orig_clusters != 0),
+ * the new clusters are attached safely and we can just leave the old
+ * value in place. If there was no external value there, we remove
+ * the entry.
+ *
+ * This way, the xattr block we store in the journal will be consistent.
+ * If the size change broke because of the journal, no changes will hit
+ * disk anyway.
+ */
+static void ocfs2_xa_cleanup_value_truncate(struct ocfs2_xa_loc *loc,
+ const char *what,
+ unsigned int orig_clusters)
+{
+ unsigned int new_clusters = ocfs2_xa_value_clusters(loc);
+ char *nameval_buf = ocfs2_xa_offset_pointer(loc,
+ le16_to_cpu(loc->xl_entry->xe_name_offset));
+
+ if (new_clusters < orig_clusters) {
+ mlog(ML_ERROR,
+ "Partial truncate while %s xattr %.*s. Leaking "
+ "%u clusters and removing the entry\n",
+ what, loc->xl_entry->xe_name_len, nameval_buf,
+ orig_clusters - new_clusters);
+ ocfs2_xa_remove_entry(loc);
+ } else if (!orig_clusters) {
+ mlog(ML_ERROR,
+ "Unable to allocate an external value for xattr "
+ "%.*s safely. Leaking %u clusters and removing the "
+ "entry\n",
+ loc->xl_entry->xe_name_len, nameval_buf,
+ new_clusters - orig_clusters);
+ ocfs2_xa_remove_entry(loc);
+ } else if (new_clusters > orig_clusters)
+ mlog(ML_ERROR,
+ "Unable to grow xattr %.*s safely. %u new clusters "
+ "have been added, but the value will not be "
+ "modified\n",
+ loc->xl_entry->xe_name_len, nameval_buf,
+ new_clusters - orig_clusters);
+}
+
static int ocfs2_xa_remove(struct ocfs2_xa_loc *loc,
struct ocfs2_xattr_set_ctxt *ctxt)
{
int rc = 0;
+ unsigned int orig_clusters;
if (!ocfs2_xattr_is_local(loc->xl_entry)) {
+ orig_clusters = ocfs2_xa_value_clusters(loc);
rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
if (rc) {
mlog_errno(rc);
- goto out;
+ /*
+ * Since this is remove, we can return 0 if
+ * ocfs2_xa_cleanup_value_truncate() is going to
+ * wipe the entry anyway. So we check the
+ * cluster count as well.
+ */
+ if (orig_clusters != ocfs2_xa_value_clusters(loc))
+ rc = 0;
+ ocfs2_xa_cleanup_value_truncate(loc, "removing",
+ orig_clusters);
+ if (rc)
+ goto out;
}
}
{
int rc = 0;
int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len);
+ unsigned int orig_clusters;
char *nameval_buf;
int xe_local = ocfs2_xattr_is_local(loc->xl_entry);
int xi_local = xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE;
if (!xi_local)
ocfs2_xa_install_value_root(loc);
} else {
+ orig_clusters = ocfs2_xa_value_clusters(loc);
if (xi_local) {
rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
- if (rc < 0) {
+ if (rc < 0)
mlog_errno(rc);
- goto out;
- }
- memset(nameval_buf + name_size, 0,
- namevalue_size_xe(loc->xl_entry) -
- name_size);
+ else
+ memset(nameval_buf + name_size, 0,
+ namevalue_size_xe(loc->xl_entry) -
+ name_size);
} else if (le64_to_cpu(loc->xl_entry->xe_value_size) >
xi->xi_value_len) {
rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len,
ctxt);
- if (rc < 0) {
+ if (rc < 0)
mlog_errno(rc);
- goto out;
- }
+ }
+
+ if (rc) {
+ ocfs2_xa_cleanup_value_truncate(loc, "reusing",
+ orig_clusters);
+ goto out;
}
}
struct ocfs2_xattr_set_ctxt *ctxt)
{
int rc = 0;
+ unsigned int orig_clusters;
+ __le64 orig_value_size = 0;
rc = ocfs2_xa_check_space(loc, xi);
if (rc)
if (loc->xl_entry) {
if (ocfs2_xa_can_reuse_entry(loc, xi)) {
+ orig_value_size = loc->xl_entry->xe_value_size;
rc = ocfs2_xa_reuse_entry(loc, xi, ctxt);
if (rc)
goto out;
}
if (!ocfs2_xattr_is_local(loc->xl_entry)) {
+ orig_clusters = ocfs2_xa_value_clusters(loc);
rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
if (rc) {
mlog_errno(rc);
+ ocfs2_xa_cleanup_value_truncate(loc,
+ "overwriting",
+ orig_clusters);
goto out;
}
}
alloc_value:
if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
+ orig_clusters = ocfs2_xa_value_clusters(loc);
rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, ctxt);
- if (rc < 0)
+ if (rc < 0) {
+ /*
+ * If we tried to grow an existing external value,
+ * ocfs2_xa_cleanuP-value_truncate() is going to
+ * let it stand. We have to restore its original
+ * value size.
+ */
+ loc->xl_entry->xe_value_size = orig_value_size;
+ ocfs2_xa_cleanup_value_truncate(loc, "growing",
+ orig_clusters);
mlog_errno(rc);
+ }
}
out:
goto out;
}
+ /*
+ * From here on out, everything is going to modify the buffer a
+ * little. Errors are going to leave the xattr header in a
+ * sane state. Thus, even with errors we dirty the sucker.
+ */
+
/* Don't worry, we are never called with !xi_value and !xl_entry */
if (!xi->xi_value) {
ret = ocfs2_xa_remove(loc, ctxt);
- goto out;
+ goto out_dirty;
}
ret = ocfs2_xa_prepare_entry(loc, xi, name_hash, ctxt);
if (ret) {
if (ret != -ENOSPC)
mlog_errno(ret);
- goto out;
+ goto out_dirty;
}
ret = ocfs2_xa_store_value(loc, xi, ctxt);
- if (ret) {
+ if (ret)
mlog_errno(ret);
- goto out;
- }
+out_dirty:
ocfs2_xa_journal_dirty(ctxt->handle, loc);
out:
{
struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
+ BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_XATTR_FL));
+
loc->xl_inode = inode;
loc->xl_ops = &ocfs2_xa_block_loc_ops;
loc->xl_storage = bh;
loc->xl_entry = entry;
-
- if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_XATTR_FL)
- loc->xl_size = le16_to_cpu(di->i_xattr_inline_size);
- else {
- BUG_ON(entry);
- loc->xl_size = OCFS2_SB(inode->i_sb)->s_xattr_inline_size;
- }
+ loc->xl_size = le16_to_cpu(di->i_xattr_inline_size);
loc->xl_header =
(struct ocfs2_xattr_header *)(bh->b_data + bh->b_size -
loc->xl_size);
loc->xl_size = OCFS2_XATTR_BUCKET_SIZE;
}
-
-/*
- * ocfs2_xattr_set_entry()
- *
- * Set extended attribute entry into inode or block.
- *
- * If extended attribute value size > OCFS2_XATTR_INLINE_SIZE,
- * We first insert tree root(ocfs2_xattr_value_root) like a normal value,
- * then set value in B tree with set_value_outside().
- */
-static int ocfs2_xattr_set_entry(struct inode *inode,
- struct ocfs2_xattr_info *xi,
- struct ocfs2_xattr_search *xs,
- struct ocfs2_xattr_set_ctxt *ctxt,
- int flag)
-{
- struct ocfs2_inode_info *oi = OCFS2_I(inode);
- struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
- handle_t *handle = ctxt->handle;
- int ret;
- struct ocfs2_xa_loc loc;
-
- BUG_ON(!(flag & OCFS2_INLINE_XATTR_FL));
- BUG_ON(xs->xattr_bh != xs->inode_bh);
-
- ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), xs->inode_bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
- if (ret) {
- mlog_errno(ret);
- goto out;
- }
-
- ocfs2_init_dinode_xa_loc(&loc, inode, xs->inode_bh,
- xs->not_found ? NULL : xs->here);
- ret = ocfs2_xa_set(&loc, xi, ctxt);
- if (ret) {
- if (ret != -ENOSPC)
- mlog_errno(ret);
- goto out;
- }
- xs->here = loc.xl_entry;
-
- if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
- struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
- unsigned int xattrsize = osb->s_xattr_inline_size;
-
- /*
- * Adjust extent record count or inline data size
- * to reserve space for extended attribute.
- */
- if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
- struct ocfs2_inline_data *idata = &di->id2.i_data;
- le16_add_cpu(&idata->id_count, -xattrsize);
- } else if (!(ocfs2_inode_is_fast_symlink(inode))) {
- struct ocfs2_extent_list *el = &di->id2.i_list;
- le16_add_cpu(&el->l_count, -(xattrsize /
- sizeof(struct ocfs2_extent_rec)));
- }
- di->i_xattr_inline_size = cpu_to_le16(xattrsize);
- }
- /* Update xattr flag */
- spin_lock(&oi->ip_lock);
- oi->ip_dyn_features |= OCFS2_INLINE_XATTR_FL;
- di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
- spin_unlock(&oi->ip_lock);
-
- ret = ocfs2_journal_dirty(handle, xs->inode_bh);
- if (ret < 0)
- mlog_errno(ret);
-
-out:
- return ret;
-}
-
/*
* In xattr remove, if it is stored outside and refcounted, we may have
* the chance to split the refcount tree. So need the allocators.
return 0;
}
+static int ocfs2_xattr_ibody_init(struct inode *inode,
+ struct buffer_head *di_bh,
+ struct ocfs2_xattr_set_ctxt *ctxt)
+{
+ int ret;
+ struct ocfs2_inode_info *oi = OCFS2_I(inode);
+ struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ unsigned int xattrsize = osb->s_xattr_inline_size;
+
+ if (!ocfs2_xattr_has_space_inline(inode, di)) {
+ ret = -ENOSPC;
+ goto out;
+ }
+
+ ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), di_bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ /*
+ * Adjust extent record count or inline data size
+ * to reserve space for extended attribute.
+ */
+ if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
+ struct ocfs2_inline_data *idata = &di->id2.i_data;
+ le16_add_cpu(&idata->id_count, -xattrsize);
+ } else if (!(ocfs2_inode_is_fast_symlink(inode))) {
+ struct ocfs2_extent_list *el = &di->id2.i_list;
+ le16_add_cpu(&el->l_count, -(xattrsize /
+ sizeof(struct ocfs2_extent_rec)));
+ }
+ di->i_xattr_inline_size = cpu_to_le16(xattrsize);
+
+ spin_lock(&oi->ip_lock);
+ oi->ip_dyn_features |= OCFS2_INLINE_XATTR_FL|OCFS2_HAS_XATTR_FL;
+ di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
+ spin_unlock(&oi->ip_lock);
+
+ ret = ocfs2_journal_dirty(ctxt->handle, di_bh);
+ if (ret < 0)
+ mlog_errno(ret);
+
+out:
+ return ret;
+}
+
/*
* ocfs2_xattr_ibody_set()
*
struct ocfs2_xattr_search *xs,
struct ocfs2_xattr_set_ctxt *ctxt)
{
+ int ret;
struct ocfs2_inode_info *oi = OCFS2_I(inode);
struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
- int ret;
+ struct ocfs2_xa_loc loc;
if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
return -ENOSPC;
}
}
- ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt,
- (OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL));
+ if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
+ ret = ocfs2_xattr_ibody_init(inode, xs->inode_bh, ctxt);
+ if (ret) {
+ if (ret != -ENOSPC)
+ mlog_errno(ret);
+ goto out;
+ }
+ }
+
+ ocfs2_init_dinode_xa_loc(&loc, inode, xs->inode_bh,
+ xs->not_found ? NULL : xs->here);
+ ret = ocfs2_xa_set(&loc, xi, ctxt);
+ if (ret) {
+ if (ret != -ENOSPC)
+ mlog_errno(ret);
+ goto out;
+ }
+ xs->here = loc.xl_entry;
+
out:
up_write(&oi->ip_alloc_sem);