#include "extent_map.h"
#include "aops.h"
#include "xattr.h"
+#include "namei.h"
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/writeback.h>
#include <linux/pagevec.h>
#include <linux/swap.h>
+#include <linux/security.h>
+#include <linux/fsnotify.h>
+#include <linux/quotaops.h>
+#include <linux/namei.h>
+#include <linux/mount.h>
struct ocfs2_cow_context {
struct inode *inode;
static int ocfs2_change_refcount_rec(handle_t *handle,
struct ocfs2_caching_info *ci,
struct buffer_head *ref_leaf_bh,
- int index, int change)
+ int index, int merge, int change)
{
int ret;
struct ocfs2_refcount_block *rb =
}
le16_add_cpu(&rl->rl_used, -1);
- } else
+ } else if (merge)
ocfs2_refcount_rec_merge(rb, index);
ret = ocfs2_journal_dirty(handle, ref_leaf_bh);
struct buffer_head *ref_root_bh,
struct buffer_head *ref_leaf_bh,
struct ocfs2_refcount_rec *rec,
- int index,
+ int index, int merge,
struct ocfs2_alloc_context *meta_ac)
{
int ret;
le16_add_cpu(&rf_list->rl_used, 1);
- ocfs2_refcount_rec_merge(rb, index);
+ if (merge)
+ ocfs2_refcount_rec_merge(rb, index);
ret = ocfs2_journal_dirty(handle, ref_leaf_bh);
if (ret) {
struct buffer_head *ref_root_bh,
struct buffer_head *ref_leaf_bh,
struct ocfs2_refcount_rec *split_rec,
- int index,
+ int index, int merge,
struct ocfs2_alloc_context *meta_ac,
struct ocfs2_cached_dealloc_ctxt *dealloc)
{
le32_to_cpu(split_rec->r_refcount),
(unsigned long long)ref_leaf_bh->b_blocknr, index);
- ocfs2_refcount_rec_merge(rb, index);
+ if (merge)
+ ocfs2_refcount_rec_merge(rb, index);
}
ret = ocfs2_journal_dirty(handle, ref_leaf_bh);
static int __ocfs2_increase_refcount(handle_t *handle,
struct ocfs2_caching_info *ci,
struct buffer_head *ref_root_bh,
- u64 cpos, u32 len,
+ u64 cpos, u32 len, int merge,
struct ocfs2_alloc_context *meta_ac,
struct ocfs2_cached_dealloc_ctxt *dealloc)
{
"count %u\n", (unsigned long long)cpos, set_len,
le32_to_cpu(rec.r_refcount));
ret = ocfs2_change_refcount_rec(handle, ci,
- ref_leaf_bh, index, 1);
+ ref_leaf_bh, index,
+ merge, 1);
if (ret) {
mlog_errno(ret);
goto out;
set_len);
ret = ocfs2_insert_refcount_rec(handle, ci, ref_root_bh,
ref_leaf_bh,
- &rec, index, meta_ac);
+ &rec, index,
+ merge, meta_ac);
if (ret) {
mlog_errno(ret);
goto out;
set_len, le32_to_cpu(rec.r_refcount));
ret = ocfs2_split_refcount_rec(handle, ci,
ref_root_bh, ref_leaf_bh,
- &rec, index,
+ &rec, index, merge,
meta_ac, dealloc);
if (ret) {
mlog_errno(ret);
return ret;
}
+int ocfs2_increase_refcount(handle_t *handle,
+ struct ocfs2_caching_info *ci,
+ struct buffer_head *ref_root_bh,
+ u64 cpos, u32 len,
+ struct ocfs2_alloc_context *meta_ac,
+ struct ocfs2_cached_dealloc_ctxt *dealloc)
+{
+ return __ocfs2_increase_refcount(handle, ci, ref_root_bh,
+ cpos, len, 1,
+ meta_ac, dealloc);
+}
+
static int ocfs2_decrease_refcount_rec(handle_t *handle,
struct ocfs2_caching_info *ci,
struct buffer_head *ref_root_bh,
if (cpos == le64_to_cpu(rec->r_cpos) &&
len == le32_to_cpu(rec->r_clusters))
ret = ocfs2_change_refcount_rec(handle, ci,
- ref_leaf_bh, index, -1);
+ ref_leaf_bh, index, 1, -1);
else {
struct ocfs2_refcount_rec split = *rec;
split.r_cpos = cpu_to_le64(cpos);
le32_to_cpu(rec->r_clusters));
ret = ocfs2_split_refcount_rec(handle, ci,
ref_root_bh, ref_leaf_bh,
- &split, index,
+ &split, index, 1,
meta_ac, dealloc);
}
* we gonna touch and whether we need to create new blocks.
*
* Normally the refcount blocks store these refcount should be
- * continguous also, so that we can get the number easily.
+ * contiguous also, so that we can get the number easily.
* As for meta_ac, we will at most add split 2 refcount record and
* 2 more refcount block, so just check it in a rough way.
*
}
ret = __ocfs2_increase_refcount(handle, ref_ci, ref_root_bh,
- p_cluster, num_clusters,
+ p_cluster, num_clusters, 0,
meta_ac, dealloc);
if (ret) {
mlog_errno(ret);
goto out_commit;
}
- ret = __ocfs2_increase_refcount(handle, ref_ci, ref_root_bh,
- p_cluster, num_clusters,
- meta_ac, dealloc);
+ ret = ocfs2_increase_refcount(handle, ref_ci, ref_root_bh,
+ p_cluster, num_clusters,
+ meta_ac, dealloc);
if (ret)
mlog_errno(ret);
static int ocfs2_complete_reflink(struct inode *s_inode,
struct buffer_head *s_bh,
struct inode *t_inode,
- struct buffer_head *t_bh)
+ struct buffer_head *t_bh,
+ bool preserve)
{
int ret;
handle_t *handle;
di->i_size = s_di->i_size;
di->i_dyn_features = s_di->i_dyn_features;
di->i_attr = s_di->i_attr;
- di->i_uid = s_di->i_uid;
- di->i_gid = s_di->i_gid;
- di->i_mode = s_di->i_mode;
- /*
- * update time.
- * we want mtime to appear identical to the source and update ctime.
- */
- t_inode->i_ctime = CURRENT_TIME;
+ if (preserve) {
+ di->i_uid = s_di->i_uid;
+ di->i_gid = s_di->i_gid;
+ di->i_mode = s_di->i_mode;
- di->i_ctime = cpu_to_le64(t_inode->i_ctime.tv_sec);
- di->i_ctime_nsec = cpu_to_le32(t_inode->i_ctime.tv_nsec);
+ /*
+ * update time.
+ * we want mtime to appear identical to the source and
+ * update ctime.
+ */
+ t_inode->i_ctime = CURRENT_TIME;
- t_inode->i_mtime = s_inode->i_mtime;
- di->i_mtime = s_di->i_mtime;
- di->i_mtime_nsec = s_di->i_mtime_nsec;
+ di->i_ctime = cpu_to_le64(t_inode->i_ctime.tv_sec);
+ di->i_ctime_nsec = cpu_to_le32(t_inode->i_ctime.tv_nsec);
+
+ t_inode->i_mtime = s_inode->i_mtime;
+ di->i_mtime = s_di->i_mtime;
+ di->i_mtime_nsec = s_di->i_mtime_nsec;
+ }
ocfs2_journal_dirty(handle, t_bh);
static int ocfs2_create_reflink_node(struct inode *s_inode,
struct buffer_head *s_bh,
struct inode *t_inode,
- struct buffer_head *t_bh)
+ struct buffer_head *t_bh,
+ bool preserve)
{
int ret;
struct buffer_head *ref_root_bh = NULL;
goto out_unlock_refcount;
}
- ret = ocfs2_complete_reflink(s_inode, s_bh, t_inode, t_bh);
+ ret = ocfs2_complete_reflink(s_inode, s_bh, t_inode, t_bh, preserve);
if (ret)
mlog_errno(ret);
return ret;
}
+
+static int __ocfs2_reflink(struct dentry *old_dentry,
+ struct buffer_head *old_bh,
+ struct inode *new_inode,
+ bool preserve)
+{
+ int ret;
+ struct inode *inode = old_dentry->d_inode;
+ struct buffer_head *new_bh = NULL;
+
+ ret = filemap_fdatawrite(inode->i_mapping);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ ret = ocfs2_attach_refcount_tree(inode, old_bh);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ mutex_lock(&new_inode->i_mutex);
+ ret = ocfs2_inode_lock(new_inode, &new_bh, 1);
+ if (ret) {
+ mlog_errno(ret);
+ goto out_unlock;
+ }
+
+ ret = ocfs2_create_reflink_node(inode, old_bh,
+ new_inode, new_bh, preserve);
+ if (ret) {
+ mlog_errno(ret);
+ goto inode_unlock;
+ }
+
+ if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_XATTR_FL) {
+ ret = ocfs2_reflink_xattrs(inode, old_bh,
+ new_inode, new_bh,
+ preserve);
+ if (ret)
+ mlog_errno(ret);
+ }
+inode_unlock:
+ ocfs2_inode_unlock(new_inode, 1);
+ brelse(new_bh);
+out_unlock:
+ mutex_unlock(&new_inode->i_mutex);
+out:
+ if (!ret) {
+ ret = filemap_fdatawait(inode->i_mapping);
+ if (ret)
+ mlog_errno(ret);
+ }
+ return ret;
+}
+
+static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
+ struct dentry *new_dentry, bool preserve)
+{
+ int error;
+ struct inode *inode = old_dentry->d_inode;
+ struct buffer_head *old_bh = NULL;
+ struct inode *new_orphan_inode = NULL;
+
+ if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)))
+ return -EOPNOTSUPP;
+
+ error = ocfs2_create_inode_in_orphan(dir, inode->i_mode,
+ &new_orphan_inode);
+ if (error) {
+ mlog_errno(error);
+ goto out;
+ }
+
+ error = ocfs2_inode_lock(inode, &old_bh, 1);
+ if (error) {
+ mlog_errno(error);
+ goto out;
+ }
+
+ down_write(&OCFS2_I(inode)->ip_xattr_sem);
+ down_write(&OCFS2_I(inode)->ip_alloc_sem);
+ error = __ocfs2_reflink(old_dentry, old_bh,
+ new_orphan_inode, preserve);
+ up_write(&OCFS2_I(inode)->ip_alloc_sem);
+ up_write(&OCFS2_I(inode)->ip_xattr_sem);
+
+ ocfs2_inode_unlock(inode, 1);
+ brelse(old_bh);
+
+ if (error) {
+ mlog_errno(error);
+ goto out;
+ }
+
+ /* If the security isn't preserved, we need to re-initialize them. */
+ if (!preserve) {
+ error = ocfs2_init_security_and_acl(dir, new_orphan_inode);
+ if (error)
+ mlog_errno(error);
+ }
+out:
+ if (!error) {
+ error = ocfs2_mv_orphaned_inode_to_new(dir, new_orphan_inode,
+ new_dentry);
+ if (error)
+ mlog_errno(error);
+ }
+
+ if (new_orphan_inode) {
+ /*
+ * We need to open_unlock the inode no matter whether we
+ * succeed or not, so that other nodes can delete it later.
+ */
+ ocfs2_open_unlock(new_orphan_inode);
+ if (error)
+ iput(new_orphan_inode);
+ }
+
+ return error;
+}
+
+/*
+ * Below here are the bits used by OCFS2_IOC_REFLINK() to fake
+ * sys_reflink(). This will go away when vfs_reflink() exists in
+ * fs/namei.c.
+ */
+
+/* copied from may_create in VFS. */
+static inline int ocfs2_may_create(struct inode *dir, struct dentry *child)
+{
+ if (child->d_inode)
+ return -EEXIST;
+ if (IS_DEADDIR(dir))
+ return -ENOENT;
+ return inode_permission(dir, MAY_WRITE | MAY_EXEC);
+}
+
+/* copied from user_path_parent. */
+static int ocfs2_user_path_parent(const char __user *path,
+ struct nameidata *nd, char **name)
+{
+ char *s = getname(path);
+ int error;
+
+ if (IS_ERR(s))
+ return PTR_ERR(s);
+
+ error = path_lookup(s, LOOKUP_PARENT, nd);
+ if (error)
+ putname(s);
+ else
+ *name = s;
+
+ return error;
+}
+
+/**
+ * ocfs2_vfs_reflink - Create a reference-counted link
+ *
+ * @old_dentry: source dentry + inode
+ * @dir: directory to create the target
+ * @new_dentry: target dentry
+ * @preserve: if true, preserve all file attributes
+ */
+int ocfs2_vfs_reflink(struct dentry *old_dentry, struct inode *dir,
+ struct dentry *new_dentry, bool preserve)
+{
+ struct inode *inode = old_dentry->d_inode;
+ int error;
+
+ if (!inode)
+ return -ENOENT;
+
+ error = ocfs2_may_create(dir, new_dentry);
+ if (error)
+ return error;
+
+ if (dir->i_sb != inode->i_sb)
+ return -EXDEV;
+
+ /*
+ * A reflink to an append-only or immutable file cannot be created.
+ */
+ if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+ return -EPERM;
+
+ /* Only regular files can be reflinked. */
+ if (!S_ISREG(inode->i_mode))
+ return -EPERM;
+
+ /*
+ * If the caller wants to preserve ownership, they require the
+ * rights to do so.
+ */
+ if (preserve) {
+ if ((current_fsuid() != inode->i_uid) && !capable(CAP_CHOWN))
+ return -EPERM;
+ if (!in_group_p(inode->i_gid) && !capable(CAP_CHOWN))
+ return -EPERM;
+ }
+
+ /*
+ * If the caller is modifying any aspect of the attributes, they
+ * are not creating a snapshot. They need read permission on the
+ * file.
+ */
+ if (!preserve) {
+ error = inode_permission(inode, MAY_READ);
+ if (error)
+ return error;
+ }
+
+ mutex_lock(&inode->i_mutex);
+ vfs_dq_init(dir);
+ error = ocfs2_reflink(old_dentry, dir, new_dentry, preserve);
+ mutex_unlock(&inode->i_mutex);
+ if (!error)
+ fsnotify_create(dir, new_dentry);
+ return error;
+}
+/*
+ * Most codes are copied from sys_linkat.
+ */
+int ocfs2_reflink_ioctl(struct inode *inode,
+ const char __user *oldname,
+ const char __user *newname,
+ bool preserve)
+{
+ struct dentry *new_dentry;
+ struct nameidata nd;
+ struct path old_path;
+ int error;
+ char *to = NULL;
+
+ if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)))
+ return -EOPNOTSUPP;
+
+ error = user_path_at(AT_FDCWD, oldname, 0, &old_path);
+ if (error) {
+ mlog_errno(error);
+ return error;
+ }
+
+ error = ocfs2_user_path_parent(newname, &nd, &to);
+ if (error) {
+ mlog_errno(error);
+ goto out;
+ }
+
+ error = -EXDEV;
+ if (old_path.mnt != nd.path.mnt)
+ goto out_release;
+ new_dentry = lookup_create(&nd, 0);
+ error = PTR_ERR(new_dentry);
+ if (IS_ERR(new_dentry)) {
+ mlog_errno(error);
+ goto out_unlock;
+ }
+
+ error = mnt_want_write(nd.path.mnt);
+ if (error) {
+ mlog_errno(error);
+ goto out_dput;
+ }
+
+ error = ocfs2_vfs_reflink(old_path.dentry,
+ nd.path.dentry->d_inode,
+ new_dentry, preserve);
+ mnt_drop_write(nd.path.mnt);
+out_dput:
+ dput(new_dentry);
+out_unlock:
+ mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
+out_release:
+ path_put(&nd.path);
+ putname(to);
+out:
+ path_put(&old_path);
+
+ return error;
+}