ocfs2: Add ioctl for reflink.
authorTao Ma <tao.ma@oracle.com>
Mon, 21 Sep 2009 03:25:14 +0000 (11:25 +0800)
committerJoel Becker <joel.becker@oracle.com>
Wed, 23 Sep 2009 03:09:51 +0000 (20:09 -0700)
The ioctl will take 3 parameters: old_path, new_path and
preserve and call vfs_reflink. It is useful when we backport
reflink features to old kernels.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
fs/ocfs2/ioctl.c
fs/ocfs2/ocfs2_fs.h
fs/ocfs2/refcounttree.c
fs/ocfs2/refcounttree.h

index 9fcd36d..a68d0e4 100644 (file)
@@ -22,6 +22,7 @@
 #include "ocfs2_fs.h"
 #include "ioctl.h"
 #include "resize.h"
+#include "refcounttree.h"
 
 #include <linux/ext2_fs.h>
 
@@ -116,6 +117,9 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
        int status;
        struct ocfs2_space_resv sr;
        struct ocfs2_new_group_input input;
+       struct reflink_arguments args;
+       const char *old_path, *new_path;
+       bool preserve;
 
        switch (cmd) {
        case OCFS2_IOC_GETFLAGS:
@@ -161,6 +165,15 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                        return -EFAULT;
 
                return ocfs2_group_add(inode, &input);
+       case OCFS2_IOC_REFLINK:
+               if (copy_from_user(&args, (struct reflink_arguments *)arg,
+                                  sizeof(args)))
+                       return -EFAULT;
+               old_path = (const char *)(unsigned long)args.old_path;
+               new_path = (const char *)(unsigned long)args.new_path;
+               preserve = (args.preserve != 0);
+
+               return ocfs2_reflink_ioctl(inode, old_path, new_path, preserve);
        default:
                return -ENOTTY;
        }
@@ -183,6 +196,7 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
        case OCFS2_IOC_GROUP_EXTEND:
        case OCFS2_IOC_GROUP_ADD:
        case OCFS2_IOC_GROUP_ADD64:
+       case OCFS2_IOC_REFLINK:
                break;
        default:
                return -ENOIOCTLCMD;
index 4a4565b..e9431e4 100644 (file)
@@ -301,6 +301,15 @@ struct ocfs2_new_group_input {
 #define OCFS2_IOC_GROUP_ADD    _IOW('o', 2,struct ocfs2_new_group_input)
 #define OCFS2_IOC_GROUP_ADD64  _IOW('o', 3,struct ocfs2_new_group_input)
 
+/* Used to pass 2 file names to reflink. */
+struct reflink_arguments {
+       __u64 old_path;
+       __u64 new_path;
+       __u64 preserve;
+};
+#define OCFS2_IOC_REFLINK      _IOW('o', 4, struct reflink_arguments)
+
+
 /*
  * Journal Flags (ocfs2_dinode.id1.journal1.i_flags)
  */
index 7a8a384..60287fc 100644 (file)
 #include <linux/writeback.h>
 #include <linux/pagevec.h>
 #include <linux/swap.h>
+#include <linux/security.h>
+#include <linux/fsnotify.h>
+#include <linux/quotaops.h>
+#include <linux/namei.h>
+#include <linux/mount.h>
 
 struct ocfs2_cow_context {
        struct inode *inode;
@@ -4145,3 +4150,164 @@ out:
 
        return error;
 }
+
+/*
+ * Below here are the bits used by OCFS2_IOC_REFLINK() to fake
+ * sys_reflink().  This will go away when vfs_reflink() exists in
+ * fs/namei.c.
+ */
+
+/* copied from may_create in VFS. */
+static inline int ocfs2_may_create(struct inode *dir, struct dentry *child)
+{
+       if (child->d_inode)
+               return -EEXIST;
+       if (IS_DEADDIR(dir))
+               return -ENOENT;
+       return inode_permission(dir, MAY_WRITE | MAY_EXEC);
+}
+
+/* copied from user_path_parent. */
+static int ocfs2_user_path_parent(const char __user *path,
+                                 struct nameidata *nd, char **name)
+{
+       char *s = getname(path);
+       int error;
+
+       if (IS_ERR(s))
+               return PTR_ERR(s);
+
+       error = path_lookup(s, LOOKUP_PARENT, nd);
+       if (error)
+               putname(s);
+       else
+               *name = s;
+
+       return error;
+}
+
+/**
+ * ocfs2_vfs_reflink - Create a reference-counted link
+ *
+ * @old_dentry:        source dentry + inode
+ * @dir:       directory to create the target
+ * @new_dentry:        target dentry
+ * @preserve:  if true, preserve all file attributes
+ */
+int ocfs2_vfs_reflink(struct dentry *old_dentry, struct inode *dir,
+                     struct dentry *new_dentry, bool preserve)
+{
+       struct inode *inode = old_dentry->d_inode;
+       int error;
+
+       if (!inode)
+               return -ENOENT;
+
+       error = ocfs2_may_create(dir, new_dentry);
+       if (error)
+               return error;
+
+       if (dir->i_sb != inode->i_sb)
+               return -EXDEV;
+
+       /*
+        * A reflink to an append-only or immutable file cannot be created.
+        */
+       if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+               return -EPERM;
+
+       /* Only regular files can be reflinked. */
+       if (!S_ISREG(inode->i_mode))
+               return -EPERM;
+
+       /*
+        * If the caller wants to preserve ownership, they require the
+        * rights to do so.
+        */
+       if (preserve) {
+               if ((current_fsuid() != inode->i_uid) && !capable(CAP_CHOWN))
+                       return -EPERM;
+               if (!in_group_p(inode->i_gid) && !capable(CAP_CHOWN))
+                       return -EPERM;
+       }
+
+       /*
+        * If the caller is modifying any aspect of the attributes, they
+        * are not creating a snapshot.  They need read permission on the
+        * file.
+        */
+       if (!preserve) {
+               error = inode_permission(inode, MAY_READ);
+               if (error)
+                       return error;
+       }
+
+       mutex_lock(&inode->i_mutex);
+       vfs_dq_init(dir);
+       error = ocfs2_reflink(old_dentry, dir, new_dentry, preserve);
+       mutex_unlock(&inode->i_mutex);
+       if (!error)
+               fsnotify_create(dir, new_dentry);
+       return error;
+}
+/*
+ * Most codes are copied from sys_linkat.
+ */
+int ocfs2_reflink_ioctl(struct inode *inode,
+                       const char __user *oldname,
+                       const char __user *newname,
+                       bool preserve)
+{
+       struct dentry *new_dentry;
+       struct nameidata nd;
+       struct path old_path;
+       int error;
+       char *to = NULL;
+
+       if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)))
+               return -EOPNOTSUPP;
+
+       error = user_path_at(AT_FDCWD, oldname, 0, &old_path);
+       if (error) {
+               mlog_errno(error);
+               return error;
+       }
+
+       error = ocfs2_user_path_parent(newname, &nd, &to);
+       if (error) {
+               mlog_errno(error);
+               goto out;
+       }
+
+       error = -EXDEV;
+       if (old_path.mnt != nd.path.mnt)
+               goto out_release;
+       new_dentry = lookup_create(&nd, 0);
+       error = PTR_ERR(new_dentry);
+       if (IS_ERR(new_dentry)) {
+               mlog_errno(error);
+               goto out_unlock;
+       }
+
+       error = mnt_want_write(nd.path.mnt);
+       if (error) {
+               mlog_errno(error);
+               goto out_dput;
+       }
+
+       error = ocfs2_vfs_reflink(old_path.dentry,
+                                 nd.path.dentry->d_inode,
+                                 new_dentry, preserve);
+       mnt_drop_write(nd.path.mnt);
+out_dput:
+       dput(new_dentry);
+out_unlock:
+       mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
+out_release:
+       path_put(&nd.path);
+       putname(to);
+out:
+       path_put(&old_path);
+
+       return error;
+}
index 2c238e6..c1d19b1 100644 (file)
@@ -99,4 +99,8 @@ int ocfs2_increase_refcount(handle_t *handle,
                            u64 cpos, u32 len,
                            struct ocfs2_alloc_context *meta_ac,
                            struct ocfs2_cached_dealloc_ctxt *dealloc);
+int ocfs2_reflink_ioctl(struct inode *inode,
+                       const char __user *oldname,
+                       const char __user *newname,
+                       bool preserve);
 #endif /* OCFS2_REFCOUNTTREE_H */