tunnels: fix netns vs proto registration ordering
[safe/jmp/linux-2.6] / fs / ioctl.c
index d152856..6c75110 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/uaccess.h>
 #include <linux/writeback.h>
 #include <linux/buffer_head.h>
+#include <linux/falloc.h>
 
 #include <asm/ioctls.h>
 
@@ -70,9 +71,7 @@ static int ioctl_fibmap(struct file *filp, int __user *p)
        res = get_user(block, p);
        if (res)
                return res;
-       lock_kernel();
        res = mapping->a_ops->bmap(mapping, block);
-       unlock_kernel();
        return put_user(res, p);
 }
 
@@ -163,20 +162,21 @@ EXPORT_SYMBOL(fiemap_check_flags);
 static int fiemap_check_ranges(struct super_block *sb,
                               u64 start, u64 len, u64 *new_len)
 {
+       u64 maxbytes = (u64) sb->s_maxbytes;
+
        *new_len = len;
 
        if (len == 0)
                return -EINVAL;
 
-       if (start > sb->s_maxbytes)
+       if (start > maxbytes)
                return -EFBIG;
 
        /*
         * Shrink request scope to what the fs can actually handle.
         */
-       if ((len > sb->s_maxbytes) ||
-           (sb->s_maxbytes - len) < start)
-               *new_len = sb->s_maxbytes - start;
+       if (len > maxbytes || (maxbytes - len) < start)
+               *new_len = maxbytes - start;
 
        return 0;
 }
@@ -231,7 +231,8 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
 #define blk_to_logical(inode, blk) (blk << (inode)->i_blkbits)
 #define logical_to_blk(inode, offset) (offset >> (inode)->i_blkbits);
 
-/*
+/**
+ * __generic_block_fiemap - FIEMAP for block based inodes (no locking)
  * @inode - the inode to map
  * @arg - the pointer to userspace where we copy everything to
  * @get_block - the fs's get_block function
@@ -242,28 +243,32 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
  *
  * If it is possible to have data blocks beyond a hole past @inode->i_size, then
  * please do not use this function, it will stop at the first unmapped block
- * beyond i_size
+ * beyond i_size.
+ *
+ * If you use this function directly, you need to do your own locking. Use
+ * generic_block_fiemap if you want the locking done for you.
  */
-int generic_block_fiemap(struct inode *inode,
-                        struct fiemap_extent_info *fieinfo, u64 start,
-                        u64 len, get_block_t *get_block)
+
+int __generic_block_fiemap(struct inode *inode,
+                          struct fiemap_extent_info *fieinfo, u64 start,
+                          u64 len, get_block_t *get_block)
 {
        struct buffer_head tmp;
-       unsigned int start_blk;
+       unsigned long long start_blk;
        long long length = 0, map_len = 0;
        u64 logical = 0, phys = 0, size = 0;
        u32 flags = FIEMAP_EXTENT_MERGED;
-       int ret = 0;
+       int ret = 0, past_eof = 0, whole_file = 0;
 
        if ((ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC)))
                return ret;
 
        start_blk = logical_to_blk(inode, start);
 
-       /* guard against change */
-       mutex_lock(&inode->i_mutex);
-
        length = (long long)min_t(u64, len, i_size_read(inode));
+       if (length < len)
+               whole_file = 1;
+
        map_len = length;
 
        do {
@@ -280,11 +285,26 @@ int generic_block_fiemap(struct inode *inode,
 
                /* HOLE */
                if (!buffer_mapped(&tmp)) {
+                       length -= blk_to_logical(inode, 1);
+                       start_blk++;
+
+                       /*
+                        * we want to handle the case where there is an
+                        * allocated block at the front of the file, and then
+                        * nothing but holes up to the end of the file properly,
+                        * to make sure that extent at the front gets properly
+                        * marked with FIEMAP_EXTENT_LAST
+                        */
+                       if (!past_eof &&
+                           blk_to_logical(inode, start_blk) >=
+                           blk_to_logical(inode, 0)+i_size_read(inode))
+                               past_eof = 1;
+
                        /*
                         * first hole after going past the EOF, this is our
                         * last extent
                         */
-                       if (length <= 0) {
+                       if (past_eof && size) {
                                flags = FIEMAP_EXTENT_MERGED|FIEMAP_EXTENT_LAST;
                                ret = fiemap_fill_next_extent(fieinfo, logical,
                                                              phys, size,
@@ -292,15 +312,37 @@ int generic_block_fiemap(struct inode *inode,
                                break;
                        }
 
-                       length -= blk_to_logical(inode, 1);
-
                        /* if we have holes up to/past EOF then we're done */
-                       if (length <= 0)
+                       if (length <= 0 || past_eof)
                                break;
-
-                       start_blk++;
                } else {
-                       if (length <= 0 && size) {
+                       /*
+                        * we have gone over the length of what we wanted to
+                        * map, and it wasn't the entire file, so add the extent
+                        * we got last time and exit.
+                        *
+                        * This is for the case where say we want to map all the
+                        * way up to the second to the last block in a file, but
+                        * the last block is a hole, making the second to last
+                        * block FIEMAP_EXTENT_LAST.  In this case we want to
+                        * see if there is a hole after the second to last block
+                        * so we can mark it properly.  If we found data after
+                        * we exceeded the length we were requesting, then we
+                        * are good to go, just add the extent to the fieinfo
+                        * and break
+                        */
+                       if (length <= 0 && !whole_file) {
+                               ret = fiemap_fill_next_extent(fieinfo, logical,
+                                                             phys, size,
+                                                             flags);
+                               break;
+                       }
+
+                       /*
+                        * if size != 0 then we know we already have an extent
+                        * to add, so add it.
+                        */
+                       if (size) {
                                ret = fiemap_fill_next_extent(fieinfo, logical,
                                                              phys, size,
                                                              flags);
@@ -317,35 +359,83 @@ int generic_block_fiemap(struct inode *inode,
                        start_blk += logical_to_blk(inode, size);
 
                        /*
-                        * if we are past the EOF we need to loop again to see
-                        * if there is a hole so we can mark this extent as the
-                        * last one, and if not keep mapping things until we
-                        * find a hole, or we run out of slots in the extent
-                        * array
+                        * If we are past the EOF, then we need to make sure as
+                        * soon as we find a hole that the last extent we found
+                        * is marked with FIEMAP_EXTENT_LAST
                         */
-                       if (length <= 0)
-                               continue;
-
-                       ret = fiemap_fill_next_extent(fieinfo, logical, phys,
-                                                     size, flags);
-                       if (ret)
-                               break;
+                       if (!past_eof &&
+                           logical+size >=
+                           blk_to_logical(inode, 0)+i_size_read(inode))
+                               past_eof = 1;
                }
                cond_resched();
        } while (1);
 
-       mutex_unlock(&inode->i_mutex);
-
        /* if ret is 1 then we just hit the end of the extent array */
        if (ret == 1)
                ret = 0;
 
        return ret;
 }
+EXPORT_SYMBOL(__generic_block_fiemap);
+
+/**
+ * generic_block_fiemap - FIEMAP for block based inodes
+ * @inode: The inode to map
+ * @fieinfo: The mapping information
+ * @start: The initial block to map
+ * @len: The length of the extect to attempt to map
+ * @get_block: The block mapping function for the fs
+ *
+ * Calls __generic_block_fiemap to map the inode, after taking
+ * the inode's mutex lock.
+ */
+
+int generic_block_fiemap(struct inode *inode,
+                        struct fiemap_extent_info *fieinfo, u64 start,
+                        u64 len, get_block_t *get_block)
+{
+       int ret;
+       mutex_lock(&inode->i_mutex);
+       ret = __generic_block_fiemap(inode, fieinfo, start, len, get_block);
+       mutex_unlock(&inode->i_mutex);
+       return ret;
+}
 EXPORT_SYMBOL(generic_block_fiemap);
 
 #endif  /*  CONFIG_BLOCK  */
 
+/*
+ * This provides compatibility with legacy XFS pre-allocation ioctls
+ * which predate the fallocate syscall.
+ *
+ * Only the l_start, l_len and l_whence fields of the 'struct space_resv'
+ * are used here, rest are ignored.
+ */
+int ioctl_preallocate(struct file *filp, void __user *argp)
+{
+       struct inode *inode = filp->f_path.dentry->d_inode;
+       struct space_resv sr;
+
+       if (copy_from_user(&sr, argp, sizeof(sr)))
+               return -EFAULT;
+
+       switch (sr.l_whence) {
+       case SEEK_SET:
+               break;
+       case SEEK_CUR:
+               sr.l_start += filp->f_pos;
+               break;
+       case SEEK_END:
+               sr.l_start += i_size_read(inode);
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return do_fallocate(filp, FALLOC_FL_KEEP_SIZE, sr.l_start, sr.l_len);
+}
+
 static int file_ioctl(struct file *filp, unsigned int cmd,
                unsigned long arg)
 {
@@ -355,12 +445,11 @@ static int file_ioctl(struct file *filp, unsigned int cmd,
        switch (cmd) {
        case FIBMAP:
                return ioctl_fibmap(filp, p);
-       case FS_IOC_FIEMAP:
-               return ioctl_fiemap(filp, arg);
-       case FIGETBSZ:
-               return put_user(inode->i_sb->s_blocksize, p);
        case FIONREAD:
                return put_user(i_size_read(inode) - filp->f_pos, p);
+       case FS_IOC_RESVSP:
+       case FS_IOC_RESVSP64:
+               return ioctl_preallocate(filp, p);
        }
 
        return vfs_ioctl(filp, cmd, arg);
@@ -380,10 +469,12 @@ static int ioctl_fionbio(struct file *filp, int __user *argp)
        if (O_NONBLOCK != O_NDELAY)
                flag |= O_NDELAY;
 #endif
+       spin_lock(&filp->f_lock);
        if (on)
                filp->f_flags |= flag;
        else
                filp->f_flags &= ~flag;
+       spin_unlock(&filp->f_lock);
        return error;
 }
 
@@ -400,21 +491,50 @@ static int ioctl_fioasync(unsigned int fd, struct file *filp,
 
        /* Did FASYNC state change ? */
        if ((flag ^ filp->f_flags) & FASYNC) {
-               if (filp->f_op && filp->f_op->fasync) {
-                       lock_kernel();
+               if (filp->f_op && filp->f_op->fasync)
+                       /* fasync() adjusts filp->f_flags */
                        error = filp->f_op->fasync(fd, filp, on);
-                       unlock_kernel();
-               } else
+               else
                        error = -ENOTTY;
        }
-       if (error)
-               return error;
+       return error < 0 ? error : 0;
+}
 
-       if (on)
-               filp->f_flags |= FASYNC;
-       else
-               filp->f_flags &= ~FASYNC;
-       return error;
+static int ioctl_fsfreeze(struct file *filp)
+{
+       struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       /* If filesystem doesn't support freeze feature, return. */
+       if (sb->s_op->freeze_fs == NULL)
+               return -EOPNOTSUPP;
+
+       /* If a blockdevice-backed filesystem isn't specified, return. */
+       if (sb->s_bdev == NULL)
+               return -EINVAL;
+
+       /* Freeze */
+       sb = freeze_bdev(sb->s_bdev);
+       if (IS_ERR(sb))
+               return PTR_ERR(sb);
+       return 0;
+}
+
+static int ioctl_fsthaw(struct file *filp)
+{
+       struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       /* If a blockdevice-backed filesystem isn't specified, return EINVAL. */
+       if (sb->s_bdev == NULL)
+               return -EINVAL;
+
+       /* Thaw */
+       return thaw_bdev(sb->s_bdev, sb);
 }
 
 /*
@@ -458,6 +578,25 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
                } else
                        error = -ENOTTY;
                break;
+
+       case FIFREEZE:
+               error = ioctl_fsfreeze(filp);
+               break;
+
+       case FITHAW:
+               error = ioctl_fsthaw(filp);
+               break;
+
+       case FS_IOC_FIEMAP:
+               return ioctl_fiemap(filp, arg);
+
+       case FIGETBSZ:
+       {
+               struct inode *inode = filp->f_path.dentry->d_inode;
+               int __user *p = (int __user *)arg;
+               return put_user(inode->i_sb->s_blocksize, p);
+       }
+
        default:
                if (S_ISREG(filp->f_path.dentry->d_inode->i_mode))
                        error = file_ioctl(filp, cmd, arg);
@@ -468,7 +607,7 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
        return error;
 }
 
-asmlinkage long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
+SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
 {
        struct file *filp;
        int error = -EBADF;