mm: task dirty accounting fix
[safe/jmp/linux-2.6] / fs / gfs2 / ops_file.c
index bb11fd6..93fe41b 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/uio.h>
 #include <linux/blkdev.h>
 #include <linux/mm.h>
+#include <linux/mount.h>
 #include <linux/fs.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/ext2_fs.h>
 #include "glock.h"
 #include "glops.h"
 #include "inode.h"
-#include "lm.h"
 #include "log.h"
 #include "meta_io.h"
-#include "ops_file.h"
-#include "ops_vm.h"
 #include "quota.h"
 #include "rgrp.h"
 #include "trans.h"
 #include "util.h"
 #include "eaops.h"
-
-/*
- * Most fields left uninitialised to catch anybody who tries to
- * use them. f_flags set to prevent file_accessed() from touching
- * any other part of this. Its use is purely as a flag so that we
- * know (in readpage()) whether or not do to locking.
- */
-struct file gfs2_internal_file_sentinel = {
-       .f_flags = O_NOATIME|O_RDONLY,
-};
-
-static int gfs2_read_actor(read_descriptor_t *desc, struct page *page,
-                          unsigned long offset, unsigned long size)
-{
-       char *kaddr;
-       unsigned long count = desc->count;
-
-       if (size > count)
-               size = count;
-
-       kaddr = kmap(page);
-       memcpy(desc->arg.data, kaddr + offset, size);
-       kunmap(page);
-
-       desc->count = count - size;
-       desc->written += size;
-       desc->arg.buf += size;
-       return size;
-}
-
-int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state,
-                      char *buf, loff_t *pos, unsigned size)
-{
-       struct inode *inode = &ip->i_inode;
-       read_descriptor_t desc;
-       desc.written = 0;
-       desc.arg.data = buf;
-       desc.count = size;
-       desc.error = 0;
-       do_generic_mapping_read(inode->i_mapping, ra_state,
-                               &gfs2_internal_file_sentinel, pos, &desc,
-                               gfs2_read_actor);
-       return desc.written ? desc.written : desc.error;
-}
+#include "ops_address.h"
 
 /**
  * gfs2_llseek - seek to a location in a file
@@ -107,11 +62,11 @@ static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin)
                error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
                                           &i_gh);
                if (!error) {
-                       error = remote_llseek(file, offset, origin);
+                       error = generic_file_llseek_unlocked(file, offset, origin);
                        gfs2_glock_dq_uninit(&i_gh);
                }
        } else
-               error = remote_llseek(file, offset, origin);
+               error = generic_file_llseek_unlocked(file, offset, origin);
 
        return error;
 }
@@ -133,8 +88,8 @@ static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir)
        u64 offset = file->f_pos;
        int error;
 
-       gfs2_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh);
-       error = gfs2_glock_nq_atime(&d_gh);
+       gfs2_holder_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
+       error = gfs2_glock_nq(&d_gh);
        if (error) {
                gfs2_holder_uninit(&d_gh);
                return error;
@@ -178,7 +133,6 @@ static const u32 fsflags_to_gfs2[32] = {
        [7] = GFS2_DIF_NOATIME,
        [12] = GFS2_DIF_EXHASH,
        [14] = GFS2_DIF_INHERIT_JDATA,
-       [20] = GFS2_DIF_INHERIT_DIRECTIO,
 };
 
 static const u32 gfs2_to_fsflags[32] = {
@@ -187,7 +141,6 @@ static const u32 gfs2_to_fsflags[32] = {
        [gfs2fl_AppendOnly] = FS_APPEND_FL,
        [gfs2fl_NoAtime] = FS_NOATIME_FL,
        [gfs2fl_ExHash] = FS_INDEX_FL,
-       [gfs2fl_InheritDirectio] = FS_DIRECTIO_FL,
        [gfs2fl_InheritJdata] = FS_JOURNAL_DATA_FL,
 };
 
@@ -199,22 +152,18 @@ static int gfs2_get_flags(struct file *filp, u32 __user *ptr)
        int error;
        u32 fsflags;
 
-       gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
-       error = gfs2_glock_nq_atime(&gh);
+       gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
+       error = gfs2_glock_nq(&gh);
        if (error)
                return error;
 
-       fsflags = fsflags_cvt(gfs2_to_fsflags, ip->i_di.di_flags);
-       if (!S_ISDIR(inode->i_mode)) {
-               if (ip->i_di.di_flags & GFS2_DIF_JDATA)
-                       fsflags |= FS_JOURNAL_DATA_FL;
-               if (ip->i_di.di_flags & GFS2_DIF_DIRECTIO)
-                       fsflags |= FS_DIRECTIO_FL;
-       }
+       fsflags = fsflags_cvt(gfs2_to_fsflags, ip->i_diskflags);
+       if (!S_ISDIR(inode->i_mode) && ip->i_diskflags & GFS2_DIF_JDATA)
+               fsflags |= FS_JOURNAL_DATA_FL;
        if (put_user(fsflags, ptr))
                error = -EFAULT;
 
-       gfs2_glock_dq_m(1, &gh);
+       gfs2_glock_dq(&gh);
        gfs2_holder_uninit(&gh);
        return error;
 }
@@ -222,30 +171,27 @@ static int gfs2_get_flags(struct file *filp, u32 __user *ptr)
 void gfs2_set_inode_flags(struct inode *inode)
 {
        struct gfs2_inode *ip = GFS2_I(inode);
-       struct gfs2_dinode_host *di = &ip->i_di;
        unsigned int flags = inode->i_flags;
 
        flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
-       if (di->di_flags & GFS2_DIF_IMMUTABLE)
+       if (ip->i_diskflags & GFS2_DIF_IMMUTABLE)
                flags |= S_IMMUTABLE;
-       if (di->di_flags & GFS2_DIF_APPENDONLY)
+       if (ip->i_diskflags & GFS2_DIF_APPENDONLY)
                flags |= S_APPEND;
-       if (di->di_flags & GFS2_DIF_NOATIME)
+       if (ip->i_diskflags & GFS2_DIF_NOATIME)
                flags |= S_NOATIME;
-       if (di->di_flags & GFS2_DIF_SYNC)
+       if (ip->i_diskflags & GFS2_DIF_SYNC)
                flags |= S_SYNC;
        inode->i_flags = flags;
 }
 
 /* Flags that can be set by user space */
 #define GFS2_FLAGS_USER_SET (GFS2_DIF_JDATA|                   \
-                            GFS2_DIF_DIRECTIO|                 \
                             GFS2_DIF_IMMUTABLE|                \
                             GFS2_DIF_APPENDONLY|               \
                             GFS2_DIF_NOATIME|                  \
                             GFS2_DIF_SYNC|                     \
                             GFS2_DIF_SYSTEM|                   \
-                            GFS2_DIF_INHERIT_DIRECTIO|         \
                             GFS2_DIF_INHERIT_JDATA)
 
 /**
@@ -265,11 +211,15 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
        int error;
        u32 new_flags, flags;
 
-       error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
+       error = mnt_want_write(filp->f_path.mnt);
        if (error)
                return error;
 
-       flags = ip->i_di.di_flags;
+       error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
+       if (error)
+               goto out_drop_write;
+
+       flags = ip->i_diskflags;
        new_flags = (flags & ~mask) | (reqflags & mask);
        if ((new_flags ^ flags) == 0)
                goto out;
@@ -287,11 +237,20 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
            !capable(CAP_LINUX_IMMUTABLE))
                goto out;
        if (!IS_IMMUTABLE(inode)) {
-               error = permission(inode, MAY_WRITE, NULL);
+               error = gfs2_permission(inode, MAY_WRITE);
+               if (error)
+                       goto out;
+       }
+       if ((flags ^ new_flags) & GFS2_DIF_JDATA) {
+               if (flags & GFS2_DIF_JDATA)
+                       gfs2_log_flush(sdp, ip->i_gl);
+               error = filemap_fdatawrite(inode->i_mapping);
+               if (error)
+                       goto out;
+               error = filemap_fdatawait(inode->i_mapping);
                if (error)
                        goto out;
        }
-
        error = gfs2_trans_begin(sdp, RES_DINODE, 0);
        if (error)
                goto out;
@@ -299,14 +258,17 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
        if (error)
                goto out_trans_end;
        gfs2_trans_add_bh(ip->i_gl, bh, 1);
-       ip->i_di.di_flags = new_flags;
+       ip->i_diskflags = new_flags;
        gfs2_dinode_out(ip, bh->b_data);
        brelse(bh);
        gfs2_set_inode_flags(inode);
+       gfs2_set_aops(inode);
 out_trans_end:
        gfs2_trans_end(sdp);
 out:
        gfs2_glock_dq_uninit(&gh);
+out_drop_write:
+       mnt_drop_write(filp->f_path.mnt);
        return error;
 }
 
@@ -320,8 +282,6 @@ static int gfs2_set_flags(struct file *filp, u32 __user *ptr)
        if (!S_ISDIR(inode->i_mode)) {
                if (gfsflags & GFS2_DIF_INHERIT_JDATA)
                        gfsflags ^= (GFS2_DIF_JDATA | GFS2_DIF_INHERIT_JDATA);
-               if (gfsflags & GFS2_DIF_INHERIT_DIRECTIO)
-                       gfsflags ^= (GFS2_DIF_DIRECTIO | GFS2_DIF_INHERIT_DIRECTIO);
                return do_gfs2_set_flags(filp, gfsflags, ~0);
        }
        return do_gfs2_set_flags(filp, gfsflags, ~GFS2_DIF_JDATA);
@@ -338,6 +298,125 @@ static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
        return -ENOTTY;
 }
 
+/**
+ * gfs2_allocate_page_backing - Use bmap to allocate blocks
+ * @page: The (locked) page to allocate backing for
+ *
+ * We try to allocate all the blocks required for the page in
+ * one go. This might fail for various reasons, so we keep
+ * trying until all the blocks to back this page are allocated.
+ * If some of the blocks are already allocated, thats ok too.
+ */
+
+static int gfs2_allocate_page_backing(struct page *page)
+{
+       struct inode *inode = page->mapping->host;
+       struct buffer_head bh;
+       unsigned long size = PAGE_CACHE_SIZE;
+       u64 lblock = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+
+       do {
+               bh.b_state = 0;
+               bh.b_size = size;
+               gfs2_block_map(inode, lblock, &bh, 1);
+               if (!buffer_mapped(&bh))
+                       return -EIO;
+               size -= bh.b_size;
+               lblock += (bh.b_size >> inode->i_blkbits);
+       } while(size > 0);
+       return 0;
+}
+
+/**
+ * gfs2_page_mkwrite - Make a shared, mmap()ed, page writable
+ * @vma: The virtual memory area
+ * @page: The page which is about to become writable
+ *
+ * When the page becomes writable, we need to ensure that we have
+ * blocks allocated on disk to back that page.
+ */
+
+static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+{
+       struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+       struct gfs2_inode *ip = GFS2_I(inode);
+       struct gfs2_sbd *sdp = GFS2_SB(inode);
+       unsigned long last_index;
+       u64 pos = page->index << PAGE_CACHE_SHIFT;
+       unsigned int data_blocks, ind_blocks, rblocks;
+       int alloc_required = 0;
+       struct gfs2_holder gh;
+       struct gfs2_alloc *al;
+       int ret;
+
+       gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
+       ret = gfs2_glock_nq(&gh);
+       if (ret)
+               goto out;
+
+       set_bit(GIF_SW_PAGED, &ip->i_flags);
+       ret = gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE, &alloc_required);
+       if (ret || !alloc_required)
+               goto out_unlock;
+       ret = -ENOMEM;
+       al = gfs2_alloc_get(ip);
+       if (al == NULL)
+               goto out_unlock;
+
+       ret = gfs2_quota_lock_check(ip);
+       if (ret)
+               goto out_alloc_put;
+       gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks);
+       al->al_requested = data_blocks + ind_blocks;
+       ret = gfs2_inplace_reserve(ip);
+       if (ret)
+               goto out_quota_unlock;
+
+       rblocks = RES_DINODE + ind_blocks;
+       if (gfs2_is_jdata(ip))
+               rblocks += data_blocks ? data_blocks : 1;
+       if (ind_blocks || data_blocks)
+               rblocks += RES_STATFS + RES_QUOTA;
+       ret = gfs2_trans_begin(sdp, rblocks, 0);
+       if (ret)
+               goto out_trans_fail;
+
+       lock_page(page);
+       ret = -EINVAL;
+       last_index = ip->i_inode.i_size >> PAGE_CACHE_SHIFT;
+       if (page->index > last_index)
+               goto out_unlock_page;
+       ret = 0;
+       if (!PageUptodate(page) || page->mapping != ip->i_inode.i_mapping)
+               goto out_unlock_page;
+       if (gfs2_is_stuffed(ip)) {
+               ret = gfs2_unstuff_dinode(ip, page);
+               if (ret)
+                       goto out_unlock_page;
+       }
+       ret = gfs2_allocate_page_backing(page);
+
+out_unlock_page:
+       unlock_page(page);
+       gfs2_trans_end(sdp);
+out_trans_fail:
+       gfs2_inplace_release(ip);
+out_quota_unlock:
+       gfs2_quota_unlock(ip);
+out_alloc_put:
+       gfs2_alloc_put(ip);
+out_unlock:
+       gfs2_glock_dq(&gh);
+out:
+       gfs2_holder_uninit(&gh);
+       return ret;
+}
+
+static struct vm_operations_struct gfs2_vm_ops = {
+       .fault = filemap_fault,
+       .page_mkwrite = gfs2_page_mkwrite,
+};
+
 
 /**
  * gfs2_mmap -
@@ -353,21 +432,14 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma)
        struct gfs2_holder i_gh;
        int error;
 
-       gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh);
-       error = gfs2_glock_nq_atime(&i_gh);
+       gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
+       error = gfs2_glock_nq(&i_gh);
        if (error) {
                gfs2_holder_uninit(&i_gh);
                return error;
        }
 
-       /* This is VM_MAYWRITE instead of VM_WRITE because a call
-          to mprotect() can turn on VM_WRITE later. */
-
-       if ((vma->vm_flags & (VM_MAYSHARE | VM_MAYWRITE)) ==
-           (VM_MAYSHARE | VM_MAYWRITE))
-               vma->vm_ops = &gfs2_vm_ops_sharewrite;
-       else
-               vma->vm_ops = &gfs2_vm_ops_private;
+       vma->vm_ops = &gfs2_vm_ops;
 
        gfs2_glock_dq_uninit(&i_gh);
 
@@ -405,16 +477,11 @@ static int gfs2_open(struct inode *inode, struct file *file)
                        goto fail;
 
                if (!(file->f_flags & O_LARGEFILE) &&
-                   ip->i_di.di_size > MAX_NON_LFS) {
+                   ip->i_disksize > MAX_NON_LFS) {
                        error = -EOVERFLOW;
                        goto fail_gunlock;
                }
 
-               /* Listen to the Direct I/O flag */
-
-               if (ip->i_di.di_flags & GFS2_DIF_DIRECTIO)
-                       file->f_flags |= O_DIRECT;
-
                gfs2_glock_dq_uninit(&i_gh);
        }
 
@@ -516,6 +583,36 @@ static int gfs2_setlease(struct file *file, long arg, struct file_lock **fl)
        return generic_setlease(file, arg, fl);
 }
 
+static int gfs2_lm_plock_get(struct gfs2_sbd *sdp, struct lm_lockname *name,
+                     struct file *file, struct file_lock *fl)
+{
+       int error = -EIO;
+       if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+               error = sdp->sd_lockstruct.ls_ops->lm_plock_get(
+                               sdp->sd_lockstruct.ls_lockspace, name, file, fl);
+       return error;
+}
+
+static int gfs2_lm_plock(struct gfs2_sbd *sdp, struct lm_lockname *name,
+                 struct file *file, int cmd, struct file_lock *fl)
+{
+       int error = -EIO;
+       if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+               error = sdp->sd_lockstruct.ls_ops->lm_plock(
+                               sdp->sd_lockstruct.ls_lockspace, name, file, cmd, fl);
+       return error;
+}
+
+static int gfs2_lm_punlock(struct gfs2_sbd *sdp, struct lm_lockname *name,
+                   struct file *file, struct file_lock *fl)
+{
+       int error = -EIO;
+       if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+               error = sdp->sd_lockstruct.ls_ops->lm_punlock(
+                               sdp->sd_lockstruct.ls_lockspace, name, file, fl);
+       return error;
+}
+
 /**
  * gfs2_lock - acquire/release a posix lock on a file
  * @file: the file pointer
@@ -538,15 +635,6 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
        if (__mandatory_lock(&ip->i_inode))
                return -ENOLCK;
 
-       if (sdp->sd_args.ar_localflocks) {
-               if (IS_GETLK(cmd)) {
-                       posix_test_lock(file, fl);
-                       return 0;
-               } else {
-                       return posix_lock_file_wait(file, fl);
-               }
-       }
-
        if (cmd == F_CANCELLK) {
                /* Hack: */
                cmd = F_SETLK;
@@ -571,8 +659,7 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
        int error = 0;
 
        state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED;
-       flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE 
-               | GL_FLOCK;
+       flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE;
 
        mutex_lock(&fp->f_fl_mutex);
 
@@ -585,9 +672,8 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
                gfs2_glock_dq_wait(fl_gh);
                gfs2_holder_reinit(state, flags, fl_gh);
        } else {
-               error = gfs2_glock_get(GFS2_SB(&ip->i_inode),
-                                     ip->i_no_addr, &gfs2_flock_glops,
-                                     CREATE, &gl);
+               error = gfs2_glock_get(GFS2_SB(&ip->i_inode), ip->i_no_addr,
+                                      &gfs2_flock_glops, CREATE, &gl);
                if (error)
                        goto out;
                gfs2_holder_init(gl, state, flags, fl_gh);
@@ -632,16 +718,12 @@ static void do_unflock(struct file *file, struct file_lock *fl)
 static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
 {
        struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
-       struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host);
 
        if (!(fl->fl_flags & FL_FLOCK))
                return -ENOLCK;
        if (__mandatory_lock(&ip->i_inode))
                return -ENOLCK;
 
-       if (sdp->sd_args.ar_localflocks)
-               return flock_lock_file_wait(file, fl);
-
        if (fl->fl_type == F_UNLCK) {
                do_unflock(file, fl);
                return 0;
@@ -678,3 +760,27 @@ const struct file_operations gfs2_dir_fops = {
        .flock          = gfs2_flock,
 };
 
+const struct file_operations gfs2_file_fops_nolock = {
+       .llseek         = gfs2_llseek,
+       .read           = do_sync_read,
+       .aio_read       = generic_file_aio_read,
+       .write          = do_sync_write,
+       .aio_write      = generic_file_aio_write,
+       .unlocked_ioctl = gfs2_ioctl,
+       .mmap           = gfs2_mmap,
+       .open           = gfs2_open,
+       .release        = gfs2_close,
+       .fsync          = gfs2_fsync,
+       .splice_read    = generic_file_splice_read,
+       .splice_write   = generic_file_splice_write,
+       .setlease       = gfs2_setlease,
+};
+
+const struct file_operations gfs2_dir_fops_nolock = {
+       .readdir        = gfs2_readdir,
+       .unlocked_ioctl = gfs2_ioctl,
+       .open           = gfs2_open,
+       .release        = gfs2_close,
+       .fsync          = gfs2_fsync,
+};
+