Merge branch 'hwpoison' of git://git.kernel.org/pub/scm/linux/kernel/git/ak/linux...
[safe/jmp/linux-2.6] / fs / ocfs2 / aops.c
index e511df1..deb2b13 100644 (file)
@@ -44,6 +44,7 @@
 #include "suballoc.h"
 #include "super.h"
 #include "symlink.h"
+#include "refcounttree.h"
 
 #include "buffer_head_io.h"
 
@@ -126,8 +127,8 @@ bail:
        return err;
 }
 
-static int ocfs2_get_block(struct inode *inode, sector_t iblock,
-                          struct buffer_head *bh_result, int create)
+int ocfs2_get_block(struct inode *inode, sector_t iblock,
+                   struct buffer_head *bh_result, int create)
 {
        int err = 0;
        unsigned int ext_flags;
@@ -590,6 +591,8 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
                goto bail;
        }
 
+       /* We should already CoW the refcounted extent. */
+       BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED);
        /*
         * get_more_blocks() expects us to describe a hole by clearing
         * the mapped bit on bh_result().
@@ -687,6 +690,10 @@ static ssize_t ocfs2_direct_IO(int rw,
        if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
                return 0;
 
+       /* Fallback to buffered I/O if we are appending. */
+       if (i_size_read(inode) <= offset)
+               return 0;
+
        ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
                                            inode->i_sb->s_bdev, iov, offset,
                                            nr_segs, 
@@ -895,18 +902,17 @@ struct ocfs2_write_cluster_desc {
         */
        unsigned        c_new;
        unsigned        c_unwritten;
+       unsigned        c_needs_zero;
 };
 
-static inline int ocfs2_should_zero_cluster(struct ocfs2_write_cluster_desc *d)
-{
-       return d->c_new || d->c_unwritten;
-}
-
 struct ocfs2_write_ctxt {
        /* Logical cluster position / len of write */
        u32                             w_cpos;
        u32                             w_clen;
 
+       /* First cluster allocated in a nonsparse extend */
+       u32                             w_first_new_cpos;
+
        struct ocfs2_write_cluster_desc w_desc[OCFS2_MAX_CLUSTERS_PER_PAGE];
 
        /*
@@ -984,6 +990,7 @@ static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp,
                return -ENOMEM;
 
        wc->w_cpos = pos >> osb->s_clustersize_bits;
+       wc->w_first_new_cpos = UINT_MAX;
        cend = (pos + len - 1) >> osb->s_clustersize_bits;
        wc->w_clen = cend - wc->w_cpos + 1;
        get_bh(di_bh);
@@ -1218,20 +1225,18 @@ out:
  */
 static int ocfs2_write_cluster(struct address_space *mapping,
                               u32 phys, unsigned int unwritten,
+                              unsigned int should_zero,
                               struct ocfs2_alloc_context *data_ac,
                               struct ocfs2_alloc_context *meta_ac,
                               struct ocfs2_write_ctxt *wc, u32 cpos,
                               loff_t user_pos, unsigned user_len)
 {
-       int ret, i, new, should_zero = 0;
+       int ret, i, new;
        u64 v_blkno, p_blkno;
        struct inode *inode = mapping->host;
        struct ocfs2_extent_tree et;
 
        new = phys == 0 ? 1 : 0;
-       if (new || unwritten)
-               should_zero = 1;
-
        if (new) {
                u32 tmp_pos;
 
@@ -1261,7 +1266,8 @@ static int ocfs2_write_cluster(struct address_space *mapping,
                        goto out;
                }
        } else if (unwritten) {
-               ocfs2_init_dinode_extent_tree(&et, inode, wc->w_di_bh);
+               ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode),
+                                             wc->w_di_bh);
                ret = ocfs2_mark_extent_written(inode, &et,
                                                wc->w_handle, cpos, 1, phys,
                                                meta_ac, &wc->w_dealloc);
@@ -1342,7 +1348,9 @@ static int ocfs2_write_cluster_by_desc(struct address_space *mapping,
                        local_len = osb->s_clustersize - cluster_off;
 
                ret = ocfs2_write_cluster(mapping, desc->c_phys,
-                                         desc->c_unwritten, data_ac, meta_ac,
+                                         desc->c_unwritten,
+                                         desc->c_needs_zero,
+                                         data_ac, meta_ac,
                                          wc, desc->c_cpos, pos, local_len);
                if (ret) {
                        mlog_errno(ret);
@@ -1392,14 +1400,14 @@ static void ocfs2_set_target_boundaries(struct ocfs2_super *osb,
                 * newly allocated cluster.
                 */
                desc = &wc->w_desc[0];
-               if (ocfs2_should_zero_cluster(desc))
+               if (desc->c_needs_zero)
                        ocfs2_figure_cluster_boundaries(osb,
                                                        desc->c_cpos,
                                                        &wc->w_target_from,
                                                        NULL);
 
                desc = &wc->w_desc[wc->w_clen - 1];
-               if (ocfs2_should_zero_cluster(desc))
+               if (desc->c_needs_zero)
                        ocfs2_figure_cluster_boundaries(osb,
                                                        desc->c_cpos,
                                                        NULL,
@@ -1448,6 +1456,9 @@ static int ocfs2_populate_write_desc(struct inode *inode,
                                goto out;
                        }
 
+                       /* We should already CoW the refcountd extent. */
+                       BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED);
+
                        /*
                         * Assume worst case - that we're writing in
                         * the middle of the extent.
@@ -1467,13 +1478,28 @@ static int ocfs2_populate_write_desc(struct inode *inode,
                        phys++;
                }
 
+               /*
+                * If w_first_new_cpos is < UINT_MAX, we have a non-sparse
+                * file that got extended.  w_first_new_cpos tells us
+                * where the newly allocated clusters are so we can
+                * zero them.
+                */
+               if (desc->c_cpos >= wc->w_first_new_cpos) {
+                       BUG_ON(phys == 0);
+                       desc->c_needs_zero = 1;
+               }
+
                desc->c_phys = phys;
                if (phys == 0) {
                        desc->c_new = 1;
+                       desc->c_needs_zero = 1;
                        *clusters_to_alloc = *clusters_to_alloc + 1;
                }
-               if (ext_flags & OCFS2_EXT_UNWRITTEN)
+
+               if (ext_flags & OCFS2_EXT_UNWRITTEN) {
                        desc->c_unwritten = 1;
+                       desc->c_needs_zero = 1;
+               }
 
                num_clusters--;
        }
@@ -1513,7 +1539,7 @@ static int ocfs2_write_begin_inline(struct address_space *mapping,
                goto out;
        }
 
-       ret = ocfs2_journal_access_di(handle, inode, wc->w_di_bh,
+       ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), wc->w_di_bh,
                                      OCFS2_JOURNAL_ACCESS_WRITE);
        if (ret) {
                ocfs2_commit_trans(osb, handle);
@@ -1633,10 +1659,13 @@ static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos,
        if (newsize <= i_size_read(inode))
                return 0;
 
-       ret = ocfs2_extend_no_holes(inode, newsize, newsize - len);
+       ret = ocfs2_extend_no_holes(inode, newsize, pos);
        if (ret)
                mlog_errno(ret);
 
+       wc->w_first_new_cpos =
+               ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode));
+
        return ret;
 }
 
@@ -1645,7 +1674,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
                             struct page **pagep, void **fsdata,
                             struct buffer_head *di_bh, struct page *mmap_page)
 {
-       int ret, credits = OCFS2_INODE_UPDATE_CREDITS;
+       int ret, cluster_of_pages, credits = OCFS2_INODE_UPDATE_CREDITS;
        unsigned int clusters_to_alloc, extents_to_split;
        struct ocfs2_write_ctxt *wc;
        struct inode *inode = mapping->host;
@@ -1681,6 +1710,19 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
                goto out;
        }
 
+       ret = ocfs2_check_range_for_refcount(inode, pos, len);
+       if (ret < 0) {
+               mlog_errno(ret);
+               goto out;
+       } else if (ret == 1) {
+               ret = ocfs2_refcount_cow(inode, di_bh,
+                                        wc->w_cpos, wc->w_clen, UINT_MAX);
+               if (ret) {
+                       mlog_errno(ret);
+                       goto out;
+               }
+       }
+
        ret = ocfs2_populate_write_desc(inode, wc, &clusters_to_alloc,
                                        &extents_to_split);
        if (ret) {
@@ -1708,7 +1750,8 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
                     (long long)i_size_read(inode), le32_to_cpu(di->i_clusters),
                     clusters_to_alloc, extents_to_split);
 
-               ocfs2_init_dinode_extent_tree(&et, inode, wc->w_di_bh);
+               ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode),
+                                             wc->w_di_bh);
                ret = ocfs2_lock_allocators(inode, &et,
                                            clusters_to_alloc, extents_to_split,
                                            &data_ac, &meta_ac);
@@ -1723,8 +1766,19 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
 
        }
 
-       ocfs2_set_target_boundaries(osb, wc, pos, len,
-                                   clusters_to_alloc + extents_to_split);
+       /*
+        * We have to zero sparse allocated clusters, unwritten extent clusters,
+        * and non-sparse clusters we just extended.  For non-sparse writes,
+        * we know zeros will only be needed in the first and/or last cluster.
+        */
+       if (clusters_to_alloc || extents_to_split ||
+           (wc->w_clen && (wc->w_desc[0].c_needs_zero ||
+                           wc->w_desc[wc->w_clen - 1].c_needs_zero)))
+               cluster_of_pages = 1;
+       else
+               cluster_of_pages = 0;
+
+       ocfs2_set_target_boundaries(osb, wc, pos, len, cluster_of_pages);
 
        handle = ocfs2_start_trans(osb, credits);
        if (IS_ERR(handle)) {
@@ -1744,7 +1798,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
         * We don't want this to fail in ocfs2_write_end(), so do it
         * here.
         */
-       ret = ocfs2_journal_access_di(handle, inode, wc->w_di_bh,
+       ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), wc->w_di_bh,
                                      OCFS2_JOURNAL_ACCESS_WRITE);
        if (ret) {
                mlog_errno(ret);
@@ -1757,8 +1811,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
         * extent.
         */
        ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos,
-                                        clusters_to_alloc + extents_to_split,
-                                        mmap_page);
+                                        cluster_of_pages, mmap_page);
        if (ret) {
                mlog_errno(ret);
                goto out_quota;
@@ -1969,4 +2022,5 @@ const struct address_space_operations ocfs2_aops = {
        .releasepage            = ocfs2_releasepage,
        .migratepage            = buffer_migrate_page,
        .is_partially_uptodate  = block_is_partially_uptodate,
+       .error_remove_page      = generic_error_remove_page,
 };