AFS: write back dirty data on unmount
[safe/jmp/linux-2.6] / fs / ocfs2 / aops.c
index 605c82a..8e7cafb 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/pagemap.h>
 #include <asm/byteorder.h>
 #include <linux/swap.h>
+#include <linux/pipe_fs_i.h>
 
 #define MLOG_MASK_PREFIX ML_FILE_IO
 #include <cluster/masklog.h>
@@ -77,7 +78,8 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
 
        if (!OCFS2_IS_VALID_DINODE(fe)) {
                mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n",
-                    (unsigned long long)fe->i_blkno, 7, fe->i_signature);
+                    (unsigned long long)le64_to_cpu(fe->i_blkno), 7,
+                    fe->i_signature);
                goto bail;
        }
 
@@ -136,6 +138,7 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock,
                           struct buffer_head *bh_result, int create)
 {
        int err = 0;
+       unsigned int ext_flags;
        u64 p_blkno, past_eof;
        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 
@@ -152,7 +155,8 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock,
                goto bail;
        }
 
-       err = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, NULL);
+       err = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, NULL,
+                                         &ext_flags);
        if (err) {
                mlog(ML_ERROR, "Error %d from get_blocks(0x%p, %llu, 1, "
                     "%llu, NULL)\n", err, inode, (unsigned long long)iblock,
@@ -170,7 +174,8 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock,
                        "ino %lu, iblock %llu\n", inode->i_ino,
                        (unsigned long long)iblock);
 
-       if (p_blkno)
+       /* Treat the unwritten extent as a hole for zeroing purposes. */
+       if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN))
                map_bh(bh_result, inode->i_sb, p_blkno);
 
        if (!ocfs2_sparse_alloc(osb)) {
@@ -395,7 +400,7 @@ static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block)
                down_read(&OCFS2_I(inode)->ip_alloc_sem);
        }
 
-       err = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL);
+       err = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL, NULL);
 
        if (!INODE_JOURNAL(inode)) {
                up_read(&OCFS2_I(inode)->ip_alloc_sem);
@@ -435,8 +440,8 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
                                     struct buffer_head *bh_result, int create)
 {
        int ret;
-       u64 p_blkno, inode_blocks;
-       int contig_blocks;
+       u64 p_blkno, inode_blocks, contig_blocks;
+       unsigned int ext_flags;
        unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits;
        unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits;
 
@@ -457,7 +462,7 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
        /* This figures out the size of the next contiguous block, and
         * our logical offset */
        ret = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno,
-                                         &contig_blocks);
+                                         &contig_blocks, &ext_flags);
        if (ret) {
                mlog(ML_ERROR, "get_blocks() failed iblock=%llu\n",
                     (unsigned long long)iblock);
@@ -477,8 +482,10 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
        /*
         * get_more_blocks() expects us to describe a hole by clearing
         * the mapped bit on bh_result().
+        *
+        * Consider an unwritten extent as a hole.
         */
-       if (p_blkno)
+       if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN))
                map_bh(bh_result, inode->i_sb, p_blkno);
        else {
                /*
@@ -516,12 +523,17 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
                             void *private)
 {
        struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
+       int level;
 
        /* this io's submitter should not have unlocked this before we could */
        BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
+
        ocfs2_iocb_clear_rw_locked(iocb);
-       up_read(&inode->i_alloc_sem);
-       ocfs2_rw_unlock(inode, 0);
+
+       level = ocfs2_iocb_rw_locked_level(iocb);
+       if (!level)
+               up_read(&inode->i_alloc_sem);
+       ocfs2_rw_unlock(inode, level);
 }
 
 /*
@@ -749,6 +761,74 @@ next_bh:
 }
 
 /*
+ * This will copy user data from the buffer page in the splice
+ * context.
+ *
+ * For now, we ignore SPLICE_F_MOVE as that would require some extra
+ * communication out all the way to ocfs2_write().
+ */
+int ocfs2_map_and_write_splice_data(struct inode *inode,
+                                 struct ocfs2_write_ctxt *wc, u64 *p_blkno,
+                                 unsigned int *ret_from, unsigned int *ret_to)
+{
+       int ret;
+       unsigned int to, from, cluster_start, cluster_end;
+       char *src, *dst;
+       struct ocfs2_splice_write_priv *sp = wc->w_private;
+       struct pipe_buffer *buf = sp->s_buf;
+       unsigned long bytes, src_from;
+       struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+
+       ocfs2_figure_cluster_boundaries(osb, wc->w_cpos, &cluster_start,
+                                       &cluster_end);
+
+       from = sp->s_offset;
+       src_from = sp->s_buf_offset;
+       bytes = wc->w_count;
+
+       if (wc->w_large_pages) {
+               /*
+                * For cluster size < page size, we have to
+                * calculate pos within the cluster and obey
+                * the rightmost boundary.
+                */
+               bytes = min(bytes, (unsigned long)(osb->s_clustersize
+                                  - (wc->w_pos & (osb->s_clustersize - 1))));
+       }
+       to = from + bytes;
+
+       if (wc->w_this_page_new)
+               ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode,
+                                           cluster_start, cluster_end, 1);
+       else
+               ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode,
+                                           from, to, 0);
+       if (ret) {
+               mlog_errno(ret);
+               goto out;
+       }
+
+       BUG_ON(from > PAGE_CACHE_SIZE);
+       BUG_ON(to > PAGE_CACHE_SIZE);
+       BUG_ON(from > osb->s_clustersize);
+       BUG_ON(to > osb->s_clustersize);
+
+       src = buf->ops->map(sp->s_pipe, buf, 1);
+       dst = kmap_atomic(wc->w_this_page, KM_USER1);
+       memcpy(dst + from, src + src_from, bytes);
+       kunmap_atomic(wc->w_this_page, KM_USER1);
+       buf->ops->unmap(sp->s_pipe, buf, src);
+
+       wc->w_finished_copy = 1;
+
+       *ret_from = from;
+       *ret_to = to;
+out:
+
+       return bytes ? (unsigned int)bytes : ret;
+}
+
+/*
  * This will copy user data from the iovec in the buffered write
  * context.
  */
@@ -860,9 +940,9 @@ out:
  * Returns a negative error code or the number of bytes copied into
  * the page.
  */
-int ocfs2_write_data_page(struct inode *inode, handle_t *handle,
-                         u64 *p_blkno, struct page *page,
-                         struct ocfs2_write_ctxt *wc, int new)
+static int ocfs2_write_data_page(struct inode *inode, handle_t *handle,
+                                u64 *p_blkno, struct page *page,
+                                struct ocfs2_write_ctxt *wc, int new)
 {
        int ret, copied = 0;
        unsigned int from = 0, to = 0;
@@ -1007,7 +1087,7 @@ static ssize_t ocfs2_write(struct file *file, u32 phys, handle_t *handle,
        for(i = 0; i < numpages; i++) {
                index = start + i;
 
-               cpages[i] = grab_cache_page(mapping, index);
+               cpages[i] = find_or_create_page(mapping, index, GFP_NOFS);
                if (!cpages[i]) {
                        ret = -ENOMEM;
                        mlog_errno(ret);
@@ -1042,7 +1122,8 @@ static ssize_t ocfs2_write(struct file *file, u32 phys, handle_t *handle,
                }
        }
 
-       ret = ocfs2_extent_map_get_blocks(inode, v_blkno, &p_blkno, NULL);
+       ret = ocfs2_extent_map_get_blocks(inode, v_blkno, &p_blkno, NULL,
+                                         NULL);
        if (ret < 0) {
 
                /*
@@ -1146,7 +1227,7 @@ ssize_t ocfs2_buffered_write_cluster(struct file *file, loff_t pos,
         */
        down_write(&OCFS2_I(inode)->ip_alloc_sem);
 
-       ret = ocfs2_get_clusters(inode, wc.w_cpos, &phys, NULL);
+       ret = ocfs2_get_clusters(inode, wc.w_cpos, &phys, NULL, NULL);
        if (ret) {
                mlog_errno(ret);
                goto out_meta;
@@ -1196,7 +1277,7 @@ ssize_t ocfs2_buffered_write_cluster(struct file *file, loff_t pos,
                i_size_write(inode, pos);
                mark_inode_dirty(inode);
        }
-       inode->i_blocks = ocfs2_align_bytes_to_sectors((u64)(i_size_read(inode)));
+       inode->i_blocks = ocfs2_inode_sector_count(inode);
        di->i_size = cpu_to_le64((u64)i_size_read(inode));
        inode->i_mtime = inode->i_ctime = CURRENT_TIME;
        di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec);