X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=fs%2Focfs2%2Faops.c;h=f37f25c931f59f7e8b6b5c27ab502529f8621834;hb=ff869de7bf5e76adffebd3a176c1c73bca7eddb7;hp=510bf84c9cf59d70a69009322f063d338fe5cd95;hpb=59a5e416d1ab543a5248a2b34d83202c4d55d132;p=safe%2Fjmp%2Flinux-2.6

diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 510bf84..f37f25c 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -232,7 +232,7 @@ static int ocfs2_readpage(struct file *file, struct page *page)
 	 * might now be discovering a truncate that hit on another node.
 	 * block_read_full_page->get_block freaks out if it is asked to read
 	 * beyond the end of a file, so we check here.  Callers
-	 * (generic_file_read, fault->nopage) are clever enough to check i_size
+	 * (generic_file_read, vm_ops->fault) are clever enough to check i_size
 	 * and notice that the page they just read isn't needed.
 	 *
 	 * XXX sys_readahead() seems to get that wrong?
@@ -740,18 +740,13 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
 	bh = head;
 	block_start = 0;
 	do {
-		void *kaddr;
-
 		block_end = block_start + bsize;
 		if (block_end <= from)
 			goto next_bh;
 		if (block_start >= to)
 			break;
 
-		kaddr = kmap_atomic(page, KM_USER0);
-		memset(kaddr+block_start, 0, bh->b_size);
-		flush_dcache_page(page);
-		kunmap_atomic(kaddr, KM_USER0);
+		zero_user_page(page, block_start, bh->b_size, KM_USER0);
 		set_buffer_uptodate(bh);
 		mark_buffer_dirty(bh);
 
@@ -782,8 +777,14 @@ struct ocfs2_write_cluster_desc {
 	 * filled.
 	 */
 	unsigned	c_new;
+	unsigned	c_unwritten;
 };
 
+static inline int ocfs2_should_zero_cluster(struct ocfs2_write_cluster_desc *d)
+{
+	return d->c_new || d->c_unwritten;
+}
+
 struct ocfs2_write_ctxt {
 	/* Logical cluster position / len of write */
 	u32				w_cpos;
@@ -829,6 +830,8 @@ struct ocfs2_write_ctxt {
 	handle_t			*w_handle;
 
 	struct buffer_head		*w_di_bh;
+
+	struct ocfs2_cached_dealloc_ctxt w_dealloc;
 };
 
 static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
@@ -852,6 +855,7 @@ static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp,
 				  struct ocfs2_super *osb, loff_t pos,
 				  unsigned len, struct buffer_head *di_bh)
 {
+	u32 cend;
 	struct ocfs2_write_ctxt *wc;
 
 	wc = kzalloc(sizeof(struct ocfs2_write_ctxt), GFP_NOFS);
@@ -859,7 +863,8 @@ static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp,
 		return -ENOMEM;
 
 	wc->w_cpos = pos >> osb->s_clustersize_bits;
-	wc->w_clen = ocfs2_clusters_for_bytes(osb->sb, len);
+	cend = (pos + len - 1) >> osb->s_clustersize_bits;
+	wc->w_clen = cend - wc->w_cpos + 1;
 	get_bh(di_bh);
 	wc->w_di_bh = di_bh;
 
@@ -868,6 +873,8 @@ static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp,
 	else
 		wc->w_large_pages = 0;
 
+	ocfs2_init_dealloc_ctxt(&wc->w_dealloc);
+
 	*wcp = wc;
 
 	return 0;
@@ -896,15 +903,11 @@ static void ocfs2_zero_new_buffers(struct page *page, unsigned from, unsigned to
 			if (block_end > from && block_start < to) {
 				if (!PageUptodate(page)) {
 					unsigned start, end;
-					void *kaddr;
 
 					start = max(from, block_start);
 					end = min(to, block_end);
 
-					kaddr = kmap_atomic(page, KM_USER0);
-					memset(kaddr+start, 0, end - start);
-					flush_dcache_page(page);
-					kunmap_atomic(kaddr, KM_USER0);
+					zero_user_page(page, start, end - start, KM_USER0);
 					set_buffer_uptodate(bh);
 				}
 
@@ -927,18 +930,11 @@ static void ocfs2_write_failure(struct inode *inode,
 				loff_t user_pos, unsigned user_len)
 {
 	int i;
-	unsigned from, to;
+	unsigned from = user_pos & (PAGE_CACHE_SIZE - 1),
+		to = user_pos + user_len;
 	struct page *tmppage;
 
-	ocfs2_zero_new_buffers(wc->w_target_page, user_pos, user_len);
-
-	if (wc->w_large_pages) {
-		from = wc->w_target_from;
-		to = wc->w_target_to;
-	} else {
-		from = 0;
-		to = PAGE_CACHE_SIZE;
-	}
+	ocfs2_zero_new_buffers(wc->w_target_page, from, to);
 
 	for(i = 0; i < wc->w_num_pages; i++) {
 		tmppage = wc->w_pages[i];
@@ -988,9 +984,6 @@ static int ocfs2_prepare_page_for_write(struct inode *inode, u64 *p_blkno,
 			map_from = cluster_start;
 			map_to = cluster_end;
 		}
-
-		wc->w_target_from = map_from;
-		wc->w_target_to = map_to;
 	} else {
 		/*
 		 * If we haven't allocated the new page yet, we
@@ -1103,16 +1096,19 @@ out:
  * Prepare a single cluster for write one cluster into the file.
  */
 static int ocfs2_write_cluster(struct address_space *mapping,
-			       u32 phys, struct ocfs2_alloc_context *data_ac,
+			       u32 phys, unsigned int unwritten,
+			       struct ocfs2_alloc_context *data_ac,
 			       struct ocfs2_alloc_context *meta_ac,
 			       struct ocfs2_write_ctxt *wc, u32 cpos,
 			       loff_t user_pos, unsigned user_len)
 {
-	int ret, i, new;
+	int ret, i, new, should_zero = 0;
 	u64 v_blkno, p_blkno;
 	struct inode *inode = mapping->host;
 
 	new = phys == 0 ? 1 : 0;
+	if (new || unwritten)
+		should_zero = 1;
 
 	if (new) {
 		u32 tmp_pos;
@@ -1123,7 +1119,7 @@ static int ocfs2_write_cluster(struct address_space *mapping,
 		 */
 		tmp_pos = cpos;
 		ret = ocfs2_do_extend_allocation(OCFS2_SB(inode->i_sb), inode,
-						 &tmp_pos, 1, wc->w_di_bh,
+						 &tmp_pos, 1, 0, wc->w_di_bh,
 						 wc->w_handle, data_ac,
 						 meta_ac, NULL);
 		/*
@@ -1142,11 +1138,20 @@ static int ocfs2_write_cluster(struct address_space *mapping,
 			mlog_errno(ret);
 			goto out;
 		}
+	} else if (unwritten) {
+		ret = ocfs2_mark_extent_written(inode, wc->w_di_bh,
+						wc->w_handle, cpos, 1, phys,
+						meta_ac, &wc->w_dealloc);
+		if (ret < 0) {
+			mlog_errno(ret);
+			goto out;
+		}
+	}
 
+	if (should_zero)
 		v_blkno = ocfs2_clusters_to_blocks(inode->i_sb, cpos);
-	} else {
+	else
 		v_blkno = user_pos >> inode->i_sb->s_blocksize_bits;
-	}
 
 	/*
 	 * The only reason this should fail is due to an inability to
@@ -1169,7 +1174,8 @@ static int ocfs2_write_cluster(struct address_space *mapping,
 
 		tmpret = ocfs2_prepare_page_for_write(inode, &p_blkno, wc,
 						      wc->w_pages[i], cpos,
-						      user_pos, user_len, new);
+						      user_pos, user_len,
+						      should_zero);
 		if (tmpret) {
 			mlog_errno(tmpret);
 			if (ret == 0)
@@ -1188,6 +1194,47 @@ out:
 	return ret;
 }
 
+static int ocfs2_write_cluster_by_desc(struct address_space *mapping,
+				       struct ocfs2_alloc_context *data_ac,
+				       struct ocfs2_alloc_context *meta_ac,
+				       struct ocfs2_write_ctxt *wc,
+				       loff_t pos, unsigned len)
+{
+	int ret, i;
+	loff_t cluster_off;
+	unsigned int local_len = len;
+	struct ocfs2_write_cluster_desc *desc;
+	struct ocfs2_super *osb = OCFS2_SB(mapping->host->i_sb);
+
+	for (i = 0; i < wc->w_clen; i++) {
+		desc = &wc->w_desc[i];
+
+		/*
+		 * We have to make sure that the total write passed in
+		 * doesn't extend past a single cluster.
+		 */
+		local_len = len;
+		cluster_off = pos & (osb->s_clustersize - 1);
+		if ((cluster_off + local_len) > osb->s_clustersize)
+			local_len = osb->s_clustersize - cluster_off;
+
+		ret = ocfs2_write_cluster(mapping, desc->c_phys,
+					  desc->c_unwritten, data_ac, meta_ac,
+					  wc, desc->c_cpos, pos, local_len);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+
+		len -= local_len;
+		pos += local_len;
+	}
+
+	ret = 0;
+out:
+	return ret;
+}
+
 /*
  * ocfs2_write_end() wants to know which parts of the target page it
  * should complete the write on. It's easiest to compute them ahead of
@@ -1217,19 +1264,19 @@ static void ocfs2_set_target_boundaries(struct ocfs2_super *osb,
 	if (wc->w_large_pages) {
 		/*
 		 * We only care about the 1st and last cluster within
-		 * our range and whether they are holes or not. Either
+		 * our range and whether they should be zero'd or not. Either
 		 * value may be extended out to the start/end of a
 		 * newly allocated cluster.
 		 */
 		desc = &wc->w_desc[0];
-		if (desc->c_new)
+		if (ocfs2_should_zero_cluster(desc))
 			ocfs2_figure_cluster_boundaries(osb,
 							desc->c_cpos,
 							&wc->w_target_from,
 							NULL);
 
 		desc = &wc->w_desc[wc->w_clen - 1];
-		if (desc->c_new)
+		if (ocfs2_should_zero_cluster(desc))
 			ocfs2_figure_cluster_boundaries(osb,
 							desc->c_cpos,
 							NULL,
@@ -1240,42 +1287,55 @@ static void ocfs2_set_target_boundaries(struct ocfs2_super *osb,
 	}
 }
 
-int ocfs2_write_begin_nolock(struct address_space *mapping,
-			     loff_t pos, unsigned len, unsigned flags,
-			     struct page **pagep, void **fsdata,
-			     struct buffer_head *di_bh, struct page *mmap_page)
+/*
+ * Populate each single-cluster write descriptor in the write context
+ * with information about the i/o to be done.
+ *
+ * Returns the number of clusters that will have to be allocated, as
+ * well as a worst case estimate of the number of extent records that
+ * would have to be created during a write to an unwritten region.
+ */
+static int ocfs2_populate_write_desc(struct inode *inode,
+				     struct ocfs2_write_ctxt *wc,
+				     unsigned int *clusters_to_alloc,
+				     unsigned int *extents_to_split)
 {
-	int ret, i, credits = OCFS2_INODE_UPDATE_CREDITS;
-	unsigned int num_clusters = 0, clusters_to_alloc = 0;
-	u32 phys = 0;
-	struct ocfs2_write_ctxt *wc;
-	struct inode *inode = mapping->host;
-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
-	struct ocfs2_dinode *di;
-	struct ocfs2_alloc_context *data_ac = NULL;
-	struct ocfs2_alloc_context *meta_ac = NULL;
-	handle_t *handle;
+	int ret;
 	struct ocfs2_write_cluster_desc *desc;
+	unsigned int num_clusters = 0;
+	unsigned int ext_flags = 0;
+	u32 phys = 0;
+	int i;
 
-	ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, di_bh);
-	if (ret) {
-		mlog_errno(ret);
-		return ret;
-	}
-
-	di = (struct ocfs2_dinode *)wc->w_di_bh->b_data;
+	*clusters_to_alloc = 0;
+	*extents_to_split = 0;
 
 	for (i = 0; i < wc->w_clen; i++) {
 		desc = &wc->w_desc[i];
 		desc->c_cpos = wc->w_cpos + i;
 
 		if (num_clusters == 0) {
+			/*
+			 * Need to look up the next extent record.
+			 */
 			ret = ocfs2_get_clusters(inode, desc->c_cpos, &phys,
-						 &num_clusters, NULL);
+						 &num_clusters, &ext_flags);
 			if (ret) {
 				mlog_errno(ret);
 				goto out;
 			}
+
+			/*
+			 * Assume worst case - that we're writing in
+			 * the middle of the extent.
+			 *
+			 * We can assume that the write proceeds from
+			 * left to right, in which case the extent
+			 * insert code is smart enough to coalesce the
+			 * next splits into the previous records created.
+			 */
+			if (ext_flags & OCFS2_EXT_UNWRITTEN)
+				*extents_to_split = *extents_to_split + 2;
 		} else if (phys) {
 			/*
 			 * Only increment phys if it doesn't describe
@@ -1287,26 +1347,63 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
 		desc->c_phys = phys;
 		if (phys == 0) {
 			desc->c_new = 1;
-			clusters_to_alloc++;
+			*clusters_to_alloc = *clusters_to_alloc + 1;
 		}
+		if (ext_flags & OCFS2_EXT_UNWRITTEN)
+			desc->c_unwritten = 1;
 
 		num_clusters--;
 	}
 
+	ret = 0;
+out:
+	return ret;
+}
+
+int ocfs2_write_begin_nolock(struct address_space *mapping,
+			     loff_t pos, unsigned len, unsigned flags,
+			     struct page **pagep, void **fsdata,
+			     struct buffer_head *di_bh, struct page *mmap_page)
+{
+	int ret, credits = OCFS2_INODE_UPDATE_CREDITS;
+	unsigned int clusters_to_alloc, extents_to_split;
+	struct ocfs2_write_ctxt *wc;
+	struct inode *inode = mapping->host;
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_dinode *di;
+	struct ocfs2_alloc_context *data_ac = NULL;
+	struct ocfs2_alloc_context *meta_ac = NULL;
+	handle_t *handle;
+
+	ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, di_bh);
+	if (ret) {
+		mlog_errno(ret);
+		return ret;
+	}
+
+	ret = ocfs2_populate_write_desc(inode, wc, &clusters_to_alloc,
+					&extents_to_split);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	di = (struct ocfs2_dinode *)wc->w_di_bh->b_data;
+
 	/*
 	 * We set w_target_from, w_target_to here so that
 	 * ocfs2_write_end() knows which range in the target page to
 	 * write out. An allocation requires that we write the entire
 	 * cluster range.
 	 */
-	if (clusters_to_alloc > 0) {
+	if (clusters_to_alloc || extents_to_split) {
 		/*
 		 * XXX: We are stretching the limits of
-		 * ocfs2_lock_allocators(). It greately over-estimates
+		 * ocfs2_lock_allocators(). It greatly over-estimates
 		 * the work to be done.
 		 */
 		ret = ocfs2_lock_allocators(inode, di, clusters_to_alloc,
-					    &data_ac, &meta_ac);
+					    extents_to_split, &data_ac, &meta_ac);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -1317,7 +1414,8 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
 
 	}
 
-	ocfs2_set_target_boundaries(osb, wc, pos, len, clusters_to_alloc);
+	ocfs2_set_target_boundaries(osb, wc, pos, len,
+				    clusters_to_alloc + extents_to_split);
 
 	handle = ocfs2_start_trans(osb, credits);
 	if (IS_ERR(handle)) {
@@ -1345,21 +1443,18 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
 	 * extent.
 	 */
 	ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos,
-					 clusters_to_alloc, mmap_page);
+					 clusters_to_alloc + extents_to_split,
+					 mmap_page);
 	if (ret) {
 		mlog_errno(ret);
 		goto out_commit;
 	}
 
-	for (i = 0; i < wc->w_clen; i++) {
-		desc = &wc->w_desc[i];
-
-		ret = ocfs2_write_cluster(mapping, desc->c_phys, data_ac,
-					  meta_ac, wc, desc->c_cpos, pos, len);
-		if (ret) {
-			mlog_errno(ret);
-			goto out_commit;
-		}
+	ret = ocfs2_write_cluster_by_desc(mapping, data_ac, meta_ac, wc, pos,
+					  len);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_commit;
 	}
 
 	if (data_ac)
@@ -1494,11 +1589,12 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
 	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 	di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec);
 	di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
-
 	ocfs2_journal_dirty(handle, wc->w_di_bh);
 
 	ocfs2_commit_trans(osb, handle);
 
+	ocfs2_run_deallocs(osb, &wc->w_dealloc);
+
 	ocfs2_free_write_ctxt(wc);
 
 	return copied;