X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=fs%2Focfs2%2Faops.c;h=f37f25c931f59f7e8b6b5c27ab502529f8621834;hb=ff869de7bf5e76adffebd3a176c1c73bca7eddb7;hp=510bf84c9cf59d70a69009322f063d338fe5cd95;hpb=59a5e416d1ab543a5248a2b34d83202c4d55d132;p=safe%2Fjmp%2Flinux-2.6 diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 510bf84..f37f25c 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -232,7 +232,7 @@ static int ocfs2_readpage(struct file *file, struct page *page) * might now be discovering a truncate that hit on another node. * block_read_full_page->get_block freaks out if it is asked to read * beyond the end of a file, so we check here. Callers - * (generic_file_read, fault->nopage) are clever enough to check i_size + * (generic_file_read, vm_ops->fault) are clever enough to check i_size * and notice that the page they just read isn't needed. * * XXX sys_readahead() seems to get that wrong? @@ -740,18 +740,13 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno, bh = head; block_start = 0; do { - void *kaddr; - block_end = block_start + bsize; if (block_end <= from) goto next_bh; if (block_start >= to) break; - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr+block_start, 0, bh->b_size); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(page, block_start, bh->b_size, KM_USER0); set_buffer_uptodate(bh); mark_buffer_dirty(bh); @@ -782,8 +777,14 @@ struct ocfs2_write_cluster_desc { * filled. */ unsigned c_new; + unsigned c_unwritten; }; +static inline int ocfs2_should_zero_cluster(struct ocfs2_write_cluster_desc *d) +{ + return d->c_new || d->c_unwritten; +} + struct ocfs2_write_ctxt { /* Logical cluster position / len of write */ u32 w_cpos; @@ -829,6 +830,8 @@ struct ocfs2_write_ctxt { handle_t *w_handle; struct buffer_head *w_di_bh; + + struct ocfs2_cached_dealloc_ctxt w_dealloc; }; static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc) @@ -852,6 +855,7 @@ static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp, struct ocfs2_super *osb, loff_t pos, unsigned len, struct buffer_head *di_bh) { + u32 cend; struct ocfs2_write_ctxt *wc; wc = kzalloc(sizeof(struct ocfs2_write_ctxt), GFP_NOFS); @@ -859,7 +863,8 @@ static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp, return -ENOMEM; wc->w_cpos = pos >> osb->s_clustersize_bits; - wc->w_clen = ocfs2_clusters_for_bytes(osb->sb, len); + cend = (pos + len - 1) >> osb->s_clustersize_bits; + wc->w_clen = cend - wc->w_cpos + 1; get_bh(di_bh); wc->w_di_bh = di_bh; @@ -868,6 +873,8 @@ static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp, else wc->w_large_pages = 0; + ocfs2_init_dealloc_ctxt(&wc->w_dealloc); + *wcp = wc; return 0; @@ -896,15 +903,11 @@ static void ocfs2_zero_new_buffers(struct page *page, unsigned from, unsigned to if (block_end > from && block_start < to) { if (!PageUptodate(page)) { unsigned start, end; - void *kaddr; start = max(from, block_start); end = min(to, block_end); - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr+start, 0, end - start); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(page, start, end - start, KM_USER0); set_buffer_uptodate(bh); } @@ -927,18 +930,11 @@ static void ocfs2_write_failure(struct inode *inode, loff_t user_pos, unsigned user_len) { int i; - unsigned from, to; + unsigned from = user_pos & (PAGE_CACHE_SIZE - 1), + to = user_pos + user_len; struct page *tmppage; - ocfs2_zero_new_buffers(wc->w_target_page, user_pos, user_len); - - if (wc->w_large_pages) { - from = wc->w_target_from; - to = wc->w_target_to; - } else { - from = 0; - to = PAGE_CACHE_SIZE; - } + ocfs2_zero_new_buffers(wc->w_target_page, from, to); for(i = 0; i < wc->w_num_pages; i++) { tmppage = wc->w_pages[i]; @@ -988,9 +984,6 @@ static int ocfs2_prepare_page_for_write(struct inode *inode, u64 *p_blkno, map_from = cluster_start; map_to = cluster_end; } - - wc->w_target_from = map_from; - wc->w_target_to = map_to; } else { /* * If we haven't allocated the new page yet, we @@ -1103,16 +1096,19 @@ out: * Prepare a single cluster for write one cluster into the file. */ static int ocfs2_write_cluster(struct address_space *mapping, - u32 phys, struct ocfs2_alloc_context *data_ac, + u32 phys, unsigned int unwritten, + struct ocfs2_alloc_context *data_ac, struct ocfs2_alloc_context *meta_ac, struct ocfs2_write_ctxt *wc, u32 cpos, loff_t user_pos, unsigned user_len) { - int ret, i, new; + int ret, i, new, should_zero = 0; u64 v_blkno, p_blkno; struct inode *inode = mapping->host; new = phys == 0 ? 1 : 0; + if (new || unwritten) + should_zero = 1; if (new) { u32 tmp_pos; @@ -1123,7 +1119,7 @@ static int ocfs2_write_cluster(struct address_space *mapping, */ tmp_pos = cpos; ret = ocfs2_do_extend_allocation(OCFS2_SB(inode->i_sb), inode, - &tmp_pos, 1, wc->w_di_bh, + &tmp_pos, 1, 0, wc->w_di_bh, wc->w_handle, data_ac, meta_ac, NULL); /* @@ -1142,11 +1138,20 @@ static int ocfs2_write_cluster(struct address_space *mapping, mlog_errno(ret); goto out; } + } else if (unwritten) { + ret = ocfs2_mark_extent_written(inode, wc->w_di_bh, + wc->w_handle, cpos, 1, phys, + meta_ac, &wc->w_dealloc); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + } + if (should_zero) v_blkno = ocfs2_clusters_to_blocks(inode->i_sb, cpos); - } else { + else v_blkno = user_pos >> inode->i_sb->s_blocksize_bits; - } /* * The only reason this should fail is due to an inability to @@ -1169,7 +1174,8 @@ static int ocfs2_write_cluster(struct address_space *mapping, tmpret = ocfs2_prepare_page_for_write(inode, &p_blkno, wc, wc->w_pages[i], cpos, - user_pos, user_len, new); + user_pos, user_len, + should_zero); if (tmpret) { mlog_errno(tmpret); if (ret == 0) @@ -1188,6 +1194,47 @@ out: return ret; } +static int ocfs2_write_cluster_by_desc(struct address_space *mapping, + struct ocfs2_alloc_context *data_ac, + struct ocfs2_alloc_context *meta_ac, + struct ocfs2_write_ctxt *wc, + loff_t pos, unsigned len) +{ + int ret, i; + loff_t cluster_off; + unsigned int local_len = len; + struct ocfs2_write_cluster_desc *desc; + struct ocfs2_super *osb = OCFS2_SB(mapping->host->i_sb); + + for (i = 0; i < wc->w_clen; i++) { + desc = &wc->w_desc[i]; + + /* + * We have to make sure that the total write passed in + * doesn't extend past a single cluster. + */ + local_len = len; + cluster_off = pos & (osb->s_clustersize - 1); + if ((cluster_off + local_len) > osb->s_clustersize) + local_len = osb->s_clustersize - cluster_off; + + ret = ocfs2_write_cluster(mapping, desc->c_phys, + desc->c_unwritten, data_ac, meta_ac, + wc, desc->c_cpos, pos, local_len); + if (ret) { + mlog_errno(ret); + goto out; + } + + len -= local_len; + pos += local_len; + } + + ret = 0; +out: + return ret; +} + /* * ocfs2_write_end() wants to know which parts of the target page it * should complete the write on. It's easiest to compute them ahead of @@ -1217,19 +1264,19 @@ static void ocfs2_set_target_boundaries(struct ocfs2_super *osb, if (wc->w_large_pages) { /* * We only care about the 1st and last cluster within - * our range and whether they are holes or not. Either + * our range and whether they should be zero'd or not. Either * value may be extended out to the start/end of a * newly allocated cluster. */ desc = &wc->w_desc[0]; - if (desc->c_new) + if (ocfs2_should_zero_cluster(desc)) ocfs2_figure_cluster_boundaries(osb, desc->c_cpos, &wc->w_target_from, NULL); desc = &wc->w_desc[wc->w_clen - 1]; - if (desc->c_new) + if (ocfs2_should_zero_cluster(desc)) ocfs2_figure_cluster_boundaries(osb, desc->c_cpos, NULL, @@ -1240,42 +1287,55 @@ static void ocfs2_set_target_boundaries(struct ocfs2_super *osb, } } -int ocfs2_write_begin_nolock(struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata, - struct buffer_head *di_bh, struct page *mmap_page) +/* + * Populate each single-cluster write descriptor in the write context + * with information about the i/o to be done. + * + * Returns the number of clusters that will have to be allocated, as + * well as a worst case estimate of the number of extent records that + * would have to be created during a write to an unwritten region. + */ +static int ocfs2_populate_write_desc(struct inode *inode, + struct ocfs2_write_ctxt *wc, + unsigned int *clusters_to_alloc, + unsigned int *extents_to_split) { - int ret, i, credits = OCFS2_INODE_UPDATE_CREDITS; - unsigned int num_clusters = 0, clusters_to_alloc = 0; - u32 phys = 0; - struct ocfs2_write_ctxt *wc; - struct inode *inode = mapping->host; - struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - struct ocfs2_dinode *di; - struct ocfs2_alloc_context *data_ac = NULL; - struct ocfs2_alloc_context *meta_ac = NULL; - handle_t *handle; + int ret; struct ocfs2_write_cluster_desc *desc; + unsigned int num_clusters = 0; + unsigned int ext_flags = 0; + u32 phys = 0; + int i; - ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, di_bh); - if (ret) { - mlog_errno(ret); - return ret; - } - - di = (struct ocfs2_dinode *)wc->w_di_bh->b_data; + *clusters_to_alloc = 0; + *extents_to_split = 0; for (i = 0; i < wc->w_clen; i++) { desc = &wc->w_desc[i]; desc->c_cpos = wc->w_cpos + i; if (num_clusters == 0) { + /* + * Need to look up the next extent record. + */ ret = ocfs2_get_clusters(inode, desc->c_cpos, &phys, - &num_clusters, NULL); + &num_clusters, &ext_flags); if (ret) { mlog_errno(ret); goto out; } + + /* + * Assume worst case - that we're writing in + * the middle of the extent. + * + * We can assume that the write proceeds from + * left to right, in which case the extent + * insert code is smart enough to coalesce the + * next splits into the previous records created. + */ + if (ext_flags & OCFS2_EXT_UNWRITTEN) + *extents_to_split = *extents_to_split + 2; } else if (phys) { /* * Only increment phys if it doesn't describe @@ -1287,26 +1347,63 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, desc->c_phys = phys; if (phys == 0) { desc->c_new = 1; - clusters_to_alloc++; + *clusters_to_alloc = *clusters_to_alloc + 1; } + if (ext_flags & OCFS2_EXT_UNWRITTEN) + desc->c_unwritten = 1; num_clusters--; } + ret = 0; +out: + return ret; +} + +int ocfs2_write_begin_nolock(struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata, + struct buffer_head *di_bh, struct page *mmap_page) +{ + int ret, credits = OCFS2_INODE_UPDATE_CREDITS; + unsigned int clusters_to_alloc, extents_to_split; + struct ocfs2_write_ctxt *wc; + struct inode *inode = mapping->host; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct ocfs2_dinode *di; + struct ocfs2_alloc_context *data_ac = NULL; + struct ocfs2_alloc_context *meta_ac = NULL; + handle_t *handle; + + ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, di_bh); + if (ret) { + mlog_errno(ret); + return ret; + } + + ret = ocfs2_populate_write_desc(inode, wc, &clusters_to_alloc, + &extents_to_split); + if (ret) { + mlog_errno(ret); + goto out; + } + + di = (struct ocfs2_dinode *)wc->w_di_bh->b_data; + /* * We set w_target_from, w_target_to here so that * ocfs2_write_end() knows which range in the target page to * write out. An allocation requires that we write the entire * cluster range. */ - if (clusters_to_alloc > 0) { + if (clusters_to_alloc || extents_to_split) { /* * XXX: We are stretching the limits of - * ocfs2_lock_allocators(). It greately over-estimates + * ocfs2_lock_allocators(). It greatly over-estimates * the work to be done. */ ret = ocfs2_lock_allocators(inode, di, clusters_to_alloc, - &data_ac, &meta_ac); + extents_to_split, &data_ac, &meta_ac); if (ret) { mlog_errno(ret); goto out; @@ -1317,7 +1414,8 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, } - ocfs2_set_target_boundaries(osb, wc, pos, len, clusters_to_alloc); + ocfs2_set_target_boundaries(osb, wc, pos, len, + clusters_to_alloc + extents_to_split); handle = ocfs2_start_trans(osb, credits); if (IS_ERR(handle)) { @@ -1345,21 +1443,18 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, * extent. */ ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, - clusters_to_alloc, mmap_page); + clusters_to_alloc + extents_to_split, + mmap_page); if (ret) { mlog_errno(ret); goto out_commit; } - for (i = 0; i < wc->w_clen; i++) { - desc = &wc->w_desc[i]; - - ret = ocfs2_write_cluster(mapping, desc->c_phys, data_ac, - meta_ac, wc, desc->c_cpos, pos, len); - if (ret) { - mlog_errno(ret); - goto out_commit; - } + ret = ocfs2_write_cluster_by_desc(mapping, data_ac, meta_ac, wc, pos, + len); + if (ret) { + mlog_errno(ret); + goto out_commit; } if (data_ac) @@ -1494,11 +1589,12 @@ int ocfs2_write_end_nolock(struct address_space *mapping, inode->i_mtime = inode->i_ctime = CURRENT_TIME; di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec); di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); - ocfs2_journal_dirty(handle, wc->w_di_bh); ocfs2_commit_trans(osb, handle); + ocfs2_run_deallocs(osb, &wc->w_dealloc); + ocfs2_free_write_ctxt(wc); return copied;