X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=fs%2Fmpage.c;h=16c3ef37eae348ed97ad90229d427449699315a3;hb=4442f518269c6b3686fcbcadad22dc4475309b16;hp=c5adcdddf3cc457dabb7af2197f69684a4b86020;hpb=dd0fc66fb33cd610bc1a5db8a5e232d34879b4d7;p=safe%2Fjmp%2Flinux-2.6 diff --git a/fs/mpage.c b/fs/mpage.c index c5adcdd..16c3ef3 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -6,7 +6,7 @@ * Contains functions related to preparing and submitting BIOs which contain * multiple pagecache pages. * - * 15May2002 akpm@zip.com.au + * 15May2002 Andrew Morton * Initial version * 27Jun2002 axboe@suse.de * use bio_add_page() to build bio's just the right size @@ -39,14 +39,11 @@ * status of that page is hard. See end_buffer_async_read() for the details. * There is no point in duplicating all that complexity. */ -static int mpage_end_io_read(struct bio *bio, unsigned int bytes_done, int err) +static void mpage_end_io_read(struct bio *bio, int err) { const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; - if (bio->bi_size) - return 1; - do { struct page *page = bvec->bv_page; @@ -62,17 +59,13 @@ static int mpage_end_io_read(struct bio *bio, unsigned int bytes_done, int err) unlock_page(page); } while (bvec >= bio->bi_io_vec); bio_put(bio); - return 0; } -static int mpage_end_io_write(struct bio *bio, unsigned int bytes_done, int err) +static void mpage_end_io_write(struct bio *bio, int err) { const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; - if (bio->bi_size) - return 1; - do { struct page *page = bvec->bv_page; @@ -87,10 +80,9 @@ static int mpage_end_io_write(struct bio *bio, unsigned int bytes_done, int err) end_page_writeback(page); } while (bvec >= bio->bi_io_vec); bio_put(bio); - return 0; } -static struct bio *mpage_bio_submit(int rw, struct bio *bio) +struct bio *mpage_bio_submit(int rw, struct bio *bio) { bio->bi_end_io = mpage_end_io_read; if (rw == WRITE) @@ -98,6 +90,7 @@ static struct bio *mpage_bio_submit(int rw, struct bio *bio) submit_bio(rw, bio); return NULL; } +EXPORT_SYMBOL(mpage_bio_submit); static struct bio * mpage_alloc(struct block_device *bdev, @@ -163,9 +156,19 @@ map_buffer_to_page(struct page *page, struct buffer_head *bh, int page_block) } while (page_bh != head); } +/* + * This is the worker routine which does all the work of mapping the disk + * blocks and constructs largest possible bios, submits them for IO if the + * blocks are not contiguous on the disk. + * + * We pass a buffer_head back and forth and use its buffer_mapped() flag to + * represent the validity of its disk mapping and to decide when to do the next + * get_block() call. + */ static struct bio * do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, - sector_t *last_block_in_bio, get_block_t get_block) + sector_t *last_block_in_bio, struct buffer_head *map_bh, + unsigned long *first_logical_block, get_block_t get_block) { struct inode *inode = page->mapping->host; const unsigned blkbits = inode->i_blkbits; @@ -173,33 +176,71 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, const unsigned blocksize = 1 << blkbits; sector_t block_in_file; sector_t last_block; + sector_t last_block_in_file; sector_t blocks[MAX_BUF_PER_PAGE]; unsigned page_block; unsigned first_hole = blocks_per_page; struct block_device *bdev = NULL; - struct buffer_head bh; int length; int fully_mapped = 1; + unsigned nblocks; + unsigned relative_block; if (page_has_buffers(page)) goto confused; - block_in_file = page->index << (PAGE_CACHE_SHIFT - blkbits); - last_block = (i_size_read(inode) + blocksize - 1) >> blkbits; + block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits); + last_block = block_in_file + nr_pages * blocks_per_page; + last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits; + if (last_block > last_block_in_file) + last_block = last_block_in_file; + page_block = 0; + + /* + * Map blocks using the result from the previous get_blocks call first. + */ + nblocks = map_bh->b_size >> blkbits; + if (buffer_mapped(map_bh) && block_in_file > *first_logical_block && + block_in_file < (*first_logical_block + nblocks)) { + unsigned map_offset = block_in_file - *first_logical_block; + unsigned last = nblocks - map_offset; + + for (relative_block = 0; ; relative_block++) { + if (relative_block == last) { + clear_buffer_mapped(map_bh); + break; + } + if (page_block == blocks_per_page) + break; + blocks[page_block] = map_bh->b_blocknr + map_offset + + relative_block; + page_block++; + block_in_file++; + } + bdev = map_bh->b_bdev; + } + + /* + * Then do more get_blocks calls until we are done with this page. + */ + map_bh->b_page = page; + while (page_block < blocks_per_page) { + map_bh->b_state = 0; + map_bh->b_size = 0; - bh.b_page = page; - for (page_block = 0; page_block < blocks_per_page; - page_block++, block_in_file++) { - bh.b_state = 0; if (block_in_file < last_block) { - if (get_block(inode, block_in_file, &bh, 0)) + map_bh->b_size = (last_block-block_in_file) << blkbits; + if (get_block(inode, block_in_file, map_bh, 0)) goto confused; + *first_logical_block = block_in_file; } - if (!buffer_mapped(&bh)) { + if (!buffer_mapped(map_bh)) { fully_mapped = 0; if (first_hole == blocks_per_page) first_hole = page_block; + page_block++; + block_in_file++; continue; } @@ -209,8 +250,8 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, * we just collected from get_block into the page's buffers * so readpage doesn't have to repeat the get_block call */ - if (buffer_uptodate(&bh)) { - map_buffer_to_page(page, &bh, page_block); + if (buffer_uptodate(map_bh)) { + map_buffer_to_page(page, map_bh, page_block); goto confused; } @@ -218,18 +259,24 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, goto confused; /* hole -> non-hole */ /* Contiguous blocks? */ - if (page_block && blocks[page_block-1] != bh.b_blocknr-1) + if (page_block && blocks[page_block-1] != map_bh->b_blocknr-1) goto confused; - blocks[page_block] = bh.b_blocknr; - bdev = bh.b_bdev; + nblocks = map_bh->b_size >> blkbits; + for (relative_block = 0; ; relative_block++) { + if (relative_block == nblocks) { + clear_buffer_mapped(map_bh); + break; + } else if (page_block == blocks_per_page) + break; + blocks[page_block] = map_bh->b_blocknr+relative_block; + page_block++; + block_in_file++; + } + bdev = map_bh->b_bdev; } if (first_hole != blocks_per_page) { - char *kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + (first_hole << blkbits), 0, - PAGE_CACHE_SIZE - (first_hole << blkbits)); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_segment(page, first_hole << blkbits, PAGE_CACHE_SIZE); if (first_hole == 0) { SetPageUptodate(page); unlock_page(page); @@ -260,7 +307,10 @@ alloc_new: goto alloc_new; } - if (buffer_boundary(&bh) || (first_hole != blocks_per_page)) + relative_block = block_in_file - *first_logical_block; + nblocks = map_bh->b_size >> blkbits; + if ((buffer_boundary(map_bh) && relative_block == nblocks) || + (first_hole != blocks_per_page)) bio = mpage_bio_submit(READ, bio); else *last_block_in_bio = blocks[blocks_per_page - 1]; @@ -278,16 +328,12 @@ confused: } /** - * mpage_readpages - populate an address space with some pages, and - * start reads against them. - * + * mpage_readpages - populate an address space with some pages & start reads against them * @mapping: the address_space * @pages: The address of a list_head which contains the target pages. These * pages have their ->index populated and are otherwise uninitialised. - * * The page at @pages->prev has the lowest file offset, and reads should be * issued in @pages->prev to @pages->next order. - * * @nr_pages: The number of pages at *@pages * @get_block: The filesystem's block mapper function. * @@ -313,6 +359,7 @@ confused: * So an mpage read of the first 16 blocks of an ext2 file will cause I/O to be * submitted in the following order: * 12 0 1 2 3 4 5 6 7 8 9 10 11 13 14 15 16 + * * because the indirect block has to be read to get the mappings of blocks * 13,14,15,16. Obviously, this impacts performance. * @@ -330,26 +377,25 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages, struct bio *bio = NULL; unsigned page_idx; sector_t last_block_in_bio = 0; - struct pagevec lru_pvec; + struct buffer_head map_bh; + unsigned long first_logical_block = 0; - pagevec_init(&lru_pvec, 0); + clear_buffer_mapped(&map_bh); for (page_idx = 0; page_idx < nr_pages; page_idx++) { struct page *page = list_entry(pages->prev, struct page, lru); prefetchw(&page->flags); list_del(&page->lru); - if (!add_to_page_cache(page, mapping, + if (!add_to_page_cache_lru(page, mapping, page->index, GFP_KERNEL)) { bio = do_mpage_readpage(bio, page, nr_pages - page_idx, - &last_block_in_bio, get_block); - if (!pagevec_add(&lru_pvec, page)) - __pagevec_lru_add(&lru_pvec); - } else { - page_cache_release(page); + &last_block_in_bio, &map_bh, + &first_logical_block, + get_block); } + page_cache_release(page); } - pagevec_lru_add(&lru_pvec); BUG_ON(!list_empty(pages)); if (bio) mpage_bio_submit(READ, bio); @@ -364,9 +410,12 @@ int mpage_readpage(struct page *page, get_block_t get_block) { struct bio *bio = NULL; sector_t last_block_in_bio = 0; + struct buffer_head map_bh; + unsigned long first_logical_block = 0; - bio = do_mpage_readpage(bio, page, 1, - &last_block_in_bio, get_block); + clear_buffer_mapped(&map_bh); + bio = do_mpage_readpage(bio, page, 1, &last_block_in_bio, + &map_bh, &first_logical_block, get_block); if (bio) mpage_bio_submit(READ, bio); return 0; @@ -389,11 +438,12 @@ EXPORT_SYMBOL(mpage_readpage); * written, so it can intelligently allocate a suitably-sized BIO. For now, * just allocate full-size (16-page) BIOs. */ -static struct bio * -__mpage_writepage(struct bio *bio, struct page *page, get_block_t get_block, - sector_t *last_block_in_bio, int *ret, struct writeback_control *wbc, - writepage_t writepage_fn) + +int __mpage_writepage(struct page *page, struct writeback_control *wbc, + void *data) { + struct mpage_data *mpd = data; + struct bio *bio = mpd->bio; struct address_space *mapping = page->mapping; struct inode *inode = page->mapping->host; const unsigned blkbits = inode->i_blkbits; @@ -411,6 +461,7 @@ __mpage_writepage(struct bio *bio, struct page *page, get_block_t get_block, int length; struct buffer_head map_bh; loff_t i_size = i_size_read(inode); + int ret = 0; if (page_has_buffers(page)) { struct buffer_head *head = page_buffers(page); @@ -466,13 +517,14 @@ __mpage_writepage(struct bio *bio, struct page *page, get_block_t get_block, * The page has no buffers: map it to disk */ BUG_ON(!PageUptodate(page)); - block_in_file = page->index << (PAGE_CACHE_SHIFT - blkbits); + block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits); last_block = (i_size - 1) >> blkbits; map_bh.b_page = page; for (page_block = 0; page_block < blocks_per_page; ) { map_bh.b_state = 0; - if (get_block(inode, block_in_file, &map_bh, 1)) + map_bh.b_size = 1 << blkbits; + if (mpd->get_block(inode, block_in_file, &map_bh, 1)) goto confused; if (buffer_new(&map_bh)) unmap_underlying_metadata(map_bh.b_bdev, @@ -508,20 +560,16 @@ page_is_mapped: * written out to the file." */ unsigned offset = i_size & (PAGE_CACHE_SIZE - 1); - char *kaddr; if (page->index > end_index || !offset) goto confused; - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_segment(page, offset, PAGE_CACHE_SIZE); } /* * This page will go to BIO. Do we need to send this BIO off first? */ - if (bio && *last_block_in_bio != blocks[0] - 1) + if (bio && mpd->last_block_in_bio != blocks[0] - 1) bio = mpage_bio_submit(WRITE, bio); alloc_new: @@ -578,7 +626,7 @@ alloc_new: boundary_block, 1 << blkbits); } } else { - *last_block_in_bio = blocks[blocks_per_page - 1]; + mpd->last_block_in_bio = blocks[blocks_per_page - 1]; } goto out; @@ -586,29 +634,24 @@ confused: if (bio) bio = mpage_bio_submit(WRITE, bio); - if (writepage_fn) { - *ret = (*writepage_fn)(page, wbc); + if (mpd->use_writepage) { + ret = mapping->a_ops->writepage(page, wbc); } else { - *ret = -EAGAIN; + ret = -EAGAIN; goto out; } /* * The caller has a ref on the inode, so *mapping is stable */ - if (*ret) { - if (*ret == -ENOSPC) - set_bit(AS_ENOSPC, &mapping->flags); - else - set_bit(AS_EIO, &mapping->flags); - } + mapping_set_error(mapping, ret); out: - return bio; + mpd->bio = bio; + return ret; } +EXPORT_SYMBOL(__mpage_writepage); /** - * mpage_writepages - walk the list of dirty pages of the given - * address space and writepage() all of them. - * + * mpage_writepages - walk the list of dirty pages of the given address space & writepage() all of them * @mapping: address space structure to write * @wbc: subtract the number of written pages from *@wbc->nr_to_write * @get_block: the filesystem's block mapper function. @@ -630,122 +673,22 @@ int mpage_writepages(struct address_space *mapping, struct writeback_control *wbc, get_block_t get_block) { - struct backing_dev_info *bdi = mapping->backing_dev_info; - struct bio *bio = NULL; - sector_t last_block_in_bio = 0; - int ret = 0; - int done = 0; - int (*writepage)(struct page *page, struct writeback_control *wbc); - struct pagevec pvec; - int nr_pages; - pgoff_t index; - pgoff_t end = -1; /* Inclusive */ - int scanned = 0; - int is_range = 0; - - if (wbc->nonblocking && bdi_write_congested(bdi)) { - wbc->encountered_congestion = 1; - return 0; - } - - writepage = NULL; - if (get_block == NULL) - writepage = mapping->a_ops->writepage; - - pagevec_init(&pvec, 0); - if (wbc->sync_mode == WB_SYNC_NONE) { - index = mapping->writeback_index; /* Start from prev offset */ - } else { - index = 0; /* whole-file sweep */ - scanned = 1; - } - if (wbc->start || wbc->end) { - index = wbc->start >> PAGE_CACHE_SHIFT; - end = wbc->end >> PAGE_CACHE_SHIFT; - is_range = 1; - scanned = 1; - } -retry: - while (!done && (index <= end) && - (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, - PAGECACHE_TAG_DIRTY, - min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { - unsigned i; - - scanned = 1; - for (i = 0; i < nr_pages; i++) { - struct page *page = pvec.pages[i]; - - /* - * At this point we hold neither mapping->tree_lock nor - * lock on the page itself: the page may be truncated or - * invalidated (changing page->mapping to NULL), or even - * swizzled back from swapper_space to tmpfs file - * mapping - */ - - lock_page(page); - - if (unlikely(page->mapping != mapping)) { - unlock_page(page); - continue; - } - - if (unlikely(is_range) && page->index > end) { - done = 1; - unlock_page(page); - continue; - } - - if (wbc->sync_mode != WB_SYNC_NONE) - wait_on_page_writeback(page); - - if (PageWriteback(page) || - !clear_page_dirty_for_io(page)) { - unlock_page(page); - continue; - } - - if (writepage) { - ret = (*writepage)(page, wbc); - if (ret) { - if (ret == -ENOSPC) - set_bit(AS_ENOSPC, - &mapping->flags); - else - set_bit(AS_EIO, - &mapping->flags); - } - } else { - bio = __mpage_writepage(bio, page, get_block, - &last_block_in_bio, &ret, wbc, - page->mapping->a_ops->writepage); - } - if (unlikely(ret == WRITEPAGE_ACTIVATE)) - unlock_page(page); - if (ret || (--(wbc->nr_to_write) <= 0)) - done = 1; - if (wbc->nonblocking && bdi_write_congested(bdi)) { - wbc->encountered_congestion = 1; - done = 1; - } - } - pagevec_release(&pvec); - cond_resched(); + int ret; + + if (!get_block) + ret = generic_writepages(mapping, wbc); + else { + struct mpage_data mpd = { + .bio = NULL, + .last_block_in_bio = 0, + .get_block = get_block, + .use_writepage = 1, + }; + + ret = write_cache_pages(mapping, wbc, __mpage_writepage, &mpd); + if (mpd.bio) + mpage_bio_submit(WRITE, mpd.bio); } - if (!scanned && !done) { - /* - * We hit the last page and there is more work to be done: wrap - * back to the start of the file - */ - scanned = 1; - index = 0; - goto retry; - } - if (!is_range) - mapping->writeback_index = index; - if (bio) - mpage_bio_submit(WRITE, bio); return ret; } EXPORT_SYMBOL(mpage_writepages); @@ -753,15 +696,15 @@ EXPORT_SYMBOL(mpage_writepages); int mpage_writepage(struct page *page, get_block_t get_block, struct writeback_control *wbc) { - int ret = 0; - struct bio *bio; - sector_t last_block_in_bio = 0; - - bio = __mpage_writepage(NULL, page, get_block, - &last_block_in_bio, &ret, wbc, NULL); - if (bio) - mpage_bio_submit(WRITE, bio); - + struct mpage_data mpd = { + .bio = NULL, + .last_block_in_bio = 0, + .get_block = get_block, + .use_writepage = 0, + }; + int ret = __mpage_writepage(page, wbc, &mpd); + if (mpd.bio) + mpage_bio_submit(WRITE, mpd.bio); return ret; } EXPORT_SYMBOL(mpage_writepage);