X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=fs%2Fbuffer.c;h=0e5ec371ce727e8fcccd79d33108b9aca1f051ca;hb=25a8b2545bfb2238f6f66da8032d768c6870c863;hp=7ff6e9346faed9f149948b8756934afbf79b9af6;hpb=72ed3d035855841ad611ee48b20909e9619d4a79;p=safe%2Fjmp%2Flinux-2.6 diff --git a/fs/buffer.c b/fs/buffer.c index 7ff6e93..0e5ec37 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include @@ -44,7 +43,6 @@ #include static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); -static void invalidate_bh_lrus(void); #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers) @@ -333,7 +331,7 @@ out: we think the disk contains more recent information than the buffercache. The update == 1 pass marks the buffers we need to update, the update == 2 pass does the actual I/O. */ -void invalidate_bdev(struct block_device *bdev, int destroy_dirty_buffers) +void invalidate_bdev(struct block_device *bdev) { struct address_space *mapping = bdev->bd_inode->i_mapping; @@ -341,11 +339,6 @@ void invalidate_bdev(struct block_device *bdev, int destroy_dirty_buffers) return; invalidate_bh_lrus(); - /* - * FIXME: what about destroy_dirty_buffers? - * We really want to use invalidate_inode_pages2() for - * that, but not until that's cleaned up. - */ invalidate_mapping_pages(mapping, 0, -1); } @@ -363,7 +356,7 @@ static void free_more_memory(void) for_each_online_pgdat(pgdat) { zones = pgdat->node_zonelists[gfp_zone(GFP_NOFS)].zones; if (*zones) - try_to_free_pages(zones, GFP_NOFS); + try_to_free_pages(zones, 0, GFP_NOFS); } } @@ -683,6 +676,39 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode) EXPORT_SYMBOL(mark_buffer_dirty_inode); /* + * Mark the page dirty, and set it dirty in the radix tree, and mark the inode + * dirty. + * + * If warn is true, then emit a warning if the page is not uptodate and has + * not been truncated. + */ +static int __set_page_dirty(struct page *page, + struct address_space *mapping, int warn) +{ + if (unlikely(!mapping)) + return !TestSetPageDirty(page); + + if (TestSetPageDirty(page)) + return 0; + + write_lock_irq(&mapping->tree_lock); + if (page->mapping) { /* Race with truncate? */ + WARN_ON_ONCE(warn && !PageUptodate(page)); + + if (mapping_cap_account_dirty(mapping)) { + __inc_zone_page_state(page, NR_FILE_DIRTY); + task_io_account_write(PAGE_CACHE_SIZE); + } + radix_tree_tag_set(&mapping->page_tree, + page_index(page), PAGECACHE_TAG_DIRTY); + } + write_unlock_irq(&mapping->tree_lock); + __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); + + return 1; +} + +/* * Add a page to the dirty page list. * * It is a sad fact of life that this function is called from several places @@ -709,7 +735,7 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode); */ int __set_page_dirty_buffers(struct page *page) { - struct address_space * const mapping = page_mapping(page); + struct address_space *mapping = page_mapping(page); if (unlikely(!mapping)) return !TestSetPageDirty(page); @@ -726,21 +752,7 @@ int __set_page_dirty_buffers(struct page *page) } spin_unlock(&mapping->private_lock); - if (TestSetPageDirty(page)) - return 0; - - write_lock_irq(&mapping->tree_lock); - if (page->mapping) { /* Race with truncate? */ - if (mapping_cap_account_dirty(mapping)) { - __inc_zone_page_state(page, NR_FILE_DIRTY); - task_io_account_write(PAGE_CACHE_SIZE); - } - radix_tree_tag_set(&mapping->page_tree, - page_index(page), PAGECACHE_TAG_DIRTY); - } - write_unlock_irq(&mapping->tree_lock); - __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); - return 1; + return __set_page_dirty(page, mapping, 1); } EXPORT_SYMBOL(__set_page_dirty_buffers); @@ -988,7 +1000,8 @@ grow_dev_page(struct block_device *bdev, sector_t block, struct page *page; struct buffer_head *bh; - page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); + page = find_or_create_page(inode->i_mapping, index, + (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE); if (!page) return NULL; @@ -1032,11 +1045,6 @@ failed: /* * Create buffers for the specified block device block's page. If * that page was dirty, the buffers are set dirty also. - * - * Except that's a bug. Attaching dirty buffers to a dirty - * blockdev's page can result in filesystem corruption, because - * some of those buffers may be aliases of filesystem data. - * grow_dev_page() will go BUG() if this happens. */ static int grow_buffers(struct block_device *bdev, sector_t block, int size) @@ -1143,8 +1151,9 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size) */ void fastcall mark_buffer_dirty(struct buffer_head *bh) { + WARN_ON_ONCE(!buffer_uptodate(bh)); if (!buffer_dirty(bh) && !test_set_buffer_dirty(bh)) - __set_page_dirty_nobuffers(bh->b_page); + __set_page_dirty(bh->b_page, page_mapping(bh->b_page), 0); } /* @@ -1283,11 +1292,11 @@ static void bh_lru_install(struct buffer_head *bh) * Look up the bh in this cpu's LRU. If it's there, move it to the head. */ static struct buffer_head * -lookup_bh_lru(struct block_device *bdev, sector_t block, int size) +lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size) { struct buffer_head *ret = NULL; struct bh_lru *lru; - int i; + unsigned int i; check_irqs_on(); bh_lru_lock(); @@ -1319,7 +1328,7 @@ lookup_bh_lru(struct block_device *bdev, sector_t block, int size) * NULL */ struct buffer_head * -__find_get_block(struct block_device *bdev, sector_t block, int size) +__find_get_block(struct block_device *bdev, sector_t block, unsigned size) { struct buffer_head *bh = lookup_bh_lru(bdev, block, size); @@ -1347,7 +1356,7 @@ EXPORT_SYMBOL(__find_get_block); * attempt is failing. FIXME, perhaps? */ struct buffer_head * -__getblk(struct block_device *bdev, sector_t block, int size) +__getblk(struct block_device *bdev, sector_t block, unsigned size) { struct buffer_head *bh = __find_get_block(bdev, block, size); @@ -1361,7 +1370,7 @@ EXPORT_SYMBOL(__getblk); /* * Do async read-ahead on a buffer.. */ -void __breadahead(struct block_device *bdev, sector_t block, int size) +void __breadahead(struct block_device *bdev, sector_t block, unsigned size) { struct buffer_head *bh = __getblk(bdev, block, size); if (likely(bh)) { @@ -1381,7 +1390,7 @@ EXPORT_SYMBOL(__breadahead); * It returns NULL if the block was unreadable. */ struct buffer_head * -__bread(struct block_device *bdev, sector_t block, int size) +__bread(struct block_device *bdev, sector_t block, unsigned size) { struct buffer_head *bh = __getblk(bdev, block, size); @@ -1408,7 +1417,7 @@ static void invalidate_bh_lru(void *arg) put_cpu_var(bh_lrus); } -static void invalidate_bh_lrus(void) +void invalidate_bh_lrus(void) { on_each_cpu(invalidate_bh_lru, NULL, 1, 1); } @@ -1440,6 +1449,7 @@ static void discard_buffer(struct buffer_head * bh) clear_buffer_req(bh); clear_buffer_new(bh); clear_buffer_delay(bh); + clear_buffer_unwritten(bh); unlock_buffer(bh); } @@ -1699,17 +1709,8 @@ done: * clean. Someone wrote them back by hand with * ll_rw_block/submit_bh. A rare case. */ - int uptodate = 1; - do { - if (!buffer_uptodate(bh)) { - uptodate = 0; - break; - } - bh = bh->b_this_page; - } while (bh != head); - if (uptodate) - SetPageUptodate(page); end_page_writeback(page); + /* * The page and buffer_heads can be released at any time from * here on. @@ -1741,8 +1742,8 @@ recover: } while ((bh = bh->b_this_page) != head); SetPageError(page); BUG_ON(PageWriteback(page)); + mapping_set_error(page->mapping, err); set_page_writeback(page); - unlock_page(page); do { struct buffer_head *next = bh->b_this_page; if (buffer_async_write(bh)) { @@ -1752,6 +1753,7 @@ recover: } bh = next; } while (bh != head); + unlock_page(page); goto done; } @@ -1823,6 +1825,7 @@ static int __block_prepare_write(struct inode *inode, struct page *page, continue; } if (!buffer_uptodate(bh) && !buffer_delay(bh) && + !buffer_unwritten(bh) && (block_start < from || block_end > to)) { ll_rw_block(READ, 1, &bh); *wait_bh++=bh; @@ -1859,13 +1862,8 @@ static int __block_prepare_write(struct inode *inode, struct page *page, if (block_start >= to) break; if (buffer_new(bh)) { - void *kaddr; - clear_buffer_new(bh); - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr+block_start, 0, bh->b_size); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(page, block_start, bh->b_size, KM_USER0); set_buffer_uptodate(bh); mark_buffer_dirty(bh); } @@ -1953,10 +1951,8 @@ int block_read_full_page(struct page *page, get_block_t *get_block) SetPageError(page); } if (!buffer_mapped(bh)) { - void *kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + i * blocksize, 0, blocksize); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(page, i * blocksize, blocksize, + KM_USER0); if (!err) set_buffer_uptodate(bh); continue; @@ -2099,7 +2095,6 @@ int cont_prepare_write(struct page *page, unsigned offset, long status; unsigned zerofrom; unsigned blocksize = 1 << inode->i_blkbits; - void *kaddr; while(page->index > (pgpos = *bytes>>PAGE_CACHE_SHIFT)) { status = -ENOMEM; @@ -2121,10 +2116,8 @@ int cont_prepare_write(struct page *page, unsigned offset, PAGE_CACHE_SIZE, get_block); if (status) goto out_unmap; - kaddr = kmap_atomic(new_page, KM_USER0); - memset(kaddr+zerofrom, 0, PAGE_CACHE_SIZE-zerofrom); - flush_dcache_page(new_page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(new_page, zerofrom, PAGE_CACHE_SIZE - zerofrom, + KM_USER0); generic_commit_write(NULL, new_page, zerofrom, PAGE_CACHE_SIZE); unlock_page(new_page); page_cache_release(new_page); @@ -2151,10 +2144,7 @@ int cont_prepare_write(struct page *page, unsigned offset, if (status) goto out1; if (zerofrom < offset) { - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr+zerofrom, 0, offset-zerofrom); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(page, zerofrom, offset - zerofrom, KM_USER0); __block_commit_write(inode, page, zerofrom, offset); } return 0; @@ -2204,6 +2194,52 @@ int generic_commit_write(struct file *file, struct page *page, return 0; } +/* + * block_page_mkwrite() is not allowed to change the file size as it gets + * called from a page fault handler when a page is first dirtied. Hence we must + * be careful to check for EOF conditions here. We set the page up correctly + * for a written page which means we get ENOSPC checking when writing into + * holes and correct delalloc and unwritten extent mapping on filesystems that + * support these features. + * + * We are not allowed to take the i_mutex here so we have to play games to + * protect against truncate races as the page could now be beyond EOF. Because + * vmtruncate() writes the inode size before removing pages, once we have the + * page lock we can determine safely if the page is beyond EOF. If it is not + * beyond EOF, then the page is guaranteed safe against truncation until we + * unlock the page. + */ +int +block_page_mkwrite(struct vm_area_struct *vma, struct page *page, + get_block_t get_block) +{ + struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + unsigned long end; + loff_t size; + int ret = -EINVAL; + + lock_page(page); + size = i_size_read(inode); + if ((page->mapping != inode->i_mapping) || + (page_offset(page) > size)) { + /* page got truncated out from underneath us */ + goto out_unlock; + } + + /* page is wholly or partially inside EOF */ + if (((page->index + 1) << PAGE_CACHE_SHIFT) > size) + end = size & ~PAGE_CACHE_MASK; + else + end = PAGE_CACHE_SIZE; + + ret = block_prepare_write(page, 0, end, get_block); + if (!ret) + ret = block_commit_write(page, 0, end); + +out_unlock: + unlock_page(page); + return ret; +} /* * nobh_prepare_write()'s prereads are special: the buffer_heads are freed @@ -2246,7 +2282,6 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to, int i; int ret = 0; int is_mapped_to_disk = 1; - int dirtied_it = 0; if (PageMappedToDisk(page)) return 0; @@ -2283,14 +2318,10 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to, continue; if (buffer_new(&map_bh) || !buffer_mapped(&map_bh)) { kaddr = kmap_atomic(page, KM_USER0); - if (block_start < from) { + if (block_start < from) memset(kaddr+block_start, 0, from-block_start); - dirtied_it = 1; - } - if (block_end > to) { + if (block_end > to) memset(kaddr + to, 0, block_end - to); - dirtied_it = 1; - } flush_dcache_page(page); kunmap_atomic(kaddr, KM_USER0); continue; @@ -2345,17 +2376,6 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to, if (is_mapped_to_disk) SetPageMappedToDisk(page); - SetPageUptodate(page); - - /* - * Setting the page dirty here isn't necessary for the prepare_write - * function - commit_write will do that. But if/when this function is - * used within the pagefault handler to ensure that all mmapped pages - * have backing space in the filesystem, we will need to dirty the page - * if its contents were altered. - */ - if (dirtied_it) - set_page_dirty(page); return 0; @@ -2369,22 +2389,24 @@ failed: * Error recovery is pretty slack. Clear the page and mark it dirty * so we'll later zero out any blocks which _were_ allocated. */ - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr, 0, PAGE_CACHE_SIZE); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0); SetPageUptodate(page); set_page_dirty(page); return ret; } EXPORT_SYMBOL(nobh_prepare_write); +/* + * Make sure any changes to nobh_commit_write() are reflected in + * nobh_truncate_page(), since it doesn't call commit_write(). + */ int nobh_commit_write(struct file *file, struct page *page, unsigned from, unsigned to) { struct inode *inode = page->mapping->host; loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; + SetPageUptodate(page); set_page_dirty(page); if (pos > inode->i_size) { i_size_write(inode, pos); @@ -2406,7 +2428,6 @@ int nobh_writepage(struct page *page, get_block_t *get_block, loff_t i_size = i_size_read(inode); const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; unsigned offset; - void *kaddr; int ret; /* Is the page fully inside i_size? */ @@ -2437,10 +2458,7 @@ int nobh_writepage(struct page *page, get_block_t *get_block, * the page size, the remaining memory is zeroed when mapped, and * writes to that region are not written out to the file." */ - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, KM_USER0); out: ret = mpage_writepage(page, get_block, wbc); if (ret == -EAGAIN) @@ -2461,7 +2479,6 @@ int nobh_truncate_page(struct address_space *mapping, loff_t from) unsigned to; struct page *page; const struct address_space_operations *a_ops = mapping->a_ops; - char *kaddr; int ret = 0; if ((offset & (blocksize - 1)) == 0) @@ -2475,10 +2492,13 @@ int nobh_truncate_page(struct address_space *mapping, loff_t from) to = (offset + blocksize) & ~(blocksize - 1); ret = a_ops->prepare_write(NULL, page, offset, to); if (ret == 0) { - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, + KM_USER0); + /* + * It would be more correct to call aops->commit_write() + * here, but this is more efficient. + */ + SetPageUptodate(page); set_page_dirty(page); } unlock_page(page); @@ -2499,7 +2519,6 @@ int block_truncate_page(struct address_space *mapping, struct inode *inode = mapping->host; struct page *page; struct buffer_head *bh; - void *kaddr; int err; blocksize = 1 << inode->i_blkbits; @@ -2544,7 +2563,7 @@ int block_truncate_page(struct address_space *mapping, if (PageUptodate(page)) set_buffer_uptodate(bh); - if (!buffer_uptodate(bh) && !buffer_delay(bh)) { + if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) { err = -EIO; ll_rw_block(READ, 1, &bh); wait_on_buffer(bh); @@ -2553,11 +2572,7 @@ int block_truncate_page(struct address_space *mapping, goto unlock; } - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + offset, 0, length); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); - + zero_user_page(page, offset, length, KM_USER0); mark_buffer_dirty(bh); err = 0; @@ -2578,7 +2593,6 @@ int block_write_full_page(struct page *page, get_block_t *get_block, loff_t i_size = i_size_read(inode); const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; unsigned offset; - void *kaddr; /* Is the page fully inside i_size? */ if (page->index < end_index) @@ -2604,10 +2618,7 @@ int block_write_full_page(struct page *page, get_block_t *get_block, * the page size, the remaining memory is zeroed when mapped, and * writes to that region are not written out to the file." */ - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, KM_USER0); return __block_write_full_page(inode, page, get_block, wbc); } @@ -2949,8 +2960,9 @@ static void recalc_bh_state(void) struct buffer_head *alloc_buffer_head(gfp_t gfp_flags) { - struct buffer_head *ret = kmem_cache_alloc(bh_cachep, gfp_flags); + struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags); if (ret) { + INIT_LIST_HEAD(&ret->b_assoc_buffers); get_cpu_var(bh_accounting).nr++; recalc_bh_state(); put_cpu_var(bh_accounting); @@ -2969,18 +2981,6 @@ void free_buffer_head(struct buffer_head *bh) } EXPORT_SYMBOL(free_buffer_head); -static void -init_buffer_head(void *data, struct kmem_cache *cachep, unsigned long flags) -{ - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) { - struct buffer_head * bh = (struct buffer_head *)data; - - memset(bh, 0, sizeof(*bh)); - INIT_LIST_HEAD(&bh->b_assoc_buffers); - } -} - static void buffer_exit_cpu(int cpu) { int i; @@ -2998,7 +2998,7 @@ static void buffer_exit_cpu(int cpu) static int buffer_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { - if (action == CPU_DEAD) + if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) buffer_exit_cpu((unsigned long)hcpu); return NOTIFY_OK; } @@ -3007,12 +3007,8 @@ void __init buffer_init(void) { int nrpages; - bh_cachep = kmem_cache_create("buffer_head", - sizeof(struct buffer_head), 0, - (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| - SLAB_MEM_SPREAD), - init_buffer_head, - NULL); + bh_cachep = KMEM_CACHE(buffer_head, + SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD); /* * Limit the bh occupancy to 10% of ZONE_NORMAL @@ -3027,6 +3023,7 @@ EXPORT_SYMBOL(__brelse); EXPORT_SYMBOL(__wait_on_buffer); EXPORT_SYMBOL(block_commit_write); EXPORT_SYMBOL(block_prepare_write); +EXPORT_SYMBOL(block_page_mkwrite); EXPORT_SYMBOL(block_read_full_page); EXPORT_SYMBOL(block_sync_page); EXPORT_SYMBOL(block_truncate_page);