X-Git-Url: http://ftp.safe.ca/?p=safe%2Fjmp%2Flinux-2.6;a=blobdiff_plain;f=mm%2Fpage-writeback.c;h=bbd396ac9546f5b0625c34facbc0b6d33fe2163a;hp=25e7770309b871e42dbaf7ccedf0132b265eacc2;hb=d87815cb2090e07b0b0b2d73dc9740706e92c80c;hpb=03ba3782e8dcc5b0e1efe440d33084f066e38cae diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 25e7770..bbd396a 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -44,18 +44,21 @@ static long ratelimit_pages = 32; /* * When balance_dirty_pages decides that the caller needs to perform some * non-background writeback, this is how many pages it will attempt to write. - * It should be somewhat larger than RATELIMIT_PAGES to ensure that reasonably + * It should be somewhat larger than dirtied pages to ensure that reasonably * large amounts of I/O are submitted. */ -static inline long sync_writeback_pages(void) +static inline long sync_writeback_pages(unsigned long dirtied) { - return ratelimit_pages + ratelimit_pages / 2; + if (dirtied < ratelimit_pages) + dirtied = ratelimit_pages; + + return dirtied + dirtied / 2; } /* The following parameters are exported via /proc/sys/vm */ /* - * Start background writeback (via pdflush) at this percentage + * Start background writeback (via writeback threads) at this percentage */ int dirty_background_ratio = 10; @@ -155,37 +158,37 @@ static void update_completion_period(void) } int dirty_background_ratio_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; - ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (ret == 0 && write) dirty_background_bytes = 0; return ret; } int dirty_background_bytes_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; - ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos); + ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); if (ret == 0 && write) dirty_background_ratio = 0; return ret; } int dirty_ratio_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos) { int old_ratio = vm_dirty_ratio; int ret; - ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (ret == 0 && write && vm_dirty_ratio != old_ratio) { update_completion_period(); vm_dirty_bytes = 0; @@ -195,13 +198,13 @@ int dirty_ratio_handler(struct ctl_table *table, int write, int dirty_bytes_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos) { unsigned long old_bytes = vm_dirty_bytes; int ret; - ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos); + ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); if (ret == 0 && write && vm_dirty_bytes != old_bytes) { update_completion_period(); vm_dirty_ratio = 0; @@ -315,7 +318,7 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) { int ret = 0; - spin_lock(&bdi_lock); + spin_lock_bh(&bdi_lock); if (min_ratio > bdi->max_ratio) { ret = -EINVAL; } else { @@ -327,7 +330,7 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) ret = -EINVAL; } } - spin_unlock(&bdi_lock); + spin_unlock_bh(&bdi_lock); return ret; } @@ -339,14 +342,14 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) if (max_ratio > 100) return -EINVAL; - spin_lock(&bdi_lock); + spin_lock_bh(&bdi_lock); if (bdi->min_ratio > max_ratio) { ret = -EINVAL; } else { bdi->max_ratio = max_ratio; bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100; } - spin_unlock(&bdi_lock); + spin_unlock_bh(&bdi_lock); return ret; } @@ -380,7 +383,8 @@ static unsigned long highmem_dirtyable_memory(unsigned long total) struct zone *z = &NODE_DATA(node)->node_zones[ZONE_HIGHMEM]; - x += zone_page_state(z, NR_FREE_PAGES) + zone_lru_pages(z); + x += zone_page_state(z, NR_FREE_PAGES) + + zone_reclaimable_pages(z); } /* * Make sure that the number of highmem pages is never larger @@ -404,7 +408,7 @@ unsigned long determine_dirtyable_memory(void) { unsigned long x; - x = global_page_state(NR_FREE_PAGES) + global_lru_pages(); + x = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages(); if (!vm_highmem_is_dirtyable) x -= highmem_dirtyable_memory(x); @@ -473,10 +477,11 @@ get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty, * balance_dirty_pages() must be called by processes which are generating dirty * data. It looks at the number of dirty pages in the machine and will force * the caller to perform writeback if the system is over `vm_dirty_ratio'. - * If we're over `background_thresh' then pdflush is woken to perform some - * writeout. + * If we're over `background_thresh' then the writeback threads are woken to + * perform some writeout. */ -static void balance_dirty_pages(struct address_space *mapping) +static void balance_dirty_pages(struct address_space *mapping, + unsigned long write_chunk) { long nr_reclaimable, bdi_nr_reclaimable; long nr_writeback, bdi_nr_writeback; @@ -484,7 +489,7 @@ static void balance_dirty_pages(struct address_space *mapping) unsigned long dirty_thresh; unsigned long bdi_thresh; unsigned long pages_written = 0; - unsigned long write_chunk = sync_writeback_pages(); + unsigned long pause = 1; struct backing_dev_info *bdi = mapping->backing_dev_info; @@ -561,7 +566,16 @@ static void balance_dirty_pages(struct address_space *mapping) if (pages_written >= write_chunk) break; /* We've done our duty */ - schedule_timeout(1); + __set_current_state(TASK_INTERRUPTIBLE); + io_schedule_timeout(pause); + + /* + * Increase the delay for each loop, up to our previous + * default of taking a 100ms nap. + */ + pause <<= 1; + if (pause > HZ / 10) + pause = HZ / 10; } if (bdi_nr_reclaimable + bdi_nr_writeback < bdi_thresh && @@ -569,7 +583,7 @@ static void balance_dirty_pages(struct address_space *mapping) bdi->dirty_exceeded = 0; if (writeback_in_progress(bdi)) - return; /* pdflush is already working this queue */ + return; /* * In laptop mode, we wait until hitting the higher threshold before @@ -580,18 +594,10 @@ static void balance_dirty_pages(struct address_space *mapping) * background_thresh, to keep the amount of dirty memory low. */ if ((laptop_mode && pages_written) || - (!laptop_mode && ((nr_writeback = global_page_state(NR_FILE_DIRTY) - + global_page_state(NR_UNSTABLE_NFS)) - > background_thresh))) { - struct writeback_control wbc = { - .bdi = bdi, - .sync_mode = WB_SYNC_NONE, - .nr_to_write = nr_writeback, - }; - - - bdi_start_writeback(&wbc); - } + (!laptop_mode && ((global_page_state(NR_FILE_DIRTY) + + global_page_state(NR_UNSTABLE_NFS)) + > background_thresh))) + bdi_start_writeback(bdi, NULL, 0); } void set_page_dirty_balance(struct page *page, int page_mkwrite) @@ -604,6 +610,8 @@ void set_page_dirty_balance(struct page *page, int page_mkwrite) } } +static DEFINE_PER_CPU(unsigned long, bdp_ratelimits) = 0; + /** * balance_dirty_pages_ratelimited_nr - balance dirty memory state * @mapping: address_space which was dirtied @@ -621,7 +629,6 @@ void set_page_dirty_balance(struct page *page, int page_mkwrite) void balance_dirty_pages_ratelimited_nr(struct address_space *mapping, unsigned long nr_pages_dirtied) { - static DEFINE_PER_CPU(unsigned long, ratelimits) = 0; unsigned long ratelimit; unsigned long *p; @@ -634,12 +641,13 @@ void balance_dirty_pages_ratelimited_nr(struct address_space *mapping, * tasks in balance_dirty_pages(). Period. */ preempt_disable(); - p = &__get_cpu_var(ratelimits); + p = &__get_cpu_var(bdp_ratelimits); *p += nr_pages_dirtied; if (unlikely(*p >= ratelimit)) { + ratelimit = sync_writeback_pages(*p); *p = 0; preempt_enable(); - balance_dirty_pages(mapping); + balance_dirty_pages(mapping, ratelimit); return; } preempt_enable(); @@ -675,35 +683,31 @@ void throttle_vm_writeout(gfp_t gfp_mask) } } -static void laptop_timer_fn(unsigned long unused); - -static DEFINE_TIMER(laptop_mode_wb_timer, laptop_timer_fn, 0, 0); - /* * sysctl handler for /proc/sys/vm/dirty_writeback_centisecs */ int dirty_writeback_centisecs_handler(ctl_table *table, int write, - struct file *file, void __user *buffer, size_t *length, loff_t *ppos) + void __user *buffer, size_t *length, loff_t *ppos) { - proc_dointvec(table, write, file, buffer, length, ppos); + proc_dointvec(table, write, buffer, length, ppos); + bdi_arm_supers_timer(); return 0; } -static void do_laptop_sync(struct work_struct *work) +#ifdef CONFIG_BLOCK +void laptop_mode_timer_fn(unsigned long data) { - wakeup_flusher_threads(0); - kfree(work); -} + struct request_queue *q = (struct request_queue *)data; + int nr_pages = global_page_state(NR_FILE_DIRTY) + + global_page_state(NR_UNSTABLE_NFS); -static void laptop_timer_fn(unsigned long unused) -{ - struct work_struct *work; + /* + * We want to write everything out, not just down to the dirty + * threshold + */ - work = kmalloc(sizeof(*work), GFP_ATOMIC); - if (work) { - INIT_WORK(work, do_laptop_sync); - schedule_work(work); - } + if (bdi_has_dirty_io(&q->backing_dev_info)) + bdi_start_writeback(&q->backing_dev_info, NULL, nr_pages); } /* @@ -711,9 +715,9 @@ static void laptop_timer_fn(unsigned long unused) * of all dirty data a few seconds from now. If the flush is already scheduled * then push it back - the user is still using the disk. */ -void laptop_io_completion(void) +void laptop_io_completion(struct backing_dev_info *info) { - mod_timer(&laptop_mode_wb_timer, jiffies + laptop_mode); + mod_timer(&info->laptop_mode_wb_timer, jiffies + laptop_mode); } /* @@ -723,8 +727,16 @@ void laptop_io_completion(void) */ void laptop_sync_completion(void) { - del_timer(&laptop_mode_wb_timer); + struct backing_dev_info *bdi; + + rcu_read_lock(); + + list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) + del_timer(&bdi->laptop_mode_wb_timer); + + rcu_read_unlock(); } +#endif /* * If ratelimit_pages is too high then we can get into dirty-data overload @@ -813,7 +825,6 @@ int write_cache_pages(struct address_space *mapping, struct writeback_control *wbc, writepage_t writepage, void *data) { - struct backing_dev_info *bdi = mapping->backing_dev_info; int ret = 0; int done = 0; struct pagevec pvec; @@ -824,12 +835,6 @@ int write_cache_pages(struct address_space *mapping, pgoff_t done_index; int cycled; int range_whole = 0; - long nr_to_write = wbc->nr_to_write; - - if (wbc->nonblocking && bdi_write_congested(bdi)) { - wbc->encountered_congestion = 1; - return 0; - } pagevec_init(&pvec, 0); if (wbc->range_cyclic) { @@ -846,7 +851,22 @@ int write_cache_pages(struct address_space *mapping, if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) range_whole = 1; cycled = 1; /* ignore range_cyclic tests */ + + /* + * If this is a data integrity sync, cap the writeback to the + * current end of file. Any extension to the file that occurs + * after this is a new write and we don't need to write those + * pages out to fulfil our data integrity requirements. If we + * try to write them out, we can get stuck in this scan until + * the concurrent writer stops adding dirty pages and extending + * EOF. + */ + if (wbc->sync_mode == WB_SYNC_ALL && + wbc->range_end == LLONG_MAX) { + end = i_size_read(mapping->host) >> PAGE_CACHE_SHIFT; + } } + retry: done_index = index; while (!done && (index <= end)) { @@ -929,11 +949,10 @@ continue_unlock: done = 1; break; } - } + } - if (nr_to_write > 0) { - nr_to_write--; - if (nr_to_write == 0 && + if (wbc->nr_to_write > 0) { + if (--wbc->nr_to_write == 0 && wbc->sync_mode == WB_SYNC_NONE) { /* * We stop writing back only if we are @@ -949,12 +968,6 @@ continue_unlock: break; } } - - if (wbc->nonblocking && bdi_write_congested(bdi)) { - wbc->encountered_congestion = 1; - done = 1; - break; - } } pagevec_release(&pvec); cond_resched(); @@ -970,11 +983,8 @@ continue_unlock: end = writeback_index - 1; goto retry; } - if (!wbc->no_nrwrite_index_update) { - if (wbc->range_cyclic || (range_whole && nr_to_write > 0)) - mapping->writeback_index = done_index; - wbc->nr_to_write = nr_to_write; - } + if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) + mapping->writeback_index = done_index; return ret; } @@ -1019,12 +1029,10 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc) if (wbc->nr_to_write <= 0) return 0; - wbc->for_writepages = 1; if (mapping->a_ops->writepages) ret = mapping->a_ops->writepages(mapping, wbc); else ret = generic_writepages(mapping, wbc); - wbc->for_writepages = 0; return ret; } @@ -1148,6 +1156,13 @@ int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page) EXPORT_SYMBOL(redirty_page_for_writepage); /* + * Dirty a page. + * + * For pages with a mapping this should be done under the page lock + * for the benefit of asynchronous memory errors who prefer a consistent + * dirty state. This rule can be broken in some special cases, + * but should be better not to. + * * If the mapping doesn't provide a set_page_dirty a_op, then * just fall through and assume that it wants buffer_heads. */