X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=mm%2Fpage-writeback.c;h=d1ba464410536570c899d6b5893c2fe5685e0afa;hb=87c6a9b253520b66e7f5e8f67a37a701eaa51cee;hp=7b0dcea4935bea292730ce7d1926fc0ea00df062;hpb=dcf975d58565880a134afb13bde511d1b873ce79;p=safe%2Fjmp%2Flinux-2.6 diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 7b0dcea..d1ba464 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -36,15 +36,6 @@ #include /* - * The maximum number of pages to writeout in a single bdflush/kupdate - * operation. We do this so we don't hold I_SYNC against an inode for - * enormous amounts of time, which would block a userspace task which has - * been forced to throttle against that inode. Also, the code reevaluates - * the dirty each time it has written this many pages. - */ -#define MAX_WRITEBACK_PAGES 1024 - -/* * After a CPU has dirtied this many pages, balance_dirty_pages_ratelimited * will look to see if it needs to force writeback or throttling. */ @@ -117,8 +108,6 @@ EXPORT_SYMBOL(laptop_mode); /* End of sysctl-exported parameters */ -static void background_writeout(unsigned long _min_pages); - /* * Scale the writeback cache size proportional to the relative writeout speeds. * @@ -320,15 +309,13 @@ static void task_dirty_limit(struct task_struct *tsk, unsigned long *pdirty) /* * */ -static DEFINE_SPINLOCK(bdi_lock); static unsigned int bdi_min_ratio; int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) { int ret = 0; - unsigned long flags; - spin_lock_irqsave(&bdi_lock, flags); + spin_lock_bh(&bdi_lock); if (min_ratio > bdi->max_ratio) { ret = -EINVAL; } else { @@ -340,27 +327,26 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) ret = -EINVAL; } } - spin_unlock_irqrestore(&bdi_lock, flags); + spin_unlock_bh(&bdi_lock); return ret; } int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) { - unsigned long flags; int ret = 0; if (max_ratio > 100) return -EINVAL; - spin_lock_irqsave(&bdi_lock, flags); + spin_lock_bh(&bdi_lock); if (bdi->min_ratio > max_ratio) { ret = -EINVAL; } else { bdi->max_ratio = max_ratio; bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100; } - spin_unlock_irqrestore(&bdi_lock, flags); + spin_unlock_bh(&bdi_lock); return ret; } @@ -499,6 +485,7 @@ static void balance_dirty_pages(struct address_space *mapping) unsigned long bdi_thresh; unsigned long pages_written = 0; unsigned long write_chunk = sync_writeback_pages(); + unsigned long pause = 1; struct backing_dev_info *bdi = mapping->backing_dev_info; @@ -541,9 +528,12 @@ static void balance_dirty_pages(struct address_space *mapping) * filesystems (i.e. NFS) in which data may have been * written to the server's write cache, but has not yet * been flushed to permanent storage. + * Only move pages to writeback if this bdi is over its + * threshold otherwise wait until the disk writes catch + * up. */ - if (bdi_nr_reclaimable) { - writeback_inodes(&wbc); + if (bdi_nr_reclaimable > bdi_thresh) { + writeback_inodes_wbc(&wbc); pages_written += write_chunk - wbc.nr_to_write; get_dirty_limits(&background_thresh, &dirty_thresh, &bdi_thresh, bdi); @@ -572,7 +562,15 @@ static void balance_dirty_pages(struct address_space *mapping) if (pages_written >= write_chunk) break; /* We've done our duty */ - congestion_wait(WRITE, HZ/10); + schedule_timeout_interruptible(pause); + + /* + * Increase the delay for each loop, up to our previous + * default of taking a 100ms nap. + */ + pause <<= 1; + if (pause > HZ / 10) + pause = HZ / 10; } if (bdi_nr_reclaimable + bdi_nr_writeback < bdi_thresh && @@ -591,10 +589,10 @@ static void balance_dirty_pages(struct address_space *mapping) * background_thresh, to keep the amount of dirty memory low. */ if ((laptop_mode && pages_written) || - (!laptop_mode && (global_page_state(NR_FILE_DIRTY) - + global_page_state(NR_UNSTABLE_NFS) + (!laptop_mode && ((nr_writeback = global_page_state(NR_FILE_DIRTY) + + global_page_state(NR_UNSTABLE_NFS)) > background_thresh))) - pdflush_operation(background_writeout, 0); + bdi_start_writeback(bdi, nr_writeback); } void set_page_dirty_balance(struct page *page, int page_mkwrite) @@ -607,6 +605,8 @@ void set_page_dirty_balance(struct page *page, int page_mkwrite) } } +static DEFINE_PER_CPU(unsigned long, bdp_ratelimits) = 0; + /** * balance_dirty_pages_ratelimited_nr - balance dirty memory state * @mapping: address_space which was dirtied @@ -624,7 +624,6 @@ void set_page_dirty_balance(struct page *page, int page_mkwrite) void balance_dirty_pages_ratelimited_nr(struct address_space *mapping, unsigned long nr_pages_dirtied) { - static DEFINE_PER_CPU(unsigned long, ratelimits) = 0; unsigned long ratelimit; unsigned long *p; @@ -637,7 +636,7 @@ void balance_dirty_pages_ratelimited_nr(struct address_space *mapping, * tasks in balance_dirty_pages(). Period. */ preempt_disable(); - p = &__get_cpu_var(ratelimits); + p = &__get_cpu_var(bdp_ratelimits); *p += nr_pages_dirtied; if (unlikely(*p >= ratelimit)) { *p = 0; @@ -666,7 +665,7 @@ void throttle_vm_writeout(gfp_t gfp_mask) if (global_page_state(NR_UNSTABLE_NFS) + global_page_state(NR_WRITEBACK) <= dirty_thresh) break; - congestion_wait(WRITE, HZ/10); + congestion_wait(BLK_RW_ASYNC, HZ/10); /* * The caller might hold locks which can prevent IO completion @@ -678,153 +677,35 @@ void throttle_vm_writeout(gfp_t gfp_mask) } } -/* - * writeback at least _min_pages, and keep writing until the amount of dirty - * memory is less than the background threshold, or until we're all clean. - */ -static void background_writeout(unsigned long _min_pages) -{ - long min_pages = _min_pages; - struct writeback_control wbc = { - .bdi = NULL, - .sync_mode = WB_SYNC_NONE, - .older_than_this = NULL, - .nr_to_write = 0, - .nonblocking = 1, - .range_cyclic = 1, - }; - - for ( ; ; ) { - unsigned long background_thresh; - unsigned long dirty_thresh; - - get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL); - if (global_page_state(NR_FILE_DIRTY) + - global_page_state(NR_UNSTABLE_NFS) < background_thresh - && min_pages <= 0) - break; - wbc.more_io = 0; - wbc.encountered_congestion = 0; - wbc.nr_to_write = MAX_WRITEBACK_PAGES; - wbc.pages_skipped = 0; - writeback_inodes(&wbc); - min_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; - if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) { - /* Wrote less than expected */ - if (wbc.encountered_congestion || wbc.more_io) - congestion_wait(WRITE, HZ/10); - else - break; - } - } -} - -/* - * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back - * the whole world. Returns 0 if a pdflush thread was dispatched. Returns - * -1 if all pdflush threads were busy. - */ -int wakeup_pdflush(long nr_pages) -{ - if (nr_pages == 0) - nr_pages = global_page_state(NR_FILE_DIRTY) + - global_page_state(NR_UNSTABLE_NFS); - return pdflush_operation(background_writeout, nr_pages); -} - -static void wb_timer_fn(unsigned long unused); static void laptop_timer_fn(unsigned long unused); -static DEFINE_TIMER(wb_timer, wb_timer_fn, 0, 0); static DEFINE_TIMER(laptop_mode_wb_timer, laptop_timer_fn, 0, 0); /* - * Periodic writeback of "old" data. - * - * Define "old": the first time one of an inode's pages is dirtied, we mark the - * dirtying-time in the inode's address_space. So this periodic writeback code - * just walks the superblock inode list, writing back any inodes which are - * older than a specific point in time. - * - * Try to run once per dirty_writeback_interval. But if a writeback event - * takes longer than a dirty_writeback_interval interval, then leave a - * one-second gap. - * - * older_than_this takes precedence over nr_to_write. So we'll only write back - * all dirty pages if they are all attached to "old" mappings. - */ -static void wb_kupdate(unsigned long arg) -{ - unsigned long oldest_jif; - unsigned long start_jif; - unsigned long next_jif; - long nr_to_write; - struct writeback_control wbc = { - .bdi = NULL, - .sync_mode = WB_SYNC_NONE, - .older_than_this = &oldest_jif, - .nr_to_write = 0, - .nonblocking = 1, - .for_kupdate = 1, - .range_cyclic = 1, - }; - - sync_supers(); - - oldest_jif = jiffies - msecs_to_jiffies(dirty_expire_interval * 10); - start_jif = jiffies; - next_jif = start_jif + msecs_to_jiffies(dirty_writeback_interval * 10); - nr_to_write = global_page_state(NR_FILE_DIRTY) + - global_page_state(NR_UNSTABLE_NFS) + - (inodes_stat.nr_inodes - inodes_stat.nr_unused); - while (nr_to_write > 0) { - wbc.more_io = 0; - wbc.encountered_congestion = 0; - wbc.nr_to_write = MAX_WRITEBACK_PAGES; - writeback_inodes(&wbc); - if (wbc.nr_to_write > 0) { - if (wbc.encountered_congestion || wbc.more_io) - congestion_wait(WRITE, HZ/10); - else - break; /* All the old data is written */ - } - nr_to_write -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; - } - if (time_before(next_jif, jiffies + HZ)) - next_jif = jiffies + HZ; - if (dirty_writeback_interval) - mod_timer(&wb_timer, next_jif); -} - -/* * sysctl handler for /proc/sys/vm/dirty_writeback_centisecs */ int dirty_writeback_centisecs_handler(ctl_table *table, int write, struct file *file, void __user *buffer, size_t *length, loff_t *ppos) { proc_dointvec(table, write, file, buffer, length, ppos); - if (dirty_writeback_interval) - mod_timer(&wb_timer, jiffies + - msecs_to_jiffies(dirty_writeback_interval * 10)); - else - del_timer(&wb_timer); return 0; } -static void wb_timer_fn(unsigned long unused) -{ - if (pdflush_operation(wb_kupdate, 0) < 0) - mod_timer(&wb_timer, jiffies + HZ); /* delay 1 second */ -} - -static void laptop_flush(unsigned long unused) +static void do_laptop_sync(struct work_struct *work) { - sys_sync(); + wakeup_flusher_threads(0); + kfree(work); } static void laptop_timer_fn(unsigned long unused) { - pdflush_operation(laptop_flush, 0); + struct work_struct *work; + + work = kmalloc(sizeof(*work), GFP_ATOMIC); + if (work) { + INIT_WORK(work, do_laptop_sync); + schedule_work(work); + } } /* @@ -907,8 +788,6 @@ void __init page_writeback_init(void) { int shift; - mod_timer(&wb_timer, - jiffies + msecs_to_jiffies(dirty_writeback_interval * 10)); writeback_set_ratelimit(); register_cpu_notifier(&ratelimit_nb); @@ -1142,12 +1021,10 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc) if (wbc->nr_to_write <= 0) return 0; - wbc->for_writepages = 1; if (mapping->a_ops->writepages) ret = mapping->a_ops->writepages(mapping, wbc); else ret = generic_writepages(mapping, wbc); - wbc->for_writepages = 0; return ret; }