vfs: Remove the range_cont writeback mode.
[safe/jmp/linux-2.6] / mm / page-writeback.c
index d8c21e5..e373f14 100644 (file)
@@ -7,7 +7,7 @@
  * Contains functions related to writing back dirty pages at the
  * address_space level.
  *
- * 10Apr2002   akpm@zip.com.au
+ * 10Apr2002   Andrew Morton
  *             Initial version
  */
 
@@ -69,6 +69,12 @@ static inline long sync_writeback_pages(void)
 int dirty_background_ratio = 5;
 
 /*
+ * free highmem will not be subtracted from the total free memory
+ * for calculating free ratios if vm_highmem_is_dirtyable is true
+ */
+int vm_highmem_is_dirtyable;
+
+/*
  * The generator of dirty data starts writeback at this percentage
  */
 int vm_dirty_ratio = 10;
@@ -120,8 +126,6 @@ static void background_writeout(unsigned long _min_pages);
 static struct prop_descriptor vm_completions;
 static struct prop_descriptor vm_dirties;
 
-static unsigned long determine_dirtyable_memory(void);
-
 /*
  * couple the period to the dirty_ratio:
  *
@@ -158,8 +162,19 @@ int dirty_ratio_handler(struct ctl_table *table, int write,
  */
 static inline void __bdi_writeout_inc(struct backing_dev_info *bdi)
 {
-       __prop_inc_percpu(&vm_completions, &bdi->completions);
+       __prop_inc_percpu_max(&vm_completions, &bdi->completions,
+                             bdi->max_prop_frac);
+}
+
+void bdi_writeout_inc(struct backing_dev_info *bdi)
+{
+       unsigned long flags;
+
+       local_irq_save(flags);
+       __bdi_writeout_inc(bdi);
+       local_irq_restore(flags);
 }
+EXPORT_SYMBOL_GPL(bdi_writeout_inc);
 
 static inline void task_dirty_inc(struct task_struct *tsk)
 {
@@ -194,7 +209,8 @@ clip_bdi_dirty_limit(struct backing_dev_info *bdi, long dirty, long *pbdi_dirty)
        avail_dirty = dirty -
                (global_page_state(NR_FILE_DIRTY) +
                 global_page_state(NR_WRITEBACK) +
-                global_page_state(NR_UNSTABLE_NFS));
+                global_page_state(NR_UNSTABLE_NFS) +
+                global_page_state(NR_WRITEBACK_TEMP));
 
        if (avail_dirty < 0)
                avail_dirty = 0;
@@ -219,7 +235,7 @@ static inline void task_dirties_fraction(struct task_struct *tsk,
  *
  *   dirty -= (dirty/8) * p_{t}
  */
-void task_dirty_limit(struct task_struct *tsk, long *pdirty)
+static void task_dirty_limit(struct task_struct *tsk, long *pdirty)
 {
        long numerator, denominator;
        long dirty = *pdirty;
@@ -237,6 +253,55 @@ void task_dirty_limit(struct task_struct *tsk, long *pdirty)
 }
 
 /*
+ *
+ */
+static DEFINE_SPINLOCK(bdi_lock);
+static unsigned int bdi_min_ratio;
+
+int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
+{
+       int ret = 0;
+       unsigned long flags;
+
+       spin_lock_irqsave(&bdi_lock, flags);
+       if (min_ratio > bdi->max_ratio) {
+               ret = -EINVAL;
+       } else {
+               min_ratio -= bdi->min_ratio;
+               if (bdi_min_ratio + min_ratio < 100) {
+                       bdi_min_ratio += min_ratio;
+                       bdi->min_ratio += min_ratio;
+               } else {
+                       ret = -EINVAL;
+               }
+       }
+       spin_unlock_irqrestore(&bdi_lock, flags);
+
+       return ret;
+}
+
+int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio)
+{
+       unsigned long flags;
+       int ret = 0;
+
+       if (max_ratio > 100)
+               return -EINVAL;
+
+       spin_lock_irqsave(&bdi_lock, flags);
+       if (bdi->min_ratio > max_ratio) {
+               ret = -EINVAL;
+       } else {
+               bdi->max_ratio = max_ratio;
+               bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100;
+       }
+       spin_unlock_irqrestore(&bdi_lock, flags);
+
+       return ret;
+}
+EXPORT_SYMBOL(bdi_set_max_ratio);
+
+/*
  * Work out the current dirty-memory clamping and background writeout
  * thresholds.
  *
@@ -280,37 +345,38 @@ static unsigned long highmem_dirtyable_memory(unsigned long total)
 #endif
 }
 
-static unsigned long determine_dirtyable_memory(void)
+/**
+ * determine_dirtyable_memory - amount of memory that may be used
+ *
+ * Returns the numebr of pages that can currently be freed and used
+ * by the kernel for direct mappings.
+ */
+unsigned long determine_dirtyable_memory(void)
 {
        unsigned long x;
 
        x = global_page_state(NR_FREE_PAGES)
                + global_page_state(NR_INACTIVE)
                + global_page_state(NR_ACTIVE);
-       x -= highmem_dirtyable_memory(x);
+
+       if (!vm_highmem_is_dirtyable)
+               x -= highmem_dirtyable_memory(x);
+
        return x + 1;   /* Ensure that we never return 0 */
 }
 
-static void
+void
 get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty,
                 struct backing_dev_info *bdi)
 {
        int background_ratio;           /* Percentages */
        int dirty_ratio;
-       int unmapped_ratio;
        long background;
        long dirty;
        unsigned long available_memory = determine_dirtyable_memory();
        struct task_struct *tsk;
 
-       unmapped_ratio = 100 - ((global_page_state(NR_FILE_MAPPED) +
-                               global_page_state(NR_ANON_PAGES)) * 100) /
-                                       available_memory;
-
        dirty_ratio = vm_dirty_ratio;
-       if (dirty_ratio > unmapped_ratio / 2)
-               dirty_ratio = unmapped_ratio / 2;
-
        if (dirty_ratio < 5)
                dirty_ratio = 5;
 
@@ -329,7 +395,7 @@ get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty,
        *pdirty = dirty;
 
        if (bdi) {
-               u64 bdi_dirty = dirty;
+               u64 bdi_dirty;
                long numerator, denominator;
 
                /*
@@ -337,8 +403,12 @@ get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty,
                 */
                bdi_writeout_fraction(bdi, &numerator, &denominator);
 
+               bdi_dirty = (dirty * (100 - bdi_min_ratio)) / 100;
                bdi_dirty *= numerator;
                do_div(bdi_dirty, denominator);
+               bdi_dirty += (dirty * bdi->min_ratio) / 100;
+               if (bdi_dirty > (dirty * bdi->max_ratio) / 100)
+                       bdi_dirty = dirty * bdi->max_ratio / 100;
 
                *pbdi_dirty = bdi_dirty;
                clip_bdi_dirty_limit(bdi, dirty, pbdi_dirty);
@@ -355,8 +425,8 @@ get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty,
  */
 static void balance_dirty_pages(struct address_space *mapping)
 {
-       long bdi_nr_reclaimable;
-       long bdi_nr_writeback;
+       long nr_reclaimable, bdi_nr_reclaimable;
+       long nr_writeback, bdi_nr_writeback;
        long background_thresh;
        long dirty_thresh;
        long bdi_thresh;
@@ -376,11 +446,26 @@ static void balance_dirty_pages(struct address_space *mapping)
 
                get_dirty_limits(&background_thresh, &dirty_thresh,
                                &bdi_thresh, bdi);
+
+               nr_reclaimable = global_page_state(NR_FILE_DIRTY) +
+                                       global_page_state(NR_UNSTABLE_NFS);
+               nr_writeback = global_page_state(NR_WRITEBACK);
+
                bdi_nr_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE);
                bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK);
+
                if (bdi_nr_reclaimable + bdi_nr_writeback <= bdi_thresh)
                        break;
 
+               /*
+                * Throttle it only when the background writeback cannot
+                * catch-up. This avoids (excessively) small writeouts
+                * when the bdi limits are ramping up.
+                */
+               if (nr_reclaimable + nr_writeback <
+                               (background_thresh + dirty_thresh) / 2)
+                       break;
+
                if (!bdi->dirty_exceeded)
                        bdi->dirty_exceeded = 1;
 
@@ -502,16 +587,6 @@ void throttle_vm_writeout(gfp_t gfp_mask)
        long background_thresh;
        long dirty_thresh;
 
-       if ((gfp_mask & (__GFP_FS|__GFP_IO)) != (__GFP_FS|__GFP_IO)) {
-               /*
-                * The caller might hold locks which can prevent IO completion
-                * or progress in the filesystem.  So we cannot just sit here
-                * waiting for IO to complete.
-                */
-               congestion_wait(WRITE, HZ/10);
-               return;
-       }
-
         for ( ; ; ) {
                get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL);
 
@@ -525,6 +600,14 @@ void throttle_vm_writeout(gfp_t gfp_mask)
                        global_page_state(NR_WRITEBACK) <= dirty_thresh)
                                break;
                 congestion_wait(WRITE, HZ/10);
+
+               /*
+                * The caller might hold locks which can prevent IO completion
+                * or progress in the filesystem.  So we cannot just sit here
+                * waiting for IO to complete.
+                */
+               if ((gfp_mask & (__GFP_FS|__GFP_IO)) != (__GFP_FS|__GFP_IO))
+                       break;
         }
 }
 
@@ -877,6 +960,7 @@ retry:
        }
        if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
                mapping->writeback_index = index;
+
        return ret;
 }
 EXPORT_SYMBOL(write_cache_pages);
@@ -991,7 +1075,7 @@ int __set_page_dirty_no_writeback(struct page *page)
  * mapping is pinned by the vma's ->vm_file reference.
  *
  * We take care to handle the case where the page was truncated from the
- * mapping by re-checking page_mapping() insode tree_lock.
+ * mapping by re-checking page_mapping() inside tree_lock.
  */
 int __set_page_dirty_nobuffers(struct page *page)
 {
@@ -1002,7 +1086,7 @@ int __set_page_dirty_nobuffers(struct page *page)
                if (!mapping)
                        return 1;
 
-               write_lock_irq(&mapping->tree_lock);
+               spin_lock_irq(&mapping->tree_lock);
                mapping2 = page_mapping(page);
                if (mapping2) { /* Race with truncate? */
                        BUG_ON(mapping2 != mapping);
@@ -1016,7 +1100,7 @@ int __set_page_dirty_nobuffers(struct page *page)
                        radix_tree_tag_set(&mapping->page_tree,
                                page_index(page), PAGECACHE_TAG_DIRTY);
                }
-               write_unlock_irq(&mapping->tree_lock);
+               spin_unlock_irq(&mapping->tree_lock);
                if (mapping->host) {
                        /* !PageAnon && !swapper_space */
                        __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
@@ -1062,7 +1146,7 @@ static int __set_page_dirty(struct page *page)
        return 0;
 }
 
-int fastcall set_page_dirty(struct page *page)
+int set_page_dirty(struct page *page)
 {
        int ret = __set_page_dirty(page);
        if (ret)
@@ -1172,18 +1256,18 @@ int test_clear_page_writeback(struct page *page)
                struct backing_dev_info *bdi = mapping->backing_dev_info;
                unsigned long flags;
 
-               write_lock_irqsave(&mapping->tree_lock, flags);
+               spin_lock_irqsave(&mapping->tree_lock, flags);
                ret = TestClearPageWriteback(page);
                if (ret) {
                        radix_tree_tag_clear(&mapping->page_tree,
                                                page_index(page),
                                                PAGECACHE_TAG_WRITEBACK);
-                       if (bdi_cap_writeback_dirty(bdi)) {
+                       if (bdi_cap_account_writeback(bdi)) {
                                __dec_bdi_stat(bdi, BDI_WRITEBACK);
                                __bdi_writeout_inc(bdi);
                        }
                }
-               write_unlock_irqrestore(&mapping->tree_lock, flags);
+               spin_unlock_irqrestore(&mapping->tree_lock, flags);
        } else {
                ret = TestClearPageWriteback(page);
        }
@@ -1201,20 +1285,20 @@ int test_set_page_writeback(struct page *page)
                struct backing_dev_info *bdi = mapping->backing_dev_info;
                unsigned long flags;
 
-               write_lock_irqsave(&mapping->tree_lock, flags);
+               spin_lock_irqsave(&mapping->tree_lock, flags);
                ret = TestSetPageWriteback(page);
                if (!ret) {
                        radix_tree_tag_set(&mapping->page_tree,
                                                page_index(page),
                                                PAGECACHE_TAG_WRITEBACK);
-                       if (bdi_cap_writeback_dirty(bdi))
+                       if (bdi_cap_account_writeback(bdi))
                                __inc_bdi_stat(bdi, BDI_WRITEBACK);
                }
                if (!PageDirty(page))
                        radix_tree_tag_clear(&mapping->page_tree,
                                                page_index(page),
                                                PAGECACHE_TAG_DIRTY);
-               write_unlock_irqrestore(&mapping->tree_lock, flags);
+               spin_unlock_irqrestore(&mapping->tree_lock, flags);
        } else {
                ret = TestSetPageWriteback(page);
        }