+/*
+ * Scale the writeback cache size proportional to the relative writeout speeds.
+ *
+ * We do this by keeping a floating proportion between BDIs, based on page
+ * writeback completions [end_page_writeback()]. Those devices that write out
+ * pages fastest will get the larger share, while the slower will get a smaller
+ * share.
+ *
+ * We use page writeout completions because we are interested in getting rid of
+ * dirty pages. Having them written out is the primary goal.
+ *
+ * We introduce a concept of time, a period over which we measure these events,
+ * because demand can/will vary over time. The length of this period itself is
+ * measured in page writeback completions.
+ *
+ */
+static struct prop_descriptor vm_completions;
+static struct prop_descriptor vm_dirties;
+
+/*
+ * couple the period to the dirty_ratio:
+ *
+ * period/2 ~ roundup_pow_of_two(dirty limit)
+ */
+static int calc_period_shift(void)
+{
+ unsigned long dirty_total;
+
+ if (vm_dirty_bytes)
+ dirty_total = vm_dirty_bytes / PAGE_SIZE;
+ else
+ dirty_total = (vm_dirty_ratio * determine_dirtyable_memory()) /
+ 100;
+ return 2 + ilog2(dirty_total - 1);
+}
+
+/*
+ * update the period when the dirty threshold changes.
+ */
+static void update_completion_period(void)
+{
+ int shift = calc_period_shift();
+ prop_change_shift(&vm_completions, shift);
+ prop_change_shift(&vm_dirties, shift);
+}
+
+int dirty_background_ratio_handler(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ int ret;
+
+ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+ if (ret == 0 && write)
+ dirty_background_bytes = 0;
+ return ret;
+}
+
+int dirty_background_bytes_handler(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ int ret;
+
+ ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
+ if (ret == 0 && write)
+ dirty_background_ratio = 0;
+ return ret;
+}
+
+int dirty_ratio_handler(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ int old_ratio = vm_dirty_ratio;
+ int ret;
+
+ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+ if (ret == 0 && write && vm_dirty_ratio != old_ratio) {
+ update_completion_period();
+ vm_dirty_bytes = 0;
+ }
+ return ret;
+}
+
+
+int dirty_bytes_handler(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ unsigned long old_bytes = vm_dirty_bytes;
+ int ret;
+
+ ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
+ if (ret == 0 && write && vm_dirty_bytes != old_bytes) {
+ update_completion_period();
+ vm_dirty_ratio = 0;
+ }
+ return ret;
+}
+
+/*
+ * Increment the BDI's writeout completion count and the global writeout
+ * completion count. Called from test_clear_page_writeback().
+ */
+static inline void __bdi_writeout_inc(struct backing_dev_info *bdi)
+{
+ __prop_inc_percpu_max(&vm_completions, &bdi->completions,
+ bdi->max_prop_frac);
+}
+
+void bdi_writeout_inc(struct backing_dev_info *bdi)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __bdi_writeout_inc(bdi);
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(bdi_writeout_inc);
+
+void task_dirty_inc(struct task_struct *tsk)
+{
+ prop_inc_single(&vm_dirties, &tsk->dirties);
+}
+
+/*
+ * Obtain an accurate fraction of the BDI's portion.
+ */
+static void bdi_writeout_fraction(struct backing_dev_info *bdi,
+ long *numerator, long *denominator)
+{
+ if (bdi_cap_writeback_dirty(bdi)) {
+ prop_fraction_percpu(&vm_completions, &bdi->completions,
+ numerator, denominator);
+ } else {
+ *numerator = 0;
+ *denominator = 1;
+ }
+}
+
+/*
+ * Clip the earned share of dirty pages to that which is actually available.
+ * This avoids exceeding the total dirty_limit when the floating averages
+ * fluctuate too quickly.
+ */
+static void clip_bdi_dirty_limit(struct backing_dev_info *bdi,
+ unsigned long dirty, unsigned long *pbdi_dirty)
+{
+ unsigned long avail_dirty;
+
+ avail_dirty = global_page_state(NR_FILE_DIRTY) +
+ global_page_state(NR_WRITEBACK) +
+ global_page_state(NR_UNSTABLE_NFS) +
+ global_page_state(NR_WRITEBACK_TEMP);
+
+ if (avail_dirty < dirty)
+ avail_dirty = dirty - avail_dirty;
+ else
+ avail_dirty = 0;
+
+ avail_dirty += bdi_stat(bdi, BDI_RECLAIMABLE) +
+ bdi_stat(bdi, BDI_WRITEBACK);
+
+ *pbdi_dirty = min(*pbdi_dirty, avail_dirty);
+}
+
+static inline void task_dirties_fraction(struct task_struct *tsk,
+ long *numerator, long *denominator)
+{
+ prop_fraction_single(&vm_dirties, &tsk->dirties,
+ numerator, denominator);
+}
+
+/*
+ * scale the dirty limit
+ *
+ * task specific dirty limit:
+ *
+ * dirty -= (dirty/8) * p_{t}
+ */
+static void task_dirty_limit(struct task_struct *tsk, unsigned long *pdirty)
+{
+ long numerator, denominator;
+ unsigned long dirty = *pdirty;
+ u64 inv = dirty >> 3;
+
+ task_dirties_fraction(tsk, &numerator, &denominator);
+ inv *= numerator;
+ do_div(inv, denominator);
+
+ dirty -= inv;
+ if (dirty < *pdirty/2)
+ dirty = *pdirty/2;
+
+ *pdirty = dirty;
+}
+
+/*
+ *
+ */
+static unsigned int bdi_min_ratio;
+
+int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
+{
+ int ret = 0;
+
+ spin_lock_bh(&bdi_lock);
+ if (min_ratio > bdi->max_ratio) {
+ ret = -EINVAL;
+ } else {
+ min_ratio -= bdi->min_ratio;
+ if (bdi_min_ratio + min_ratio < 100) {
+ bdi_min_ratio += min_ratio;
+ bdi->min_ratio += min_ratio;
+ } else {
+ ret = -EINVAL;
+ }
+ }
+ spin_unlock_bh(&bdi_lock);
+
+ return ret;
+}
+
+int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio)
+{
+ int ret = 0;
+
+ if (max_ratio > 100)
+ return -EINVAL;
+
+ spin_lock_bh(&bdi_lock);
+ if (bdi->min_ratio > max_ratio) {
+ ret = -EINVAL;
+ } else {
+ bdi->max_ratio = max_ratio;
+ bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100;
+ }
+ spin_unlock_bh(&bdi_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL(bdi_set_max_ratio);