linux-stable/lib/proportions.c
Peter Zijlstra a42dde0415 mm: bdi: allow setting a maximum for the bdi dirty limit
Add "max_ratio" to /sys/class/bdi.  This indicates the maximum percentage of
the global dirty threshold allocated to this bdi.

[mszeredi@suse.cz]

 - fix parsing in max_ratio_store().
 - export bdi_set_max_ratio() to modules
 - limit bdi_dirty with bdi->max_ratio
 - document new sysfs attribute

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-04-30 08:29:50 -07:00

405 lines
9.3 KiB
C

/*
* Floating proportions
*
* Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
*
* Description:
*
* The floating proportion is a time derivative with an exponentially decaying
* history:
*
* p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i)
*
* Where j is an element from {prop_local}, x_{j} is j's number of events,
* and i the time period over which the differential is taken. So d/dt_{-i} is
* the differential over the i-th last period.
*
* The decaying history gives smooth transitions. The time differential carries
* the notion of speed.
*
* The denominator is 2^(1+i) because we want the series to be normalised, ie.
*
* \Sum_{i=0} 1/2^(1+i) = 1
*
* Further more, if we measure time (t) in the same events as x; so that:
*
* t = \Sum_{j} x_{j}
*
* we get that:
*
* \Sum_{j} p_{j} = 1
*
* Writing this in an iterative fashion we get (dropping the 'd's):
*
* if (++x_{j}, ++t > period)
* t /= 2;
* for_each (j)
* x_{j} /= 2;
*
* so that:
*
* p_{j} = x_{j} / t;
*
* We optimize away the '/= 2' for the global time delta by noting that:
*
* if (++t > period) t /= 2:
*
* Can be approximated by:
*
* period/2 + (++t % period/2)
*
* [ Furthermore, when we choose period to be 2^n it can be written in terms of
* binary operations and wraparound artefacts disappear. ]
*
* Also note that this yields a natural counter of the elapsed periods:
*
* c = t / (period/2)
*
* [ Its monotonic increasing property can be applied to mitigate the wrap-
* around issue. ]
*
* This allows us to do away with the loop over all prop_locals on each period
* expiration. By remembering the period count under which it was last accessed
* as c_{j}, we can obtain the number of 'missed' cycles from:
*
* c - c_{j}
*
* We can then lazily catch up to the global period count every time we are
* going to use x_{j}, by doing:
*
* x_{j} /= 2^(c - c_{j}), c_{j} = c
*/
#include <linux/proportions.h>
#include <linux/rcupdate.h>
int prop_descriptor_init(struct prop_descriptor *pd, int shift)
{
int err;
if (shift > PROP_MAX_SHIFT)
shift = PROP_MAX_SHIFT;
pd->index = 0;
pd->pg[0].shift = shift;
mutex_init(&pd->mutex);
err = percpu_counter_init_irq(&pd->pg[0].events, 0);
if (err)
goto out;
err = percpu_counter_init_irq(&pd->pg[1].events, 0);
if (err)
percpu_counter_destroy(&pd->pg[0].events);
out:
return err;
}
/*
* We have two copies, and flip between them to make it seem like an atomic
* update. The update is not really atomic wrt the events counter, but
* it is internally consistent with the bit layout depending on shift.
*
* We copy the events count, move the bits around and flip the index.
*/
void prop_change_shift(struct prop_descriptor *pd, int shift)
{
int index;
int offset;
u64 events;
unsigned long flags;
if (shift > PROP_MAX_SHIFT)
shift = PROP_MAX_SHIFT;
mutex_lock(&pd->mutex);
index = pd->index ^ 1;
offset = pd->pg[pd->index].shift - shift;
if (!offset)
goto out;
pd->pg[index].shift = shift;
local_irq_save(flags);
events = percpu_counter_sum(&pd->pg[pd->index].events);
if (offset < 0)
events <<= -offset;
else
events >>= offset;
percpu_counter_set(&pd->pg[index].events, events);
/*
* ensure the new pg is fully written before the switch
*/
smp_wmb();
pd->index = index;
local_irq_restore(flags);
synchronize_rcu();
out:
mutex_unlock(&pd->mutex);
}
/*
* wrap the access to the data in an rcu_read_lock() section;
* this is used to track the active references.
*/
static struct prop_global *prop_get_global(struct prop_descriptor *pd)
{
int index;
rcu_read_lock();
index = pd->index;
/*
* match the wmb from vcd_flip()
*/
smp_rmb();
return &pd->pg[index];
}
static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg)
{
rcu_read_unlock();
}
static void
prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift)
{
int offset = *pl_shift - new_shift;
if (!offset)
return;
if (offset < 0)
*pl_period <<= -offset;
else
*pl_period >>= offset;
*pl_shift = new_shift;
}
/*
* PERCPU
*/
#define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))
int prop_local_init_percpu(struct prop_local_percpu *pl)
{
spin_lock_init(&pl->lock);
pl->shift = 0;
pl->period = 0;
return percpu_counter_init_irq(&pl->events, 0);
}
void prop_local_destroy_percpu(struct prop_local_percpu *pl)
{
percpu_counter_destroy(&pl->events);
}
/*
* Catch up with missed period expirations.
*
* until (c_{j} == c)
* x_{j} -= x_{j}/2;
* c_{j}++;
*/
static
void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl)
{
unsigned long period = 1UL << (pg->shift - 1);
unsigned long period_mask = ~(period - 1);
unsigned long global_period;
unsigned long flags;
global_period = percpu_counter_read(&pg->events);
global_period &= period_mask;
/*
* Fast path - check if the local and global period count still match
* outside of the lock.
*/
if (pl->period == global_period)
return;
spin_lock_irqsave(&pl->lock, flags);
prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
/*
* For each missed period, we half the local counter.
* basically:
* pl->events >> (global_period - pl->period);
*/
period = (global_period - pl->period) >> (pg->shift - 1);
if (period < BITS_PER_LONG) {
s64 val = percpu_counter_read(&pl->events);
if (val < (nr_cpu_ids * PROP_BATCH))
val = percpu_counter_sum(&pl->events);
__percpu_counter_add(&pl->events, -val + (val >> period),
PROP_BATCH);
} else
percpu_counter_set(&pl->events, 0);
pl->period = global_period;
spin_unlock_irqrestore(&pl->lock, flags);
}
/*
* ++x_{j}, ++t
*/
void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
{
struct prop_global *pg = prop_get_global(pd);
prop_norm_percpu(pg, pl);
__percpu_counter_add(&pl->events, 1, PROP_BATCH);
percpu_counter_add(&pg->events, 1);
prop_put_global(pd, pg);
}
/*
* identical to __prop_inc_percpu, except that it limits this pl's fraction to
* @frac/PROP_FRAC_BASE by ignoring events when this limit has been exceeded.
*/
void __prop_inc_percpu_max(struct prop_descriptor *pd,
struct prop_local_percpu *pl, long frac)
{
struct prop_global *pg = prop_get_global(pd);
prop_norm_percpu(pg, pl);
if (unlikely(frac != PROP_FRAC_BASE)) {
unsigned long period_2 = 1UL << (pg->shift - 1);
unsigned long counter_mask = period_2 - 1;
unsigned long global_count;
long numerator, denominator;
numerator = percpu_counter_read_positive(&pl->events);
global_count = percpu_counter_read(&pg->events);
denominator = period_2 + (global_count & counter_mask);
if (numerator > ((denominator * frac) >> PROP_FRAC_SHIFT))
goto out_put;
}
percpu_counter_add(&pl->events, 1);
percpu_counter_add(&pg->events, 1);
out_put:
prop_put_global(pd, pg);
}
/*
* Obtain a fraction of this proportion
*
* p_{j} = x_{j} / (period/2 + t % period/2)
*/
void prop_fraction_percpu(struct prop_descriptor *pd,
struct prop_local_percpu *pl,
long *numerator, long *denominator)
{
struct prop_global *pg = prop_get_global(pd);
unsigned long period_2 = 1UL << (pg->shift - 1);
unsigned long counter_mask = period_2 - 1;
unsigned long global_count;
prop_norm_percpu(pg, pl);
*numerator = percpu_counter_read_positive(&pl->events);
global_count = percpu_counter_read(&pg->events);
*denominator = period_2 + (global_count & counter_mask);
prop_put_global(pd, pg);
}
/*
* SINGLE
*/
int prop_local_init_single(struct prop_local_single *pl)
{
spin_lock_init(&pl->lock);
pl->shift = 0;
pl->period = 0;
pl->events = 0;
return 0;
}
void prop_local_destroy_single(struct prop_local_single *pl)
{
}
/*
* Catch up with missed period expirations.
*/
static
void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl)
{
unsigned long period = 1UL << (pg->shift - 1);
unsigned long period_mask = ~(period - 1);
unsigned long global_period;
unsigned long flags;
global_period = percpu_counter_read(&pg->events);
global_period &= period_mask;
/*
* Fast path - check if the local and global period count still match
* outside of the lock.
*/
if (pl->period == global_period)
return;
spin_lock_irqsave(&pl->lock, flags);
prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
/*
* For each missed period, we half the local counter.
*/
period = (global_period - pl->period) >> (pg->shift - 1);
if (likely(period < BITS_PER_LONG))
pl->events >>= period;
else
pl->events = 0;
pl->period = global_period;
spin_unlock_irqrestore(&pl->lock, flags);
}
/*
* ++x_{j}, ++t
*/
void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl)
{
struct prop_global *pg = prop_get_global(pd);
prop_norm_single(pg, pl);
pl->events++;
percpu_counter_add(&pg->events, 1);
prop_put_global(pd, pg);
}
/*
* Obtain a fraction of this proportion
*
* p_{j} = x_{j} / (period/2 + t % period/2)
*/
void prop_fraction_single(struct prop_descriptor *pd,
struct prop_local_single *pl,
long *numerator, long *denominator)
{
struct prop_global *pg = prop_get_global(pd);
unsigned long period_2 = 1UL << (pg->shift - 1);
unsigned long counter_mask = period_2 - 1;
unsigned long global_count;
prop_norm_single(pg, pl);
*numerator = pl->events;
global_count = percpu_counter_read(&pg->events);
*denominator = period_2 + (global_count & counter_mask);
prop_put_global(pd, pg);
}