sched: add /proc/sys/kernel/sched_compat_yield

add /proc/sys/kernel/sched_compat_yield to make sys_sched_yield()
more agressive, by moving the yielding task to the last position
in the rbtree.

with sched_compat_yield=0:

   PID USER      PR  NI  VIRT  RES  SHR S %CPU %MEM    TIME+  COMMAND
  2539 mingo     20   0  1576  252  204 R   50  0.0   0:02.03 loop_yield
  2541 mingo     20   0  1576  244  196 R   50  0.0   0:02.05 loop

with sched_compat_yield=1:

   PID USER      PR  NI  VIRT  RES  SHR S %CPU %MEM    TIME+  COMMAND
  2584 mingo     20   0  1576  248  196 R   99  0.0   0:52.45 loop
  2582 mingo     20   0  1576  256  204 R    0  0.0   0:00.00 loop_yield

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
This commit is contained in:
Ingo Molnar 2007-09-19 23:34:46 +02:00
parent a88a8eff1e
commit 1799e35d5b
4 changed files with 67 additions and 10 deletions

View file

@ -1406,6 +1406,7 @@ extern unsigned int sysctl_sched_wakeup_granularity;
extern unsigned int sysctl_sched_batch_wakeup_granularity;
extern unsigned int sysctl_sched_stat_granularity;
extern unsigned int sysctl_sched_runtime_limit;
extern unsigned int sysctl_sched_compat_yield;
extern unsigned int sysctl_sched_child_runs_first;
extern unsigned int sysctl_sched_features;

View file

@ -4550,9 +4550,6 @@ asmlinkage long sys_sched_yield(void)
struct rq *rq = this_rq_lock();
schedstat_inc(rq, yld_cnt);
if (unlikely(rq->nr_running == 1))
schedstat_inc(rq, yld_act_empty);
else
current->sched_class->yield_task(rq, current);
/*

View file

@ -42,6 +42,14 @@ unsigned int sysctl_sched_latency __read_mostly = 20000000ULL;
*/
unsigned int sysctl_sched_min_granularity __read_mostly = 2000000ULL;
/*
* sys_sched_yield() compat mode
*
* This option switches the agressive yield implementation of the
* old scheduler back on.
*/
unsigned int __read_mostly sysctl_sched_compat_yield;
/*
* SCHED_BATCH wake-up granularity.
* (default: 25 msec, units: nanoseconds)
@ -897,12 +905,24 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep)
}
/*
* sched_yield() support is very simple - we dequeue and enqueue
* sched_yield() support is very simple - we dequeue and enqueue.
*
* If compat_yield is turned on then we requeue to the end of the tree.
*/
static void yield_task_fair(struct rq *rq, struct task_struct *p)
{
struct cfs_rq *cfs_rq = task_cfs_rq(p);
struct rb_node **link = &cfs_rq->tasks_timeline.rb_node;
struct sched_entity *rightmost, *se = &p->se;
struct rb_node *parent;
/*
* Are we the only task in the tree?
*/
if (unlikely(cfs_rq->nr_running == 1))
return;
if (likely(!sysctl_sched_compat_yield)) {
__update_rq_clock(rq);
/*
* Dequeue and enqueue the task to update its
@ -910,6 +930,37 @@ static void yield_task_fair(struct rq *rq, struct task_struct *p)
*/
dequeue_entity(cfs_rq, &p->se, 0);
enqueue_entity(cfs_rq, &p->se, 0);
return;
}
/*
* Find the rightmost entry in the rbtree:
*/
do {
parent = *link;
link = &parent->rb_right;
} while (*link);
rightmost = rb_entry(parent, struct sched_entity, run_node);
/*
* Already in the rightmost position?
*/
if (unlikely(rightmost == se))
return;
/*
* Minimally necessary key value to be last in the tree:
*/
se->fair_key = rightmost->fair_key + 1;
if (cfs_rq->rb_leftmost == &se->run_node)
cfs_rq->rb_leftmost = rb_next(&se->run_node);
/*
* Relink the task to the rightmost position:
*/
rb_erase(&se->run_node, &cfs_rq->tasks_timeline);
rb_link_node(&se->run_node, parent, link);
rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline);
}
/*

View file

@ -303,6 +303,14 @@ static ctl_table kern_table[] = {
.proc_handler = &proc_dointvec,
},
#endif
{
.ctl_name = CTL_UNNUMBERED,
.procname = "sched_compat_yield",
.data = &sysctl_sched_compat_yield,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
#ifdef CONFIG_PROVE_LOCKING
{
.ctl_name = CTL_UNNUMBERED,