Select Git revision
docker-compose.yml
core.c 7.88 KiB
/**
* @file kernel/sched/core.c
*
* @brief the core scheduling code
*/
#include <kernel/err.h>
#include <kernel/kthread.h>
#include <kernel/sched.h>
#include <kernel/init.h>
#include <kernel/tick.h>
#include <kernel/smp.h>
#include <asm-generic/irqflags.h>
#include <asm-generic/spinlock.h>
#include <asm/switch_to.h>
#include <string.h>
#define MSG "SCHEDULER: "
static LIST_HEAD(kernel_schedulers);
static bool sched_enabled[CONFIG_SMP_CPUS_MAX];
/* XXX: per-cpu... */
extern struct thread_info *current_set[];
/**
* @brief update the remaining runtime of the current thread and set
* state from TASK_BUSY to TASK_RUN
*/
static void sched_update_runtime(struct task_struct *task, ktime now)
{
ktime rt;
rt = ktime_sub(now, task->exec_start);
task->exec_stop = now;
task->runtime = ktime_sub(task->runtime, rt);
task->total = ktime_add(task->total, rt);
if (task->state == TASK_BUSY)
task->state = TASK_RUN;
}
/**
* @brief find the next task to execute
*
* @returns the runtime to the next scheduling event
*/
static ktime sched_find_next_task(struct task_struct **task, int cpu, ktime now)
{
struct scheduler *sched;
struct task_struct *next;
ktime slice;
/* our schedulers are sorted by priority, so the highest-priority
* scheduler with some task to run gets to call dibs on the cpu
*/
list_for_each_entry(sched, &kernel_schedulers, node) {
next = sched->pick_next_task(sched->tq, cpu, now);
if (next) {
/* we found something to execute, off we go */
slice = next->sched->timeslice_ns(next);
break;
}
}
/* NOTE: _next_ can never be NULL, as there must always be at least
* the initial kernel bootup thread present or scheduling must be
* disabled altogether
*/
BUG_ON(!next);
/* Determine the most pressing ready time. If the remaining runtime in
* a thread is smaller than the wakeup timeout for a given scheduler
* priority, we will restrict ourselves to the remaining runtime.
* This is particularly needed for strictly (periodic) schedulers,
* e.g. EDF
*/
list_for_each_entry(sched, &kernel_schedulers, node) {
ktime ready;
/* NOTE: non-periodic, non-real-time schedulers (e.g. round
* robin, fifo, ...) are supposed to return a zero-timeout
* for next task readiness, since their tasks are
* always ready to run
*/
ready = sched->task_ready_ns(sched->tq, cpu, now);
/* TODO raise kernel alarm if ready < 0, this would imply a
* real-time requirement has been violated
*/
BUG_ON(ready < 0);
if (!ready)
continue;
if (ready >= slice)
continue;
if (sched->priority >= next->sched->priority)
slice = ready;
else
break;
}
(*task) = next;
return slice;
}
/**
* @brief schedule and execute the next task
*/
void schedule(void)
{
int cpu;
ktime now;
ktime tick;
ktime slice;
struct task_struct *next;
cpu = smp_cpu_id();
if (!sched_enabled[cpu])
return;
/* booted yet? */
BUG_ON(!current_set[cpu]);
arch_local_irq_disable();
now = ktime_get();
sched_update_runtime(current_set[cpu]->task, now);
tick = (ktime) tick_get_period_min_ns();
while (1) {
slice = sched_find_next_task(&next, cpu, now);
if (slice > tick)
break;
/* keep trying until we can find a task which can actually
* execute given the system overhead
*/
now = ktime_get();
}
next->exec_start = now;
/*
* We subtract a tick period here to account for the approximate
* overhead of the scheduling function, which is about twice the
* ISR processing time. This could be improved, but it appears
* to be sufficient for very high load, high frequency tasks
*
* On a GR712 @80 MHz, this makes the following EDF configuration
* not miss any deadlines:
* 1) P = 1000 ms, D = 999 ms, W = 300 ms
* 2) P = 140 us, D = 115 us, W = 90 us
* 3) P = 1000 ms, D = 999 ms, W = 300 ms
* 4) P = 140 us, D = 115 us, W = 90 us
*
* This configuration puts a load of 94.3% on each CPU, running tasks
* 1+2, 3+4 respectively. The remaining runtime is allocated to the
* RR-mode threads of the kernel main boot threads per CPU.
*
* Note: This means that the scheduling overhead comes out of the
* run-time budget of each task, no matter the scheduler type.
* In the above example, the high-frequency tasks cannot use
* more than about 93 percent of their actual WCET. Obviously,
* this will be significantly less for longer period/WCET tasks.
*/
/* set next wakeup */
tick_set_next_ns(ktime_sub(slice, tick));
prepare_arch_switch(1);
switch_to(next);
arch_local_irq_enable();
}
/**
* @brief yield remaining runtime and reschedule
*/
void sched_yield(void)
{
struct task_struct *tsk;
tsk = current_set[smp_cpu_id()]->task;
tsk->runtime = 0;
schedule();
}
/**
* @brief wake up a task
*/
int sched_wake(struct task_struct *task, ktime now)
{
if (!task)
return -EINVAL;
if (!task->sched) {
pr_err(MSG "no scheduler configured for task %s\n", task->name);
return -EINVAL;
}
return task->sched->wake_task(task, now);
}
/**
* @brief enqueue a task
*/
int sched_enqueue(struct task_struct *task)
{
int ret;
if (!task)
return -EINVAL;
if (!task->sched) {
pr_err(MSG "no scheduler configured for task %s\n", task->name);
return -EINVAL;
}
ret = task->sched->check_sched_attr(&task->attr);
if (ret)
return ret;
ret = task->sched->enqueue_task(task);
if (ret)
return ret;
return 0;
}
/**
* @brief set a scheduling attribute for a task
*
* @returns 0 on success, < 0 on error
*
* XXX: should implement list of all threads, so we can use pid_t pid
*
* XXX: no error checking of attr params
*/
int sched_set_attr(struct task_struct *task, struct sched_attr *attr)
{
struct scheduler *sched;
if (!task)
goto error;
if (!attr)
goto error;
list_for_each_entry(sched, &kernel_schedulers, node) {
if (sched->policy == attr->policy) {
memcpy(&task->attr, attr, sizeof(struct sched_attr));
if (sched->check_sched_attr(attr))
goto error;
task->sched = sched;
return 0;
}
}
pr_crit(MSG "specified policy %d not available\n", attr->policy);
error:
task->sched = NULL;
return -EINVAL;
}
/**
* @brief get a scheduling attribute for a task
* XXX: should implement list of all threads, so we can use pid_t pid
*/
int sched_get_attr(struct task_struct *task, struct sched_attr *attr)
{
if (!task)
return -EINVAL;
if (!attr)
return -EINVAL;
memcpy(attr, &task->attr, sizeof(struct sched_attr));
return 0;
}
/**
* @brief set a task to the default scheduling policy
*/
int sched_set_policy_default(struct task_struct *task)
{
struct sched_attr attr = {.policy = SCHED_RR,
.priority = 100};
return sched_set_attr(task, &attr);
}
/**
* @brief register a new scheduler
*
* @returns 0 on success,
* -EPERM if the scheduler instance is already registered,
* -EINVAL otherwise
*
* XXX locking
*/
int sched_register(struct scheduler *sched)
{
int swap;
struct scheduler *elem;
struct scheduler *tmp;
if (!sched)
return -EINVAL;
list_for_each_entry(elem, &kernel_schedulers, node) {
if (elem == sched) {
pr_err(MSG "scheduler instance already registered\n");
return -EPERM;
}
}
list_add_tail(&sched->node, &kernel_schedulers);
/* bubble-sort by priority */
do {
swap = 0;
list_for_each_entry_safe(elem, tmp, &kernel_schedulers, node) {
struct scheduler *next = list_next_entry(elem, node);
if (elem->priority < next->priority) {
list_swap(&elem->node, &next->node);
swap = 1;
}
}
} while (swap);
return 0;
}
/**
* @brief enable scheduling on the current cpu
*/
void sched_enable(void)
{
sched_enabled[smp_cpu_id()] = true;
}
/*
* @brief disable scheduling on the current cpu
*/
void sched_disable(void)
{
sched_enabled[smp_cpu_id()] = false;
}
/**
* @brief scheduler initcall
*
* @note sets tick mode to oneshot
*/
static int sched_init(void)
{
tick_set_mode(TICK_MODE_ONESHOT);
return 0;
}
late_initcall(sched_init);