From 4c6b45b64342e510842e7da6e6ba515287a37336 Mon Sep 17 00:00:00 2001
From: Armin Luntzer <armin.luntzer@univie.ac.at>
Date: Wed, 13 Nov 2019 16:43:22 +0100
Subject: [PATCH] consolidate RR and EDF schedulers, as well as core scheduling
 code

 - add list_swap()
---
 arch/sparc/kernel/thread.c |   8 +-
 include/kernel/kthread.h   |  18 +-
 include/kernel/sched.h     |  21 +-
 include/list.h             |  17 +
 init/Kconfig               |   8 +
 init/main.c                | 311 ++++---------
 kernel/kthread.c           | 194 +++-----
 kernel/sched/core.c        | 336 +++++++++-----
 kernel/sched/edf.c         | 879 +++++++++++++++++++++----------------
 kernel/sched/rr.c          | 161 ++++---
 kernel/tick.c              |   6 +-
 11 files changed, 1024 insertions(+), 935 deletions(-)

diff --git a/arch/sparc/kernel/thread.c b/arch/sparc/kernel/thread.c
index 85a526d..15f5c5e 100644
--- a/arch/sparc/kernel/thread.c
+++ b/arch/sparc/kernel/thread.c
@@ -40,12 +40,16 @@ extern struct thread_info *current_set[];
 #include <kernel/time.h>
 static void th_starter(void)
 {
+	unsigned long flags;
 	struct task_struct *task = current_set[leon3_cpuid()]->task;
 
 	struct timespec ts;
+
 	double start;
 	double stop;
 
+
+
 	ts = get_uptime();
 
 	start = (double) ts.tv_sec + (double) ts.tv_nsec / 1e9;
@@ -57,9 +61,9 @@ static void th_starter(void)
 
 //	printk("thread: %p returned after %gs\n", task->stack, stop-start);
 
-	arch_local_irq_disable();
+	flags = arch_local_irq_save();
 	task->state = TASK_DEAD;
-	arch_local_irq_enable();
+	arch_local_irq_restore(flags);
 
 
 	schedule();
diff --git a/include/kernel/kthread.h b/include/kernel/kthread.h
index 863dfc0..6118a7b 100644
--- a/include/kernel/kthread.h
+++ b/include/kernel/kthread.h
@@ -13,15 +13,10 @@
 #include <kernel/time.h>
 #include <kernel/sched.h>
 
+#include <compiler.h>
+#include <generated/autoconf.h>
 
-
-struct remove_this_declaration {
-	struct list_head new;
-	struct list_head run;
-	struct list_head wake;
-	struct list_head dead;
-};
-
+compile_time_assert(!(CONFIG_STACK_SIZE & STACK_ALIGN), STACK_SIZE_UNALIGNED);
 
 
 #define KTHREAD_CPU_AFFINITY_NONE	(-1)
@@ -112,12 +107,7 @@ struct task_struct *kthread_create(int (*thread_fn)(void *data),
 struct task_struct *kthread_init_main(void);
 int kthread_wake_up(struct task_struct *task);
 
-/* XXX dummy */
-void switch_to(struct task_struct *next);
-void schedule(void);
-void sched_yield(void);
-
-void sched_print_edf_list(void);
+void kthread_free(struct task_struct *task);
 
 void kthread_set_sched_edf(struct task_struct *task, unsigned long period_us,
 			  unsigned long wcet_us, unsigned long deadline_rel_us);
diff --git a/include/kernel/sched.h b/include/kernel/sched.h
index e7ae967..a60ceac 100644
--- a/include/kernel/sched.h
+++ b/include/kernel/sched.h
@@ -9,15 +9,18 @@
 #include <generated/autoconf.h>	/*XXX */
 
 
+/* scheduler priority levels */
+#define SCHED_PRIORITY_RR	0
+#define SCHED_PRIORITY_EDF	1
 
 enum sched_policy {
 	SCHED_RR,
 	SCHED_EDF,
-	SCHED_FIFO,
 	SCHED_OTHER,
 };
 
 
+
 struct sched_attr {
 	enum sched_policy	policy;
 
@@ -47,9 +50,8 @@ struct rq {
 
 
 struct task_queue {
-	struct list_head new;
-	struct list_head run;
 	struct list_head wake;
+	struct list_head run;
 	struct list_head dead;
 };
 
@@ -69,17 +71,15 @@ struct scheduler {
 	struct task_struct *(*pick_next_task)(struct task_queue tq[], int cpu,
 					      ktime now);
 
-	/* XXX: sucks */
-	void (*wake_next_task)  (struct task_queue tq[], int cpu, ktime now);
-	int  (*enqueue_task)    (struct task_queue tq[],
-			         struct task_struct *task);
+	int (*wake_task)    (struct task_struct *task, ktime now);
+	int (*enqueue_task) (struct task_struct *task);
 
 	ktime (*timeslice_ns)   (struct task_struct *task);
 	ktime (*task_ready_ns)  (struct task_queue tq[], int cpu, ktime now);
 
 	int (*check_sched_attr) (struct sched_attr *attr);
 
-	unsigned long sched_priority;		/* scheduler priority */
+	unsigned long priority;		/* scheduler priority */
 	struct list_head	node;
 #if 0
 	const struct sched_class *next;
@@ -110,7 +110,11 @@ struct scheduler {
 #endif
 
 
+void switch_to(struct task_struct *next);
+void schedule(void);
+void sched_yield(void);
 
+void sched_print_edf_list(void);
 
 
 int sched_set_attr(struct task_struct *task, struct sched_attr *attr);
@@ -118,6 +122,7 @@ int sched_get_attr(struct task_struct *task, struct sched_attr *attr);
 
 int sched_set_policy_default(struct task_struct *task);
 int sched_enqueue(struct task_struct *task);
+int sched_wake(struct task_struct *task, ktime now);
 int sched_register(struct scheduler *sched);
 
 void sched_enable(void);
diff --git a/include/list.h b/include/list.h
index abb1c01..78bc5f0 100644
--- a/include/list.h
+++ b/include/list.h
@@ -366,6 +366,23 @@ static inline void list_replace(struct list_head *old,
 }
 
 
+/**
+ * @brief replace entry1 with entry2 and re-add entry1 at entry2's position
+ * @param entry1: the location to place entry2
+ * @param entry2: the location to place entry1
+ */
+static inline void list_swap(struct list_head *entry1,
+			     struct list_head *entry2)
+{
+	struct list_head *pos = entry2->prev;
+
+	list_del(entry2);
+	list_replace(entry1, entry2);
+	if (pos == entry1)
+		pos = entry2;
+	list_add(entry1, pos);
+}
+
 /**
  * @brief tests whether a list is empty
  * @param head: the list to test.
diff --git a/init/Kconfig b/init/Kconfig
index 9f9165e..a786cac 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -20,6 +20,14 @@ config SMP_CPUS_MAX
 	 is smaller or larger than this number, either the former or the latter
 	 will determine the actual number of CPUs used at runtime.
 
+config STACK_SIZE
+	int "Stack size per thread"
+	default 4096
+	range 1024 32768
+	help
+	 Set the stack size allocated to a thread at runtime. It's probably
+	 wise to set this to a radix-2.
+
 config KALLSYMS
 	bool "Generate a kernel symbol table"
 	default y
diff --git a/init/main.c b/init/main.c
index c412f4c..8d57935 100644
--- a/init/main.c
+++ b/init/main.c
@@ -19,6 +19,8 @@
 #include <kernel/kernel.h>
 #include <kernel/kthread.h>
 #include <kernel/time.h>
+#include <kernel/err.h>
+#include <kernel/sysctl.h>
 #include <modules-image.h>
 
 #include <kernel/string.h>
@@ -44,178 +46,78 @@
 #endif /* __OPTIMIZE__ */
 #endif /* GCC_VERSION */
 
-volatile int inc;
-volatile unsigned int xa, xb, xc, xd;
-
-int task2(void *data);
-
-int task0(void *data)
-{
-
-	while (1) {
-
-		xd++;
-
-		if (xd % 1000000000 == 0)
-			sched_yield();
 
+/**
+ * @brief kernel initialisation routines
+ */
 
-	}
-}
-
-int task1(void *data)
+static int kernel_init(void)
 {
-	xa = 0;
-	while (1) {
+	setup_arch();
 
-		xa++;
+	/* free_bootmem() */
+	/* run_init_process() */
 
-	}
+	return 0;
 }
-#define QUIT_AT		100000
+arch_initcall(kernel_init);
 
-int task2(void *data)
-{
 
-	iowrite32be(0xdeadbeef, &xb);
-	return 0;
-
-	while (1) {
-		xb++;
 
+/** XXX dummy **/
+extern int cpu1_ready;
 
-		if (xb > QUIT_AT) {
-			printk("EXITING\n");
-			xb = 0xdeadbeef;
-			return 0;
-		}
-	}
-}
-extern int threadcnt;
-int task_rr(void *data);
-int task3(void *data)
-{
-	while (1) {
-		xc++;
-		task_rr(NULL);
-	}
-}
 
-int xf;
-int task4(void *data)
+volatile int xp;
+int task1(void *p)
 {
 	while (1) {
-		xf++;
+		xp++;
 	}
 }
 
-
-int task_restart(void *data)
+volatile int xd;
+int task2(void *p)
 {
-	struct task_struct *t = NULL;
-	struct sched_attr attr;
-
-	xb = 0xdeadbeef;
-	while(1) {
-		/* "fake" single shot reset */
-
-
-#if 1
-		if (ioread32be(&xb) == 0xdeadbeef)
-		{
-			xb = 0;
-
-			t = kthread_create(task2, NULL, KTHREAD_CPU_AFFINITY_NONE, "task7");
-
-		//	printk("now at %p %d\n", t, threadcnt);
-			sched_get_attr(t, &attr);
-			attr.policy = SCHED_EDF;
-
-			attr.period       = us_to_ktime(0);
-			attr.deadline_rel = us_to_ktime(100);
-			attr.wcet         = us_to_ktime(30);
-
-			sched_set_attr(t, &attr);
-			barrier();
-			BUG_ON (kthread_wake_up(t) < 0);
-			barrier();
-
-
-		}
-			sched_yield();
-#endif
-	}
+	while (1)
+		xd++;
 }
 
-
-#include <kernel/sysctl.h>
-extern ktime sched_last_time;
-	void sched_print_edf_list_internal(struct task_queue *tq, int cpu, ktime now);
-extern uint32_t sched_ev;
-
-
-extern struct scheduler sched_edf;
-int task_rr(void *data)
+int task(void *p)
 {
 	char buf1[64];
 	char buf2[64];
 	char buf3[64];
 
-
 	struct sysobj *sys_irq = NULL;
 
 
-
-	bzero(buf1, 64);
 	sys_irq = sysset_find_obj(sys_set, "/sys/irl/primary");
 
+	if (!sys_irq) {
+		printk("Error locating sysctl entry\n");
+		return -1;
+	}
 
 	while (1) {
 
-		if (sys_irq) {
-			sysobj_show_attr(sys_irq, "irl", buf1);
-			sysobj_show_attr(sys_irq, "8", buf2);
-			sysobj_show_attr(sys_irq, "9", buf3);
-			printk("IRQs: %s timer1 %s timer2 %s threads created: %d\n", buf1, buf2, buf3, threadcnt);
-		}
-
-	//	sched_print_edf_list_internal(&sched_edf.tq[0], 0, ktime_get());
-	//	sched_print_edf_list_internal(&sched_edf.tq[1], 1, ktime_get());
+		sysobj_show_attr(sys_irq, "irl", buf1);
+		sysobj_show_attr(sys_irq, "8", buf2);
+		sysobj_show_attr(sys_irq, "9", buf3);
 
+		printk("IRQ total: %s timer1: %s timer2: %s, %d %d\n",
+		       buf1, buf2, buf3, ioread32be(&xp), xd);
 
-
-
-		sched_yield();
+//		sched_yield();
 	}
-}
-
-
-
-/**
- * @brief kernel initialisation routines
- */
-
-static int kernel_init(void)
-{
-	setup_arch();
-
-	/* free_bootmem() */
-	/* run_init_process() */
 
 	return 0;
 }
-arch_initcall(kernel_init);
 
 
-
-/** XXX dummy **/
-extern int cpu1_ready;
 /**
  * @brief kernel main functionputchar( *((char *) data) );
  */
-#define MAX_TASKS 0
-#include <kernel/clockevent.h>
-#include <kernel/tick.h>
 int kernel_main(void)
 {
 	struct task_struct *t;
@@ -276,7 +178,6 @@ int kernel_main(void)
 
 	/* elevate boot thread */
 	kthread_init_main();
-	tick_set_next_ns(1000000);
 
 	/* wait for cpus */
 	cpu1_ready = 2;
@@ -287,120 +188,72 @@ int kernel_main(void)
 	printk(MSG "Boot complete\n");
 
 
+	t = kthread_create(task, NULL, KTHREAD_CPU_AFFINITY_NONE, "task");
+	if (!IS_ERR(t)) {
+		sched_get_attr(t, &attr);
+		attr.policy = SCHED_EDF;
+		attr.period       = ms_to_ktime(1000);
+		attr.deadline_rel = ms_to_ktime(999);
+		attr.wcet         = ms_to_ktime(300);
+		sched_set_attr(t, &attr);
+		if (kthread_wake_up(t) < 0)
+			printk("---- %s NOT SCHEDUL-ABLE---\n", t->name);
+	} else {
+		printk("Got an error in kthread_create!");
+	}
 
-
-#if 0
-	t = kthread_create(task2, NULL, KTHREAD_CPU_AFFINITY_NONE, "print1");
-	sched_get_attr(t, &attr);
-	attr.policy = SCHED_EDF;
-	attr.period       = ms_to_ktime(1000);
-	attr.deadline_rel = ms_to_ktime(900);
-	attr.wcet         = ms_to_ktime(200);
-	sched_set_attr(t, &attr);
-	kthread_wake_up(t);
-#endif
-
-#if 1
-
-	//t = kthread_create(task0, NULL, KTHREAD_CPU_AFFINITY_NONE, "task0");
-	t = kthread_create(task_restart, NULL, KTHREAD_CPU_AFFINITY_NONE, "task_restart");
-	sched_get_attr(t, &attr);
-	attr.policy = SCHED_EDF;
-	attr.period       = ms_to_ktime(10);
-	attr.deadline_rel = ms_to_ktime(9);
-	attr.wcet         = ms_to_ktime(5);
-	sched_set_attr(t, &attr);
-	if (kthread_wake_up(t) < 0)
-		printk("---- %s NOT SCHEDUL-ABLE---\n", t->name);
-#endif
-
+	t = kthread_create(task1, NULL, KTHREAD_CPU_AFFINITY_NONE, "task1");
+	if (!IS_ERR(t)) {
+		sched_get_attr(t, &attr);
+		attr.policy = SCHED_EDF;
+		attr.period       = us_to_ktime(140);
+		attr.deadline_rel = us_to_ktime(115);
+		attr.wcet         = us_to_ktime(90);
+		sched_set_attr(t, &attr);
+		if (kthread_wake_up(t) < 0)
+			printk("---- %s NOT SCHEDUL-ABLE---\n", t->name);
+	} else {
+		printk("Got an error in kthread_create!");
+	}
 
 
-#if 0
-	t = kthread_create(task1, NULL, KTHREAD_CPU_AFFINITY_NONE, "task1");
-	sched_get_attr(t, &attr);
-	attr.policy = SCHED_EDF;
-	attr.period       = us_to_ktime(50000);
-	attr.deadline_rel = us_to_ktime(40000);
-	attr.wcet         = us_to_ktime(33000);
-	sched_set_attr(t, &attr);
-	if (kthread_wake_up(t) < 0)
-		printk("---- %s NOT SCHEDUL-ABLE---\n", t->name);
-#endif
+	t = kthread_create(task, NULL, KTHREAD_CPU_AFFINITY_NONE, "task");
+	if (!IS_ERR(t)) {
+		sched_get_attr(t, &attr);
+		attr.policy = SCHED_EDF;
+		attr.period       = ms_to_ktime(1000);
+		attr.deadline_rel = ms_to_ktime(999);
+		attr.wcet         = ms_to_ktime(300);
+		sched_set_attr(t, &attr);
+		if (kthread_wake_up(t) < 0)
+			printk("---- %s NOT SCHEDUL-ABLE---\n", t->name);
+	} else {
+		printk("Got an error in kthread_create!");
+	}
 
-#if 0
 	t = kthread_create(task2, NULL, KTHREAD_CPU_AFFINITY_NONE, "task2");
-	sched_get_attr(t, &attr);
-	attr.policy = SCHED_EDF;
-	attr.period       = us_to_ktime(200);
-	attr.deadline_rel = us_to_ktime(110);
-	attr.wcet         = us_to_ktime(95);
-	sched_set_attr(t, &attr);
-	if (kthread_wake_up(t) < 0) {
-		printk("---- %s NOT SCHEDUL-ABLE---\n", t->name);
-		BUG();
+	if (!IS_ERR(t)) {
+		sched_get_attr(t, &attr);
+		attr.policy = SCHED_EDF;
+		attr.period       = us_to_ktime(140);
+		attr.deadline_rel = us_to_ktime(115);
+		attr.wcet         = us_to_ktime(90);
+		sched_set_attr(t, &attr);
+		if (kthread_wake_up(t) < 0)
+			printk("---- %s NOT SCHEDUL-ABLE---\n", t->name);
+	} else {
+		printk("Got an error in kthread_create!");
 	}
-#endif
 
-#if 1
-	t = kthread_create(task3, NULL, KTHREAD_CPU_AFFINITY_NONE, "task3");
-	sched_get_attr(t, &attr);
-	attr.policy = SCHED_EDF;
-	attr.period       = ms_to_ktime(1000);
-	attr.deadline_rel = ms_to_ktime(999);
-	attr.wcet         = ms_to_ktime(300);
-	sched_set_attr(t, &attr);
-	if (kthread_wake_up(t) < 0)
-		printk("---- %s NOT SCHEDUL-ABLE---\n", t->name);
-#endif
 
 
-#if 0
-	t = kthread_create(task_rr, NULL, KTHREAD_CPU_AFFINITY_NONE, "task_rr");
-	sched_get_attr(t, &attr);
-	attr.policy = SCHED_RR;
-	attr.priority = 1;
-	sched_set_attr(t, &attr);
-	kthread_wake_up(t);
-#endif
 
 
-#if 0
-	t = kthread_create(task_restart, NULL, KTHREAD_CPU_AFFINITY_NONE, "task_restart");
-	sched_get_attr(t, &attr);
-	attr.policy = SCHED_RR;
-	attr.priority = 1;
-	sched_set_attr(t, &attr);
-	kthread_wake_up(t);
-#endif
-//	xb = 0xdeadbeef;
 	while(1) {
-		/* "fake" single shot reset */
-
-	//	task_rr(NULL);
-#if 0
-		if (xb == 0xdeadbeef)
-		{
-			xb = 0;
-			t = kthread_create(task2, NULL, KTHREAD_CPU_AFFINITY_NONE, "task2");
-			sched_get_attr(t, &attr);
-			attr.policy = SCHED_EDF;
-
-			attr.period       = us_to_ktime(0);
-			attr.deadline_rel = us_to_ktime(100);
-			attr.wcet         = us_to_ktime(60);
-
-			sched_set_attr(t, &attr);
-			BUG_ON (kthread_wake_up(t) < 0);
-
-		}
-#endif
-
 		cpu_relax();
 	}
 
-	while (1)
-		cpu_relax();
+
 	/* never reached */
 	BUG();
 
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 3f3726a..0b42a26 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -20,29 +20,13 @@
 
 #include <kernel/tick.h>
 
-#include <generated/autoconf.h> /* XXX need common CPU include */
-
-
 
 #define MSG "KTHREAD: "
 
-
-struct remove_this_declaration /*{
-	struct list_head new;
-	struct list_head run;
-	struct list_head wake;
-	struct list_head dead;
-}*/ _kthreads = {
-	.new  = LIST_HEAD_INIT(_kthreads.new),
-	.run  = LIST_HEAD_INIT(_kthreads.run),
-	.wake = LIST_HEAD_INIT(_kthreads.wake),
-	.dead = LIST_HEAD_INIT(_kthreads.dead)
-};
+#define TASK_NAME_LEN	64
 
 static struct spinlock kthread_spinlock;
 
-
-
 struct thread_info *current_set[CONFIG_SMP_CPUS_MAX]; /* XXX */
 
 
@@ -50,7 +34,7 @@ struct thread_info *current_set[CONFIG_SMP_CPUS_MAX]; /* XXX */
  * @brief lock critical kthread section
  */
 
- void kthread_lock(void)
+static void kthread_lock(void)
 {
 	spin_lock_raw(&kthread_spinlock);
 }
@@ -60,67 +44,43 @@ struct thread_info *current_set[CONFIG_SMP_CPUS_MAX]; /* XXX */
  * @brief unlock critical kthread section
  */
 
-void kthread_unlock(void)
+static void kthread_unlock(void)
 {
 	spin_unlock(&kthread_spinlock);
 }
 
 
-/* this should be a thread with a semaphore
- * that is unlocked by schedule() if dead tasks
- * were added
- * (need to irq_disable/kthread_lock)
+/* we should have a thread with a semaphore which is unlocked by schedule()
+ * if dead tasks were added to the "dead" list
  */
 
-void kthread_cleanup_dead(void)
+void kthread_free(struct task_struct *task)
 {
-	struct task_struct *p_elem;
-	struct task_struct *p_tmp;
-
-	list_for_each_entry_safe(p_elem, p_tmp, &_kthreads.dead, node) {
-		list_del(&p_elem->node);
-		kfree(p_elem->stack);
-		kfree(p_elem->name);
-		kfree(p_elem);
-	}
-}
-
+	if (task->flags & TASK_NO_CLEAN) /* delete from list as well */
+		return;
 
-
-void sched_yield(void)
-{
-	struct task_struct *tsk;
-
-	tsk = current_set[smp_cpu_id()]->task;
-//	if (tsk->attr.policy == SCHED_EDF)
-	tsk->runtime = 0;
-
-	schedule();
-}
-
-
-__attribute__((unused))
-/* static */ void kthread_set_sched_policy(struct task_struct *task,
-				     enum sched_policy policy)
-{
-	arch_local_irq_disable();
-	kthread_lock();
-	task->attr.policy = policy;
-	kthread_unlock();
-	arch_local_irq_enable();
+	kfree(task->stack);
+	kfree(task->name);
+	kfree(task);
 }
 
 
+/**
+ * @brief wake up a kthread
+ *
+ */
 
-int threadcnt;
 int kthread_wake_up(struct task_struct *task)
 {
 	int ret = 0;
 
+	unsigned long flags;
+
 	ktime now;
 
 
-	threadcnt++;
+	if (!task)
+		return -EINVAL;
 
 	if (task->state != TASK_NEW)
 		return -EINVAL;
@@ -128,73 +88,77 @@ int kthread_wake_up(struct task_struct *task)
 	ret = sched_enqueue(task);
 	if(ret)
 		return ret;
-#if 1
+
+	flags = arch_local_irq_save();
 	kthread_lock();
-	arch_local_irq_disable();
 	now = ktime_get();
-#if 1
-	/* XXX need function in sched.c to do that */
-	task->sched->wake_next_task(task->sched->tq, task->on_cpu, now);
 
+	sched_wake(task, ktime_get());
+
+	/* this may be a critical task, send reschedule */
 	if (task->on_cpu != KTHREAD_CPU_AFFINITY_NONE)
 		smp_send_reschedule(task->on_cpu);
-#endif
 
-	arch_local_irq_enable();
 	kthread_unlock();
-#endif
+	arch_local_irq_restore(flags);
 
 	return 0;
 }
 
 
+/**
+ * @brief convert the boot path to a thread
+ *
+ * @note this function sets the initial task for any cpu; if a task has alreay
+ *	 been set, the attempt will be rejected
+ */
+
 struct task_struct *kthread_init_main(void)
 {
 	int cpu;
 
+	unsigned long flags;
+
 	struct task_struct *task;
 
 
 	cpu = smp_cpu_id();
+	if (current_set[cpu])
+		return ERR_PTR(-EPERM);
 
 	task = kmalloc(sizeof(*task));
-
-
 	if (!task)
 		return ERR_PTR(-ENOMEM);
 
-	/* XXX accessors */
-	task->attr.policy = SCHED_RR; /* default */
-	task->attr.priority = 1;
+
+	sched_set_policy_default(task);
+
+	task->state  = TASK_NEW;
+	task->name   = strdup("KERNEL");
 	task->on_cpu = cpu;
 
 	arch_promote_to_task(task);
 
-	task->name = "KERNEL";
-	BUG_ON(sched_set_policy_default(task));
-
-	arch_local_irq_disable();
+	flags = arch_local_irq_save();
 	kthread_lock();
 
 	current_set[cpu] = &task->thread_info;
 
-
-	task->state = TASK_RUN;
-
 	sched_enqueue(task);
-	/*list_add_tail(&task->node, &_kthreads.run);*/
+	sched_wake(task, ktime_get());
 
 	smp_send_reschedule(cpu);
 
-
 	kthread_unlock();
-	arch_local_irq_enable();
+	arch_local_irq_restore(flags);
 
 	return task;
 }
 
 
-
+/**
+ * @brief create a new thread
+ */
 
 static struct task_struct *kthread_create_internal(int (*thread_fn)(void *data),
 						   void *data, int cpu,
@@ -203,75 +167,49 @@ static struct task_struct *kthread_create_internal(int (*thread_fn)(void *data),
 {
 	struct task_struct *task;
 
-	task = kzalloc(sizeof(*task));
-
 
+	task = kzalloc(sizeof(struct task_struct));
 	if (!task)
 		return ERR_PTR(-ENOMEM);
 
+	/* NOTE: we require that malloc always returns properly aligned memory,
+	 * i.e. aligned to the largest possible memory access instruction
+	 * (which is typically 64 bits)
+	 */
 
-	/* XXX: need stack size detection and realloc/migration code */
-
-	task->stack = kzalloc(8192 + STACK_ALIGN); /* XXX */
-
-	BUG_ON((int) task->stack > (0x40800000 - 4096 + 1));
-
+	task->stack = kmalloc(CONFIG_STACK_SIZE);
 	if (!task->stack) {
 		kfree(task);
 		return ERR_PTR(-ENOMEM);
 	}
 
+	/* initialise stack with pattern, makes detection of errors easier */
+	memset32(task->stack, 0xdeadbeef, CONFIG_STACK_SIZE / sizeof(uint32_t));
 
-	task->stack_bottom = task->stack; /* XXX */
-	task->stack_top = ALIGN_PTR(task->stack, STACK_ALIGN) +8192/4; /* XXX */
-	BUG_ON(task->stack_top > (task->stack + (8192/4 + STACK_ALIGN/4)));
+	task->stack_bottom = task->stack;
+	task->stack_top    = (void *) ((uint8_t *) task->stack
+						   + CONFIG_STACK_SIZE);
 
-#if 0
-	/* XXX: need wmemset() */
-	memset(task->stack, 0xab, 8192 + STACK_ALIGN);
-#else
-#if 0
-	{
-		int i;
-		for (i = 0; i < (8192 + STACK_ALIGN) / 4; i++)
-			((int *) task->stack)[i] = 0xdeadbeef;
-
-	}
-#endif
-#endif
-
-	/* dummy */
-	task->name = kmalloc(32);
-	BUG_ON(!task->name);
-	vsnprintf(task->name, 32, namefmt, args);
+	task->name = kmalloc(TASK_NAME_LEN);
+	vsnprintf(task->name, TASK_NAME_LEN, namefmt, args);
 
 	if (sched_set_policy_default(task)) {
-		pr_crit("KTHREAD: must destroy task at this point\n");
-		BUG();
+		pr_crit("KTHREAD: task policy error\n");
+		kthread_free(task);
+		return NULL;
 	}
 
-	task->total = 0;
+	task->total  = 0;
 	task->slices = 0;
 	task->on_cpu = cpu;
-	arch_init_task(task, thread_fn, data);
-
-	task->state = TASK_NEW;
+	task->state  = TASK_NEW;
 
-	arch_local_irq_disable();
-	kthread_lock();
-
-	/** XXX **/ /*sched_enqueue(task); */
-	//list_add_tail(&task->node, &_kthreads.new);
-
-	kthread_unlock();
-	arch_local_irq_enable();
+	arch_init_task(task, thread_fn, data);
 
 	return task;
 }
 
 
-
-
 /**
  * @brief create a new kernel thread
  *
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 63a8039..dd1052c 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -19,169 +19,228 @@
 #include <string.h>
 
 
-
 #define MSG "SCHEDULER: "
 
+
 static LIST_HEAD(kernel_schedulers);
+static bool sched_enabled[2] = {false, false};
 
 
 /* XXX: per-cpu... */
-
 extern struct thread_info *current_set[];
 
-static bool sched_enabled[2] = {false, false};
-
 
+/**
+ * @brief update the remaining runtime of the current thread and set
+ *	  state from TASK_BUSY to TASK_RUN
+ */
 
-void schedule(void)
+static void sched_update_runtime(struct task_struct *task, ktime now)
 {
-	int cpu;
+	ktime rt;
+
+	rt = ktime_sub(now, task->exec_start);
+
+	task->runtime = ktime_sub(task->runtime, rt);
+	task->total   = ktime_add(task->total, rt);
+	task->state   = TASK_RUN;
+}
 
+/**
+ * @brief find the next task to execute
+ *
+ * @returns the runtime to the next scheduling event
+ */
+
+static ktime sched_find_next_task(struct task_struct **task, int cpu, ktime now)
+{
 	struct scheduler *sched;
 
-	struct task_struct *next = NULL;
+	struct task_struct *next;
 
-	struct task_struct *current;
-	int64_t slot_ns = 1000000LL;
-	int64_t wake_ns = 1000000000;
+	ktime slice;
 
-	ktime rt;
-	ktime now;
 
+	/* our schedulers are sorted by priority, so the highest-priority
+	 * scheduler with some task to run gets to call dibs on the cpu
+	 */
+	list_for_each_entry(sched, &kernel_schedulers, node) {
 
-	cpu = smp_cpu_id();
+		next = sched->pick_next_task(sched->tq, cpu, now);
 
-	if (!sched_enabled[cpu])
-		return;
-#if 1
-	/* booted yet? */
-	if (!current_set[cpu])
-		return;
-#endif
+		if (next) {
+			/* we found something to execute, off we go */
+			slice = next->sched->timeslice_ns(next);
+			break;
+		}
+	}
 
+	/* NOTE: _next_ can never be NULL, as there must always be at least
+	 * the initial kernel bootup thread present or scheduling must be
+	 * disabled altogether
+	 */
+	BUG_ON(!next);
 
-	arch_local_irq_disable();
 
+	/* Determine the most pressing ready time. If the remaining runtime in
+	 * a thread is smaller than the wakeup timeout for a given scheduler
+	 * priority, we will restrict ourselves to the remaining runtime.
+	 * This is particularly needed for strictly (periodic) schedulers,
+	 * e.g. EDF
+	 */
+	list_for_each_entry(sched, &kernel_schedulers, node) {
 
-	/* get the current task for this CPU */
-	current = current_set[cpu]->task;
+		ktime ready;
 
 
+		/* NOTE: non-periodic, non-real-time schedulers (e.g. round
+		 *       robin, fifo, ...) are supposed to return a zero-timeout
+		 *       for next task readiness, since their tasks are
+		 *       always ready to run
+		 */
 
-	now = ktime_get();
+		ready = sched->task_ready_ns(sched->tq, cpu, now);
 
-	rt = ktime_sub(now, current->exec_start);
+		/* TODO raise kernel alarm if ready < 0, this would imply a
+		 *	real-time requirement has been violated
+		 */
 
-	/** XXX need timeslice_update callback for schedulers */
-	/* update remaining runtime of current thread */
+		BUG_ON(ready < 0);
 
-	current->runtime = ktime_sub(current->runtime, rt);
-	current->total = ktime_add(current->total, rt);
+		if (!ready)
+			continue;
 
-	/* XXX */
-	if (current->state == TASK_BUSY)
-		current->state = TASK_RUN;
+		if (ready >= slice)
+			continue;
 
-retry:
-	next = NULL;
-	wake_ns = 1000000000;
+		if (sched->priority >= next->sched->priority)
+			slice = ready;
+		else
+			break;
+	}
 
 
-	/* XXX need sorted list: highest->lowest scheduler priority, e.g.:
-	 * EDF -> RMS -> FIFO -> RR
-	 * TODO: scheduler priority value
-	 */
+	(*task) = next;
 
-	list_for_each_entry(sched, &kernel_schedulers, node) {
+	return slice;
+}
 
 
+/**
+ * @brief schedule and execute the next task
+ */
 
-		/* if one of the schedulers have a task which needs to run now,
-		 * next is non-NULL
-		 */
-		next = sched->pick_next_task(sched->tq, cpu, now);
+void schedule(void)
+{
+	int cpu;
 
-		/* check if we need to limit the next tasks timeslice;
-		 * since our scheduler list is sorted by scheduler priority,
-		 * only update the value if wake_next is not set;
-		 * XXX ---wrong description for implementation ---
-		 * because our schedulers are sorted, this means that if next
-		 * is set, the highest priority scheduler will both tell us
-		 * whether it has another task pending soon. If next is not set,
-		 * a lower-priority scheduler may set the next thread to run,
-		 * but we will take the smallest timeout from high to low
-		 * priority schedulers, so we enter this function again when
-		 * the timeslice of the next thread is over and we can determine
-		 * what needs to be run in the following scheduling cycle. This
-		 * way, we will distribute CPU time even to the lowest priority
-		 * scheduler, if available, but guarantee, that the highest
-		 * priority threads are always ranked and executed on time
-		 *
-		 * we assume that the timeslice is reasonable; if not fix it in
-		 * the corresponding scheduler
-		 */
+	ktime now;
+	ktime tick;
+	ktime slice;
 
-		if (next) {
-			/* we found something to execute, off we go */
-			slot_ns = next->sched->timeslice_ns(next);
-			break;
-		}
-	}
+	struct task_struct *next;
 
 
-	if (!next) {
-		/* there is absolutely nothing nothing to do, check again later */
-		tick_set_next_ns(wake_ns);
-		goto exit;
-	}
 
-	/* see if the remaining runtime in a thread is smaller than the wakeup
-	 * timeout. In this case, we will restrict ourselves to the remaining
-	 * runtime. This is particularly needeed for strictly periodic
-	 * schedulers, e.g. EDF
-	 */
+	cpu = smp_cpu_id();
 
-	/* XXX should go through sched list in reverse to pick most pressing
-	 * wakeup time
-	 */
-	//	list_for_each_entry(sched, &kernel_schedulers, node) {
-	sched = list_first_entry(&kernel_schedulers, struct scheduler, node);
-	wake_ns = sched->task_ready_ns(sched->tq, cpu, now);
+	if (!sched_enabled[cpu])
+		return;
 
-	BUG_ON(wake_ns < 0);
+	/* booted yet? */
+	BUG_ON(!current_set[cpu]);
 
-	if (wake_ns < slot_ns)
-		slot_ns  = wake_ns;
 
-	/* ALWAYS get current time here */
-	next->exec_start = now;;
-	next->state = TASK_BUSY;
+	arch_local_irq_disable();
 
 
+	now = ktime_get();
+	sched_update_runtime(current_set[cpu]->task, now);
+
+
+	tick = (ktime) tick_get_period_min_ns();
+
+	while (1) {
+
+		slice = sched_find_next_task(&next, cpu, now);
+		if (slice > tick)
+			break;
+
+		/* keep trying until we can find a task which can actually
+		 * execute given the system overhead
+		 */
 
-#if 1
-	if (slot_ns < 10000UL) {
-		//	printk("wake %lld slot %lld %s\n", wake_ns, slot_ns, next->name);
 		now = ktime_get();
-	//	BUG();
-		goto retry;
 	}
-#endif
 
-	/* subtract readout overhead */
-	tick_set_next_ns(ktime_sub(slot_ns, 9000LL));
-	//tick_set_next_ns(slot_ns);
+	next->exec_start = now;
+
+	/*
+	 * We subtract a tick period here to account for the approximate
+	 * overhead of the scheduling function, which is about twice the
+	 * ISR processing time. This could be improved, but it appears
+	 * to be sufficient for very high load, high frequency tasks
+	 *
+	 * On a GR712 @80 MHz, this makes the following EDF configuration
+	 * not miss any deadlines:
+	 *	1) P = 1000 ms, D = 999 ms, W = 300 ms
+	 *	2) P =  140 us, D = 115 us, W =  90 us
+	 *	3) P = 1000 ms, D = 999 ms, W = 300 ms
+	 *	4) P =  140 us, D = 115 us, W =  90 us
+	 *
+	 * This configuration puts a load of 94.3% on each CPU, running tasks
+	 * 1+2, 3+4 respectively. The remaining runtime is allocated to the
+	 * RR-mode threads of the kernel main boot threads per CPU.
+	 *
+	 * Note: This means that the scheduling overhead comes out of the
+	 *	 run-time budget of each task, no matter the scheduler type.
+	 *	 In the above example, the high-frequency tasks cannot use
+	 *	 more than about 93 percent of their actual WCET. Obviously,
+	 *	 this will be significantly less for longer period/WCET tasks.
+	 */
 
+	/* set next wakeup */
+	tick_set_next_ns(ktime_sub(slice, tick));
 
 
 	prepare_arch_switch(1);
 	switch_to(next);
 
-exit:
 	arch_local_irq_enable();
 }
 
 
+/**
+ * @brief yield remaining runtime and reschedule
+ */
+
+void sched_yield(void)
+{
+	struct task_struct *tsk;
+
+	tsk = current_set[smp_cpu_id()]->task;
+	tsk->runtime = 0;
+
+	schedule();
+}
+
+
+/**
+ * @brief wake up a task
+ */
+
+int sched_wake(struct task_struct *task, ktime now)
+{
+	if (!task)
+		return -EINVAL;
+
+	if (!task->sched) {
+		pr_err(MSG "no scheduler configured for task %s\n", task->name);
+		return -EINVAL;
+	}
+
+	return task->sched->wake_task(task, now);
+}
 
 
 /**
@@ -190,16 +249,22 @@ exit:
 
 int sched_enqueue(struct task_struct *task)
 {
+	int ret;
+
+
+	if (!task)
+		return -EINVAL;
+
 	if (!task->sched) {
 		pr_err(MSG "no scheduler configured for task %s\n", task->name);
 		return -EINVAL;
 	}
 
-	/** XXX retval **/
-	if (task->sched->check_sched_attr(&task->attr))
-		return -EINVAL;
+	ret = task->sched->check_sched_attr(&task->attr);
+	if (ret)
+		return ret;
 
-	task->sched->enqueue_task(task->sched->tq, task);
+	task->sched->enqueue_task(task);
 
 	return 0;
 }
@@ -236,9 +301,7 @@ int sched_set_attr(struct task_struct *task, struct sched_attr *attr)
 			if (sched->check_sched_attr(attr))
 				goto error;
 
-			task->sched  = sched;
-
-			/* XXX other stuff */
+			task->sched = sched;
 
 			return 0;
 		}
@@ -248,6 +311,7 @@ int sched_set_attr(struct task_struct *task, struct sched_attr *attr)
 
 error:
 	task->sched = NULL;
+
 	return -EINVAL;
 }
 
@@ -259,7 +323,6 @@ error:
 
 int sched_get_attr(struct task_struct *task, struct sched_attr *attr)
 {
-
 	if (!task)
 		return -EINVAL;
 
@@ -280,8 +343,9 @@ int sched_get_attr(struct task_struct *task, struct sched_attr *attr)
 
 int sched_set_policy_default(struct task_struct *task)
 {
-	struct sched_attr attr = {.policy = SCHED_RR,
-		.priority = 1};
+	struct sched_attr attr = {.policy   = SCHED_RR,
+				  .priority = 1};
+
 
 	return sched_set_attr(task, &attr);
 }
@@ -289,18 +353,56 @@ int sched_set_policy_default(struct task_struct *task)
 
 /**
  * @brief register a new scheduler
+ *
+ * @returns 0 on success,
+ *	   -EPERM if the scheduler instance is already registered,
+ *	   -EINVAL otherwise
+ *
+ * XXX locking
  */
 
 int sched_register(struct scheduler *sched)
 {
-	/* XXX locks */
+	int swap;
+
+	struct scheduler *elem;
+	struct scheduler *tmp;
+
+
+
+	if (!sched)
+		return -EINVAL;
+
+
+	list_for_each_entry(elem, &kernel_schedulers, node) {
+		if (elem == sched) {
+			pr_err(MSG "scheduler instance already registered\n");
+			return -EPERM;
+		}
+	}
+
+	list_add_tail(&sched->node, &kernel_schedulers);
+
+
+	/* bubble-sort by priority */
+	do {
+		swap = 0;
+
+		list_for_each_entry_safe(elem, tmp, &kernel_schedulers, node) {
+
+			struct scheduler *next = list_next_entry(elem, node);
+
+			if (elem->priority < next->priority) {
+
+				list_swap(&elem->node, &next->node);
+
+				swap = 1;
+			}
+		}
+
+	} while (swap);
 
 
-	/* XXX stupid */
-	if (!sched->sched_priority)
-		list_add_tail(&sched->node, &kernel_schedulers);
-	else
-		list_add(&sched->node, &kernel_schedulers);
 
 	return 0;
 }
diff --git a/kernel/sched/edf.c b/kernel/sched/edf.c
index d2e685b..91d7154 100644
--- a/kernel/sched/edf.c
+++ b/kernel/sched/edf.c
@@ -13,13 +13,36 @@
 #include <kernel/init.h>
 #include <kernel/smp.h>
 
-#include <generated/autoconf.h> /* XXX need common CPU include */
+#include <asm/spinlock.h>
+#include <asm-generic/irqflags.h>
+
 
 
 #define MSG "SCHED_EDF: "
 
 #define UTIL_MAX 0.98 /* XXX should be config option, also should be adaptive depending on RT load */
 
+static struct spinlock edf_spinlock;
+
+/**
+ * @brief lock critical edf section
+ */
+
+static void edf_lock(void)
+{
+	spin_lock_raw(&edf_spinlock);
+}
+
+
+/**
+ * @brief unlock critical edf section
+ */
+
+static void edf_unlock(void)
+{
+	spin_unlock(&edf_spinlock);
+}
+
 
 void sched_print_edf_list_internal(struct task_queue *tq, int cpu, ktime now)
 {
@@ -36,7 +59,7 @@ void sched_print_edf_list_internal(struct task_queue *tq, int cpu, ktime now)
 	ktime wake;
 
 
-	printk("\nktime: %lld CPU %d\n", ktime_to_ms(now), cpu);
+	printk("\nktime: %lld CPU %d\n", ktime_to_us(now), cpu);
 	printk("S\tDeadline\tWakeup\t\tt_rem\ttotal\tslices\tName\t\twcet\tavg(us)\n");
 	printk("------------------------------------------------------------------\n");
 	list_for_each_entry_safe(tsk, tmp, &tq->run, node) {
@@ -73,34 +96,14 @@ void sched_print_edf_list_internal(struct task_queue *tq, int cpu, ktime now)
 }
 
 
-#include <asm/spinlock.h>
-static struct spinlock edf_spinlock;
-
-
-/**
- * @brief lock critical rr section
- */
-
- void edf_lock(void)
-{
-	return;
-	spin_lock_raw(&edf_spinlock);
-}
-
-
-/**
- * @brief unlock critical rr section
- */
-
-void edf_unlock(void)
-{
-	return;
-	spin_unlock(&edf_spinlock);
-}
-
-
-
 /**
+ * @brief check if an EDF task can still execute given its deadline and
+ *        remaining runtime
+ *
+ *
+ * @returns true if can still execute before deadline
+ *
+ *
  * Our EDF task scheduling timeline:
  *
  *
@@ -122,30 +125,22 @@ void edf_unlock(void)
  *   |---------------- period ------------------|
  */
 
-
-/**
- * @brief check if an EDF task can still execute given its deadline and
- *        remaining runtime
- *
- *
- * @returns true if can still execute before deadline
- */
-
 static inline bool schedule_edf_can_execute(struct task_struct *tsk, int cpu, ktime now)
 {
-	int64_t to_deadline;
+	ktime tick;
+	ktime to_deadline;
 
 
 
+	/* consider twice the min tick period for overhead */
+	tick = tick_get_period_min_ns() << 1;
 
-	/* should to consider twice the min tick period for overhead */
-	if (tsk->runtime <= (tick_get_period_min_ns() << 1))
+	if (tsk->runtime <= tick)
 		return false;
 
 	to_deadline = ktime_delta(tsk->deadline, now);
 
-	/* should to consider twice the min tick period for overhead */
-	if (ktime_delta(tsk->deadline, now) <= (tick_get_period_min_ns() << 1))
+	if (ktime_delta(tsk->deadline, now) <= tick)
 		return false;
 
 
@@ -153,10 +148,17 @@ static inline bool schedule_edf_can_execute(struct task_struct *tsk, int cpu, kt
 }
 
 
+/**
+ * @brief reinitialise a task
+ */
+
 static inline void schedule_edf_reinit_task(struct task_struct *tsk, ktime now)
 {
 	ktime new_wake;
 
+
+
+	/* XXX this task never ran, we're in serious trouble */
 	if (tsk->runtime == tsk->attr.wcet) {
 		printk("T == WCET!! %s\n", tsk->name);
 		__asm__ __volatile__ ("ta 0\n\t");
@@ -164,20 +166,25 @@ static inline void schedule_edf_reinit_task(struct task_struct *tsk, ktime now)
 
 
 	new_wake = ktime_add(tsk->wakeup, tsk->attr.period);
-#if 1
-	/* need FDIR procedure for this situation: report and wind
-	 * wakeup/deadline forward */
 
-	if (ktime_after(now, new_wake)){ /* deadline missed earlier? */
-		printk("%s violated, rt: %lld, next wake: %lld (%lld)\n", tsk->name,
-		       tsk->runtime, tsk->wakeup, new_wake);
-		sched_print_edf_list_internal(&tsk->sched->tq[tsk->on_cpu], tsk->on_cpu, now);
-		__asm__ __volatile__ ("ta 0\n\t");
+	/* deadline missed earlier?
+	 * XXX need FDIR procedure for this situation: report and wind
+	 *     wakeup/deadline forward
+	 */
+
+	if (ktime_after(now, new_wake)) {
+
+		printk("%s violated, rt: %lld, next wake: %lld (%lld)\n",
+		       tsk->name, tsk->runtime, tsk->wakeup, new_wake);
+
+		sched_print_edf_list_internal(&tsk->sched->tq[tsk->on_cpu],
+					      tsk->on_cpu, now);
 
 		/* XXX raise kernel alarm and attempt to recover wakeup */
-		BUG();
+		__asm__ __volatile__ ("ta 0\n\t");
 	}
-#endif
+
+
 	if (tsk->flags & TASK_RUN_ONCE) {
 		tsk->state = TASK_DEAD;
 		return;
@@ -254,272 +261,224 @@ static ktime edf_hyperperiod(struct task_queue tq[], int cpu, const struct task_
 }
 
 
-
 /**
- * @brief EDF schedulability test
- *
- * @returns the cpu to run on if schedulable, -EINVAL otherwise
- *
- *
- * * 1) determine task with longest period
- *
- *	T1: (P=50, D=20, R=10)
- *
- * 2) calculate unused head and tail (before and after deadline)
- *
- *	UH = D1 - R1				(= 20) (Hyperperiod)
- *	UT = P1 - D1				(= 60)
- *
- * 3) loop over other tasks (Period < Deadline of Task 1)
- *
- *	calculate slots usage before deadline of Task 1:
- *
- *	H * Ri * D1 / Pi		(T2: 10, T5: 10)
- *
- *	update head slots UH = UH - 20 = 0 -> all used
- *
- *
- *	calculate slot usage after deadline of Task2:
+ * @brief basic EDFutilisation
  *
- *	H * Ri * F1 / Pi		(T2: 15, T5: 15)
- *
- *	update tail slots: UT = UT - 30 = 30
+ * @param task may be NULL to be excluded
  *
- *	-> need hyperperiod factor H = 2
- *
- *
- * ####
- *
- * Note: EDF in SMP configurations is not an optimal algorithm, and deadlines
- *	 cannot be guaranteed even for utilisation values just above 1.0
- *	 (Dhall's effect). In order to mitigate this for EDF tasks with no
- *	 CPU affinity set (KTHREAD_CPU_AFFINITY_NONE), we search the per-cpu
- *	 queues until we find one which is below the utilisation limit and
- *	 force the affinity of the task to that particular CPU
- *
- *
- *	 XXX function needs adaptation
  */
 
-
-static int edf_schedulable(struct task_queue tq[], const struct task_struct *task)
+static double edf_utilisation(struct task_queue tq[], int cpu,
+			      const struct task_struct *task)
 {
-	struct task_struct *tsk = NULL;
+	double u = 0.0;
+
+	struct task_struct *t;
 	struct task_struct *tmp;
 
-	int cpu = -EINVAL;
 
-	double u = 0.0;	/* utilisation */
+	/* add new task */
+	if (task)
+		u = (double)  task->attr.wcet / (double)  task->attr.period;
 
+	/* add tasks queued in wakeup */
+	list_for_each_entry_safe(t, tmp, &tq[cpu].wake, node)
+		u += (double) t->attr.wcet / (double) t->attr.period;
 
+	/* add all running */
+	list_for_each_entry_safe(t, tmp, &tq[cpu].run, node)
+		u += (double) t->attr.wcet / (double) t->attr.period;
 
+	return u;
+}
 
-	if (task->on_cpu == KTHREAD_CPU_AFFINITY_NONE) {
-		int i;
-		double util_max = 0.0;
 
-		for (i = 0; i < 2; i++) {
-			/* XXX look for best fit */
-			double util;
-			/* add new task */
-			util= (double) (int32_t) task->attr.wcet / (double) (int32_t) task->attr.period;
+static int edf_find_cpu(struct task_queue tq[], const struct task_struct *task)
+{
+	int cpu = -ENODEV;
+	int i;
+	double u;
+	double u_max = 0.0;
 
 
-			/* add tasks queued in wakeup */
-			if (!list_empty(&tq[i].wake)) {
-				list_for_each_entry_safe(tsk, tmp, &tq[i].wake, node) {
-					util += (double) (int32_t) tsk->attr.wcet / (double) (int32_t) tsk->attr.period;
-				}
 
-			}
+	/* XXX need cpu_nr_online() */
+	for (i = 0; i < CONFIG_SMP_CPUS_MAX; i++) {
 
-			/* add all running */
-			if (!list_empty(&tq[i].run)) {
-				list_for_each_entry_safe(tsk, tmp, &tq[i].run, node)
-					util += (double) (int32_t) tsk->attr.wcet / (double) (int32_t) tsk->attr.period;
-			}
+		u = edf_utilisation(tq, i, task);
 
+		if (u > UTIL_MAX)
+			continue;
 
-		//	printk("UTIL %g\n", util);
-			if (util > UTIL_MAX)
-				continue;
+		if (u > u_max) {
+			u_max = u;
+			cpu = i;
+		}
+	}
 
-			if (util > util_max) {
-				util_max = util;
-				cpu = i;
-			}
+	return cpu;
+}
 
+/**
+ * @brief returns the longest period task
+ */
 
+static struct task_struct *
+edf_longest_period_task(struct task_queue tq[], int cpu,
+			const struct task_struct *task)
+{
+	ktime max = 0;
+
+	struct task_struct *t;
+	struct task_struct *tmp;
+
+	struct task_struct *t0 = NULL;
+
+
+
+	t0 = (struct task_struct *) task;
+	max = task->attr.period;
+
+	list_for_each_entry_safe(t, tmp, &tq[cpu].wake, node) {
+		if (t->attr.period > max) {
+			t0 = t;
+			max = t->attr.period;
 		}
-		if (cpu == -EINVAL) {
-	//		printk("---- WILL NOT FIT ----\n");
-			return -EINVAL;
-		}
+	}
 
-	//	printk("best fit is %d\n", cpu);
-	} else {
-		cpu = task->on_cpu;
+	list_for_each_entry_safe(t, tmp, &tq[cpu].run, node) {
+		if (t->attr.period > max) {
+			t0 = t;
+			max = t->attr.period;
+		}
 	}
 
+	return t0;
+}
+
+/**
+ * @brief performs a more complex slot utilisation test
+ *
+ * @returns  0 if the new task is schedulable
+ */
 
-retry:
-	/******/
-if (1)
+static int edf_test_slot_utilisation(struct task_queue tq[], int cpu,
+				     const struct task_struct *task)
 {
-	int nogo = 0;
-	//printk("\n\n\n");
 	ktime p;
 	ktime h;
-	ktime max = 0;
 
 	ktime uh, ut, f1;
 	ktime sh = 0, st = 0;
-        ktime stmin = 0x7fffffffffffffULL;
-        ktime shmin = 0x7fffffffffffffULL;
+	ktime stmin = 0x7fffffffffffffULL;
+	ktime shmin = 0x7fffffffffffffULL;
 
 	struct task_struct *t0;
+	struct task_struct *tsk;
+	struct task_struct *tmp;
 
-	//printk("hyper? %s %lld\n", task->name, ktime_to_ms(task->attr.period));
-	p = edf_hyperperiod(tq, cpu, task);
-	//printk("hyper %llu\n", ktime_to_ms(p));
 
 
-	/* new */
-	t0 = (struct task_struct *) task;
-	max = task->attr.period;
+	t0 = edf_longest_period_task(tq, cpu, task);
+	if (!t0)
+		return 0;
 
-	/* add tasks queued in wakeup */
-	if (!list_empty(&tq[cpu].wake)) {
-		list_for_each_entry_safe(tsk, tmp, &tq[cpu].wake, node) {
-			 if (tsk->attr.period > max) {
-				 t0 = tsk;
-				 max = tsk->attr.period;
-			 }
-		}
-	}
-
-	/* add tasks queued in run */
-	if (!list_empty(&tq[cpu].run)) {
-		list_for_each_entry_safe(tsk, tmp, &tq[cpu].run, node) {
-			 if (tsk->attr.period > max) {
-				 t0 = tsk;
-				 max = tsk->attr.period;
-			 }
-		}
-	}
 
-	//printk("t0: %s (cpu %d)\n", t0->name, cpu);
-	h = p / t0->attr.period;
+	p = edf_hyperperiod(tq, cpu, task);
 
+	/* XXX don't know why h=1 would work, needs proper testing */
+#if 1
+	h = p / t0->attr.period; /* period factor */
+#else
 	h = 1;
-	//printk("Period factor %lld, duration %lld actual period: %lld\n", h, ktime_to_ms(p), ktime_to_ms(t0->attr.period));
+#endif
 
 
+	/* max available head and tail slots before and after deadline of
+	 * longest task
+	 */
 	uh = h * (t0->attr.deadline_rel - t0->attr.wcet);
 	ut = h * (t0->attr.period - t0->attr.deadline_rel);
 	f1 = ut/h;
 
-	//printk("max UH: %lld, UT: %lld\n", ktime_to_ms(uh), ktime_to_ms(ut));
-
 
 	/* tasks queued in wakeup */
-	if (!list_empty(&tq[cpu].wake)) {
-		list_for_each_entry_safe(tsk, tmp, &tq[cpu].wake, node) {
+	list_for_each_entry_safe(tsk, tmp, &tq[cpu].wake, node) {
 
-			if (tsk == t0)
-				continue;
+		if (tsk == t0)
+			continue;
 
-			//printk("tsk wake: %s\n", tsk->name);
-			if (tsk->attr.deadline_rel <= t0->attr.deadline_rel) {
-
-				/* slots before deadline of  T0 */
-				sh = h * tsk->attr.wcet * t0->attr.deadline_rel / tsk->attr.period;
-				if (sh < shmin)
-					shmin = sh;
-				if (sh > uh) {
-					//printk("WARN: NOT SCHEDULABLE in head: %s\n", tsk->name);
-				nogo = 1;
-				}
-				uh = uh - sh;
-			}
+		if (tsk->attr.deadline_rel <= t0->attr.deadline_rel) {
 
-			/* slots after deadline of T0 */
-			st = h * tsk->attr.wcet * f1 / tsk->attr.period;
-			if (st < stmin)
-				stmin = st;
+			/* slots before deadline of T0 */
+			sh = h * tsk->attr.wcet * t0->attr.deadline_rel / tsk->attr.period;
 
-			if (st > ut) {
-				//printk("WARN: NOT SCHEDULABLE in tail: %s\n", tsk->name);
-				nogo = 1;
-			}
+			if (sh < shmin)
+				shmin = sh;
 
-			ut = ut - st;
+			if (sh > uh)
+				return -1;
 
-			//printk("w %s UH: %lld, UT: %lld\n", tsk->name, ktime_to_ms(uh), ktime_to_ms(ut));
+			uh = uh - sh;
+		}
 
-			//printk("w %s SH: %lld, ST: %lld\n", tsk->name, ktime_to_ms(sh), ktime_to_ms(st));
+		/* slots after deadline of T0 */
+		st = h * tsk->attr.wcet * f1 / tsk->attr.period;
+		if (st < stmin)
+			stmin = st;
 
-		}
+		if (st > ut)
+			return -2;	/* not schedulable in remaining tail */
+
+		ut = ut - st;		/* update tail utilisation */
 	}
 
 
 	/* tasks queued in run */
-	if (!list_empty(&tq[cpu].run)) {
-		list_for_each_entry_safe(tsk, tmp, &tq[cpu].run, node) {
-
-			if (tsk == t0)
-				continue;
-
-			//printk("tsk run: %s\n", tsk->name);
-			if (tsk->attr.deadline_rel <= t0->attr.deadline_rel) {
-
-				/* slots before deadline of  T0 */
-				sh = h * tsk->attr.wcet * t0->attr.deadline_rel / tsk->attr.period;
-				if (sh < shmin)
-					shmin = sh;
-				if (sh > uh) {
-					//printk("WARN: NOT SCHEDULABLE in head: %s\n", tsk->name);
-				nogo = 1;
-				}
-				uh = uh - sh;
-			}
+	list_for_each_entry_safe(tsk, tmp, &tq[cpu].run, node) {
 
-			/* slots after deadline of T0 */
-			st = h * tsk->attr.wcet * f1 / tsk->attr.period;
-			if (st < stmin)
-				stmin = st;
+		if (tsk == t0)
+			continue;
 
-			if (st > ut) {
-				//printk("WARN: NOT SCHEDULABLE in tail: %s\n", tsk->name);
-				nogo = 1;
-			}
+		if (tsk->attr.deadline_rel <= t0->attr.deadline_rel) {
 
-			ut = ut - st;
+			/* slots before deadline of T0 */
+			sh = h * tsk->attr.wcet * t0->attr.deadline_rel / tsk->attr.period;
 
-			//printk("w %s UH: %lld, UT: %lld\n", tsk->name, ktime_to_ms(uh), ktime_to_ms(ut));
+			if (sh < shmin)
+				shmin = sh;
 
-			//printk("w %s SH: %lld, ST: %lld\n", tsk->name, ktime_to_ms(sh), ktime_to_ms(st));
+			if (sh > uh)
+				return -1;
 
+			uh = uh - sh;
 		}
-	}
 
+		/* slots after deadline of T0 */
+		st = h * tsk->attr.wcet * f1 / tsk->attr.period;
+		if (st < stmin)
+			stmin = st;
 
+		if (st > ut)
+			return -2;	/* not schedulable in remaining tail */
 
+		ut = ut - st;		/* update tail utilisation */
+	}
 
 
-	if (task != t0) {
 
+	if (task != t0) {
 		if (task->attr.deadline_rel <= t0->attr.deadline_rel) {
-			//printk("task: %s\n", task->name);
 
-			/* slots before deadline of  T0 */
+			/* slots before deadline of T0 */
 			sh = h * task->attr.wcet * t0->attr.deadline_rel / task->attr.period;
+
 			if (sh < shmin)
 				shmin = sh;
-			if (sh > uh) {
-				//printk("xWARN: NOT SCHEDULABLE in head: %s\n", task->name);
-				nogo = 1;
-			}
+
+			if (sh > uh)
+				return -1;
+
 			uh = uh - sh;
 		}
 
@@ -528,78 +487,127 @@ if (1)
 		if (st < stmin)
 			stmin = st;
 
-		if (st > ut) {
-			//printk("xWARN: NOT SCHEDULABLE in tail: %s\n", task->name);
-			nogo = 1;
-		}
+		if (st > ut)
+			return -2;	/* not schedulable in remaining tail */
 
-		ut = ut - st;
+		ut = ut - st;		/* update tail utilisation */
+	}
 
-		//printk("x %s UH: %lld, UT: %lld\n", task->name, ktime_to_ms(uh), ktime_to_ms(ut));
 
-		//printk("x %s SH: %lld, ST: %lld\n", task->name, ktime_to_ms(sh), ktime_to_ms(st));
+	return 0;
+}
 
-	}
 
-	if (nogo == 1) {
-		if (cpu == 0) {
-			cpu = 1;
-			//printk("RETRY\n");
-			goto retry;
-		} else {
-			//printk("RETURN: I am NOT schedul-ableh: %f ", u);
-			return -EINVAL;
-		}
-	}
+/**
+ * @brief EDF schedulability test
+ *
+ * @returns the cpu to run on if schedulable, -EINVAL otherwise
+ *
+ *
+ * We perform two tests, the first is the very basic
+ *
+ *        __  WCET_i
+ *        \   ------  <= 1
+ *        /_   P_i
+ *
+ * the other one is slightly more complex (with example values):
+ *
+ *
+ * 1) determine task with longest period
+ *
+ *	T1: (P=50, D=20, R=10)
+ *
+ * 2) calculate unused head and tail (before and after deadline)
+ *
+ *	UH = D1 - R1				(= 20) (Hyperperiod)
+ *	UT = P1 - D1				(= 60)
+ *
+ * 3) loop over other tasks (Period < Deadline of Task 1)
+ *
+ *	calculate slots usage before deadline of Task 1:
+ *
+ *	H * Ri * D1 / Pi		(T2: 10, T5: 10)
+ *
+ *	update head slots UH = UH - 20 = 0 -> all used
+ *
+ *
+ *	calculate slot usage after deadline of Task2:
+ *
+ *	H * Ri * F1 / Pi		(T2: 15, T5: 15)
+ *
+ *	update tail slots: UT = UT - 30 = 30
+ *
+ *	-> need hyperperiod factor H = 2
+ *
+ *
+ * Note: EDF in SMP configurations is not an optimal algorithm, and deadlines
+ *	 cannot be guaranteed even for utilisation values just above 1.0
+ *	 (Dhall's effect). In order to mitigate this for EDF tasks with no
+ *	 CPU affinity set (KTHREAD_CPU_AFFINITY_NONE), we search the per-cpu
+ *	 queues until we find one which is below the utilisation limit and
+ *	 force the affinity of the task to that particular CPU
+ */
 
+static int edf_schedulable(struct task_queue tq[], const struct task_struct *task)
+{
+	int cpu = -ENODEV;
 
-	//printk("\n\n\n");
-}
-	/*******/
 
+	if (task->on_cpu == KTHREAD_CPU_AFFINITY_NONE) {
 
+		cpu = edf_find_cpu(tq, task);
+		if (cpu < 0)
+			return cpu;
 
+	} else {
+		cpu = task->on_cpu;
+	}
 
-	/* add new task */
-	u += (double) (int32_t) task->attr.wcet / (double) (int32_t) task->attr.period;
 
 
+	/* try to locate a CPU which could fit the task
+	 *
+	 * XXX this needs some rework, also we must consider only
+	 * cpus which are actually online
+	 */
+	if (edf_test_slot_utilisation(tq, cpu, task)) {
 
-	/* add tasks queued in wakeup */
-	if (!list_empty(&tq[cpu].wake)) {
-		list_for_each_entry_safe(tsk, tmp, &tq[cpu].wake, node) {
-			u += (double) (int32_t) tsk->attr.wcet / (double) (int32_t) tsk->attr.period;
-		}
+		int i;
 
-	}
+		for (i = 0; i < CONFIG_SMP_CPUS_MAX; i++) {
 
-	/* add all running */
-	if (!list_empty(&tq[cpu].run)) {
-		list_for_each_entry_safe(tsk, tmp, &tq[cpu].run, node)
-			u += (double) (int32_t) tsk->attr.wcet / (double) (int32_t) tsk->attr.period;
-	}
+			if (i == cpu)
+				continue;
 
-	if (u > UTIL_MAX) {
-//		printk("I am NOT schedul-ableh: %f ", u);
-		BUG();
-		return -EINVAL;
-		printk("changed task mode to RR\n", u);
-	}
+			if (edf_utilisation(tq, cpu, task) > UTIL_MAX)
+				continue;
+
+			if (edf_test_slot_utilisation(tq, i, task))
+				continue;
 
-//	printk("Utilisation: %g CPU %d\n", u, cpu);
+			return i; /* found one */
+		}
+
+	}
 
+	if (edf_utilisation(tq, cpu, task) > UTIL_MAX)
+		return -ENODEV;
 
-	/* TODO check against projected interrupt rate, we really need a limit
-	 * there */
+	/* TODO check utilisation against projected interrupt rate */
 
 	return cpu;
 }
 
 
+/**
+ * @brief select the next task to run
+ */
+
 static struct task_struct *edf_pick_next(struct task_queue *tq, int cpu,
 					 ktime now)
 {
-	int64_t delta;
+	ktime tick;
+	ktime delta;
 
 	struct task_struct *tsk;
 	struct task_struct *tmp;
@@ -610,10 +618,12 @@ static struct task_struct *edf_pick_next(struct task_queue *tq, int cpu,
 	if (list_empty(&tq[cpu].run))
 		return NULL;
 
-	edf_lock();
 
+	/* we use twice the tick period as minimum time to a wakeup */
+	tick = (ktime) tick_get_period_min_ns() << 1;
+
+	edf_lock();
 
-	/* XXX need to lock run list for wakeup() */
 
 	list_for_each_entry_safe(tsk, tmp, &tq[cpu].run, node) {
 
@@ -621,8 +631,8 @@ static struct task_struct *edf_pick_next(struct task_queue *tq, int cpu,
 		/* time to wake up yet? */
 		delta = ktime_delta(tsk->wakeup, now);
 
-		/* not yet XXX min period to variable */
-		if (delta > (tick_get_period_min_ns() << 1))
+		/* not yet... */
+		if (delta > tick)
 			continue;
 
 
@@ -680,19 +690,17 @@ static struct task_struct *edf_pick_next(struct task_queue *tq, int cpu,
 
 		if (tsk->state == TASK_DEAD){ /* XXX need other mechanism */
 			list_del(&tsk->node);
-			kfree(tsk->stack);
-			kfree(tsk->name);
-			kfree(tsk);
+			kthread_free(tsk);
 			continue;
 		}
-
-
-
 	}
 
 
 	first = list_first_entry(&tq[cpu].run, struct task_struct, node);
+
+
 	edf_unlock();
+
 	if (first->state == TASK_RUN)
 		return first;
 
@@ -700,138 +708,239 @@ static struct task_struct *edf_pick_next(struct task_queue *tq, int cpu,
 }
 
 
+/**
+ * @brief verify that a task is in the wake queue
+ */
 
-#include <asm-generic/irqflags.h>
-static void edf_wake_next(struct task_queue *tq, int cpu, ktime now)
+static int edf_task_in_wake_queue(struct task_struct *task,
+				  struct task_queue tq[],
+				  int cpu)
 {
-	ktime last;
+	int found = 0;
 
-	struct task_struct *tmp;
-	struct task_struct *task;
-	struct task_struct *first = NULL;
 	struct task_struct *t;
+	struct task_struct *tmp;
 
 
-	ktime max = 0;
+	list_for_each_entry_safe(t, tmp, &tq[cpu].wake, node) {
 
+		if (t != task)
+			continue;
 
-	struct task_struct *after = NULL;
+		found = 1;
+		break;
+	}
 
+	return found;
+}
 
-	if (list_empty(&tq[cpu].wake))
-		return;
 
-	edf_lock();
-	last = now;
+/**
+ * @brief determine the earliest sensible wakeup for a periodic task given
+ *	  the current run queue
+ */
 
-	/* no period, run it asap */
-	task = list_first_entry(&tq[cpu].wake, struct task_struct, node);
-	if (task->flags & TASK_RUN_ONCE)
-		goto insert;
+static ktime edf_get_earliest_wakeup(struct task_queue tq[],
+				     int cpu, ktime now)
+{
+	ktime max  = 0;
+	ktime wakeup = now;
+
+	struct task_struct *t;
+	struct task_struct *tmp;
+	struct task_struct *after = NULL;
 
 
-	list_for_each_entry_safe(task, tmp, &tq->run, node) {
+	list_for_each_entry_safe(t, tmp, &tq[cpu].run, node) {
 
-			/* XXX need other mechanism */
-			if (task->state == TASK_DEAD) {
-				list_del(&task->node);
-				kfree(task->stack);
-				kfree(task->name);
-				kfree(task);
-				continue;
-			}
+		if (t->state == TASK_DEAD)
+			continue;
 
-			if (task->flags & TASK_RUN_ONCE)
-				continue;
+		if (t->flags & TASK_RUN_ONCE)
+			continue;
 
-		if (max > task->attr.period)
+		if (max > t->attr.period)
 			continue;
 
-		max = task->attr.period;
-		after = task;
+		max   = t->attr.period;
+		after = t;
 	}
 
-
 	if (after)
-		last = ktime_add(after->wakeup, after->attr.period);
+		wakeup = ktime_add(after->wakeup, after->attr.period);
+
+	return wakeup;
+}
 
 
-	task = list_first_entry(&tq[cpu].wake, struct task_struct, node);
+/**
+ * @brief sort run queue in order of urgency of wakeup
+ */
 
-	/* XXX */
-	BUG_ON(task->on_cpu == KTHREAD_CPU_AFFINITY_NONE);
+static void edf_sort_queue_by_urgency(struct task_queue tq[],
+				      int cpu, ktime now)
+{
+	struct task_struct *t;
+	struct task_struct *tmp;
+	struct task_struct *first;
 
+	list_for_each_entry_safe(t, tmp, &tq[cpu].run, node) {
 
-	if (!list_empty(&tq[cpu].run)) {
+		if (t->flags & TASK_RUN_ONCE)
+			continue;
 
-		/* reorder */
+		if (t->state == TASK_DEAD)
+			continue;
 
-		list_for_each_entry_safe(t, tmp, &tq[cpu].run, node) {
+		first = list_first_entry(&tq[cpu].run, struct task_struct, node);
 
+		if (ktime_before (t->wakeup, now)) {
 
-			if (t->flags & TASK_RUN_ONCE)
-				continue;
+			ktime latest_wake;
 
-			if (t->state == TASK_DEAD)
-				continue;
+			latest_wake = ktime_delta(t->deadline, t->runtime);
 
+			if (ktime_before(latest_wake, first->deadline))
+				list_move(&t->node, &tq[cpu].run);
 
-			first = list_first_entry(&tq[cpu].run, struct task_struct, node);
-			if (ktime_before (t->wakeup, now)) {
-				if (ktime_before (t->deadline - t->runtime, first->deadline)) {
-					list_move(&t->node, &tq[cpu].run);
-				}
-			}
 		}
+	}
+}
 
-		list_for_each_entry_safe(t, tmp, &tq[cpu].run, node) {
 
-			if (t->flags & TASK_RUN_ONCE)
-				continue;
+/**
+ * @brief if possible, adjust the wakeup for a given periodic task
+ *
+ * @param last the previous best estimate of the wakeup time
+ */
 
-			if (t->state != TASK_IDLE)
-				continue;
+static ktime edf_get_best_wakeup(struct task_struct *task,
+				 ktime wakeup,
+				 struct task_queue tq[],
+				 int cpu, ktime now)
+{
+	struct task_struct *t;
+	struct task_struct *tmp;
 
-			if (ktime_before (t->wakeup, now))
-				continue;
 
-			/* if the relative deadline of task-to-wake can fit in between the unused
-			 * timeslice of this running task, insert after the next wakeup
-			 */
-			if (task->attr.deadline_rel < ktime_sub(t->deadline, t->wakeup)) {
-				last = t->wakeup;
-				break;
-			}
+	/* locate the best position withing the run queue's hyperperiod
+	 * for the task-to-wake
+	 */
+	list_for_each_entry_safe(t, tmp, &tq[cpu].run, node) {
 
-			if (task->attr.wcet < ktime_sub(t->deadline, t->wakeup)) {
-				last = t->deadline;
-				break;
-			}
+		ktime delta;
+
+		if (t->flags & TASK_RUN_ONCE)
+			continue;
+
+		if (t->state != TASK_IDLE)
+			continue;
+
+		if (ktime_before (t->wakeup, now))
+			continue;
+
+		/* if the relative deadline of our task-to-wake can fit in
+		 * between the unused timeslices of a running task, insert
+		 * it after its next wakeup
+		 */
+
+		delta = ktime_delta(t->deadline, t->wakeup);
+
+		if (task->attr.deadline_rel < delta) {
+			wakeup = t->wakeup;
+			break;
+		}
+
+		if (task->attr.wcet < delta) {
+			wakeup = t->deadline;
+			break;
 		}
 	}
 
+	return wakeup;
+}
+
 
-insert:
-	/* initially furthest deadline as wakeup */
-	last  = ktime_add(last, 30000ULL); /* XXX minimum wakeup shift for overheads */
-	task->wakeup     = ktime_add(last, task->attr.period);
-	task->deadline   = ktime_add(task->wakeup, task->attr.deadline_rel);
+/**
+ * @brief wake up a task by inserting in into the run queue
+ */
+
+static int edf_wake(struct task_struct *task, ktime now)
+{
+	int cpu;
+
+	ktime wakeup;
+
+	unsigned long flags;
+
+	struct task_queue *tq;
+
+
+	if (!task)
+		return -EINVAL;
+
+	if (task->attr.policy != SCHED_EDF)
+		return -EINVAL;
+
+
+	tq  = task->sched->tq;
+	cpu = task->on_cpu;
+
+	if (cpu == KTHREAD_CPU_AFFINITY_NONE)
+		return -EINVAL;
+
+	if (list_empty(&tq[cpu].wake))
+		return -EINVAL;
+
+	if (!edf_task_in_wake_queue(task, tq, cpu))
+		return -EINVAL;
+
+
+	wakeup = now;
+
+	flags = arch_local_irq_save();
+	edf_lock();
+
+	/* if this is first task or a non-periodic task, run it asap, otherwise
+	 * we try to find a good insertion point
+	 */
+	if (!list_empty(&tq[cpu].run) || !(task->flags & TASK_RUN_ONCE)) {
+		wakeup = edf_get_earliest_wakeup(tq, cpu, now);
+		edf_sort_queue_by_urgency(tq, cpu, now);
+		wakeup = edf_get_best_wakeup(task, wakeup, tq, cpu, now);
+	}
+
+
+	/* shift wakeup by minimum tick period */
+	wakeup         = ktime_add(wakeup, tick_get_period_min_ns());
+	task->wakeup   = ktime_add(wakeup, task->attr.period);
+	task->deadline = ktime_add(task->wakeup, task->attr.deadline_rel);
 
 	/* reset runtime to full */
 	task->runtime = task->attr.wcet;
-	task->state = TASK_IDLE;
+	task->state   = TASK_IDLE;
 
 	list_move_tail(&task->node, &tq[cpu].run);
 
 	edf_unlock();
-}
+	arch_local_irq_restore(flags);
 
+	return 0;
+}
 
 
+/**
+ * @brief enqueue a task
+ *
+ * @returns 0 if the task can be scheduled, ENOSCHED otherwise
+ */
 
-static int edf_enqueue(struct task_queue tq[], struct task_struct *task)
+static int edf_enqueue(struct task_struct *task)
 {
 	int cpu;
+	unsigned long flags;
+
+	struct task_queue *tq = task->sched->tq;
 
 
 	if (!task->attr.period) {
@@ -840,25 +949,42 @@ static int edf_enqueue(struct task_queue tq[], struct task_struct *task)
 	} else
 		task->flags &= ~TASK_RUN_ONCE;
 
+	flags = arch_local_irq_save();
+	edf_lock();
+
 	cpu = edf_schedulable(tq, task);
-	if (cpu < 0)
+	if (cpu < 0) {
+		edf_unlock();
+		arch_local_irq_restore(flags);
 		return -ENOSCHED;
+	}
 
 	task->on_cpu = cpu;
 
 	list_add_tail(&task->node, &tq[cpu].wake);
 
+	edf_unlock();
+	arch_local_irq_restore(flags);
 
 
 	return 0;
 }
 
 
+/**
+ * @brief get remaining time slice for a given task
+ */
+
 static ktime edf_timeslice_ns(struct task_struct *task)
 {
 	return (ktime) (task->runtime);
 }
 
+
+/**
+ * @brief verify scheduling attributes
+ */
+
 static int edf_check_sched_attr(struct sched_attr *attr)
 {
 	ktime tick_min;
@@ -913,9 +1039,6 @@ static int edf_check_sched_attr(struct sched_attr *attr)
 			       tick_min);
 			goto error;
 		}
-
-
-
 	}
 
 
@@ -934,10 +1057,13 @@ error:
 }
 
 
-/* called after pick_next() */
+/**
+ * @brief returns the next ktime when a task will become ready
+ */
 
 ktime edf_task_ready_ns(struct task_queue *tq, int cpu, ktime now)
 {
+	ktime tick;
 	ktime delta;
 	ktime ready = (unsigned int) ~0 >> 1;
 
@@ -945,6 +1071,10 @@ ktime edf_task_ready_ns(struct task_queue *tq, int cpu, ktime now)
 	struct task_struct *tmp;
 
 
+
+	/* we use twice the tick period as minimum time to a wakeup */
+	tick = (ktime) tick_get_period_min_ns() << 1;
+
 	list_for_each_entry_safe(tsk, tmp, &tq[cpu].run, node) {
 
 		if (tsk->state == TASK_BUSY)
@@ -952,7 +1082,7 @@ ktime edf_task_ready_ns(struct task_queue *tq, int cpu, ktime now)
 
 		delta = ktime_delta(tsk->wakeup, now);
 
-		if (delta <= (tick_get_period_min_ns() << 1)) /* XXX init once */
+		if (delta <= tick)
 			continue;
 
 		if (ready > delta)
@@ -967,12 +1097,12 @@ ktime edf_task_ready_ns(struct task_queue *tq, int cpu, ktime now)
 struct scheduler sched_edf = {
 	.policy           = SCHED_EDF,
 	.pick_next_task   = edf_pick_next,
-	.wake_next_task   = edf_wake_next,
+	.wake_task        = edf_wake,
 	.enqueue_task     = edf_enqueue,
 	.timeslice_ns     = edf_timeslice_ns,
 	.task_ready_ns    = edf_task_ready_ns,
 	.check_sched_attr = edf_check_sched_attr,
-	.sched_priority   = 1,
+	.priority         = SCHED_PRIORITY_EDF,
 };
 
 
@@ -983,7 +1113,6 @@ static int sched_edf_init(void)
 
 
 	for (i = 0; i < CONFIG_SMP_CPUS_MAX; i++) {
-		INIT_LIST_HEAD(&sched_edf.tq[i].new);
 		INIT_LIST_HEAD(&sched_edf.tq[i].wake);
 		INIT_LIST_HEAD(&sched_edf.tq[i].run);
 		INIT_LIST_HEAD(&sched_edf.tq[i].dead);
diff --git a/kernel/sched/rr.c b/kernel/sched/rr.c
index d8d622f..f47020d 100644
--- a/kernel/sched/rr.c
+++ b/kernel/sched/rr.c
@@ -2,6 +2,15 @@
  * @file kernel/sched/round_robin.c
  *
  * @brief round-robin scheduler
+ *
+ * Selects the first non-busy task which can run on the current CPU.
+ * If a task has used up its runtime, the runtime is reset and the task is
+ * moved to the end of the queue.
+ *
+ * Task runtimes are calculated from their priority value, which acts as a
+ * multiplier for a given minimum slice, which is a multiple of the
+ * minimum tick device period.
+ *
  */
 
 
@@ -13,8 +22,9 @@
 
 #define MSG "SCHED_RR: "
 
-#define MIN_RR_SLICE_NS		1000000
 
+/* radix-2 shift for min tick */
+#define RR_MIN_TICK_SHIFT	4
 
 static struct spinlock rr_spinlock;
 
@@ -39,117 +49,148 @@ static void rr_unlock(void)
 }
 
 
+/**
+ * @brief select the next task to run
+ */
+
 static struct task_struct *rr_pick_next(struct task_queue tq[], int cpu,
 					ktime now)
 {
-	struct task_struct *next = NULL;
+	ktime tick;
+
+	struct task_struct *tsk;
 	struct task_struct *tmp;
+	struct task_struct *next = NULL;
+
 
 
 	if (list_empty(&tq[0].run))
 		return NULL;
 
+
+	/* we use twice the minimum tick period for resetting the runtime */
+	tick = (ktime) tick_get_period_min_ns() << 1;
+
 	rr_lock();
-	list_for_each_entry_safe(next, tmp, &tq[0].run, node) {
 
+	list_for_each_entry_safe(tsk, tmp, &tq[0].run, node) {
 
-		if (next->on_cpu == KTHREAD_CPU_AFFINITY_NONE
-		    || next->on_cpu == cpu) {
+		if (tsk->on_cpu != cpu)
+			if (tsk->on_cpu != KTHREAD_CPU_AFFINITY_NONE)
+				continue;
 
-		if (next->state == TASK_RUN) {
-			/* XXX: must pick head first, then move tail on put()
-			 * following a scheduling event. for now, just force
-			 * round robin
-			 */
-			list_move_tail(&next->node, &tq[0].run);
 
-			/* reset runtime */
-			next->runtime = (next->attr.priority * MIN_RR_SLICE_NS);
+		if (tsk->state == TASK_RUN) {
 
+			if (tsk->runtime <= tick) {
+				/* reset runtime and queue up at the end */
+				tsk->runtime = tsk->attr.wcet;
+				list_move_tail(&tsk->node, &tq[0].run);
+				next = tsk;
+				continue;
+			}
 
+			next = tsk;
+			break;
+		}
 
+		if (tsk->state == TASK_DEAD) {
+			list_del(&tsk->node);
+			kthread_free(tsk);
 		}
+	}
 
-		if (next->state == TASK_IDLE)
-			list_move_tail(&next->node, &tq[0].run);
+	if (next)
+		next->state = TASK_BUSY;
 
-		if (next->state == TASK_DEAD)
-			list_move_tail(&next->node, &tq[0].dead);
+	rr_unlock();
 
-		break;
+	return next;
+}
 
 
-		} else {
-			next = NULL;
-			continue;
-		}
+/**
+ * @brief wake up a task by inserting in into the run queue
+ */
 
+static int rr_wake(struct task_struct *task, ktime now)
+{
+	int found = 0;
 
-	}
+	ktime tick;
 
-	rr_unlock();
+	struct task_struct *elem;
+	struct task_struct *tmp;
 
-	return next;
-}
+	struct task_queue *tq;
 
 
-/* this sucks, wrong place. keep for now */
-static void rr_wake_next(struct task_queue tq[], int cpu, ktime now)
-{
+	if (!task)
+		return -EINVAL;
+
+	if (task->attr.policy != SCHED_RR)
+		return -EINVAL;
 
-	struct task_struct *task;
 
+	tq = task->sched->tq;
 	if (list_empty(&tq[0].wake))
-		return;
+		return -EINVAL;
+
 
+	list_for_each_entry_safe(elem, tmp, &tq[0].wake, node) {
 
-	task = list_entry(tq[0].wake.next, struct task_struct, node);
+		if (elem != task)
+			continue;
 
-	BUG_ON(task->attr.policy != SCHED_RR);
+		found = 1;
+		break;
+	}
 
-	task->state = TASK_RUN;
+	if (!found)
+		return -EINVAL;
+
+
+	/* let's hope tick periods between cpus never differ significantly */
+	tick = (ktime) tick_get_period_min_ns() << RR_MIN_TICK_SHIFT;
+
+	task->attr.wcet = task->attr.priority * tick;
+	task->runtime   = task->attr.wcet;
+	task->state     = TASK_RUN;
 
 	rr_lock();
 	list_move(&task->node, &tq[0].run);
 	rr_unlock();
+
+
+	return 0;
 }
 
 
-static int rr_enqueue(struct task_queue tq[], struct task_struct *task)
-{
 
-	task->runtime = (task->attr.priority * MIN_RR_SLICE_NS);
+/**
+ * @brief enqueue a task
+ */
 
+static int rr_enqueue(struct task_struct *task)
+{
 	rr_lock();
-	if (task->state == TASK_RUN)
-		list_add_tail(&task->node, &tq[0].run);
-	else
-		list_add_tail(&task->node, &tq[0].wake);
-
+	list_add_tail(&task->node, &task->sched->tq[0].wake);
 	rr_unlock();
 
 	return 0;
 }
 
+
 /**
  * @brief get the requested timeslice of a task
- *
- * @note RR timeslices are determined from their configured priority
- *	 XXX: must not be 0
- *
- * @note for now, just take the minimum tick period to fit as many RR slices
- *	 as possible. This will jack up the IRQ rate, but RR tasks only run when
- *	 the system is not otherwise busy;
- *	 still, a larger (configurable) extra factor may be desirable
  */
 
 static ktime rr_timeslice_ns(struct task_struct *task)
 {
-	return (ktime) (task->attr.priority * MIN_RR_SLICE_NS);
+	return task->runtime;
 }
 
 
-
 /**
  * @brief sanity-check sched_attr configuration
  *
@@ -158,13 +199,18 @@ static ktime rr_timeslice_ns(struct task_struct *task)
 
 static int rr_check_sched_attr(struct sched_attr *attr)
 {
+
+	if (!attr)
+		return -EINVAL;
+
 	if (attr->policy != SCHED_RR) {
 		pr_err(MSG "attribute policy is %d, expected SCHED_RR (%d)\n",
-			attr->policy, SCHED_RR);
+		            attr->policy, SCHED_RR);
 		return -EINVAL;
 	}
 
 	if (!attr->priority) {
+		attr->priority = 1;
 		pr_warn(MSG "minimum priority is 1, adjusted\n");
 	}
 
@@ -172,7 +218,6 @@ static int rr_check_sched_attr(struct sched_attr *attr)
 }
 
 
-
 /**
  * @brief return the time until the the next task is ready
  *
@@ -186,17 +231,15 @@ ktime rr_task_ready_ns(struct task_queue tq[], int cpu, ktime now)
 }
 
 
-
-
 static struct scheduler sched_rr = {
 	.policy           = SCHED_RR,
 	.pick_next_task   = rr_pick_next,
-	.wake_next_task   = rr_wake_next,
+	.wake_task        = rr_wake,
 	.enqueue_task     = rr_enqueue,
 	.timeslice_ns     = rr_timeslice_ns,
 	.task_ready_ns    = rr_task_ready_ns,
 	.check_sched_attr = rr_check_sched_attr,
-	.sched_priority   = 0,
+	.priority   = 0,
 };
 
 
@@ -205,10 +248,8 @@ static int sched_rr_init(void)
 {
 	int i;
 
-	/* XXX */
 
 	for (i = 0; i < CONFIG_SMP_CPUS_MAX; i++) {
-		INIT_LIST_HEAD(&sched_rr.tq[i].new);
 		INIT_LIST_HEAD(&sched_rr.tq[i].wake);
 		INIT_LIST_HEAD(&sched_rr.tq[i].run);
 		INIT_LIST_HEAD(&sched_rr.tq[i].dead);
diff --git a/kernel/tick.c b/kernel/tick.c
index 46a926e..12a33f4 100644
--- a/kernel/tick.c
+++ b/kernel/tick.c
@@ -313,8 +313,10 @@ void tick_check_device(struct clock_event_device *dev)
 
 	tick_setup_device(dev, cpu);
 
-	/* XXX should inform scheduler to recalculate any deadline-related
-	 * timeouts of tasks */
+	/* XXX should inform scheduler to re-evaluate all schedules,
+	 * i.e. move all current tasks for the given CPU(s) from run to wakeup
+	 * queues and wake all tasks one-by-one
+	 */
 }
 
 
-- 
GitLab