diff --git a/arch/sparc/include/asm/thread.h b/arch/sparc/include/asm/thread.h
new file mode 100644
index 0000000000000000000000000000000000000000..0e8b431f387dacd7374a9c66afe78ed4adab21f3
--- /dev/null
+++ b/arch/sparc/include/asm/thread.h
@@ -0,0 +1,69 @@
+/**
+ * @file    sparc/include/asm/thread.h
+ *
+ * @copyright GPLv2
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * @brief architecture specific threads
+ */
+
+
+#ifndef _ARCH_SPARC_ASM_THREAD_H_
+#define _ARCH_SPARC_ASM_THREAD_H_
+
+#include <kernel/types.h>
+#include <kernel/kernel.h>
+#include <stack.h>
+
+
+
+#define NSWINS 8
+struct thread_info {
+	unsigned long		uwinmask;
+	struct task_struct	*task;		/* main task structure */
+	unsigned long		flags;		/* low level flags */
+	int			cpu;		/* cpu we're on */
+	int			preempt_count;	/* 0 => preemptable,
+						   <0 => BUG */
+	int			softirq_count;
+	int			hardirq_count;
+
+	uint32_t __unused;
+
+	/* Context switch saved kernel state. */
+	unsigned long ksp;	/* ... ksp __attribute__ ((aligned (8))); */
+	unsigned long kpc;
+	unsigned long kpsr;
+	unsigned long kwim;
+
+	/* A place to store user windows and stack pointers
+	 * when the stack needs inspection.
+	 */
+	struct leon_reg_win	reg_window[NSWINS];	/* align for ldd! */
+	unsigned long		rwbuf_stkptrs[NSWINS];
+	unsigned long		w_saved;
+};
+
+
+
+
+#define TI_TASK	(offset_of(struct thread_info, task))
+#define TI_KSP	(offset_of(struct thread_info, ksp))
+#define TI_KPC  (offset_of(struct thread_info, kpc))
+#define TI_KPSR (offset_of(struct thread_info, kpsr))
+#define TI_KWIM (offset_of(struct thread_info, kwim))
+
+#if 0
+compile_time_assert((!(TI_KSP & 0x7UL)),
+		    SPARC_THREAD_INFO_THREAD_STATE_NOT_DWORD_ALIGNED);
+#endif
+
+#endif /* _ARCH_SPARC_ASM_THREAD_H_ */
diff --git a/arch/sparc/kernel/irqtrap.S b/arch/sparc/kernel/irqtrap.S
index 739133d3d480af2ba2fdab936d391d244136bc2a..8d1e0a9b3716884d6c591a957b75765c2dd85730 100644
--- a/arch/sparc/kernel/irqtrap.S
+++ b/arch/sparc/kernel/irqtrap.S
@@ -51,6 +51,8 @@ __interrupt_entry:
 #else /* CONFIG_SPARC_NESTED_IRQ */
 	 or %t_psr, PSR_PIL, %t_tmp
 
+	call schedule
+	 nop
 	wr	%t_tmp, PSR_ET, %psr	! we can't be preemted here, so set PIL
 					! to max and xor ET to enable
 					! delayed-write; nops not needed if
diff --git a/arch/sparc/kernel/rtrap.S b/arch/sparc/kernel/rtrap.S
index 196d68ea79a7bd904a1b3cfe3a07f9d6ca28cd31..4b0900229fd6177ccc50e7dc53001c684e6096f3 100644
--- a/arch/sparc/kernel/rtrap.S
+++ b/arch/sparc/kernel/rtrap.S
@@ -34,6 +34,7 @@ ret_trap_entry:
 
 
 ret_trap_kernel:
+
 	/* Will the rett land us in the invalid window? */
 	mov	2, %g1
 	sll	%g1, %t_psr, %g1
diff --git a/include/asm-generic/thread.h b/include/asm-generic/thread.h
new file mode 100644
index 0000000000000000000000000000000000000000..ed6e97a307114682a604eb8f65e9296996eaf785
--- /dev/null
+++ b/include/asm-generic/thread.h
@@ -0,0 +1,23 @@
+/**
+ * @file    include/asm-generic/thread.h
+ *
+ * @copyright GPLv2
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef _ASM_GENERIC_THREAD_H_
+#define _ASM_GENERIC_THREAD_H_
+
+#include <asm/thread.h>
+
+#endif /* _ASM_GENERIC_THREAD_H_ */
+
diff --git a/include/kernel/kthread.h b/include/kernel/kthread.h
new file mode 100644
index 0000000000000000000000000000000000000000..4f4048a963729251777b2217d0ecc332e6826bb2
--- /dev/null
+++ b/include/kernel/kthread.h
@@ -0,0 +1,67 @@
+/**
+ * @file include/kernel/kthread.h
+ */
+
+
+#ifndef _KERNEL_KTHREAD_H_
+#define _KERNEL_KTHREAD_H_
+
+
+#include <stdarg.h>
+#include <list.h>
+#include <asm-generic/thread.h>
+
+
+#define KTHREAD_CPU_AFFINITY_NONE	(-1)
+
+
+
+
+struct task_struct {
+
+	struct thread_info thread_info;
+
+
+	/* -1 unrunnable, 0 runnable, >0 stopped: */
+	volatile long			state;
+
+	void				*stack;
+
+	int				on_cpu;
+	int (*thread_fn)(void *data);
+	void *data;
+
+
+	/* XXX
+	 * We can use a guard pattern in a canary, so we can detect if the stack
+	 * was corrupted. Since we do not need security, just safety, this
+	 * can be any kind of pattern TBD
+	 */
+	unsigned long stack_canary;
+
+
+	/* Tasks may have a parent and any number of siblings or children.
+	 * If the parent is killed or terminated, so are all siblings and
+	 * children.
+	 */
+	struct task_struct		*parent;
+	struct list_head		sibling;
+	struct list_head		children;
+
+
+
+};
+
+
+struct task_struct *kthread_create(int (*thread_fn)(void *data),
+				   void *data, int cpu,
+				   const char *namefmt,
+				   ...);
+
+struct task_struct *kthread_init_main(void);
+void kthread_wake_up(struct task_struct *task);
+/* XXX dummy */
+void switch_to(struct task_struct *next);
+void schedule(void);
+
+#endif /* _KERNEL_KTHREAD_H_ */
diff --git a/init/main.c b/init/main.c
index d9b32d6da7eee2679aa7ea77b1d6367df62f9b7d..ba32a761e8c635afcc915ba53e04a50aae5ba1db 100644
--- a/init/main.c
+++ b/init/main.c
@@ -16,6 +16,8 @@
 #include <kernel/module.h>
 #include <kernel/ksym.h>
 #include <kernel/printk.h>
+#include <kernel/kernel.h>
+#include <kernel/kthread.h>
 #include <modules-image.h>
 
 #include <asm/processor.h>
@@ -41,6 +43,8 @@
 
 irqreturn_t dummy(unsigned int irq, void *userdata)
 {
+	//printk("IRQ!\n");
+	//schedule();
 	return 0;
 }
 
@@ -48,7 +52,7 @@ irqreturn_t dummy(unsigned int irq, void *userdata)
 /**
  * @brief do something useless
  */
-
+__attribute__((unused))
 static void twiddle(void)
 {
 	static int i;
@@ -60,6 +64,51 @@ static void twiddle(void)
 }
 
 
+
+#define TREADY 4
+
+static volatile int *console = (int *)0x80000100;
+
+static int putchar(int c)
+{
+	while (!(console[1] & TREADY));
+
+	console[0] = 0x0ff & c;
+
+	if (c == '\n') {
+		while (!(console[1] & TREADY));
+		console[0] = (int) '\r';
+	}
+
+	return c;
+}
+
+
+extern struct task_struct *kernel;
+struct task_struct *tsk1;
+struct task_struct *tsk2;
+
+int thread1(void *data)
+{
+
+	while(1) {
+		//printk(".");
+		putchar('.');
+		//twiddle();
+		cpu_relax();
+	}
+}
+
+int thread2(void *data)
+{
+
+	while(1) {
+		//printk("o");
+		putchar('o');
+		cpu_relax();
+	}
+}
+
 /**
  * @brief kernel initialisation routines
  */
@@ -126,7 +175,7 @@ int kernel_main(void)
 #ifdef CONFIG_MPPB
 	/* The mppbv2 LEON's cache would really benefit from cache sniffing...
 	 * Interactions with DMA or Xentiums are a pain when using the lower
-	 * half of the AHB SDRAM memory bank and since we don't create
+	 * half of the AHB SDRAM memory schedulebank and since we don't create
 	 * a complete memory configuration for this demonstrator, we'll
 	 * to just disable the dcache entirely >:(
 	 */
@@ -139,30 +188,43 @@ int kernel_main(void)
 #endif
 	printk(MSG "Boot complete, spinning idly.\n");
 
-	{
+
 #define GR712_IRL1_GPTIMER_2    10
 
 #define LEON3_TIMER_EN 0x00000001       /* enable counting */
 #define LEON3_TIMER_RL 0x00000002       /* reload at 0     */
 #define LEON3_TIMER_LD 0x00000004       /* load counter    */
 #define LEON3_TIMER_IE 0x00000008       /* irq enable      */
-
+	{
 	struct gptimer_unit *mtu = (struct gptimer_unit *) 0x80000300;
 
+	printk("%s() entered\n", __func__);
+
 	irq_request(8,  ISR_PRIORITY_NOW, dummy, NULL);
 
-        mtu->scaler_reload = 5;
+	mtu->scaler_reload = 5;
 
-        mtu->timer[0].reload = 10000000 / (mtu->scaler_reload + 1);
-        mtu->timer[0].value = mtu->timer[0].reload;
-        mtu->timer[0].ctrl = LEON3_TIMER_LD | LEON3_TIMER_EN
-                             | LEON3_TIMER_RL | LEON3_TIMER_IE;
+	mtu->timer[0].reload = 800 / (mtu->scaler_reload + 1);
+	mtu->timer[0].value = mtu->timer[0].reload;
+	mtu->timer[0].ctrl = LEON3_TIMER_LD | LEON3_TIMER_EN
+		| LEON3_TIMER_RL | LEON3_TIMER_IE;
 	}
 
+
+
+	tsk1 = kthread_create(thread1, NULL, KTHREAD_CPU_AFFINITY_NONE, "Thread1");
+	tsk2 = kthread_create(thread2, NULL, KTHREAD_CPU_AFFINITY_NONE, "Thread2");
+	//kthread_wake_up(tsk2);
+	//	kthread_wake_up(tsk2);
+
+	kernel = kthread_init_main();
 	while(1) {
-		twiddle();
+		//printk("-");
+		putchar('-');
 		cpu_relax();
 	}
+	/* never reached */
+	BUG();
 
 	return 0;
 }
diff --git a/kernel/Makefile b/kernel/Makefile
index c06a73f62b11e6bdcd3898469262b7d735b5e50b..dfe58af469d4713843ff5dc6938c70c0334a7564 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -6,3 +6,4 @@ obj-y += module.o
 obj-$(CONFIG_NOC_DMA) += noc_dma.o
 obj-y += irq.o
 obj-$(CONFIG_XENTIUM) += xentium.o
+obj-y += kthread.o
diff --git a/kernel/kthread.c b/kernel/kthread.c
new file mode 100644
index 0000000000000000000000000000000000000000..d41192885daf53cc9ccccee68ecd0d7c88594dd8
--- /dev/null
+++ b/kernel/kthread.c
@@ -0,0 +1,604 @@
+/**
+ * @file kernel/kthread.c
+ */
+
+
+#include <kernel/kthread.h>
+#include <kernel/export.h>
+#include <kernel/kmem.h>
+#include <kernel/err.h>
+
+
+static inline unsigned int get_psr(void)
+{
+	unsigned int psr;
+	__asm__ __volatile__(
+		"rd	%%psr, %0\n\t"
+		"nop\n\t"
+		"nop\n\t"
+		"nop\n\t"
+	: "=r" (psr)
+	: /* no inputs */
+	: "memory");
+
+	return psr;
+}
+
+static inline void put_psr(unsigned int new_psr)
+{
+	__asm__ __volatile__(
+		"wr	%0, 0x0, %%psr\n\t"
+		"nop\n\t"
+		"nop\n\t"
+		"nop\n\t"
+	: /* no outputs */
+	: "r" (new_psr)
+	: "memory", "cc");
+}
+
+
+
+struct task_struct *kernel;
+
+
+struct {
+	struct task_struct *current;
+	struct task_struct *second;
+	struct task_struct *third;
+} tasks;
+
+
+struct thread_info *current_set[1]; // = {kernel->thread_info};
+
+
+#define prepare_arch_switch(next) do { \
+	__asm__ __volatile__( \
+	"save %sp, -0x40, %sp; save %sp, -0x40, %sp; save %sp, -0x40, %sp\n\t" \
+	"save %sp, -0x40, %sp; save %sp, -0x40, %sp; save %sp, -0x40, %sp\n\t" \
+	"save %sp, -0x40, %sp\n\t" \
+	"restore; restore; restore; restore; restore; restore; restore"); \
+} while(0)
+
+
+
+void schedule(void)
+{
+	struct task_struct *tmp;
+
+	if (tasks.current && tasks.second && tasks.third)
+		(void) 0;
+	else
+		return;
+
+
+	tmp = tasks.current;
+	if (tasks.second) {
+		tasks.current = tasks.second;
+		if (tasks.third) {
+			tasks.second = tasks.third;
+			tasks.third = tmp;
+		} else {
+			tasks.second = tmp;
+		}
+	} else {
+		return;
+	}
+
+//	printk("new task: %p\n", tasks.current);
+
+
+	prepare_arch_switch(1);
+#if 0
+	__asm__ __volatile__("/*#define curptr    g6*/"
+			   "sethi	%%hi(here - 0x8), %%o7\n\t"	/* save the program counter just at the jump below as calĺed return address*/
+			    "or	%%o7, %%lo(here - 0x8), %%o7\n\t"       /* so the old thread will hop over this section when it returns */
+			   "rd	%%psr, %%g4\n\t"
+			   "std %%sp, [%%g6 + %2]\n\t" //store %sp and skip %pc to current thread's KSP
+			   "rd	%%wim, %%g5\n\t"	// read wim
+			   "wr	%%g4, 0x00000020, %%psr\n\t" // toggle ET bit (should be off at this point!
+			   "nop\n\t"
+			   "nop\n\t"
+			   "nop\n\t"
+			   //? pause
+			   "std	%%g4, [%%g6 + %4]\n\t"	// store %psr to KPSR and %wim to KWIM
+
+			   "ldd	[%1 + %4], %%g4\n\t"	// load KPSR + KWIM into %g4, %g5 from new thread
+			   "mov	%1, %%g6\n\t"		// and set the new thread as current
+			   "st	%1, [%0]\n\t"		// and to current_set[]
+			   "wr	%%g4, 0x20, %%psr\n\t"	// set new PSR and toggle ET (should be off)
+			   "nop; nop; nop\n\t"		// wait for bits to settle, so we are in the proper window
+			   "ldd	[%%g6 + %2], %%sp\n\t"	// and and load KSP to %sp (%o6) and KPC to %o7 (all of these MUST be aligned to dbl)
+			   "wr	%%g5, 0x0, %%wim\n\t"	//set the new KWIM (from double load above)
+
+			   "ldd	[%%sp + 0x00], %%l0\n\t" //load  %l0 (%t_psr and %pc
+			   "ldd	[%%sp + 0x38], %%i6\n\t"	// load %fp and %i7 (return address)
+			   "wr	%%g4, 0x0, %%psr\n\t"		// set the original PSR (without traps)
+			   "jmpl %%o7 + 0x8, %%g0\n\t"		// as the thread is switched in, it will jump to the "here" marker and continue
+				"nop\n"
+			   "here:\n"
+			   :
+			   : "r" (&(current_set[0])),
+			     "r" (&(tasks.next->thread_info)),
+				"i" (TI_KSP),
+				"i" (TI_KPC),
+				"i" (TI_KPSR)
+					:       "g1", "g2", "g3", "g4", "g5",       "g7",
+				"l0", "l1",       "l3", "l4", "l5", "l6", "l7",
+				"i0", "i1", "i2", "i3", "i4", "i5",
+				"o0", "o1", "o2", "o3",                   "o7");
+#else
+	__asm__ __volatile__("/*#define curptr    g6*/"
+			   "sethi	%%hi(here - 0x8), %%o7\n\t"	/* save the program counter just at the jump below as calĺed return address*/
+			    "or	%%o7, %%lo(here - 0x8), %%o7\n\t"       /* so the old thread will hop over this section when it returns */
+			   "rd	%%psr, %%g4\n\t"
+			   "std %%sp, [%%g6 + %2]\n\t" //store %sp and skip %pc to current thread's KSP
+			   "rd	%%wim, %%g5\n\t"	// read wim
+			   "wr	%%g4, 0x00000020, %%psr\n\t" // toggle ET bit (should be off at this point!
+			   "nop\n\t"
+			   "nop\n\t"
+			   "nop\n\t"
+			   "std	%%g4, [%%g6 + %4]\n\t"	// store %psr to KPSR and %wim to KWIM
+			   "ldd	[%1 + %4], %%g4\n\t"	// load KPSR + KWIM into %g4, %g5 from new thread
+			   "mov	%1, %%g6\n\t"		// and set the new thread as current
+			   "st	%1, [%0]\n\t"		// and to current_set[]
+			   "wr	%%g4, 0x20, %%psr\n\t"	// set new PSR and toggle ET (should be off)
+			   "nop; nop; nop\n\t"		// wait for bits to settle, so we are in the proper window
+			   "ldd	[%%g6 + %2], %%sp\n\t"	// and and load KSP to %sp (%o6) and KPC to %o7 (all of these MUST be aligned to dbl)
+			   "wr	%%g5, 0x0, %%wim\n\t"	//set the new KWIM (from double load above)
+
+			   "ldd	[%%sp + 0x00], %%l0\n\t" //load  %l0 (%t_psr and %pc
+			   "ldd	[%%sp + 0x38], %%i6\n\t"	// load %fp and %i7 (return address)
+			   "wr	%%g4, 0x0, %%psr\n\t"		// set the original PSR (without traps)
+			   "jmpl %%o7 + 0x8, %%g0\n\t"		// as the thread is switched in, it will jump to the "here" marker and continue
+				"nop\n"
+			   "here:\n"
+			   :
+			   : "r" (&(current_set[0])),
+			     "r" (&(tasks.current->thread_info)),
+				"i" (TI_KSP),
+				"i" (TI_KPC),
+				"i" (TI_KPSR)
+					:       "g1", "g2", "g3", "g4", "g5",       "g7",
+				"l0", "l1",       "l3", "l4", "l5", "l6", "l7",
+				"i0", "i1", "i2", "i3", "i4", "i5",
+				"o0", "o1", "o2", "o3",                   "o7");
+#endif
+
+
+}
+
+
+#define curptr g6
+
+/* this is executed from an interrupt exit  */
+void __attribute__((always_inline)) switch_to(struct task_struct *next)
+{
+	//struct task_struct *task;
+	//struct thread_info *ti;
+
+	printk("Switch!\n");
+	prepare_arch_switch(1);
+
+
+
+
+	/* NOTE: we don't actually require the PSR_ET toggle, but if we have
+	 * unaligned accesses (or access traps), it is a really good idea, or we'll die */
+	/* NOTE: this assumes we have a mixed kernel/user mapping in the MMU (if
+	 * we are using it), otherwise we might would not be able to load the
+	 * thread's data. Oh, and we'll have to do a switch user->kernel->new
+	 * user OR we'll run into the same issue with different user contexts */
+
+	/* first, store the current thread */
+#if 0
+	__asm__ __volatile__("/*#define curptr    g6*/"
+			   "sethi	%%hi(here - 0x8), %%o7\n\t"	/* save the program counter just at the jump below as calĺed return address*/
+			    "or	%%o7, %%lo(here - 0x8), %%o7\n\t"       /* so the old thread will hop over this section when it returns */
+			   "rd	%%psr, %%g4\n\t"
+			   "std %%sp, [%%g6 + %2]\n\t" //store %sp and skip %pc to current thread's KSP
+			   "rd	%%wim, %%g5\n\t"	// read wim
+			   "wr	%%g4, 0x00000020, %%psr\n\t" // toggle ET bit (should be off at this point!
+			   "nop\n\t"
+			   //? pause
+			   "std	%%g4, [%%g6 + %4]\n\t"	// store %psr to KPSR and %wim to KWIM
+
+			   "ldd	[%1 + %4], %%g4\n\t"	// load KPSR + KWIM into %g4, %g5 from new thread
+			   "mov	%1, %%g6\n\t"		// and set the new thread as current
+			   "st	%1, [%0]\n\t"		// and to current_set[]
+			   "wr	%%g4, 0x20, %%psr\n\t"	// set new PSR and toggle ET (should be off)
+			   "nop; nop; nop\n\t"		// wait for bits to settle, so we are in the proper window
+			   "ldd	[%%g6 + %2], %%sp\n\t"	// and and load KSP to %sp (%o6) and KPC to %o7 (all of these MUST be aligned to dbl)
+			   "wr	%%g5, 0x0, %%wim\n\t"	//set the new KWIM (from double load above)
+
+			   "ldd	[%%sp + 0x00], %%l0\n\t" //load  %l0 (%t_psr and %pc
+			   "ldd	[%%sp + 0x38], %%i6\n\t"	// load %fp and %i7 (return address)
+			   "wr	%%g4, 0x0, %%psr\n\t"		// set the original PSR (without traps)
+			   "jmpl %%o7 + 0x8, %%g0\n\t"		// as the thread is switched in, it will jump to the "here" marker and continue
+				"nop\n"
+			   "here:\n"
+			   :
+			   : "r" (&(current_set[0])),
+			     "r" (&(next->thread_info)),
+				"i" (TI_KSP),
+				"i" (TI_KPC),
+				"i" (TI_KPSR)
+					:       "g1", "g2", "g3", "g4", "g5",       "g7",
+				"l0", "l1",       "l3", "l4", "l5", "l6", "l7",
+				"i0", "i1", "i2", "i3", "i4", "i5",
+				"o0", "o1", "o2", "o3",                   "o7");
+
+#endif
+
+}
+
+#if 0
+
+       __asm__ __volatile__(
+                       "mov %0, %%fp      \n\t"
+                       "sub %%fp, 96, %%sp\n\t"
+                       :
+                       : "r" (task->stack)
+                       : "memory");
+
+       thread_fn(data);
+#endif
+
+#include <asm/leon.h>
+
+void kthread_wake_up(struct task_struct *task)
+{
+	printk("running thread function\n");
+
+	task->thread_fn(task->data);
+}
+
+__attribute__((unused))
+static void kthread_exit(void)
+{
+	printk("thread leaving\n");
+}
+
+struct task_struct *kthread_init_main(void)
+{
+	struct task_struct *task;
+
+	task = kmalloc(sizeof(*task));
+
+	if (!task)
+		return ERR_PTR(-ENOMEM);
+
+	/*** XXX dummy **/
+	current_set[0] = &kernel->thread_info;
+
+
+#define PSR_CWP     0x0000001f
+
+	task->thread_info.ksp = (unsigned long) leon_get_fp();
+	task->thread_info.kpc = (unsigned long) __builtin_return_address(1) - 8;
+	task->thread_info.kpsr = get_psr();
+	task->thread_info.kwim = 1 << (((get_psr() & PSR_CWP) + 1) % 8);
+	task->thread_info.task = task;
+
+	task->thread_fn = NULL;
+	task->data      = NULL;
+
+
+		printk("kernel stack %x\n", leon_get_fp());
+	/* dummy */
+	tasks.current = task;
+
+	__asm__ __volatile__("mov	%0, %%g6\n\t"
+			     :: "r"(&(tasks.current->thread_info)) : "memory");		// and set the new thread as current
+
+	return task;
+}
+
+
+
+static struct task_struct *kthread_create_internal(int (*thread_fn)(void *data),
+						   void *data, int cpu,
+						   const char *namefmt,
+						   va_list args)
+{
+	struct task_struct *task;
+
+
+	task = kmalloc(sizeof(*task));
+
+	if (!task)
+		return ERR_PTR(-ENOMEM);
+
+
+	/* XXX: need stack size detection and realloc/migration code */
+
+	task->stack = kmalloc(8192) + 8192; /* XXX */
+
+	if (!task->stack) {
+		kfree(task);
+		return ERR_PTR(-ENOMEM);
+	}
+
+
+#define STACKFRAME_SZ	96
+#define PTREG_SZ	80
+#define PSR_CWP     0x0000001f
+	task->thread_info.ksp = (unsigned long) task->stack - (STACKFRAME_SZ + PTREG_SZ);
+	task->thread_info.kpc = (unsigned long) thread_fn - 8;
+	task->thread_info.kpsr = get_psr();
+	task->thread_info.kwim = 1 << (((get_psr() & PSR_CWP) + 1) % 8);
+	task->thread_info.task = task;
+
+	task->thread_fn = thread_fn;
+	task->data      = data;
+
+
+		printk("%s is next at %p stack %p\n", namefmt, &task->thread_info, task->stack);
+		if (!tasks.second)
+			tasks.second = task;
+		else
+			tasks.third = task;
+
+	/* wake up */
+
+
+
+#if 0
+	struct kthread_create_info *create = kmalloc(sizeof(*create),
+						     GFP_KERNEL);
+
+	if (!create)
+		return ERR_PTR(-ENOMEM);
+	create->threadfn = threadfn;
+	create->data = data;
+	create->node = node;
+	create->done = &done;
+	spin_lock(&kthread_create_lock);
+	list_add_tail(&create->list, &kthread_create_list);
+	spin_unlock(&kthread_create_lock);
+
+	wake_up_process(kthreadd_task);
+	/*
+	 * Wait for completion in killable state, for I might be chosen by
+	 * the OOM killer while kthreadd is trying to allocate memory for
+	 * new kernel thread.
+	 */
+	if (unlikely(wait_for_completion_killable(&done))) {
+		/*
+		 * If I was SIGKILLed before kthreadd (or new kernel thread)
+		 * calls complete(), leave the cleanup of this structure to
+		 * that thread.
+		 */
+		if (xchg(&create->done, NULL))
+			return ERR_PTR(-EINTR);
+		/*
+		 * kthreadd (or new kernel thread) will call complete()
+		 * shortly.
+		 */
+		wait_for_completion(&done);
+	}
+	task = create->result;
+	if (!IS_ERR(task)) {
+		static const struct sched_param param = { .sched_priority = 0 };
+
+		vsnprintf(task->comm, sizeof(task->comm), namefmt, args);
+		/*
+		 * root may have changed our (kthreadd's) priority or CPU mask.
+		 * The kernel thread should not inherit these properties.
+		 */
+		sched_setscheduler_nocheck(task, SCHED_NORMAL, &param);
+		set_cpus_allowed_ptr(task, cpu_all_mask);
+	}
+	kfree(create);
+#endif
+	return task;
+}
+
+
+
+
+
+/**
+ * try_to_wake_up - wake up a thread
+ * @p: the thread to be awakened
+ * @state: the mask of task states that can be woken
+ * @wake_flags: wake modifier flags (WF_*)
+ *
+ * If (@state & @p->state) @p->state = TASK_RUNNING.
+ *
+ * If the task was not queued/runnable, also place it back on a runqueue.
+ *
+ * Atomic against schedule() which would dequeue a task, also see
+ * set_current_state().
+ *
+ * Return: %true if @p->state changes (an actual wakeup was done),
+ *	   %false otherwise.
+ */
+static int
+wake_up_thread_internal(struct task_struct *p, unsigned int state, int wake_flags)
+{
+	//unsigned long flags;
+	//int cpu = 0;
+	int success = 0;
+#if 0
+	/*
+	 * If we are going to wake up a thread waiting for CONDITION we
+	 * need to ensure that CONDITION=1 done by the caller can not be
+	 * reordered with p->state check below. This pairs with mb() in
+	 * set_current_state() the waiting thread does.
+	 */
+	raw_spin_lock_irqsave(&p->pi_lock, flags);
+	smp_mb__after_spinlock();
+	if (!(p->state & state))
+		goto out;
+
+	trace_sched_waking(p);
+
+	/* We're going to change ->state: */
+	success = 1;
+	cpu = task_cpu(p);
+
+	/*
+	 * Ensure we load p->on_rq _after_ p->state, otherwise it would
+	 * be possible to, falsely, observe p->on_rq == 0 and get stuck
+	 * in smp_cond_load_acquire() below.
+	 *
+	 * sched_ttwu_pending()                 try_to_wake_up()
+	 *   [S] p->on_rq = 1;                  [L] P->state
+	 *       UNLOCK rq->lock  -----.
+	 *                              \
+	 *				 +---   RMB
+	 * schedule()                   /
+	 *       LOCK rq->lock    -----'
+	 *       UNLOCK rq->lock
+	 *
+	 * [task p]
+	 *   [S] p->state = UNINTERRUPTIBLE     [L] p->on_rq
+	 *
+	 * Pairs with the UNLOCK+LOCK on rq->lock from the
+	 * last wakeup of our task and the schedule that got our task
+	 * current.
+	 */
+	smp_rmb();
+	if (p->on_rq && ttwu_remote(p, wake_flags))
+		goto stat;
+
+#ifdef CONFIG_SMP
+	/*
+	 * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be
+	 * possible to, falsely, observe p->on_cpu == 0.
+	 *
+	 * One must be running (->on_cpu == 1) in order to remove oneself
+	 * from the runqueue.
+	 *
+	 *  [S] ->on_cpu = 1;	[L] ->on_rq
+	 *      UNLOCK rq->lock
+	 *			RMB
+	 *      LOCK   rq->lock
+	 *  [S] ->on_rq = 0;    [L] ->on_cpu
+	 *
+	 * Pairs with the full barrier implied in the UNLOCK+LOCK on rq->lock
+	 * from the consecutive calls to schedule(); the first switching to our
+	 * task, the second putting it to sleep.
+	 */
+	smp_rmb();
+
+	/*
+	 * If the owning (remote) CPU is still in the middle of schedule() with
+	 * this task as prev, wait until its done referencing the task.
+	 *
+	 * Pairs with the smp_store_release() in finish_task().
+	 *
+	 * This ensures that tasks getting woken will be fully ordered against
+	 * their previous state and preserve Program Order.
+	 */
+	smp_cond_load_acquire(&p->on_cpu, !VAL);
+
+	p->sched_contributes_to_load = !!task_contributes_to_load(p);
+	p->state = TASK_WAKING;
+
+	if (p->in_iowait) {
+		delayacct_blkio_end(p);
+		atomic_dec(&task_rq(p)->nr_iowait);
+	}
+
+	cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags);
+	if (task_cpu(p) != cpu) {
+		wake_flags |= WF_MIGRATED;
+		set_task_cpu(p, cpu);
+	}
+
+#else /* CONFIG_SMP */
+
+	if (p->in_iowait) {
+		delayacct_blkio_end(p);
+		atomic_dec(&task_rq(p)->nr_iowait);
+	}
+
+#endif /* CONFIG_SMP */
+
+	ttwu_queue(p, cpu, wake_flags);
+stat:
+	ttwu_stat(p, cpu, wake_flags);
+out:
+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+#endif
+	return success;
+}
+
+/**
+ * wake_up_process - Wake up a specific process
+ * @p: The process to be woken up.
+ *
+ * Attempt to wake up the nominated process and move it to the set of runnable
+ * processes.
+ *
+ * Return: 1 if the process was woken up, 0 if it was already running.
+ *
+ * It may be assumed that this function implies a write memory barrier before
+ * changing the task state if and only if any tasks are woken up.
+ */
+/* Used in tsk->state: */
+#define TASK_RUNNING			0x0000
+#define TASK_INTERRUPTIBLE		0x0001
+#define TASK_UNINTERRUPTIBLE		0x0002
+#define __TASK_STOPPED			0x0004
+#define __TASK_TRACED			0x0008
+
+#define TASK_NORMAL			(TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)
+int wake_up_thread(struct task_struct *p)
+{
+	return wake_up_thread_internal(p, TASK_NORMAL, 0);
+}
+EXPORT_SYMBOL(wake_up_thread);
+
+
+/**
+ *
+ * @brief create a new kernel thread
+ *
+ * @param thread_fn the function to run in the thread
+ * @param data a user data pointer for thread_fn, may be NULL
+ *
+ * @param cpu set the cpu affinity
+ *
+ * @param name_fmt a printf format string name for the thread
+ *
+ * @param ... parameters to the format string
+ *
+ * Create a named kernel thread. The thread will be initially stopped.
+ * Use wake_up_thread to activate it.
+ *
+ * If cpu is set to KTHREAD_CPU_AFFINITY_NONE, the thread will be affine to all
+ * CPUs. IF the selected CPU index excceds the number of available CPUS, it
+ * will default to KTHREAD_CPU_AFFINITY_NONE, otherwise the thread will be
+ * bound to that CPU
+ *
+ * The new thread has SCHED_NORMAL policy.
+ *
+ * If thread is going to be bound on a particular cpu, give its node
+ * in @node, to get NUMA affinity for kthread stack, or else give NUMA_NO_NODE.
+ * When woken, the thread will run @threadfn() with @data as its
+ * argument. @threadfn() can either call do_exit() directly if it is a
+ * standalone thread for which no one will call kthread_stop(), or
+ * return when 'kthread_should_stop()' is true (which means
+ * kthread_stop() has been called).  The return value should be zero
+ * or a negative error number; it will be passed to kthread_stop().
+ *
+ * Returns a task_struct or ERR_PTR(-ENOMEM) or ERR_PTR(-EINTR).
+ */
+
+struct task_struct *kthread_create(int (*thread_fn)(void *data),
+				   void *data, int cpu,
+				   const char *namefmt,
+				   ...)
+{
+	struct task_struct *task;
+	va_list args;
+
+	va_start(args, namefmt);
+	task = kthread_create_internal(thread_fn, data, cpu, namefmt, args);
+	va_end(args);
+
+	return task;
+}
+EXPORT_SYMBOL(kthread_create);