diff --git a/arch/sparc/include/stack.h b/arch/sparc/include/stack.h
index 42d013e10e316cf01b87298114f8007f48cb8ff9..51386c2aa5d190295984ef2fd731ba8d84a93583 100644
--- a/arch/sparc/include/stack.h
+++ b/arch/sparc/include/stack.h
@@ -2,24 +2,7 @@
 #define _SPARC_STACK_H_
 
 #include <kernel/types.h>
-
-/* reg window offset */
-#define RW_L0     0x00
-#define RW_L1     0x04
-#define RW_L2     0x08
-#define RW_L3     0x0c
-#define RW_L4     0x10
-#define RW_L5     0x14
-#define RW_L6     0x18
-#define RW_L7     0x1c
-#define RW_I0     0x20
-#define RW_I1     0x24
-#define RW_I2     0x28
-#define RW_I3     0x2c
-#define RW_I4     0x30
-#define RW_I5     0x34
-#define RW_I6     0x38
-#define RW_I7     0x3c
+#include <asm/win.h>
 
 /* stack frame offsets */
 #define SF_L0     0x00
diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile
index d1915c93da083f0adf332a0536211b9839bf6e9a..5cfce14d28ff8e86a4fdc8d6e1f9534a54884537 100644
--- a/arch/sparc/kernel/Makefile
+++ b/arch/sparc/kernel/Makefile
@@ -1,8 +1,11 @@
 CHECKFLAGS     += -D__sparc__
 
 extra-$(CONFIG_ARCH_CUSTOM_BOOT_CODE) += kernel.lds
+
 obj-$(CONFIG_ARCH_CUSTOM_BOOT_CODE) += ttable.o
-obj-$(CONFIG_ARCH_CUSTOM_BOOT_CODE)  += head.o
+obj-$(CONFIG_ARCH_CUSTOM_BOOT_CODE) += head.o
+obj-$(CONFIG_ARCH_CUSTOM_BOOT_CODE) += win_uflow.o
+obj-$(CONFIG_ARCH_CUSTOM_BOOT_CODE) += win_oflow.o
 
 obj-y += setup.o
 obj-y += init.o
diff --git a/arch/sparc/kernel/head.S b/arch/sparc/kernel/head.S
index 5cf8eadd5d2c906c098b41e33a9ec93cad8ab1af..c26edb74494d4ef8171ce5c8dcd62a9f7029a7ce 100644
--- a/arch/sparc/kernel/head.S
+++ b/arch/sparc/kernel/head.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 
 #ifndef CONFIG_SPARC_CPU_REG_WINDOWS
 #define NWINDOWS 8
diff --git a/arch/sparc/kernel/ttable.S b/arch/sparc/kernel/ttable.S
index 1876278f02983a10ff429f6ad7a803a9a32c2f1b..7faa4c64cafdc77adb87afc57c75a951fa7f588f 100644
--- a/arch/sparc/kernel/ttable.S
+++ b/arch/sparc/kernel/ttable.S
@@ -131,10 +131,6 @@ priv_instruction_trap_handler:
 leonbare_trapreturn:
 	.global fpdis_trap_handler
 fpdis_trap_handler:
-	.global __wim_overflow
-__wim_overflow:
-	.global __wim_underflow
-__wim_underflow:
 	.global mna_trap_handler
 mna_trap_handler:
 	.global fpe_trap_handler
diff --git a/arch/sparc/kernel/win_oflow.S b/arch/sparc/kernel/win_oflow.S
new file mode 100644
index 0000000000000000000000000000000000000000..3cc7fce46db1d3e98732c62a8f39918eda9b7ec6
--- /dev/null
+++ b/arch/sparc/kernel/win_oflow.S
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/* XXX: there is no handling of trapping from user space at this time,
+ *	we'll add that when we implement threads
+ *
+ * what we probably will do:
+ * - maintain a list of thread info pointers (one per CPU)
+ * - identify the CPU, fetch pointer to current thread's structure from list
+ *   and store the pointer of the thread structure in one of %g6-%g7 (these
+ *   are not used by the compiler), so we can track the info across windows
+ * - 
+ */
+
+#include <asm/win.h>
+
+/* As we enter this trap handler, we expect the following setup:
+ * PSR in l0
+ * PC  in l1 (set by CPU)
+ * NPC in l2 (set by CPU)
+ * WIM in l3
+ * PSR_PS bit condition code set by trap (i.e. andcc %l0, PSR_PS, %g0)
+ *
+ * We will use %l4 as temporary local registers.
+ * We will store the next WIM in the %g5, since we need it across windows
+ *  - the previous %g5 will be stored in %l5 temporarily.
+ */
+
+#ifndef CONFIG_SPARC_CPU_REG_WINDOWS
+#define NWINDOWS 8
+#else
+#define NWINDOWS CONFIG_SPARC_CPU_REG_WINDOWS
+#endif /* CONFIG_SPARC_CPU_REG_WINDOWS */
+
+#if (NWINDOWS < 3)
+#error "This code will not work on CPUs with < 3 register windows implemented."
+#endif
+
+	.text
+	.align	4
+	.global	__wim_overflow
+
+__wim_overflow:
+
+	/* we use %g5 to store the new WIM, store original in %l5 */
+	mov	%g5, %l5
+
+	/* First, we rotate the window invalid mask to the right by one and
+	 * to the left by (NWINDOWS - 1), then OR the results (which is done
+	 * implicitly in the WRPSR instruction), so our WIM rotates modulo
+	 * NWINDOWS just like the CWP in the PSR
+	 */
+
+        srl     %l3, 1, %l4
+        sll     %l3, (NWINDOWS - 1), %g5
+	or	%l4, %g5, %g5	! store new WIM globally
+	!and	%g5, ((1 << NWINDOWS) - 1), %g5
+
+	/* At this point, we proceed depending on the state of the previous
+	 * supervisor (PS) bit. The condition code has been set by the trap
+	 * entry.
+	 */
+	/* XXX: not implemented */
+	be,a win_trap_from_user	! trapped from user mode
+	 save			! annulled if branch not taken
+
+	/* XXX: user window mask check and handling goes here */
+
+	/* Otherwise, change to the window which has to be saved and store it
+	 * to memory
+	 */
+	save			! move to window
+	wr	%g5, %g0, %wim 	! delayed-branch instruction
+				! delay cycles are covered in store block
+
+	STORE_WINDOW(sp)	! store to memory
+
+	restore			! return to trap window
+
+	mov 	%l5, %g5	! restore original %g5
+
+        wr      %l0, %g0, %psr	! restore PSR, delayed-write instruction
+	nop			! need 2 delays, rett modifies CWP
+	nop			! 3rd delay cycle is filled by jmp instr
+
+	/* load %pc from %l1 and  return to execute the trapped save instruction
+	 * at %npc
+	 */
+        jmp     %l1
+        rett    %l2
+
+
+
+
+win_trap_from_user:
+	ta 0	! not implemented, halt CPU
diff --git a/arch/sparc/kernel/win_uflow.S b/arch/sparc/kernel/win_uflow.S
new file mode 100644
index 0000000000000000000000000000000000000000..db81242f8dcddfa271edc6d267ce5c3a75dc0dd7
--- /dev/null
+++ b/arch/sparc/kernel/win_uflow.S
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <asm/win.h>
+
+/* As we enter this trap handler, we expect the following setup:
+ * PSR in l0
+ * PC  in l1 (set by CPU)
+ * NPC in l2 (set by CPU)
+ * WIM in l3
+ * PSR_PS bit condition code set by trap (i.e. andcc %l0, PSR_PS, %g0)
+ *
+ * We will use %l4 and %l5 as temporary registers.
+ */
+
+#ifndef CONFIG_SPARC_CPU_REG_WINDOWS
+#define NWINDOWS 8
+#else
+#define NWINDOWS CONFIG_SPARC_CPU_REG_WINDOWS
+#endif /* CONFIG_SPARC_CPU_REG_WINDOWS */
+
+#if (NWINDOWS < 3)
+#error "This code will not work on CPUs with < 3 register windows implemented."
+#endif
+
+	.text
+	.align	4
+	.global	__wim_underflow
+
+__wim_underflow:
+	/* A trap always decrements the CWP, so we are now 2 windows away from
+	 * the window we want to get into.
+	 *
+	 * First, we rotate the window invalid mask to the left by one and
+	 * to the right by (NWINDOWS - 1), then OR the results (which is done
+	 *	.text
+	.align	4
+	.global	__wim_overflow implicitly in the WRPSR instruction), so our WIM rotates modulo
+	 * NWINDOWS just like the CWP in the PSR
+	 */
+
+        sll     %l3, 1, %l4
+        srl     %l3, (NWINDOWS - 1), %l5
+
+        wr      %l4, %l5, %wim	! delayed-write instruction
+	nop			! need 2 delays, restore modifies CWP
+	nop			! 3rd cycle is filled by branch instruction
+
+	/* At this point, we proceed depending on the state of the previous
+	 * supervisor (PS) bit. The condition code has been set by the trap
+	 * entry.
+	 */
+	be win_trap_from_user
+	 restore		! first restore, we'll need another to restore
+				! to the target window (user->caller)
+win_trap_from_superuser:
+	restore			! second restore
+
+win_trap_finish:
+	LOAD_WINDOW(sp)		! load stack window from memory
+
+	save			! return to trap window
+        save
+
+        wr      %l0, %g0, %psr	! restore PSR, delayed-write instruction
+	nop			! need 2 delays, rett modifies CWP
+	nop			! 3rd delay cycle is filled by jmp instr
+
+	/* load %pc from %l1 and  return to execute the trapped save instruction
+	 * at %npc
+	 */
+        jmp     %l1
+        rett    %l2
+
+
+
+/* XXX: basic structure, add more checks as needed */
+win_trap_from_user:
+	restore			! second restore
+
+	/* users cannot be trusted, so we'll perform a basic sanity check for
+	 * doubleword-alignment of the user's stack pointer
+	 */
+	! b win_trap_stack_check
+	andcc %sp, (16 - 1), %g0
+
+win_trap_stack_check:
+
+	/* evaluate check 1: stack alignment */
+	bne win_trap_user_stack_error
+	 nop
+
+#ifdef CONFIG_MMU
+	/* XXX: add check 2: stack memory address in user-spaced mapped VMA? */
+#endif
+	/* XXX: maybe check 3: stack memory address in user-allocated page?
+	 *	(e.g. to catch cross-process stack jumps)
+	 */
+
+	/* all checks passed, finish up */
+	b win_trap_finish
+	 nop
+
+/* XXX: do something in case of user stack error, e.g. terminate process */
+win_trap_user_stack_error:
+	ta 0	! trap again to halt