diff --git a/dsp/xentium/Makefile b/dsp/xentium/Makefile
index b83f1b31ed7854d206f69049dede4ec85044a8e5..e63613811b9b9b52f71daa86a8232af5aa2b36b1 100644
--- a/dsp/xentium/Makefile
+++ b/dsp/xentium/Makefile
@@ -25,7 +25,7 @@ obj- := dummy.o
 HOSTCC := xentium-clang
 HOSTLD := xentium-ld
 
-HOSTCFLAGS := -Wall -Wmissing-prototypes -Wstrict-prototypes -O2
+HOSTCFLAGS := -Wall -Wmissing-prototypes -Wstrict-prototypes -O3
 HOSTCFLAGS +=-Idsp/xentium/include
 HOSTCFLAGS += $(KERNELINCLUDE)
 
@@ -33,7 +33,7 @@ HOSTLDFLAGS += -Tdsp/xentium/sysroot/lib/default.ld
 HOSTLDFLAGS += --sysroot=dsp/xentium/sysroot
 
 xen_libs:
-xen_libs-objs := xen_printf.o data_proc_task.o lib/xen.o
+xen_libs-objs := xen_printf.o data_proc_task.o lib/xen.o lib/dma.o
 
 xen_dummy.xen : xen_libs
 HOSTLOADLIBES_xen_dummy.xen :=
diff --git a/dsp/xentium/include/dma.h b/dsp/xentium/include/dma.h
new file mode 100644
index 0000000000000000000000000000000000000000..232d7015d8a8cfc1161ffcc611890305cce55018
--- /dev/null
+++ b/dsp/xentium/include/dma.h
@@ -0,0 +1,26 @@
+/**
+ * @file dsp/xentium/include/xdma.h
+ */
+
+#ifndef _DSP_XENTIUM_DMA_H_
+#define _DSP_XENTIUM_DMA_H_
+
+#include <stddef.h>
+#include <noc_dma.h>
+
+
+
+int xen_noc_dma_req_xfer(struct noc_dma_channel *c,
+			 void *src, void *dst, uint16_t x_elem, uint16_t y_elem,
+			 enum noc_dma_elem_size elem_size,
+			 int16_t x_stride_src, int16_t x_stride_dst,
+			 int16_t y_stride_src, int16_t y_stride_dst,
+			 enum noc_dma_priority dma_priority, uint16_t mtu);
+
+
+int xen_noc_dma_req_lin_xfer(struct noc_dma_channel *c,
+			     void *src, void *dst,
+			     uint16_t elem, enum noc_dma_elem_size elem_size,
+			     enum noc_dma_priority dma_priority, uint16_t mtu);
+
+#endif /* _DSP_XENTIUM_DMA_H_ */
diff --git a/dsp/xentium/include/xen.h b/dsp/xentium/include/xen.h
index 1eec01effb6b0b7d196842f8a4efe7005ab860bf..c8a17df3e0b00461b5754c3ad3565cebd799e025 100644
--- a/dsp/xentium/include/xen.h
+++ b/dsp/xentium/include/xen.h
@@ -17,10 +17,14 @@ void xen_set_mail(size_t mbox, unsigned long msg);
 
 unsigned long xen_get_mail(size_t mbox);
 
-struct xen_msg_data *xen_wait_msg(void);
+struct xen_msg_data *xen_wait_cmd(void);
+void xen_send_msg(struct xen_msg_data *m);
 
 void xen_wait_timer(int timer, unsigned long cycles);
 
+void xen_wait_dma(void);
+
+void *xen_get_base_addr(size_t xen_id);
 
 
 #endif /* _DSP_XENTIUM_XEN_H_ */
diff --git a/dsp/xentium/lib/dma.c b/dsp/xentium/lib/dma.c
new file mode 100644
index 0000000000000000000000000000000000000000..ed7d6041ed3488a8dbe5ff784026f1076c33cc78
--- /dev/null
+++ b/dsp/xentium/lib/dma.c
@@ -0,0 +1,384 @@
+/**
+ * @file dsp/xentium/lib/xdma.c
+ *
+ * These are mostly just duplicates of the noc_dma access functions...
+ */
+
+#include <errno.h>
+#include <dma.h>
+
+
+#define ioread32(X)                   __raw_readl(X)
+#define iowrite32(val,X)              __raw_writel(val,X)
+
+
+static inline uint32_t __raw_readl(const volatile void *addr)
+{
+        return (*(const volatile uint32_t *) addr);
+}
+
+
+static inline void __raw_writel(uint32_t l, volatile void *addr)
+{
+        (*(volatile uint32_t *) addr) = l;
+}
+
+
+/**
+ * NoC DMA channel control register layout
+ * @see MPPB datasheet v4.03, p64
+ */
+
+__extension__
+struct noc_dma_channel_ctrl {
+	union {
+		uint32_t ctrl;
+		struct {
+			uint32_t priority   :1;
+			uint32_t irq_fwd    :2;
+			uint32_t acc_sz     :2;
+			uint32_t reserved   :11;
+			uint32_t max_pkt_sz :16;
+		}__attribute__((packed));
+	};
+};
+
+
+/**
+ * NoC DMA channel configuration register layout
+ * @see MPPB datasheet v4.03, p63
+ */
+__extension__
+struct noc_dma_channel {
+
+	union {
+		uint32_t start;
+		uint32_t status;
+		struct {
+			uint32_t start_status:1;
+			uint32_t res01	     :31;	/* reserved */
+		}__attribute__((packed));
+	};
+
+	union {
+		uint32_t control;
+		struct noc_dma_channel_ctrl ctrl;
+	};
+
+	uint32_t dst;
+
+	uint32_t src;
+
+	union {
+		uint32_t sizes;
+		struct {
+			uint16_t sz_y;
+			uint16_t sz_x;
+		}__attribute__((packed));
+	};
+
+	union {
+		uint32_t strides_x;
+		struct {
+			uint16_t str_x_src;
+			uint16_t str_x_dst;
+		}__attribute__((packed));
+	};
+
+	union {
+		uint32_t strides_y;
+		struct {
+			uint16_t str_y_src;
+			uint16_t str_y_dst;
+		}__attribute__((packed));
+	};
+
+	uint32_t res02[9];
+
+}__attribute__((packed));
+
+
+
+
+
+/**
+ * @brief check if a NoC DMA channel is in use
+ *
+ * @param chan a struct noc_dma_channel
+ *
+ * @returns 0 if not busy
+ */
+
+static int noc_dma_channel_busy(struct noc_dma_channel *chan)
+{
+	return ioread32(&chan->status) & NOC_DMA_CHANNEL_BUSY;
+}
+
+
+/**
+ * @brief set source and destination strides in x
+ *
+ * @param chan a struct noc_dma_channel
+ * @param stride_src the source stride
+ * @param stride_dst the destination stride
+ */
+
+static void noc_dma_set_strides_x(struct noc_dma_channel *chan,
+				  int16_t stride_src, int16_t stride_dst)
+{
+	iowrite32(NOC_DMA_STRIDES(stride_src, stride_dst), &chan->strides_x);
+}
+
+
+/**
+ * @brief set source and destination strides in y
+ *
+ * @param chan a struct noc_dma_channel
+ * @param stride_src the source stride
+ * @param stride_dst the destination stride
+ */
+
+static void noc_dma_set_strides_y(struct noc_dma_channel *chan,
+				  int16_t stride_src, int16_t stride_dst)
+{
+	iowrite32(NOC_DMA_STRIDES(stride_src, stride_dst), &chan->strides_y);
+}
+
+
+/**
+ * @brief set transfer sizes in x and y
+ *
+ * @param chan a struct noc_dma_channel
+ * @param size_x the number of elements to transfer in x
+ * @param size_y the number of elements to transfer in y
+ */
+
+static void noc_dma_set_sizes(struct noc_dma_channel *chan,
+			      int16_t size_x, uint16_t size_y)
+{
+	iowrite32(NOC_DMA_SIZES(size_x, size_y), &chan->sizes);
+}
+
+
+/**
+ * @brief set transfer source address
+ *
+ * @param chan a struct noc_dma_channel
+ * @param src the source memory location
+ */
+
+static void noc_dma_set_src(struct noc_dma_channel *chan, void *src)
+{
+	iowrite32((uint32_t) src, &chan->src);
+}
+
+
+/**
+ * @brief set transfer destination address
+ *
+ * @param chan a struct noc_dma_channel
+ * @param dst the destination memory location
+ */
+
+static void noc_dma_set_dst(struct noc_dma_channel *chan, void *dst)
+{
+	iowrite32((uint32_t) dst, &chan->dst);
+}
+
+
+/**
+ * @brief start a DMA transfer
+ * @param chan a struct noc_dma_channel
+ *
+ * @returns 0 on success, -EBUSY if channel is active
+ */
+
+static int noc_dma_start_transfer(struct noc_dma_channel *chan)
+{
+	if (noc_dma_channel_busy(chan))
+		return -EBUSY;
+
+	iowrite32(NOC_DMA_CHANNEL_START, &chan->start);
+	
+	/* XXX remove once we figure out how to properly use the Xentium's
+	 * DMA status bits
+	 */
+	while (noc_dma_channel_busy(chan));
+
+	return 0;
+}
+
+
+/**
+ * @brief sets the channel configuration for a transfer
+ *
+ * @param chan a struct noc_dma_channel
+ * @param max_pkt_size the maximum packet size to use during the transfer
+ * @param elem_size the size of a transfer element
+ * @param irq_fwd the irq notification mode on transfer completion
+ * @param priority the transfer priority
+ *
+ * @returns 0 on success, -EBUSY if channel is active
+ */
+
+static int noc_dma_set_config(struct noc_dma_channel *chan,
+			      int16_t max_pkt_size,
+			      enum noc_dma_elem_size elem_size,
+			      enum noc_dma_irq_fwd irq_fwd,
+			      enum noc_dma_priority priority)
+{
+	struct noc_dma_channel_ctrl ctrl;
+
+
+	if (noc_dma_channel_busy(chan))
+		return -EBUSY;
+
+	ctrl.max_pkt_sz = max_pkt_size;
+	ctrl.acc_sz     = elem_size;
+	ctrl.irq_fwd    = irq_fwd;
+	ctrl.priority   = priority;
+
+	iowrite32(ctrl.ctrl, &chan->ctrl);
+
+	return 0;
+}
+
+
+/**
+ * @brief initialises the data parameters for a one dimensional DMA transfer
+ *
+ * @param chan a struct noc_dma_channel
+ * @param chan a struct noc_dma_transfer
+ *
+ * @returns 0 on success, EINVAL on error, EBUSY if channel is busy
+ */
+
+static int noc_dma_init_transfer(struct noc_dma_channel  *c,
+				 struct noc_dma_transfer *t)
+{
+
+	if (noc_dma_channel_busy(c))
+		return -EBUSY;
+
+	/* src == dst memory will result in a stuck DMA channel */
+	if (((int) t->dst & 0xF0000000) == ((int) t->src & 0xF0000000))
+		return -EINVAL;
+
+	/* no need to verify stride/size limits, they are implicity by type */
+
+	noc_dma_set_src(c, t->src);
+	noc_dma_set_dst(c, t->dst);
+
+	noc_dma_set_strides_x(c, t->x_stride_src, t->x_stride_dst);
+	noc_dma_set_strides_y(c, t->y_stride_src, t->y_stride_dst);
+
+	noc_dma_set_sizes(c, t->x_elem, t->y_elem);
+
+	noc_dma_set_config(c, t->mtu, t->elem_size, t->irq_fwd, t->priority);
+
+	return 0;
+}
+
+
+
+/**
+ * @brief request an arbitrary DMA transfer
+ *
+ * @param c the DMA channel to use
+ *
+ * @param src  the source address
+ * @param dst the destination address
+ *
+ * @param x_elem the number of elements in x
+ * @param y_elem the number of elements in y
+ * @param size the element size (BYTE, HALFWORD, WORD, DOUBLEWORD)
+ *
+ * @param x_stride_src the width of stride in source x
+ * @param x_stride_dst the width of stride in destination x
+ *
+ * @param y_stride_src the width of stride in source y
+ * @param y_stride_dst the width of stride in destination y
+ *
+ * @param mtu the maximum transfer unit of a NoC packet
+ * 
+ * @returns <0 on error
+ */
+
+int xen_noc_dma_req_xfer(struct noc_dma_channel *c,
+			 void *src, void *dst, uint16_t x_elem, uint16_t y_elem,
+			 enum noc_dma_elem_size elem_size,
+			 int16_t x_stride_src, int16_t x_stride_dst,
+			 int16_t y_stride_src, int16_t y_stride_dst,
+			 enum noc_dma_priority dma_priority, uint16_t mtu)
+{
+	int ret;
+	struct noc_dma_transfer t;
+
+
+
+	if (!src)
+		return -EINVAL;
+
+	if (!dst)
+		return -EINVAL;
+
+	if(!x_elem)
+		return -EINVAL;
+
+	if(!y_elem)
+		return -EINVAL;
+
+
+	t.src = src;
+	t.dst = dst;
+
+	t.x_elem = x_elem;
+	t.y_elem = y_elem;
+
+	t.elem_size = elem_size;
+
+	t.x_stride_src = x_stride_src;
+	t.x_stride_dst = x_stride_dst;
+
+	t.y_stride_src = y_stride_src;
+	t.y_stride_dst = y_stride_dst;
+
+	t.mtu = mtu;
+
+	t.irq_fwd = IN;
+
+	t.priority = dma_priority;
+
+	
+	ret = noc_dma_init_transfer(c, &t);
+	if (ret)
+		return ret;
+
+	return noc_dma_start_transfer(c);
+}
+
+
+/**
+ * @brief request a linear array DMA transfer
+ *
+ * @param c the DMA channel to use
+ *
+ * @param src  the source address
+ * @param dst the destination address
+ *
+ * @param elem the number of elements
+ * @param size the element size (BYTE, HALFWORD, WORD, DOUBLEWORD)
+ *
+ * @param mtu the maximum transfer unit of a NoC packet
+ *
+ * @returns <0 on error
+ */
+
+int xen_noc_dma_req_lin_xfer(struct noc_dma_channel *c,
+			     void *src, void *dst,
+			     uint16_t elem, enum noc_dma_elem_size elem_size,
+			     enum noc_dma_priority dma_priority, uint16_t mtu)
+{
+	return xen_noc_dma_req_xfer(c, src, dst, elem, 1, elem_size,
+				    1, 1, 1, 1, LOW, mtu);
+}
diff --git a/dsp/xentium/lib/xen.c b/dsp/xentium/lib/xen.c
index e264e8791a57ef3b9b99b2a9b6359fbfa69dbe21..da658d628c3469b8d36ed593746b2d132c8907ef 100644
--- a/dsp/xentium/lib/xen.c
+++ b/dsp/xentium/lib/xen.c
@@ -48,23 +48,36 @@ unsigned long xen_get_mail(size_t mbox)
 
 
 /**
- * @brief wait for host to pass a data message for processing
+ * @brief wait for host to pass a command data message for processing
  *
  * @returns a data message pointer
  */
 
-struct xen_msg_data *xen_wait_msg(void)
+struct xen_msg_data *xen_wait_cmd(void)
 {
 	struct xen_msg_data *m;
 
-	x_wait(XEN_MSG_MBOX_MASK);
+	x_wait(XEN_CMD_MBOX_MASK);
 
-	m = (struct xen_msg_data *) xen_get_mail(XEN_MSG_MBOX);
+	m = (struct xen_msg_data *) xen_get_mail(XEN_CMD_MBOX);
 
 	return m;
 }
 
 
+/**
+ * @brief pass a data message and signal the host
+ *
+ * @param m the message to pass
+ */
+
+void xen_send_msg(struct xen_msg_data *m)
+{
+	xen_set_mail(XEN_MSG_MBOX, (unsigned long) m);
+	xen_signal_host();
+}
+
+
 /**
  * @brief wait for a timer to count to 0
  *
@@ -83,3 +96,28 @@ void xen_wait_timer(int timer, unsigned long cycles)
 	while(xen_dev_local->timer[timer]);
 }
 
+
+/**
+ * @brief wait until the DMA status bit is low (i.e. transfer complete)
+ * *
+ */
+void xen_wait_dma(void)
+{
+	/* XXX this doesn't seem to work sensibly, we can't x_wait() on the
+	 * dma irq status bit, because it is set UNTIL we clear the 
+	 * dma_irq, and we'd apparently have to also clear the latter
+	 * _before_ we even start the transfer. For now, it's easier to just
+	 * wait on the channel status bit in noc_dma_start_transfer()
+	 */
+}
+
+/**
+ * @brief get the base address of a Xentium by its numeric index
+ *
+ * @returns the base address of the Xentium
+ */
+
+void *xen_get_base_addr(size_t idx)
+{
+	return (void *) (XEN_BASE_ADDR + XEN_BLOCK_SIZE * idx);
+}
diff --git a/dsp/xentium/otherkernel.c b/dsp/xentium/otherkernel.c
new file mode 100644
index 0000000000000000000000000000000000000000..41b06961f612a417be673b28c581a36711d0d972
--- /dev/null
+++ b/dsp/xentium/otherkernel.c
@@ -0,0 +1,82 @@
+#include <xen.h>
+#include <dma.h>
+#include <xen_printf.h>
+#include <data_proc_net.h>
+
+#define KERN_NAME		"otherkernel"
+#define KERN_STORAGE_BYTES	0
+#define KERN_OP_CODE		0xb19b00b5
+#define KERN_CRIT_TASK_LVL	12
+
+/* actual configuration */
+struct xen_kernel_cfg _xen_kernel_param  __attribute__((used)) = {
+	KERN_NAME, KERN_OP_CODE,
+	KERN_CRIT_TASK_LVL,
+	NULL, KERN_STORAGE_BYTES,
+};
+
+
+static void process_task(struct xen_msg_data *m)
+{
+	size_t n;
+	size_t i;
+
+	unsigned int *p;
+	volatile unsigned int *b;
+
+	struct xen_tcm *tcm_ext;
+
+	
+	b = (volatile unsigned int *) xen_tcm_local;
+
+	tcm_ext = xen_get_base_addr(m->xen_id);
+
+
+	if (!m->t) {
+		m->cmd = TASK_DESTROY;
+		return;
+	}
+	
+
+	n = pt_get_nmemb(m->t);
+	p = (unsigned int *) pt_get_data(m->t);
+
+	/* retrieve data to TCM XXX retval */
+	xen_noc_dma_req_lin_xfer(m->dma, p, tcm_ext, n, WORD, LOW, 256);
+
+	/* process */
+	for (i = 0; i < n; i++) {
+		b[i] *= 2;
+	}
+
+	/* back to main memory XXX retval */
+	xen_noc_dma_req_lin_xfer(m->dma, tcm_ext, p, n, WORD, LOW, 256);
+
+	m->cmd = TASK_SUCCESS;
+}
+
+
+
+int main(void)
+{
+	struct xen_msg_data *m;
+
+	while (1) {
+		m = xen_wait_cmd();
+
+		switch (m->cmd) {
+			case TASK_STOP:
+				/* confirm abort */
+				xen_send_msg(m);
+				return 0;
+			default:
+				break;
+		}
+		process_task(m);
+
+		xen_send_msg(m);
+	}
+
+
+	return 0;
+}
diff --git a/dsp/xentium/xen_dummy.c b/dsp/xentium/xen_dummy.c
index a8e84318cca7d7ce0a093da7adf917437a65c613..cf384890697164c250773a53696aba94d64d5d90 100644
--- a/dsp/xentium/xen_dummy.c
+++ b/dsp/xentium/xen_dummy.c
@@ -1,10 +1,9 @@
-
 #include <xen.h>
+#include <dma.h>
 #include <xen_printf.h>
-
-
 #include <data_proc_net.h>
 
+
 #define KERN_NAME		"xen_dummy"
 #define KERN_STORAGE_BYTES	0
 #define KERN_OP_CODE		0xdeadbeef
@@ -21,34 +20,40 @@ struct xen_kernel_cfg _xen_kernel_param = {
 static void process_task(struct xen_msg_data *m)
 {
 	size_t n;
-	size_t i, j;
+	size_t i;
 
 	unsigned int *p;
+	volatile unsigned int *b;
+
+	struct xen_tcm *tcm_ext;
+
+	
+	b = (volatile unsigned int *) xen_tcm_local;
+
+	tcm_ext = xen_get_base_addr(m->xen_id);
 
 
 	if (!m->t) {
 		m->cmd = TASK_DESTROY;
 		return;
 	}
-#if 0
-	/* op code doesn't match, pass it on */
-	if (pt_get_pend_step_op_code(m->t) != _xen_kernel_param.op_code) {
-		m->cmd = TASK_SUCCESS;
-		return;
-	}
 
-#endif
+
 	n = pt_get_nmemb(m->t);
 	p = (unsigned int *) pt_get_data(m->t);
 
-#if 0
-	for (j = 0; j < 1000; j++) {
-		for (i = 0; i < n; i++) {
-			p[i] += 1230;
-		}
+	/* retrieve data to TCM XXX retval */
+	xen_noc_dma_req_lin_xfer(m->dma, p, tcm_ext, n, WORD, LOW, 256);
+
+	/* process */
+	for (i = 0; i < n; i++) {
+		b[i] += 1;
 	}
-#endif
-	m->cmd = TASK_SUCCESS;	
+	
+	/* back to main memory XXX retval */
+	xen_noc_dma_req_lin_xfer(m->dma, tcm_ext, p, n, WORD, LOW, 256);
+
+	m->cmd = TASK_SUCCESS;
 }
 
 
@@ -57,27 +62,22 @@ int main(void)
 {
 	struct xen_msg_data *m;
 
-
 	while (1) {
-		m = xen_wait_msg();
+		m = xen_wait_cmd();
 
 		switch (m->cmd) {
 			case TASK_STOP:
-				/* confirm abort */	
-				x_printf("%s says bye!\n", _xen_kernel_param.name);
-				xen_signal_host();
+				/* confirm abort */
+				xen_send_msg(m);
 				return 0;
 			default:
 				break;
 		}
-
-		//x_printf("Hello there %s\n", _xen_kernel_param.name);
 		process_task(m);
 
-		xen_set_mail(XEN_MSG_MBOX, (unsigned long) m);
-		xen_signal_host();
+		xen_send_msg(m);
 	}
-	
+
 
 	return 0;
 }