diff --git a/include/byteorder.h b/include/byteorder.h
new file mode 100644
index 0000000000000000000000000000000000000000..b0397059a8dfb1f632f2df24db380ec1d46beab9
--- /dev/null
+++ b/include/byteorder.h
@@ -0,0 +1,273 @@
+/**
+ * @file   byteorder.h
+ * @author Armin Luntzer (armin.luntzer@univie.ac.at),
+ * @date   2015
+ *
+ * @copyright GPLv2
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * This is a set of macros for consistent endianess conversion. They work
+ * for both little and big endian cpus.
+ *
+ * conversion of XX-bit integers (16- or 32-) between native CPU format
+ * and little/big endian format:
+ *	cpu_to_[bl]eXX(uintXX_t x)
+ *	[bl]eXX_to_cpu(uintXX_t x)
+ *
+ * the same, but change in situ:
+ *	cpu_to_[bl]eXXs(uintXX_t x)
+ *	[bl]eXX_to_cpus(uintXX_t x)
+ *
+ *
+ * This is based on the byte order macros from the linux kernel, see:
+ * include/linux/byteorder/generic.h
+ * include/uapi/linux/swab.h
+ * include/uapi/linux/byteorder/big_endian.h
+ * include/uapi/linux/byteorder/little_endian.h
+ * by @author Linus Torvalds et al.
+ *
+ */
+#ifndef BYTEORDER_H
+#define BYTEORDER_H
+
+#include <stdint.h>
+
+
+
+#ifdef __BIG_ENDIAN
+#undef __BIG_ENDIAN
+#endif
+
+#ifdef __LITTLE_ENDIAN
+#undef __LITTLE_ENDIAN
+#endif
+
+#if (__sparc__)
+#ifndef __BIG_ENDIAN
+#define __BIG_ENDIAN 4321
+#endif
+#endif
+
+#if (__i386__ || __x86_64__)
+#ifndef __LITTLE_ENDIAN
+#define __LITTLE_ENDIAN 1234
+#endif
+#endif
+
+
+#define ___constant_swab16(x) ((uint16_t)(			\
+	(((uint16_t)(x) & (uint16_t)0x00ffU) << 8) |		\
+	(((uint16_t)(x) & (uint16_t)0xff00U) >> 8)))
+
+#define ___constant_swab32(x) ((uint32_t)(			\
+	(((uint32_t)(x) & (uint32_t)0x000000ffUL) << 24) |	\
+	(((uint32_t)(x) & (uint32_t)0x0000ff00UL) <<  8) |	\
+	(((uint32_t)(x) & (uint32_t)0x00ff0000UL) >>  8) |	\
+	(((uint32_t)(x) & (uint32_t)0xff000000UL) >> 24)))
+
+
+#ifdef USE_BUILTIN_BSWAP
+#if GCC_VERSION >= 40400
+#define __HAVE_BUILTIN_BSWAP32__
+#endif
+#if GCC_VERSION >= 40800
+#define __HAVE_BUILTIN_BSWAP16__
+#endif
+#endif /* USE_BUILTIN_BSWAP */
+
+
+static inline __attribute__((const)) uint16_t __fswab16(uint16_t val)
+{
+#ifdef __HAVE_BUILTIN_BSWAP16__
+	return __builtin_bswap16(val);
+#else
+	return ___constant_swab16(val);
+#endif
+}
+
+
+static inline __attribute__((const)) uint32_t __fswab32(uint32_t val)
+{
+#ifdef __HAVE_BUILTIN_BSWAP32__
+	return __builtin_bswap32(val);
+#else
+	return ___constant_swab32(val);
+#endif
+}
+
+
+/**
+ * @brief return a byteswapped 16-bit value
+ * @param x value to byteswap
+ */
+
+#define __swab16(x)				\
+	(__builtin_constant_p((uint16_t)(x)) ?	\
+	___constant_swab16(x) :			\
+	__fswab16(x))
+
+
+/**
+ * @brief return a byteswapped 32-bit value
+ * @param x a value to byteswap
+ */
+
+#define __swab32(x)				\
+	(__builtin_constant_p((uint32_t)(x)) ?	\
+	___constant_swab32(x) :			\
+	__fswab32(x))
+
+
+/**
+ * @brief return a byteswapped 16-bit value from a pointer
+ * @param p a pointer to a naturally-aligned 16-bit value
+ */
+static inline uint16_t __swab16p(const uint16_t *p)
+{
+	return __swab16(*p);
+}
+
+
+/**
+ * @brief return a byteswapped 32-bit value from a pointer
+ * @param p a pointer to a naturally-aligned 32-bit value
+ */
+static inline uint32_t __swab32p(const uint32_t *p)
+{
+	return __swab32(*p);
+}
+
+
+/**
+ * @brief byteswap a 16-bit value in-place
+ * @param p a pointer to a naturally-aligned 16-bit value
+ */
+
+static inline void __swab16s(uint16_t *p)
+{
+	*p = __swab16p(p);
+}
+
+
+/**
+ * @brief byteswap a 32-bit value in-place
+ * @param p a pointer to a naturally-aligned 32-bit value
+ */
+
+static inline void __swab32s(uint32_t *p)
+{
+	*p = __swab32p(p);
+}
+
+
+
+#ifdef __BIG_ENDIAN
+
+#define __cpu_to_le16(x)   ((uint16_t)__swab16((x)))
+#define __cpu_to_le32(x)   ((uint32_t)__swab32((x)))
+
+#define __cpu_to_le16s(x)  __swab16s((x))
+#define __cpu_to_le32s(x)  __swab32s((x))
+
+#define __cpu_to_be16(x)   ((uint16_t)(x))
+#define __cpu_to_be32(x)   ((uint32_t)(x))
+
+#define __cpu_to_be16s(x)  { (void)(x); }
+#define __cpu_to_be32s(x)  { (void)(x); }
+
+
+
+#define __le16_to_cpu(x)   __swab16((uint16_t)(x))
+#define __le32_to_cpu(x)   __swab32((uint32_t)(x))
+
+#define __le16_to_cpus(x)  __swab16s((x))
+#define __le32_to_cpus(x)  __swab32s((x))
+
+#define __be16_to_cpu(x)   ((uint16_t)(x))
+#define __be32_to_cpu(x)   ((uint32_t)(x))
+
+#define __be16_to_cpus(x)  { (void)(x); }
+#define __be32_to_cpus(x)  { (void)(x); }
+
+#endif /* __BIG_ENDIAN */
+
+
+#ifdef __LITTLE_ENDIAN
+
+#define __cpu_to_le16(x)   ((uint16_t)(x))
+#define __cpu_to_le32(x)   ((uint32_t)(x))
+
+#define __cpu_to_le16s(x)  { (void)(x); }
+#define __cpu_to_le32s(x)  { (void)(x); }
+
+#define __cpu_to_be16(x)   ((uint16_t)__swab16((x)))
+#define __cpu_to_be32(x)   ((uint32_t)__swab32((x)))
+
+#define __cpu_to_be16s(x)  __swab16s((x))
+#define __cpu_to_be32s(x)  __swab32s((x))
+
+
+
+#define __le16_to_cpu(x)  ((uint16_t)(x))
+#define __le32_to_cpu(x)  ((uint32_t)(x))
+
+#define __le32_to_cpus(x) { (void)(x); }
+#define __le16_to_cpus(x) { (void)(x); }
+
+#define __be16_to_cpu(x)  __swab16((uint16_t)(uint16_t)(x))
+#define __be32_to_cpu(x)  __swab32((uint32_t)(uint32_t)(x))
+
+#define __be16_to_cpus(x) __swab16s((x))
+#define __be32_to_cpus(x) __swab32s((x))
+
+#endif /* __LITTLE_ENDIAN */
+
+
+
+/** these are the conversion macros */
+
+/** convert cpu order to little endian */
+#define cpu_to_le16  __cpu_to_le16
+#define cpu_to_le32  __cpu_to_le32
+
+/** in-place convert cpu order to little endian */
+#define cpu_to_le16s __cpu_to_le16s
+#define cpu_to_le32s __cpu_to_le32s
+
+/** convert cpu order to big endian */
+#define cpu_to_be16  __cpu_to_be16
+#define cpu_to_be32  __cpu_to_be32
+
+/** in-place convert cpu order to big endian */
+#define cpu_to_be16s __cpu_to_be16s
+#define cpu_to_be32s __cpu_to_be32s
+
+
+/* same, but in reverse */
+
+/** convert little endian to cpu order*/
+#define le16_to_cpu  __le16_to_cpu
+#define le32_to_cpu  __le32_to_cpu
+
+/** in-place convert little endian to cpu order*/
+#define le16_to_cpus __le16_to_cpus
+#define le32_to_cpus __le32_to_cpus
+
+/** convert big endian to cpu order*/
+#define be16_to_cpu  __be16_to_cpu
+#define be32_to_cpu  __be32_to_cpu
+
+/** in-place convert big endian to cpu order*/
+#define be16_to_cpus __be16_to_cpus
+#define be32_to_cpus __be32_to_cpus
+
+
+
+#endif /* BYTEORDER_H */
diff --git a/include/rdcu_ctrl.h b/include/rdcu_ctrl.h
index a008943bdf48c14a97111002f51d6c6f9f1c55d6..54e47a78fa21b9543b649fd8a8b8fd5aa5fb1b83 100644
--- a/include/rdcu_ctrl.h
+++ b/include/rdcu_ctrl.h
@@ -272,7 +272,9 @@ uint8_t rdcu_edac_get_scrub_info(void);
 /* SRAM */
 int rdcu_read_sram(void *buf, uint32_t addr, uint32_t size);
 int rdcu_write_sram(void *buf, uint32_t addr, uint32_t size);
-
+int rdcu_write_sram_8(uint8_t *buf, uint32_t addr, uint32_t size);
+int rdcu_write_sram_16(uint16_t *buf, uint32_t addr, uint32_t size);
+int rdcu_write_sram_32(uint32_t *buf, uint32_t addr, uint32_t size);
 
 
 void rdcu_ctrl_init(void);
diff --git a/lib/rdcu_ctrl.c b/lib/rdcu_ctrl.c
index 60e5e1c76f8d4b9072d8b70095832fe549dbc450..7117567335e63130442ae29b86aa0581cb6c50aa 100644
--- a/lib/rdcu_ctrl.c
+++ b/lib/rdcu_ctrl.c
@@ -61,6 +61,7 @@
 #include <stdlib.h>
 #include <string.h>
 
+#include <byteorder.h>
 #include <rmap.h>
 #include <rdcu_cmd.h>
 #include <rdcu_ctrl.h>
@@ -462,7 +463,7 @@ uint32_t rdcu_get_adc_logic_enabled(void)
 }
 
 
-/*
+/**
  * @brief get RDCU Interrupt status
  * @see RDCU-FRS-FN-0632
  *
@@ -1453,7 +1454,7 @@ uint8_t rdcu_edac_get_scrub_info(void)
 
 
 /**
- * @brief read data from SRAM
+ * @brief read data from the local SRAM mirror
  *
  * @param buf the buffer to read to (if NULL, the required size is returned)
  *
@@ -1477,12 +1478,12 @@ int rdcu_read_sram(void *buf, uint32_t addr, uint32_t size)
 	if (buf)
 		memcpy(buf, &rdcu->sram[addr], size);
 
-	return size; /* lol */
+	return (int)size; /* lol */
 }
 
 
 /**
- * @brief write data to SRAM
+ * @brief write arbitrary big-endian data to the local SRAM mirror
  *
  * @param buf the buffer to read from
  *
@@ -1509,7 +1510,117 @@ int rdcu_write_sram(void *buf, uint32_t addr, uint32_t size)
 	if (buf)
 		memcpy(&rdcu->sram[addr], buf, size);
 
-	return size; /* lol */
+	return (int)size; /* lol */
+}
+
+
+/**
+ * @brief write uint8_t formatted data to the local SRAM mirror. (This function
+ *	is endian-safe.)
+ *
+ * @param buf the buffer to read from
+ *
+ * @param addr an address within the RDCU SRAM
+ * @param size the number of bytes read
+ *
+ * @returns the number of bytes written, < 0 on error
+ */
+
+int rdcu_write_sram_8(uint8_t *buf, uint32_t addr, uint32_t size)
+{
+	return rdcu_write_sram(buf, addr, size);
+}
+
+
+/**
+ * @brief write uint16_t formatted data to the local SRAM mirror. This function
+ *	is endian-safe.
+ *
+ * @param buf the buffer to read from
+ *
+ * @param addr an address within the RDCU SRAM
+ * @param size the number of bytes read
+ *
+ * @returns the number of bytes written, < 0 on error
+ */
+
+int rdcu_write_sram_16(uint16_t *buf, uint32_t addr, uint32_t size)
+{
+	if (!buf)
+		return 0;
+
+	if (size & 0x1)
+		return -1;
+
+	if (addr > RDCU_SRAM_END)
+		return -1;
+
+	if (size > RDCU_SRAM_SIZE)
+		return -1;
+
+	if (addr + size > RDCU_SRAM_END)
+		return -1;
+
+#if __BIG_ENDIAN
+	return rdcu_write_sram(buf, addr, size);
+#else
+	{
+		uint32_t i;
+
+		for (i = 0; i < size/sizeof(uint16_t); i++) {
+			uint16_t *sram_buf = (uint16_t *)&rdcu->sram[addr];
+
+			sram_buf[i] = cpu_to_be16(buf[i]);
+		}
+	}
+	return (int)size; /* lol */
+#endif /* __BIG_ENDIAN */
+}
+
+
+/**
+ * @brief write uint32_t formatted data to the local SRAM mirror. This function
+ *	is endian-safe.
+ *
+ * @param buf the buffer to read from
+ *
+ * @param addr an address within the RDCU SRAM
+ * @param size the number of bytes read
+ *
+ * @returns the number of bytes written, < 0 on error
+ */
+
+int rdcu_write_sram_32(uint32_t *buf, uint32_t addr, uint32_t size)
+{
+	if (!buf)
+		return 0;
+
+	if (size & 0x3)
+		return -1;
+
+	if (addr > RDCU_SRAM_END)
+		return -1;
+
+	if (size > RDCU_SRAM_SIZE)
+		return -1;
+
+	if (addr + size > RDCU_SRAM_END)
+		return -1;
+
+#if __BIG_ENDIAN
+	return rdcu_write_sram(buf, addr, size);
+#else
+	{
+		uint32_t i;
+
+		for (i = 0; i < size/sizeof(uint32_t); i++) {
+			uint32_t *sram_buf = (uint32_t *)&rdcu->sram[addr];
+
+			sram_buf[i] = cpu_to_be32(buf[i]);
+		}
+	}
+	return (int)size; /* lol */
+#endif /* __BIG_ENDIAN */
 }
 
 
@@ -2050,7 +2161,7 @@ int rdcu_sync_mirror_to_sram(uint32_t addr, uint32_t size, uint32_t mtu)
 
 	tx_bytes = size;
 
-	while(tx_bytes >= mtu) {
+	while (tx_bytes >= mtu) {
 
 		ret = rdcu_sync_data(rdcu_write_cmd_data, addr + sent,
 				     &rdcu->sram[addr + sent], mtu, 0);
@@ -2124,13 +2235,14 @@ int rdcu_sync_sram_to_mirror(uint32_t addr, uint32_t size, uint32_t mtu)
 
 	rx_bytes = size;
 
-	while(rx_bytes >= mtu) {
+	while (rx_bytes >= mtu) {
 
 		ret = rdcu_sync_data(rdcu_read_cmd_data, addr + recv,
 				     &rdcu->sram[addr + recv], mtu, 1);
 
 #if 1
-		while (rdcu_rmap_sync_status() > 3);
+		while (rdcu_rmap_sync_status() > 3)
+			;
 #endif
 
 		if (ret > 0)
diff --git a/lib/rdcu_rmap.c b/lib/rdcu_rmap.c
index 70745605a4446a4b88fbb464645cb35dcc8fd834..7e20572d4d49cccb607e3bd6757f0a206789ec8a 100644
--- a/lib/rdcu_rmap.c
+++ b/lib/rdcu_rmap.c
@@ -66,6 +66,7 @@
 #include <stdlib.h>
 #include <string.h>
 
+#include <byteorder.h>
 #include <rmap.h>
 #include <rdcu_rmap.h>
 
@@ -229,7 +230,7 @@ static int rdcu_process_rx(void)
 	int n;
 	int cnt = 0;
 
-	void *local_addr;
+	uint32_t *local_addr;
 
 	uint8_t *spw_pckt;
 
@@ -243,13 +244,14 @@ static int rdcu_process_rx(void)
 	while ((n = rmap_rx(NULL))) {
 		/* we received something, allocate enough space for the packet */
 		spw_pckt = (uint8_t *) malloc(n);
-		if(!spw_pckt) {
+		if (!spw_pckt) {
 			printf("malloc() for packet failed!\n");
 			return -1;
 		}
 
 		/* read the packet */
 		n = rmap_rx(spw_pckt);
+
 		if (!n) {
 			printf("Unknown error in rmap_rx()\n");
 			free(spw_pckt);
@@ -273,14 +275,27 @@ static int rdcu_process_rx(void)
 		local_addr = trans_log_get_addr(rp->tr_id);
 
 		if (!local_addr) {
-			printf("warning: response packet received not in"
+			printf("warning: response packet received not in "
 			       "transaction log\n");
 			rmap_erase_packet(rp);
 			continue;
 		}
 
-		if (rp->data_len)
+		if (rp->data_len) {
 			memcpy(local_addr, rp->data, rp->data_len);
+#if __LITTLE_ENDIAN
+			if (rp->data_len & 0x3)
+				printf("warning: length of response packet"
+				       "received is not a multiple of 4 bytes\n");
+			{
+				uint32_t i;
+
+				for (i = 0; i < rp->data_len/4; i++)
+					be32_to_cpus(&local_addr[i]);
+			}
+#endif /* __LITTLE_ENDIAN */
+		}
+
 
 		trans_log_release_slot(rp->tr_id);
 		rmap_erase_packet(rp);
@@ -314,7 +329,7 @@ int rdcu_submit_tx(const uint8_t *cmd,  int cmd_size,
 		printf("Transmitting RMAP command\n");
 
 	if (rmap_tx(cmd, cmd_size, dpath_len, data, data_size)) {
-		printf("rmap_tx() returned error!");
+		printf("rmap_tx() returned error!\n");
 		return -1;
 	}
 
@@ -401,8 +416,7 @@ int rdcu_sync(int (*fn)(uint16_t trans_id, uint8_t *cmd),
 	int slot;
 
 	uint8_t *rmap_cmd;
-
-
+	uint8_t *data = addr;
 
 	slot = trans_log_grab_slot(addr);
 	if (slot < 0)
@@ -426,8 +440,26 @@ int rdcu_sync(int (*fn)(uint16_t trans_id, uint8_t *cmd),
 		return -1;
 	}
 
-	n = rdcu_submit_tx(rmap_cmd, n, addr, data_len);
+#if __LITTLE_ENDIAN
+	if (data_len & 0x3)
+		printf("warning: length of send packet is not a multiple of "
+		       "4 bytes\n");
+
+	if (data_len) {
+		int i;
+
+		data = (uint8_t *) malloc(data_len);
+		for (i = 0; i < data_len/4; i++)
+			((uint32_t *)data)[i] =
+				cpu_to_be32(((uint32_t *)addr)[i]);
+	}
+#endif /* __LITTLE_ENDIAN */
+
+	n = rdcu_submit_tx(rmap_cmd, n, data, data_len);
 	free(rmap_cmd);
+#if __LITTLE_ENDIAN
+	free(data);
+#endif /* __LITTLE_ENDIAN */
 
 	return n;
 }