From 9579fdc73a1a8a11aef10c02d90f9cf4c3882b00 Mon Sep 17 00:00:00 2001
From: Dominik Loidolt <dominik.loidolt@univie.ac.at>
Date: Mon, 15 Jan 2024 17:01:48 +0100
Subject: [PATCH] Restructuring of the reading of compressed data bitstream

Relaxation of the requirement that the data to be decompressed must be 4-byte aligned
---
 lib/common/cmp_entity.c                    |    6 +-
 lib/common/cmp_support.c                   |   16 +-
 lib/common/cmp_support.h                   |    1 +
 lib/decompress/decmp.c                     | 1449 ++++++++------------
 lib/decompress/read_bitstream.h            |  314 ++++-
 lib/rdcu_compress/cmp_rdcu.c               |    2 +-
 programs/cmp_tool.c                        |   34 +-
 test/cmp_decmp/test_cmp_decmp.c            |   19 +-
 test/cmp_entity/test_cmp_entity.c          |    4 +-
 test/cmp_tool/cmp_tool_integration_test.py |   42 +-
 test/decmp/test_decmp.c                    | 1152 ++++++++--------
 11 files changed, 1528 insertions(+), 1511 deletions(-)

diff --git a/lib/common/cmp_entity.c b/lib/common/cmp_entity.c
index 5ee24e9..4517dc8 100644
--- a/lib/common/cmp_entity.c
+++ b/lib/common/cmp_entity.c
@@ -1816,7 +1816,7 @@ int cmp_ent_write_cmp_pars(struct cmp_entity *ent, const struct cmp_cfg *cfg,
 	ent_cmp_data_size = cmp_ent_get_cmp_data_size(ent);
 
 	/* check if the entity can hold the compressed data */
-	if (ent_cmp_data_size < cmp_bit_to_4byte((unsigned int)cmp_size_bits)) {
+	if (ent_cmp_data_size < cmp_bit_to_byte((unsigned int)cmp_size_bits)) {
 		debug_print("Error: The entity size is to small to hold the compressed data.\n");
 		return -2;
 	}
@@ -1985,7 +1985,7 @@ int cmp_ent_write_rdcu_cmp_pars(struct cmp_entity *ent, const struct cmp_info *i
 
 	/* check if the entity can hold the compressed data */
 	ent_cmp_data_size = cmp_ent_get_cmp_data_size(ent);
-	if (ent_cmp_data_size < cmp_bit_to_4byte(info->cmp_size)) {
+	if (ent_cmp_data_size < cmp_bit_to_byte(info->cmp_size)) {
 		debug_print("Error: The entity size is to small to hold the compressed data.\n");
 		return -2;
 	}
@@ -2106,7 +2106,7 @@ uint32_t cmp_ent_build(struct cmp_entity *ent, uint32_t version_id,
 		       uint64_t start_time, uint64_t end_time, uint16_t model_id,
 		       uint8_t model_counter, const struct cmp_cfg *cfg, int cmp_size_bits)
 {
-	uint32_t cmp_size_bytes = cmp_bit_to_4byte((unsigned int)cmp_size_bits); /* TODO: do we need to round up to 4 bytes? */
+	uint32_t cmp_size_bytes = cmp_bit_to_byte((unsigned int)cmp_size_bits);
 	uint32_t hdr_size;
 
 	if (!cfg)
diff --git a/lib/common/cmp_support.c b/lib/common/cmp_support.c
index 24cfaaa..e13040d 100644
--- a/lib/common/cmp_support.c
+++ b/lib/common/cmp_support.c
@@ -366,12 +366,26 @@ uint32_t cmp_icu_max_spill(unsigned int cmp_par)
  * @param cmp_size_bit	compressed data size, measured in bits
  *
  * @returns the size in bytes to store the hole bitstream
+ */
+
+unsigned int cmp_bit_to_byte(unsigned int cmp_size_bit)
+{
+	return (cmp_size_bit + 7) / 8;
+}
+
+
+/**
+ * @brief calculate the need bytes to hold a bitstream
  * @note we round up the result to multiples of 4 bytes
+ *
+ * @param cmp_size_bit	compressed data size, measured in bits
+ *
+ * @returns the size in bytes to store the hole bitstream
  */
 
 unsigned int cmp_bit_to_4byte(unsigned int cmp_size_bit)
 {
-	return (((cmp_size_bit + 7) / 8) + 3) & ~0x3UL;
+	return (cmp_bit_to_byte(cmp_size_bit) + 3) & ~0x3UL;
 }
 
 
diff --git a/lib/common/cmp_support.h b/lib/common/cmp_support.h
index aee7bbb..6e65e21 100644
--- a/lib/common/cmp_support.h
+++ b/lib/common/cmp_support.h
@@ -269,6 +269,7 @@ struct fx_cob_par {
 int is_a_pow_of_2(unsigned int v);
 unsigned int ilog_2(uint32_t x);
 
+unsigned int cmp_bit_to_byte(unsigned int cmp_size_bit);
 unsigned int cmp_bit_to_4byte(unsigned int cmp_size_bit);
 
 int cmp_cfg_icu_is_invalid(const struct cmp_cfg *cfg);
diff --git a/lib/decompress/decmp.c b/lib/decompress/decmp.c
index 9d616c8..9481577 100644
--- a/lib/decompress/decmp.c
+++ b/lib/decompress/decmp.c
@@ -31,16 +31,15 @@
 #include "../common/byteorder.h"
 #include "../common/compiler.h"
 
+#include "read_bitstream.h"
+#include "cmp_max_used_bits_list.h"
 #include "../common/cmp_debug.h"
 #include "../common/cmp_support.h"
 #include "../common/cmp_entity.h"
 #include "../common/cmp_max_used_bits.h"
-#include "read_bitstream.h"
-#include "cmp_max_used_bits_list.h"
 
 
-#define MAX_CW_LEN_RDCU 16 /* maximum RDCU Golomb code word bit length */
-#define MAX_CW_LEN_ICU 32 /* maximum ICU Golomb code word bit length */
+#define CORRUPTION_DETECTED -1
 
 
 static const char *please_check_str = "Please check that the compression parameters match those used to compress the data and that the compressed data are not corrupted.\n";
@@ -50,153 +49,141 @@ static const char *please_check_str = "Please check that the compression paramet
  * @brief function pointer to a code word decoder function
  */
 
-typedef unsigned int(*decoder_ptr)(uint32_t, uint32_t, uint32_t, uint32_t *);
+typedef uint32_t(*decoder_ptr)(struct bit_decoder *, uint32_t, uint32_t);
 
 
 /**
- * @brief structure to hold a setup to encode a value
+ * @brief structure to hold all parameters to decode a value
  */
 
 struct decoder_setup {
-	decoder_ptr decode_cw_f; /* pointer to the code word decoder (Golomb/Rice)*/
-	int (*decode_method_f)(uint32_t *decoded_value, int stream_pos,
-			       const struct decoder_setup *setup); /* pointer to the decoding function */
-	uint32_t *bitstream_adr; /* start address of the compressed data bitstream */
-	uint32_t max_stream_len; /* maximum length of the bitstream/icu_output_buf in bits */
-	uint32_t encoder_par1; /* encoding parameter 1 */
-	uint32_t encoder_par2; /* encoding parameter 2 */
-	uint32_t outlier_par; /* outlier parameter */
-	uint32_t lossy_par; /* lossy compression parameter */
-	uint32_t max_data_bits; /* bit length of the decoded value */
-	uint32_t max_cw_len; /* bit length of the longest possible code word */
+	int (*decode_method_f)(const struct decoder_setup *setup,
+			       uint32_t *decoded_value); /* pointer to the decoding function with escape mechanism */
+	decoder_ptr decode_cw_f; /* pointer to the code word decoder function (Golomb/Rice/unary) */
+	struct bit_decoder *dec; /* pointer to a bit_decoder context */
+	uint32_t encoder_par1;   /* encoding parameter 1 */
+	uint32_t encoder_par2;   /* encoding parameter 2 */
+	uint32_t outlier_par;    /* outlier parameter */
+	uint32_t lossy_par;      /* lossy compression parameter */
+	uint32_t max_data_bits;  /* bit length of the decoded value */
 };
 
 
 /**
- * @brief decode a unary code word
+ * @brief decode the next unary code word in the bitstream
  *
- * @param dec		a pointer to a bit_DStream_t context
+ * @param dec		a pointer to a bit_decoder context
  * @param unused_1	this parameter is not used
  * @param unused_2	this parameter is not used
- * @param decoded_cw	pointer where decoded value is written
+ * @note: Can be used to decode a code word with compression parameter m = 1 (log2_m = 0)
  *
- * @returns the length of the decoded code word in bits (NOT the decoded value);
- *	failure if the return value is larger than 32
+ * @returns the decoded value
  */
 
 static __inline uint32_t unary_decoder(struct bit_decoder *dec, uint32_t unused_1,
-				       uint32_t unused_2, uint32_t *decoded_cw)
+				       uint32_t unused_2)
 {
-	uint32_t cw_len;
+	uint32_t const decoded_cw = bit_peek_leading_ones(dec); /* decode unary coding */
+	uint32_t const cw_len = decoded_cw + 1; /* Number of 1's + following 0 */
 
 	UNUSED(unused_1); /* we don't need this parameter */
-	UNUSED(unused_2); /* we don't need this parameter */
+	UNUSED(unused_2); /* and this parameter */
 
-	*decoded_cw = bit_count_leading_ones(dec); /* decode unary coding */
-	cw_len = *decoded_cw + 1; /* Number of 1's + following 0 */
 	bit_consume_bits(dec, cw_len);
 
-	return cw_len;
+	return decoded_cw;
 }
 
 
 /**
- * @brief decode a Rice code word
+ * @brief decode the next Rice code word in the bitstream
  *
- * @param code_word	Rice code word bitstream starting at the MSb
- * @param m		Golomb parameter (not used)
- * @param log2_m	Rice parameter, must be the same used for encoding; is ilog_2(m)
- * @param decoded_cw	pointer where decoded value is written
+ * @param dec		a pointer to a bit_decoder context
+ * @param m		Golomb parameter, must be the same used for encoding
+ * @param log2_m	Rice parameter, is ilog_2(m), must be larger than 0
+ * @note the Golomb parameter (m) must be a power of 2
+ * @warning the Rice parameter (log2_m) must be greater than 0! If you want to
+ *	use a Rice parameter equal to 0, use the unary_decoder instead.
  *
- * @returns the length of the decoded code word in bits (NOT the decoded value);
- *	failure if the return value is larger than 32
+ * @returns the decoded value
  */
 
-static unsigned int rice_decoder(struct bit_decoder *dec, uint32_t m, uint32_t log2_m,
-				 uint32_t *decoded_cw)
+static uint32_t rice_decoder(struct bit_decoder *dec, uint32_t m, uint32_t log2_m)
 {
-	uint32_t q; /* quotient code */
-	uint32_t ql; /* length of the quotient code */
-	uint32_t r; /* remainder code */
-	uint32_t rl = log2_m; /* length of the remainder code */
+	uint32_t q;  /* quotient */
+	uint32_t r;  /* remainder */
 
-	/* decode quotient unary code part */
-	ql = unary_decoder(dec, m, log2_m, &q);
-
-	/* get remainder code  */
-	r = (uint32_t)bit_read_bits(dec, rl);
+	assert(log2_m > 0 && log2_m < 32);
 
-	*decoded_cw = (q << rl) + r;
-
-	return ql + rl;
+	q = unary_decoder(dec, m, log2_m); /* decode quotient unary code part */
+	r = bit_read_bits32(dec, log2_m); /* get remainder */
 
+	return (q << log2_m) + r; /* calculate decoded value (q*m+r) */
 }
 
 
 /**
- * @brief decode a Golomb code word
+ * @brief decode the next Golomb code word in the bitstream
  *
- * @param code_word	Golomb code word bitstream starting at the MSb
+ * @param dec		a pointer to a bit_decoder context
  * @param m		Golomb parameter (have to be bigger than 0)
  * @param log2_m	is ilog_2(m) calculate outside function for better
  *			performance
- * @param decoded_cw	pointer where decoded value is written
  *
- * @returns the length of the decoded code word in bits (NOT the decoded value);
- *	failure if the return value is larger than 32
+ * @returns the decoded value
  */
 
-static unsigned int golomb_decoder(struct bit_decoder *dec, uint32_t m,
-				   uint32_t log2_m, uint32_t *decoded_cw)
+static uint32_t golomb_decoder(struct bit_decoder *dec, uint32_t m, uint32_t log2_m)
 {
-	uint32_t q;  /* quotient code */
-	uint32_t r1; /* remainder code case 1 */
-	uint32_t r2; /* remainder code case 2 */
-	uint32_t r;  /* remainder code */
+	uint32_t q;  /* quotient */
+	uint32_t r1; /* remainder case 1 */
+	uint32_t r2; /* remainder case 2 */
+	uint32_t r;  /* remainder */
 	uint32_t cutoff; /* cutoff between group 1 and 2 */
-	uint32_t cw_len; /* length of the decoded code word in bits */
 
 	assert(m > 0);
-	assert(log2_m == ilog_2(m) && log2_m < 32);
-	assert(decoded_cw != NULL);
+	assert(log2_m == ilog_2(m));
 
 	/* decode quotient unary code part */
-	ql = unary_decoder(dec, m, log2_m, &q);
+	q = unary_decoder(dec, m, log2_m);
 
 	/* get the remainder code for both cases */
-	r2 = (uint32_t)bit_peek_bits(dec, log2_m +1);
+	r2 = (uint32_t)bit_peek_bits(dec, log2_m+1);
 	r1 = r2 >> 1;
 
+	/* calculate cutoff between case 1 and 2 */
 	cutoff = (0x2U << log2_m) - m; /* = 2^(log2_m+1)-m */
 
 	if (r1 < cutoff) { /* remainder case 1: remainder length=log2_m */
-		cw_len = ql + log2_m;
+		bit_consume_bits(dec, log2_m);
 		r = r1;
 	} else { /* remainder case 2: remainder length = log2_m+1 */
-		cw_len = ql + log2_m + 1;
+		bit_consume_bits(dec, log2_m+1);
 		r = r2 - cutoff;
 	}
 
-	*decoded_cw = q*m + r;
-
-	return cw_len;
+	return q*m + r;
 }
 
 
 /**
  * @brief select the decoder based on the used Golomb parameter
- * @note if the Golomb parameter is a power of 2 we can use the faster Rice
- *	decoder
  *
- * @param golomb_par	Golomb parameter (have to be bigger than 0)
+ * @param golomb_par	Golomb parameter, have to be bigger than 0
  *
- * @returns function pointer to the select decoder function; NULL on failure
+ * @note if the Golomb parameter is a power of 2 we can use the faster Rice decoder
+ * @note if the Golomb parameter is 1 we can use the even faster unary decoder
+ *
+ * @returns function pointer to the select code word decoder function
  */
 
 static decoder_ptr select_decoder(uint32_t golomb_par)
 {
 	assert(golomb_par > 0);
 
+	if (golomb_par == 1)
+		return &unary_decoder;
+
 	if (is_a_pow_of_2(golomb_par))
 		return &rice_decoder;
 	else
@@ -205,207 +192,83 @@ static decoder_ptr select_decoder(uint32_t golomb_par)
 
 
 /**
- * @brief read a value of up to 32 bits from a big-endian bitstream
- *
- * @param p_value		pointer to the read value, the read value will
- *				be converted to the system endianness
- * @param n_bits		number of bits to read from the bitstream
- * @param bit_offset		bit index where the bits will be read, seen from
- *				the very beginning of the bitstream
- * @param bitstream_adr		pointer to the beginning of the bitstream
- * @param max_stream_len	maximum length of the bitstream in bits
- *
- * @returns bit position of the last read bit in the bitstream on success;
- *	returns negative in case of erroneous input; returns CMP_ERROR_SMALL_BUF
- *	if the bitstream buffer is too small to read the value from the
- *	bitstream
- */
-
-static int get_n_bits32(uint32_t *p_value, unsigned int n_bits, int bit_offset,
-			uint32_t *bitstream_adr, unsigned int max_stream_len)
-{
-	/* separate the bit_offset into word offset (local_adr pointer) and
-	 * local bit offset (bits_left)
-	 */
-	uint32_t *local_adr = bitstream_adr + (bit_offset >> 5);
-	unsigned int bits_left = bit_offset & 0x1f;
-	unsigned int bits_right = 32 - n_bits;
-	unsigned int local_end_pos = bits_left + n_bits;
-	int stream_len = (int)(n_bits + (unsigned int)bit_offset); /* overflow results in a negative return value */
-
-	assert(p_value != NULL);
-	assert(n_bits > 0 && n_bits <= 32);
-	assert(bit_offset >= 0);
-	assert(bitstream_adr != NULL);
-
-	/* Check if the bitstream buffer is large enough */
-	if (unlikely((unsigned int)stream_len > max_stream_len)) {
-			debug_print("Error: The end of the compressed bit stream has been exceeded. %s", please_check_str);
-			return CMP_ERROR_SMALL_BUF;
-	}
-
-	*p_value = (cpu_to_be32(*local_adr) << bits_left) >> bits_right;
-
-	if (local_end_pos > 32) { /* part 2: */
-		local_adr += 1;   /* adjust address */
-		bits_right = 64 - local_end_pos;
-		*p_value |= cpu_to_be32(*local_adr) >> bits_right;
-	}
-
-	return stream_len;
-}
-
-
-/**
- * @brief decode a Golomb/Rice encoded code word from the bitstream
+ * @brief decode the next code word with zero escape system mechanism from the bitstream
  *
- * @param decoded_value	pointer to the decoded value
- * @param stream_pos	start bit position code word to be decoded in the bitstream
  * @param setup		pointer to the decoder setup
+ * @param decoded_value	points to the location where the decoded value is stored
  *
- * @returns bit index of the next code word in the bitstream on success; returns
- *	negative in case of erroneous input; returns CMP_ERROR_SMALL_BUF if the
- *	bitstream buffer is too small to read the value from the bitstream
- */
-
-static int decode_normal(uint32_t *decoded_value, int stream_pos,
-			 const struct decoder_setup *setup)
-{
-	unsigned int n_read_bits = setup->max_cw_len;
-	int stream_pos_read;
-	uint32_t read_val;
-	unsigned int cw_len;
-
-	/* check if we can read max_cw_len or less; we do not know how long the
-	 * code word actually is so we try to read the maximum cw length */
-	if (setup->max_cw_len > setup->max_stream_len - (unsigned int)stream_pos) {
-		n_read_bits = setup->max_stream_len - (unsigned int)stream_pos;
-		if (n_read_bits == 0) {
-			debug_print("Error: The end of the compressed bit stream has been exceeded. %s", please_check_str);
-			return CMP_ERROR_SMALL_BUF;
-		}
-	}
-
-	stream_pos_read = get_n_bits32(&read_val, n_read_bits, stream_pos,
-				       setup->bitstream_adr, setup->max_stream_len);
-	if (stream_pos_read < 0)
-		return stream_pos_read;
-
-	/* if we read less than 32, we shift the bitstream so that it starts at the MSb */
-	read_val = read_val << (32 - n_read_bits);
-
-	cw_len = setup->decode_cw_f(read_val, setup->encoder_par1,
-				    setup->encoder_par2, decoded_value);
-	/* consistency check: The bit length of the codeword cannot be greater
-	 * than the bits read from the bitstream.  */
-	if (cw_len > n_read_bits) {
-		debug_print("Error: Data consistency check failed. Unable to decode the codeword. %s", please_check_str);
-		return -1;
-	}
-
-	return stream_pos + (int)cw_len;
-}
-
-
-/**
- * @brief decode a Golomb/Rice encoded code word with zero escape system
- *	mechanism from the bitstream
- *
- * @param decoded_value	pointer to the decoded value
- * @param stream_pos	start bit position code word to be decoded in the bitstream
- * @param setup		pointer to the decoder setup
- *
- * @returns bit index of the next code word in the bitstream on success; returns
- *	negative in case of erroneous input; returns CMP_ERROR_SMALL_BUF if the
- *	bitstream buffer is too small to read the value from the bitstream
+ * @returns 0 on success; otherwise error
  */
 
-static int decode_zero(uint32_t *decoded_value, int stream_pos,
-		       const struct decoder_setup *setup)
+static int decode_zero(const struct decoder_setup *setup, uint32_t *decoded_value)
 {
-	stream_pos = decode_normal(decoded_value, stream_pos, setup);
-	if (stream_pos < 0)
-		return stream_pos;
-
-	/* consistency check: values larger than the outlier parameter should not
-	 * be Golomb/Rice encoded */
-	if (*decoded_value > setup->outlier_par) {
-		debug_print("Error: Data consistency check failed. Decoded value lager than the outlier parameter. %s", please_check_str);
-		return -1;
-	}
+	/* Decode the next value in the bitstream with the Golomb/Rice/unary decoder */
+	*decoded_value = setup->decode_cw_f(setup->dec, setup->encoder_par1, setup->encoder_par2);
 
-	if (*decoded_value == 0) {
-		/* escape symbol mechanism was used; read unencoded value */
-		uint32_t unencoded_val;
-
-		stream_pos = get_n_bits32(&unencoded_val, setup->max_data_bits, stream_pos,
-					  setup->bitstream_adr, setup->max_stream_len);
-		if (stream_pos < 0)
-			return stream_pos;
-		/* consistency check: outliers must be bigger than the outlier_par */
-		if (unencoded_val < setup->outlier_par && unencoded_val != 0) {
-			debug_print("Error: Data consistency check failed. Outlier small than the outlier parameter. %s", please_check_str);
-			return -1;
+	if (*decoded_value != 0) { /* no escape symbol detected */
+		if (*decoded_value >= setup->outlier_par) {
+			debug_print("Error: Data consistency check failed. Non-outlier decoded value greater or equal than the outlier parameter. %s", please_check_str);
+			return CORRUPTION_DETECTED;
+		}
+		*decoded_value -= 1;
+	} else {
+		/* the zero escape symbol mechanism was used; read unencoded value */
+		bit_refill(setup->dec);
+		*decoded_value = bit_read_bits32_sub_1(setup->dec, setup->max_data_bits);
+
+		if (*decoded_value < setup->outlier_par - 1) { /* -1 because we subtract -1 from the *decoded_value */
+			if (bit_refill(setup->dec) != BIT_OVERFLOW)
+				debug_print("Error: Data consistency check failed. Outlier small than the outlier parameter. %s", please_check_str);
+			return CORRUPTION_DETECTED;
 		}
-
-		*decoded_value = unencoded_val;
 	}
-
-	(*decoded_value)--;
-	if (*decoded_value == 0xFFFFFFFF) /* catch underflow */
-		(*decoded_value) >>=  (32 - setup->max_data_bits);
-
-	return stream_pos;
+	return bit_refill(setup->dec) == BIT_OVERFLOW;
 }
 
 
 /**
- * @brief decode a Golomb/Rice encoded code word with the multi escape mechanism
- *	from the bitstream
+ * @brief decode the next code word with the multi escape mechanism from the bitstream
  *
- * @param decoded_value	pointer to the decoded value
- * @param stream_pos	start bit position code word to be decoded in the bitstream
  * @param setup		pointer to the decoder setup
+ * @param decoded_value	points to the location where the decoded value is stored
  *
- * @returns bit index of the next code word in the bitstream on success; returns
- *	negative in case of erroneous input; returns CMP_ERROR_SMALL_BUF if the
- *	bitstream buffer is too small to read the value from the bitstream
+ * @returns 0 on success; otherwise error
  */
 
-static int decode_multi(uint32_t *decoded_value, int stream_pos,
-			const struct decoder_setup *setup)
+static int decode_multi(const struct decoder_setup *setup, uint32_t *decoded_value)
 {
-	stream_pos = decode_normal(decoded_value, stream_pos, setup);
-	if (stream_pos < 0)
-		return stream_pos;
+	/* Decode the next value in the bitstream with the Golomb/Rice/unary decoder */
+	*decoded_value = setup->decode_cw_f(setup->dec, setup->encoder_par1, setup->encoder_par2);
 
-	if (*decoded_value >= setup->outlier_par) {
-		/* escape symbol mechanism was used; read unencoded value */
-		unsigned int unencoded_len = (*decoded_value - setup->outlier_par + 1) << 1;
-		uint32_t *unencoded_val = decoded_value;
+	if (*decoded_value >= setup->outlier_par) { /* escape symbol mechanism detected */
+		uint32_t const unencoded_len = (*decoded_value - setup->outlier_par + 1) << 1;
 
-		/* consistency check: length of the unencoded value can not be bigger than the maximum data length */
 		if (unencoded_len > ((setup->max_data_bits+1) & -2U)) { /* round up max_data_bits to the nearest multiple of 2 */
 			debug_print("Error: Data consistency check failed. Multi escape symbol higher than expected. %s", please_check_str);
-			return -1;
+			return CORRUPTION_DETECTED;
 		}
 
-		stream_pos = get_n_bits32(unencoded_val, unencoded_len, stream_pos,
-					  setup->bitstream_adr, setup->max_stream_len);
-		if (stream_pos < 0)
-			return stream_pos;
+		/* read unencoded value */
+		bit_refill(setup->dec);
+		*decoded_value = bit_read_bits32(setup->dec, unencoded_len);
 
-		/* consistency check: check if the unencoded value used the bits expected */
-		if (*unencoded_val >> (unencoded_len-2) == 0) { /* check if at least one bit of the two highest is set. */
-			if (unencoded_len > 2) { /* Exception: if we code outlier_par, no set bit is expected */
-				debug_print("Error: Data consistency check failed. Unencoded value after escape symbol to small. %s", please_check_str);
-				return -1;
+		if (*decoded_value >> (unencoded_len-2) == 0) { /* check if at least one bit of the two highest is set. */
+			if (unencoded_len > 2) { /* Exception: if we code outlier_par as outlier, no set bit is expected */
+				if (bit_refill(setup->dec) != BIT_OVERFLOW)
+					debug_print("Error: Data consistency check failed. Unencoded data after multi escape symbol to small. %s", please_check_str);
+				return CORRUPTION_DETECTED;
 			}
 		}
 
-		*decoded_value = *unencoded_val + setup->outlier_par;
+		*decoded_value += setup->outlier_par;
+
+		if ((*decoded_value & BIT_MASK[setup->max_data_bits]) < setup->outlier_par) { /* check for overflow in addition */
+			if (bit_refill(setup->dec) != BIT_OVERFLOW)
+				debug_print("Error: Data consistency check failed. Outlier small than the outlier parameter. %s", please_check_str);
+			return CORRUPTION_DETECTED;
+		}
 	}
-	return stream_pos;
+	return bit_refill(setup->dec) == BIT_OVERFLOW;
 }
 
 
@@ -413,23 +276,17 @@ static int decode_multi(uint32_t *decoded_value, int stream_pos,
  * @brief get the value unencoded with setup->cmp_par_1 bits without any
  *	additional changes from the bitstream
  *
- * @param decoded_value	pointer to the decoded value
- * @param stream_pos	start bit position code word to be decoded in the bitstream
  * @param setup		pointer to the decoder setup
+ * @param decoded_value	points to the location where the decoded value is stored
  *
- * @returns bit index of the next code word in the bitstream on success; returns
- *	negative in case of erroneous input; returns CMP_ERROR_SMALL_BUF if the
- *	bitstream buffer is too small to read the value from the bitstream
- *
+ * @returns 0 on success; otherwise error
  */
 
-static int decode_none(uint32_t *decoded_value, int stream_pos,
-		       const struct decoder_setup *setup)
+static int decode_none(const struct decoder_setup *setup, uint32_t *decoded_value)
 {
-	stream_pos = get_n_bits32(decoded_value, setup->encoder_par1, stream_pos,
-				  setup->bitstream_adr, setup->max_stream_len);
+	*decoded_value = bit_read_bits32(setup->dec, setup->encoder_par1);
 
-	return stream_pos;
+	return bit_refill(setup->dec) == BIT_OVERFLOW;
 }
 
 
@@ -442,12 +299,13 @@ static int decode_none(uint32_t *decoded_value, int stream_pos,
  * @returns the signed remapped value
  */
 
-static uint32_t re_map_to_pos(uint32_t value_to_unmap)
+static __inline uint32_t re_map_to_pos(uint32_t value_to_unmap)
 {
 	if (value_to_unmap & 0x1) { /* if uneven */
-		if (value_to_unmap == 0xFFFFFFFF) /* catch overflow */
-			return 0x80000000;
-		return -((value_to_unmap + 1) / 2);
+		/* uint64_t to prevent overflow if value_to_unmap == 0xFFFFFFFF */
+		uint64_t tmp64 = value_to_unmap;
+
+		return (uint32_t)(-((tmp64 + 1) / 2));
 	} else {
 		return value_to_unmap / 2;
 	}
@@ -458,29 +316,22 @@ static uint32_t re_map_to_pos(uint32_t value_to_unmap)
  * @brief decompress the next code word in the bitstream and decorate it with
  *	the model
  *
- * @param decoded_value	pointer to the decoded value
- * @param model		model of the decoded_value (0 if not used)
- * @param stream_pos	start bit position code word to be decoded in the bitstream
  * @param setup		pointer to the decoder setup
+ * @param decoded_value	points to the location where the decoded value is stored
+ * @param model		model of the decoded_value (0 if not used)
  *
- * @returns bit index of the next code word in the bitstream on success; returns
- *	negative in case of erroneous input; returns CMP_ERROR_SMALL_BUF if the
- *	bitstream buffer is too small to read the value from the bitstream
+ * @returns 0 on success; otherwise error
  */
 
-static int decode_value(uint32_t *decoded_value, uint32_t model,
-			int stream_pos, const struct decoder_setup *setup)
+static int decode_value(const struct decoder_setup *setup, uint32_t *decoded_value,
+			uint32_t model)
 {
-	uint32_t mask = (~0U >> (32 - setup->max_data_bits)); /* mask the used bits */
-
 	/* decode the next value from the bitstream */
-	stream_pos = setup->decode_method_f(decoded_value, stream_pos, setup);
-	if (stream_pos <= 0)
-		return stream_pos;
+	int err = setup->decode_method_f(setup, decoded_value);
 
 	if (setup->decode_method_f == decode_none)
 		/* we are done here in stuff mode */
-		return stream_pos;
+		return err;
 
 	/* map the unsigned decode value back to a signed value */
 	*decoded_value = re_map_to_pos(*decoded_value);
@@ -489,12 +340,12 @@ static int decode_value(uint32_t *decoded_value, uint32_t model,
 	*decoded_value += round_fwd(model, setup->lossy_par);
 
 	/* we mask only the used bits in case there is an overflow when adding the model */
-	*decoded_value &= mask;
+	*decoded_value &= BIT_MASK[setup->max_data_bits];
 
 	/* inverse step of the lossy compression */
 	*decoded_value = round_inv(*decoded_value, setup->lossy_par);
 
-	return stream_pos;
+	return err;
 }
 
 
@@ -502,52 +353,41 @@ static int decode_value(uint32_t *decoded_value, uint32_t model,
  * @brief configure a decoder setup structure to have a setup to decode a vale
  *
  * @param setup		pointer to the decoder setup
+ * @param dec		pointer to a bit_decoder context
+ * @param cmp_mode	compression mode
  * @param cmp_par	compression parameter
  * @param spillover	spillover_par parameter
  * @param lossy_par	lossy compression parameter
  * @param max_data_bits	how many bits are needed to represent the highest possible value
- * @param cfg		pointer to the compression configuration structure
- *
- * @returns 0 on success; otherwise error
  */
 
-static int configure_decoder_setup(struct decoder_setup *setup,
-				   uint32_t cmp_par, uint32_t spillover,
-				   uint32_t lossy_par, uint32_t max_data_bits,
-				   const struct cmp_cfg *cfg)
+static void configure_decoder_setup(struct decoder_setup *setup, struct bit_decoder *dec,
+				    enum cmp_mode cmp_mode, uint32_t cmp_par,
+				    uint32_t spillover, uint32_t lossy_par,
+				    uint32_t max_data_bits)
 {
-	if (multi_escape_mech_is_used(cfg->cmp_mode))
+	assert(setup != NULL);
+	assert(dec != NULL);
+	assert(cmp_par != 0);
+	assert(max_data_bits > 0 && max_data_bits <= 32);
+
+	if (multi_escape_mech_is_used(cmp_mode))
 		setup->decode_method_f = &decode_multi;
-	else if (zero_escape_mech_is_used(cfg->cmp_mode))
+	else if (zero_escape_mech_is_used(cmp_mode))
 		setup->decode_method_f = &decode_zero;
-	else if (cfg->cmp_mode == CMP_MODE_STUFF)
+	else if (cmp_mode == CMP_MODE_STUFF)
 		setup->decode_method_f = &decode_none;
 	else {
-		setup->decode_method_f = NULL;
 		debug_print("Error: Compression mode not supported.\n");
-		return -1;
+		assert(0);
 	}
-
-	setup->bitstream_adr = cfg->icu_output_buf; /* start address of the compressed data bitstream */
-	if (cfg->buffer_length & 0x3) {
-		debug_print("Error: The length of the compressed data is not a multiple of 4 bytes.\n");
-		return -1;
-	}
-	setup->max_stream_len = (cfg->buffer_length) * CHAR_BIT;  /* maximum length of the bitstream/icu_output_buf in bits */
+	setup->decode_cw_f = select_decoder(cmp_par);
+	setup->dec = dec;
 	setup->encoder_par1 = cmp_par; /* encoding parameter 1 */
-	if (ilog_2(cmp_par) == -1U)
-		return -1;
 	setup->encoder_par2 = ilog_2(cmp_par); /* encoding parameter 2 */
 	setup->outlier_par = spillover; /* outlier parameter */
 	setup->lossy_par = lossy_par; /* lossy compression parameter */
 	setup->max_data_bits = max_data_bits; /* how many bits are needed to represent the highest possible value */
-	setup->decode_cw_f = select_decoder(cmp_par);
-	if (rdcu_supported_data_type_is_used(cfg->data_type))
-		setup->max_cw_len = MAX_CW_LEN_RDCU;
-	else
-		setup->max_cw_len = MAX_CW_LEN_ICU;
-
-	return 0;
 }
 
 
@@ -555,16 +395,15 @@ static int configure_decoder_setup(struct decoder_setup *setup,
  * @brief decompress imagette data
  *
  * @param cfg	pointer to the compression configuration structure
+ * @param dec	a pointer to a bit_decoder context
  *
- * @returns bit position of the last read bit in the bitstream on success;
- *	returns negative on error, returns CMP_ERROR_SMALL_BUF if the bitstream
- *	buffer is too small to read the value from the bitstream
+ * @returns 0 on success; otherwise error
  */
 
-static int decompress_imagette(struct cmp_cfg *cfg)
+static int decompress_imagette(struct cmp_cfg *cfg, struct bit_decoder *dec)
 {
 	size_t i;
-	int stream_pos = 0;
+	int err;
 	uint32_t decoded_value;
 	uint32_t max_data_bits;
 	struct decoder_setup setup;
@@ -600,14 +439,13 @@ static int decompress_imagette(struct cmp_cfg *cfg)
 		break;
 	}
 
-	if (configure_decoder_setup(&setup, cfg->golomb_par, cfg->spill,
-				    cfg->round, max_data_bits, cfg))
-		return -1;
+	configure_decoder_setup(&setup, dec, cfg->cmp_mode, cfg->golomb_par,
+				cfg->spill, cfg->round, max_data_bits);
 
 	for (i = 0; ; i++) {
-		stream_pos = decode_value(&decoded_value, model, stream_pos, &setup);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup, &decoded_value, model);
+		if (err)
+			break;
 		data_buf[i] = (__typeof__(data_buf[i]))decoded_value;
 
 		if (up_model_buf)
@@ -619,8 +457,7 @@ static int decompress_imagette(struct cmp_cfg *cfg)
 
 		model = next_model_p[i];
 	}
-
-	return stream_pos;
+	return err;
 }
 
 
@@ -671,16 +508,15 @@ static int decompress_multi_entry_hdr(void **data, void **model, void **up_model
  * @brief decompress short normal light flux (S_FX) data
  *
  * @param cfg	pointer to the compression configuration structure
+ * @param dec	a pointer to a bit_decoder context
  *
- * @returns bit position of the last read bit in the bitstream on success;
- *	returns negative on error, returns CMP_ERROR_SMALL_BUF if the bitstream
- *	buffer is too small to read the value from the bitstream
+ * @returns 0 on success; otherwise error
  */
 
-static int decompress_s_fx(const struct cmp_cfg *cfg)
+static int decompress_s_fx(const struct cmp_cfg *cfg, struct bit_decoder *dec)
 {
 	size_t i;
-	int stream_pos = 0;
+	int err;
 	uint32_t decoded_value;
 	struct decoder_setup setup_exp_flags, setup_fx;
 	struct s_fx *data_buf = cfg->input_buf;
@@ -692,8 +528,9 @@ static int decompress_s_fx(const struct cmp_cfg *cfg)
 	if (model_mode_is_used(cfg->cmp_mode))
 		up_model_buf = cfg->icu_new_model_buf;
 
-	stream_pos = decompress_multi_entry_hdr((void **)&data_buf, (void **)&model_buf,
-						(void **)&up_model_buf, cfg);
+	decompress_multi_entry_hdr((void **)&data_buf, (void **)&model_buf,
+				   (void **)&up_model_buf, cfg);
+	bit_init_decoder(dec, (uint8_t *)cfg->icu_output_buf+MULTI_ENTRY_HDR_SIZE, cfg->buffer_length-MULTI_ENTRY_HDR_SIZE);
 
 	if (model_mode_is_used(cfg->cmp_mode)) {
 		model = model_buf[0];
@@ -703,24 +540,20 @@ static int decompress_s_fx(const struct cmp_cfg *cfg)
 		next_model_p = data_buf;
 	}
 
-	if (configure_decoder_setup(&setup_exp_flags, cfg->cmp_par_exp_flags, cfg->spill_exp_flags,
-				    cfg->round, cfg->max_used_bits->s_exp_flags, cfg))
-		return -1;
-	if (configure_decoder_setup(&setup_fx, cfg->cmp_par_fx, cfg->spill_fx,
-				    cfg->round, cfg->max_used_bits->s_fx, cfg))
-		return -1;
+	configure_decoder_setup(&setup_exp_flags, dec, cfg->cmp_mode, cfg->cmp_par_exp_flags,
+				cfg->spill_exp_flags, cfg->round, cfg->max_used_bits->s_exp_flags);
+	configure_decoder_setup(&setup_fx, dec, cfg->cmp_mode, cfg->cmp_par_fx,
+				cfg->spill_fx, cfg->round, cfg->max_used_bits->s_fx);
 
 	for (i = 0; ; i++) {
-		stream_pos = decode_value(&decoded_value, model.exp_flags,
-					  stream_pos, &setup_exp_flags);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_exp_flags, &decoded_value, model.exp_flags);
+		if (err)
+			break;
 		data_buf[i].exp_flags = (__typeof__(data_buf[i].exp_flags))decoded_value;
 
-		stream_pos = decode_value(&decoded_value, model.fx, stream_pos,
-					  &setup_fx);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_fx, &decoded_value, model.fx);
+		if (err)
+			break;
 		data_buf[i].fx = decoded_value;
 
 		if (up_model_buf) {
@@ -735,7 +568,7 @@ static int decompress_s_fx(const struct cmp_cfg *cfg)
 
 		model = next_model_p[i];
 	}
-	return stream_pos;
+	return err;
 }
 
 
@@ -743,16 +576,15 @@ static int decompress_s_fx(const struct cmp_cfg *cfg)
  * @brief decompress S_FX_EFX data
  *
  * @param cfg	pointer to the compression configuration structure
+ * @param dec	a pointer to a bit_decoder context
  *
- * @returns bit position of the last read bit in the bitstream on success;
- *	returns negative on error, returns CMP_ERROR_SMALL_BUF if the bitstream
- *	buffer is too small to read the value from the bitstream
+ * @returns 0 on success; otherwise error
  */
 
-static int decompress_s_fx_efx(const struct cmp_cfg *cfg)
+static int decompress_s_fx_efx(const struct cmp_cfg *cfg, struct bit_decoder *dec)
 {
 	size_t i;
-	int stream_pos = 0;
+	int err;
 	uint32_t decoded_value;
 	struct decoder_setup setup_exp_flags, setup_fx, setup_efx;
 	struct s_fx_efx *data_buf = cfg->input_buf;
@@ -764,8 +596,9 @@ static int decompress_s_fx_efx(const struct cmp_cfg *cfg)
 	if (model_mode_is_used(cfg->cmp_mode))
 		up_model_buf = cfg->icu_new_model_buf;
 
-	stream_pos = decompress_multi_entry_hdr((void **)&data_buf, (void **)&model_buf,
-						(void **)&up_model_buf, cfg);
+	decompress_multi_entry_hdr((void **)&data_buf, (void **)&model_buf,
+				   (void **)&up_model_buf, cfg);
+	bit_init_decoder(dec, (uint8_t *)cfg->icu_output_buf+MULTI_ENTRY_HDR_SIZE, cfg->buffer_length-MULTI_ENTRY_HDR_SIZE);
 
 	if (model_mode_is_used(cfg->cmp_mode)) {
 		model = model_buf[0];
@@ -775,33 +608,27 @@ static int decompress_s_fx_efx(const struct cmp_cfg *cfg)
 		next_model_p = data_buf;
 	}
 
-	if (configure_decoder_setup(&setup_exp_flags, cfg->cmp_par_exp_flags, cfg->spill_exp_flags,
-				    cfg->round, cfg->max_used_bits->s_exp_flags, cfg))
-		return -1;
-	if (configure_decoder_setup(&setup_fx, cfg->cmp_par_fx, cfg->spill_fx,
-				    cfg->round, cfg->max_used_bits->s_fx, cfg))
-		return -1;
-	if (configure_decoder_setup(&setup_efx, cfg->cmp_par_efx, cfg->spill_efx,
-				    cfg->round, cfg->max_used_bits->s_efx, cfg))
-		return -1;
+	configure_decoder_setup(&setup_exp_flags, dec, cfg->cmp_mode, cfg->cmp_par_exp_flags,
+				cfg->spill_exp_flags, cfg->round, cfg->max_used_bits->s_exp_flags);
+	configure_decoder_setup(&setup_fx, dec, cfg->cmp_mode, cfg->cmp_par_fx,
+				cfg->spill_fx, cfg->round, cfg->max_used_bits->s_fx);
+	configure_decoder_setup(&setup_efx, dec, cfg->cmp_mode, cfg->cmp_par_efx,
+				cfg->spill_efx, cfg->round, cfg->max_used_bits->s_efx);
 
 	for (i = 0; ; i++) {
-		stream_pos = decode_value(&decoded_value, model.exp_flags,
-					  stream_pos, &setup_exp_flags);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_exp_flags, &decoded_value, model.exp_flags);
+		if (err)
+			break;
 		data_buf[i].exp_flags = (__typeof__(data_buf[i].exp_flags)) decoded_value;
 
-		stream_pos = decode_value(&decoded_value, model.fx, stream_pos,
-					  &setup_fx);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_fx, &decoded_value, model.fx);
+		if (err)
+			break;
 		data_buf[i].fx = decoded_value;
 
-		stream_pos = decode_value(&decoded_value, model.efx, stream_pos,
-					  &setup_efx);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_efx, &decoded_value, model.efx);
+		if (err)
+			break;
 		data_buf[i].efx = decoded_value;
 
 		if (up_model_buf) {
@@ -818,7 +645,7 @@ static int decompress_s_fx_efx(const struct cmp_cfg *cfg)
 
 		model = next_model_p[i];
 	}
-	return stream_pos;
+	return err;
 }
 
 
@@ -826,16 +653,15 @@ static int decompress_s_fx_efx(const struct cmp_cfg *cfg)
  * @brief decompress short S_FX_NCOB data
  *
  * @param cfg	pointer to the compression configuration structure
+ * @param dec	a pointer to a bit_decoder context
  *
- * @returns bit position of the last read bit in the bitstream on success;
- *	returns negative on error, returns CMP_ERROR_SMALL_BUF if the bitstream
- *	buffer is too small to read the value from the bitstream
+ * @returns 0 on success; otherwise error
  */
 
-static int decompress_s_fx_ncob(const struct cmp_cfg *cfg)
+static int decompress_s_fx_ncob(const struct cmp_cfg *cfg, struct bit_decoder *dec)
 {
 	size_t i;
-	int stream_pos = 0;
+	int err;
 	uint32_t decoded_value;
 	struct decoder_setup setup_exp_flags, setup_fx, setup_ncob;
 	struct s_fx_ncob *data_buf = cfg->input_buf;
@@ -847,8 +673,9 @@ static int decompress_s_fx_ncob(const struct cmp_cfg *cfg)
 	if (model_mode_is_used(cfg->cmp_mode))
 		up_model_buf = cfg->icu_new_model_buf;
 
-	stream_pos = decompress_multi_entry_hdr((void **)&data_buf, (void **)&model_buf,
-						(void **)&up_model_buf, cfg);
+	decompress_multi_entry_hdr((void **)&data_buf, (void **)&model_buf,
+				   (void **)&up_model_buf, cfg);
+	bit_init_decoder(dec, (uint8_t *)cfg->icu_output_buf+MULTI_ENTRY_HDR_SIZE, cfg->buffer_length-MULTI_ENTRY_HDR_SIZE);
 
 	if (model_mode_is_used(cfg->cmp_mode)) {
 		model = model_buf[0];
@@ -858,39 +685,32 @@ static int decompress_s_fx_ncob(const struct cmp_cfg *cfg)
 		next_model_p = data_buf;
 	}
 
-	if (configure_decoder_setup(&setup_exp_flags, cfg->cmp_par_exp_flags, cfg->spill_exp_flags,
-				    cfg->round, cfg->max_used_bits->s_exp_flags, cfg))
-		return -1;
-	if (configure_decoder_setup(&setup_fx, cfg->cmp_par_fx, cfg->spill_fx,
-				    cfg->round, cfg->max_used_bits->s_fx, cfg))
-		return -1;
-	if (configure_decoder_setup(&setup_ncob, cfg->cmp_par_ncob, cfg->spill_ncob,
-				    cfg->round, cfg->max_used_bits->s_ncob, cfg))
-		return -1;
+	configure_decoder_setup(&setup_exp_flags, dec, cfg->cmp_mode, cfg->cmp_par_exp_flags,
+				cfg->spill_exp_flags, cfg->round, cfg->max_used_bits->s_exp_flags);
+	configure_decoder_setup(&setup_fx, dec, cfg->cmp_mode, cfg->cmp_par_fx,
+				cfg->spill_fx, cfg->round, cfg->max_used_bits->s_fx);
+	configure_decoder_setup(&setup_ncob, dec, cfg->cmp_mode, cfg->cmp_par_ncob,
+				cfg->spill_ncob, cfg->round, cfg->max_used_bits->s_ncob);
 
 	for (i = 0; ; i++) {
-		stream_pos = decode_value(&decoded_value, model.exp_flags,
-					  stream_pos, &setup_exp_flags);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_exp_flags, &decoded_value, model.exp_flags);
+		if (err)
+			break;
 		data_buf[i].exp_flags = (__typeof__(data_buf[i].exp_flags)) decoded_value;
 
-		stream_pos = decode_value(&decoded_value, model.fx, stream_pos,
-					  &setup_fx);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_fx, &decoded_value, model.fx);
+		if (err)
+			break;
 		data_buf[i].fx = decoded_value;
 
-		stream_pos = decode_value(&decoded_value, model.ncob_x, stream_pos,
-					  &setup_ncob);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_ncob, &decoded_value, model.ncob_x);
+		if (err)
+			break;
 		data_buf[i].ncob_x = decoded_value;
 
-		stream_pos = decode_value(&decoded_value, model.ncob_y, stream_pos,
-					  &setup_ncob);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_ncob, &decoded_value, model.ncob_y);
+		if (err)
+			break;
 		data_buf[i].ncob_y = decoded_value;
 
 		if (up_model_buf) {
@@ -909,7 +729,7 @@ static int decompress_s_fx_ncob(const struct cmp_cfg *cfg)
 
 		model = next_model_p[i];
 	}
-	return stream_pos;
+	return err;
 }
 
 
@@ -917,16 +737,15 @@ static int decompress_s_fx_ncob(const struct cmp_cfg *cfg)
  * @brief decompress short S_FX_NCOB_ECOB data
  *
  * @param cfg	pointer to the compression configuration structure
+ * @param dec	a pointer to a bit_decoder context
  *
- * @returns bit position of the last read bit in the bitstream on success;
- *	returns negative on error, returns CMP_ERROR_SMALL_BUF if the bitstream
- *	buffer is too small to read the value from the bitstream
+ * @returns 0 on success; otherwise error
  */
 
-static int decompress_s_fx_efx_ncob_ecob(const struct cmp_cfg *cfg)
+static int decompress_s_fx_efx_ncob_ecob(const struct cmp_cfg *cfg, struct bit_decoder *dec)
 {
 	size_t i;
-	int stream_pos = 0;
+	int err;
 	uint32_t decoded_value;
 	struct decoder_setup setup_exp_flags, setup_fx, setup_ncob, setup_efx, setup_ecob;
 	struct s_fx_efx_ncob_ecob *data_buf = cfg->input_buf;
@@ -938,8 +757,9 @@ static int decompress_s_fx_efx_ncob_ecob(const struct cmp_cfg *cfg)
 	if (model_mode_is_used(cfg->cmp_mode))
 		up_model_buf = cfg->icu_new_model_buf;
 
-	stream_pos = decompress_multi_entry_hdr((void **)&data_buf, (void **)&model_buf,
-						(void **)&up_model_buf, cfg);
+	decompress_multi_entry_hdr((void **)&data_buf, (void **)&model_buf,
+				   (void **)&up_model_buf, cfg);
+	bit_init_decoder(dec, (uint8_t *)cfg->icu_output_buf+MULTI_ENTRY_HDR_SIZE, cfg->buffer_length-MULTI_ENTRY_HDR_SIZE);
 
 	if (model_mode_is_used(cfg->cmp_mode)) {
 		model = model_buf[0];
@@ -949,63 +769,51 @@ static int decompress_s_fx_efx_ncob_ecob(const struct cmp_cfg *cfg)
 		next_model_p = data_buf;
 	}
 
-	if (configure_decoder_setup(&setup_exp_flags, cfg->cmp_par_exp_flags, cfg->spill_exp_flags,
-				    cfg->round, cfg->max_used_bits->s_exp_flags, cfg))
-		return -1;
-	if (configure_decoder_setup(&setup_fx, cfg->cmp_par_fx, cfg->spill_fx,
-				    cfg->round, cfg->max_used_bits->s_fx, cfg))
-		return -1;
-	if (configure_decoder_setup(&setup_ncob, cfg->cmp_par_ncob, cfg->spill_ncob,
-				    cfg->round, cfg->max_used_bits->s_ncob, cfg))
-		return -1;
-	if (configure_decoder_setup(&setup_efx, cfg->cmp_par_efx, cfg->spill_efx,
-				    cfg->round, cfg->max_used_bits->s_efx, cfg))
-		return -1;
-	if (configure_decoder_setup(&setup_ecob, cfg->cmp_par_ecob, cfg->spill_ecob,
-				    cfg->round, cfg->max_used_bits->s_ecob, cfg))
-		return -1;
+	configure_decoder_setup(&setup_exp_flags, dec, cfg->cmp_mode, cfg->cmp_par_exp_flags,
+				cfg->spill_exp_flags, cfg->round, cfg->max_used_bits->s_exp_flags);
+	configure_decoder_setup(&setup_fx, dec, cfg->cmp_mode, cfg->cmp_par_fx, cfg->spill_fx,
+				cfg->round, cfg->max_used_bits->s_fx);
+	configure_decoder_setup(&setup_ncob, dec, cfg->cmp_mode, cfg->cmp_par_ncob, cfg->spill_ncob,
+				cfg->round, cfg->max_used_bits->s_ncob);
+	configure_decoder_setup(&setup_efx, dec, cfg->cmp_mode, cfg->cmp_par_efx, cfg->spill_efx,
+				cfg->round, cfg->max_used_bits->s_efx);
+	configure_decoder_setup(&setup_ecob, dec, cfg->cmp_mode, cfg->cmp_par_ecob, cfg->spill_ecob,
+				cfg->round, cfg->max_used_bits->s_ecob);
 
 	for (i = 0; ; i++) {
-		stream_pos = decode_value(&decoded_value, model.exp_flags,
-					  stream_pos, &setup_exp_flags);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_exp_flags, &decoded_value, model.exp_flags);
+		if (err)
+			break;
 		data_buf[i].exp_flags = (__typeof__(data_buf[i].exp_flags)) decoded_value;
 
-		stream_pos = decode_value(&decoded_value, model.fx, stream_pos,
-					  &setup_fx);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_fx, &decoded_value, model.fx);
+		if (err)
+			break;
 		data_buf[i].fx = decoded_value;
 
-		stream_pos = decode_value(&decoded_value, model.ncob_x, stream_pos,
-					  &setup_ncob);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_ncob, &decoded_value, model.ncob_x);
+		if (err)
+			break;
 		data_buf[i].ncob_x = decoded_value;
 
-		stream_pos = decode_value(&decoded_value, model.ncob_y, stream_pos,
-					  &setup_ncob);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_ncob, &decoded_value, model.ncob_y);
+		if (err)
+			break;
 		data_buf[i].ncob_y = decoded_value;
 
-		stream_pos = decode_value(&decoded_value, model.efx, stream_pos,
-					  &setup_efx);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_efx, &decoded_value, model.efx);
+		if (err)
+			break;
 		data_buf[i].efx = decoded_value;
 
-		stream_pos = decode_value(&decoded_value, model.ecob_x, stream_pos,
-					  &setup_ecob);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_ecob, &decoded_value, model.ecob_x);
+		if (err)
+			break;
 		data_buf[i].ecob_x = decoded_value;
 
-		stream_pos = decode_value(&decoded_value, model.ecob_y, stream_pos,
-					  &setup_ecob);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_ecob, &decoded_value, model.ecob_y);
+		if (err)
+			break;
 		data_buf[i].ecob_y = decoded_value;
 
 		if (up_model_buf) {
@@ -1030,7 +838,7 @@ static int decompress_s_fx_efx_ncob_ecob(const struct cmp_cfg *cfg)
 
 		model = next_model_p[i];
 	}
-	return stream_pos;
+	return err;
 }
 
 
@@ -1038,16 +846,15 @@ static int decompress_s_fx_efx_ncob_ecob(const struct cmp_cfg *cfg)
  * @brief decompress fast normal light flux (F_FX) data
  *
  * @param cfg	pointer to the compression configuration structure
+ * @param dec	a pointer to a bit_decoder context
  *
- * @returns bit position of the last read bit in the bitstream on success;
- *	returns negative on error, returns CMP_ERROR_SMALL_BUF if the bitstream
- *	buffer is too small to read the value from the bitstream
+ * @returns 0 on success; otherwise error
  */
 
-static int decompress_f_fx(const struct cmp_cfg *cfg)
+static int decompress_f_fx(const struct cmp_cfg *cfg, struct bit_decoder *dec)
 {
 	size_t i;
-	int stream_pos = 0;
+	int err;
 	uint32_t decoded_value;
 	struct decoder_setup setup_fx;
 	struct f_fx *data_buf = cfg->input_buf;
@@ -1059,8 +866,9 @@ static int decompress_f_fx(const struct cmp_cfg *cfg)
 	if (model_mode_is_used(cfg->cmp_mode))
 		up_model_buf = cfg->icu_new_model_buf;
 
-	stream_pos = decompress_multi_entry_hdr((void **)&data_buf, (void **)&model_buf,
-						(void **)&up_model_buf, cfg);
+	decompress_multi_entry_hdr((void **)&data_buf, (void **)&model_buf,
+				   (void **)&up_model_buf, cfg);
+	bit_init_decoder(dec, (uint8_t *)cfg->icu_output_buf+MULTI_ENTRY_HDR_SIZE, cfg->buffer_length-MULTI_ENTRY_HDR_SIZE);
 
 	if (model_mode_is_used(cfg->cmp_mode)) {
 		model = model_buf[0];
@@ -1070,15 +878,13 @@ static int decompress_f_fx(const struct cmp_cfg *cfg)
 		next_model_p = data_buf;
 	}
 
-	if (configure_decoder_setup(&setup_fx, cfg->cmp_par_fx, cfg->spill_fx,
-				    cfg->round, cfg->max_used_bits->f_fx, cfg))
-		return -1;
+	configure_decoder_setup(&setup_fx, dec, cfg->cmp_mode, cfg->cmp_par_fx, cfg->spill_fx,
+				cfg->round, cfg->max_used_bits->f_fx);
 
 	for (i = 0; ; i++) {
-		stream_pos = decode_value(&decoded_value, model.fx, stream_pos,
-					  &setup_fx);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_fx, &decoded_value, model.fx);
+		if (err)
+			break;
 		data_buf[i].fx = decoded_value;
 
 		if (up_model_buf)
@@ -1090,7 +896,7 @@ static int decompress_f_fx(const struct cmp_cfg *cfg)
 
 		model = next_model_p[i];
 	}
-	return stream_pos;
+	return err;
 }
 
 
@@ -1098,16 +904,15 @@ static int decompress_f_fx(const struct cmp_cfg *cfg)
  * @brief decompress F_FX_EFX data
  *
  * @param cfg	pointer to the compression configuration structure
+ * @param dec	a pointer to a bit_decoder context
  *
- * @returns bit position of the last read bit in the bitstream on success;
- *	returns negative on error, returns CMP_ERROR_SMALL_BUF if the bitstream
- *	buffer is too small to read the value from the bitstream
+ * @returns 0 on success; otherwise error
  */
 
-static int decompress_f_fx_efx(const struct cmp_cfg *cfg)
+static int decompress_f_fx_efx(const struct cmp_cfg *cfg, struct bit_decoder *dec)
 {
 	size_t i;
-	int stream_pos = 0;
+	int err;
 	uint32_t decoded_value;
 	struct decoder_setup setup_fx, setup_efx;
 	struct f_fx_efx *data_buf = cfg->input_buf;
@@ -1119,8 +924,9 @@ static int decompress_f_fx_efx(const struct cmp_cfg *cfg)
 	if (model_mode_is_used(cfg->cmp_mode))
 		up_model_buf = cfg->icu_new_model_buf;
 
-	stream_pos = decompress_multi_entry_hdr((void **)&data_buf, (void **)&model_buf,
-						(void **)&up_model_buf, cfg);
+	decompress_multi_entry_hdr((void **)&data_buf, (void **)&model_buf,
+				   (void **)&up_model_buf, cfg);
+	bit_init_decoder(dec, (uint8_t *)cfg->icu_output_buf+MULTI_ENTRY_HDR_SIZE, cfg->buffer_length-MULTI_ENTRY_HDR_SIZE);
 
 	if (model_mode_is_used(cfg->cmp_mode)) {
 		model = model_buf[0];
@@ -1130,24 +936,20 @@ static int decompress_f_fx_efx(const struct cmp_cfg *cfg)
 		next_model_p = data_buf;
 	}
 
-	if (configure_decoder_setup(&setup_fx, cfg->cmp_par_fx, cfg->spill_fx,
-				    cfg->round, cfg->max_used_bits->f_fx, cfg))
-		return -1;
-	if (configure_decoder_setup(&setup_efx, cfg->cmp_par_efx, cfg->spill_efx,
-				    cfg->round, cfg->max_used_bits->f_efx, cfg))
-		return -1;
+	configure_decoder_setup(&setup_fx, dec, cfg->cmp_mode, cfg->cmp_par_fx, cfg->spill_fx,
+				cfg->round, cfg->max_used_bits->f_fx);
+	configure_decoder_setup(&setup_efx, dec, cfg->cmp_mode, cfg->cmp_par_efx, cfg->spill_efx,
+				cfg->round, cfg->max_used_bits->f_efx);
 
 	for (i = 0; ; i++) {
-		stream_pos = decode_value(&decoded_value, model.fx, stream_pos,
-					  &setup_fx);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_fx, &decoded_value, model.fx);
+		if (err)
+			break;
 		data_buf[i].fx = decoded_value;
 
-		stream_pos = decode_value(&decoded_value, model.efx, stream_pos,
-					  &setup_efx);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_efx, &decoded_value, model.efx);
+		if (err)
+			break;
 		data_buf[i].efx = decoded_value;
 
 		if (up_model_buf) {
@@ -1162,7 +964,7 @@ static int decompress_f_fx_efx(const struct cmp_cfg *cfg)
 
 		model = next_model_p[i];
 	}
-	return stream_pos;
+	return err;
 }
 
 
@@ -1170,16 +972,15 @@ static int decompress_f_fx_efx(const struct cmp_cfg *cfg)
  * @brief decompress short F_FX_NCOB data
  *
  * @param cfg	pointer to the compression configuration structure
+ * @param dec	a pointer to a bit_decoder context
  *
- * @returns bit position of the last read bit in the bitstream on success;
- *	returns negative on error, returns CMP_ERROR_SMALL_BUF if the bitstream
- *	buffer is too small to read the value from the bitstream
+ * @returns 0 on success; otherwise error
  */
 
-static int decompress_f_fx_ncob(const struct cmp_cfg *cfg)
+static int decompress_f_fx_ncob(const struct cmp_cfg *cfg, struct bit_decoder *dec)
 {
 	size_t i;
-	int stream_pos = 0;
+	int err;
 	uint32_t decoded_value;
 	struct decoder_setup setup_fx, setup_ncob;
 	struct f_fx_ncob *data_buf = cfg->input_buf;
@@ -1191,8 +992,9 @@ static int decompress_f_fx_ncob(const struct cmp_cfg *cfg)
 	if (model_mode_is_used(cfg->cmp_mode))
 		up_model_buf = cfg->icu_new_model_buf;
 
-	stream_pos = decompress_multi_entry_hdr((void **)&data_buf, (void **)&model_buf,
-						(void **)&up_model_buf, cfg);
+	decompress_multi_entry_hdr((void **)&data_buf, (void **)&model_buf,
+				   (void **)&up_model_buf, cfg);
+	bit_init_decoder(dec, (uint8_t *)cfg->icu_output_buf+MULTI_ENTRY_HDR_SIZE, cfg->buffer_length-MULTI_ENTRY_HDR_SIZE);
 
 	if (model_mode_is_used(cfg->cmp_mode)) {
 		model = model_buf[0];
@@ -1202,30 +1004,25 @@ static int decompress_f_fx_ncob(const struct cmp_cfg *cfg)
 		next_model_p = data_buf;
 	}
 
-	if (configure_decoder_setup(&setup_fx, cfg->cmp_par_fx, cfg->spill_fx,
-				    cfg->round, cfg->max_used_bits->f_fx, cfg))
-		return -1;
-	if (configure_decoder_setup(&setup_ncob, cfg->cmp_par_ncob, cfg->spill_ncob,
-				    cfg->round, cfg->max_used_bits->f_ncob, cfg))
-		return -1;
+	configure_decoder_setup(&setup_fx, dec, cfg->cmp_mode, cfg->cmp_par_fx, cfg->spill_fx,
+				cfg->round, cfg->max_used_bits->f_fx);
+	configure_decoder_setup(&setup_ncob, dec, cfg->cmp_mode, cfg->cmp_par_ncob, cfg->spill_ncob,
+				cfg->round, cfg->max_used_bits->f_ncob);
 
 	for (i = 0; ; i++) {
-		stream_pos = decode_value(&decoded_value, model.fx, stream_pos,
-					  &setup_fx);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_fx, &decoded_value, model.fx);
+		if (err)
+			break;
 		data_buf[i].fx = decoded_value;
 
-		stream_pos = decode_value(&decoded_value, model.ncob_x, stream_pos,
-					  &setup_ncob);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_ncob, &decoded_value, model.ncob_x);
+		if (err)
+			break;
 		data_buf[i].ncob_x = decoded_value;
 
-		stream_pos = decode_value(&decoded_value, model.ncob_y, stream_pos,
-					  &setup_ncob);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_ncob, &decoded_value, model.ncob_y);
+		if (err)
+			break;
 		data_buf[i].ncob_y = decoded_value;
 
 		if (up_model_buf) {
@@ -1242,7 +1039,7 @@ static int decompress_f_fx_ncob(const struct cmp_cfg *cfg)
 
 		model = next_model_p[i];
 	}
-	return stream_pos;
+	return err;
 }
 
 
@@ -1250,16 +1047,15 @@ static int decompress_f_fx_ncob(const struct cmp_cfg *cfg)
  * @brief decompress short F_FX_NCOB_ECOB data
  *
  * @param cfg	pointer to the compression configuration structure
+ * @param dec	a pointer to a bit_decoder context
  *
- * @returns bit position of the last read bit in the bitstream on success;
- *	returns negative on error, returns CMP_ERROR_SMALL_BUF if the bitstream
- *	buffer is too small to read the value from the bitstream
+ * @returns 0 on success; otherwise error
  */
 
-static int decompress_f_fx_efx_ncob_ecob(const struct cmp_cfg *cfg)
+static int decompress_f_fx_efx_ncob_ecob(const struct cmp_cfg *cfg, struct bit_decoder *dec)
 {
 	size_t i;
-	int stream_pos = 0;
+	int err;
 	uint32_t decoded_value;
 	struct decoder_setup setup_fx, setup_ncob, setup_efx, setup_ecob;
 	struct f_fx_efx_ncob_ecob *data_buf = cfg->input_buf;
@@ -1271,8 +1067,9 @@ static int decompress_f_fx_efx_ncob_ecob(const struct cmp_cfg *cfg)
 	if (model_mode_is_used(cfg->cmp_mode))
 		up_model_buf = cfg->icu_new_model_buf;
 
-	stream_pos = decompress_multi_entry_hdr((void **)&data_buf, (void **)&model_buf,
-						(void **)&up_model_buf, cfg);
+	decompress_multi_entry_hdr((void **)&data_buf, (void **)&model_buf,
+				   (void **)&up_model_buf, cfg);
+	bit_init_decoder(dec, (uint8_t *)cfg->icu_output_buf+MULTI_ENTRY_HDR_SIZE, cfg->buffer_length-MULTI_ENTRY_HDR_SIZE);
 
 	if (model_mode_is_used(cfg->cmp_mode)) {
 		model = model_buf[0];
@@ -1282,54 +1079,44 @@ static int decompress_f_fx_efx_ncob_ecob(const struct cmp_cfg *cfg)
 		next_model_p = data_buf;
 	}
 
-	if (configure_decoder_setup(&setup_fx, cfg->cmp_par_fx, cfg->spill_fx,
-				    cfg->round, cfg->max_used_bits->f_fx, cfg))
-		return -1;
-	if (configure_decoder_setup(&setup_ncob, cfg->cmp_par_ncob, cfg->spill_ncob,
-				    cfg->round, cfg->max_used_bits->f_ncob, cfg))
-		return -1;
-	if (configure_decoder_setup(&setup_efx, cfg->cmp_par_efx, cfg->spill_efx,
-				    cfg->round, cfg->max_used_bits->f_efx, cfg))
-		return -1;
-	if (configure_decoder_setup(&setup_ecob, cfg->cmp_par_ecob, cfg->spill_ecob,
-				    cfg->round, cfg->max_used_bits->f_ecob, cfg))
-		return -1;
+	configure_decoder_setup(&setup_fx, dec, cfg->cmp_mode, cfg->cmp_par_fx, cfg->spill_fx,
+				cfg->round, cfg->max_used_bits->f_fx);
+	configure_decoder_setup(&setup_ncob, dec, cfg->cmp_mode, cfg->cmp_par_ncob, cfg->spill_ncob,
+				cfg->round, cfg->max_used_bits->f_ncob);
+	configure_decoder_setup(&setup_efx, dec, cfg->cmp_mode, cfg->cmp_par_efx, cfg->spill_efx,
+				cfg->round, cfg->max_used_bits->f_efx);
+	configure_decoder_setup(&setup_ecob, dec, cfg->cmp_mode, cfg->cmp_par_ecob, cfg->spill_ecob,
+				cfg->round, cfg->max_used_bits->f_ecob);
 
 	for (i = 0; ; i++) {
-		stream_pos = decode_value(&decoded_value, model.fx, stream_pos,
-					  &setup_fx);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_fx, &decoded_value, model.fx);
+		if (err)
+			break;
 		data_buf[i].fx = decoded_value;
 
-		stream_pos = decode_value(&decoded_value, model.ncob_x, stream_pos,
-					  &setup_ncob);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_ncob, &decoded_value, model.ncob_x);
+		if (err)
+			break;
 		data_buf[i].ncob_x = decoded_value;
 
-		stream_pos = decode_value(&decoded_value, model.ncob_y, stream_pos,
-					  &setup_ncob);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_ncob, &decoded_value, model.ncob_y);
+		if (err)
+			break;
 		data_buf[i].ncob_y = decoded_value;
 
-		stream_pos = decode_value(&decoded_value, model.efx, stream_pos,
-					  &setup_efx);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_efx, &decoded_value, model.efx);
+		if (err)
+			break;
 		data_buf[i].efx = decoded_value;
 
-		stream_pos = decode_value(&decoded_value, model.ecob_x, stream_pos,
-					  &setup_ecob);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_ecob, &decoded_value, model.ecob_x);
+		if (err)
+			break;
 		data_buf[i].ecob_x = decoded_value;
 
-		stream_pos = decode_value(&decoded_value, model.ecob_y, stream_pos,
-					  &setup_ecob);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_ecob, &decoded_value, model.ecob_y);
+		if (err)
+			break;
 		data_buf[i].ecob_y = decoded_value;
 
 		if (up_model_buf) {
@@ -1352,7 +1139,7 @@ static int decompress_f_fx_efx_ncob_ecob(const struct cmp_cfg *cfg)
 
 		model = next_model_p[i];
 	}
-	return stream_pos;
+	return err;
 }
 
 
@@ -1360,16 +1147,15 @@ static int decompress_f_fx_efx_ncob_ecob(const struct cmp_cfg *cfg)
  * @brief decompress long normal light flux (L_FX) data
  *
  * @param cfg	pointer to the compression configuration structure
+ * @param dec	a pointer to a bit_decoder context
  *
- * @returns bit position of the last read bit in the bitstream on success;
- *	returns negative on error, returns CMP_ERROR_SMALL_BUF if the bitstream
- *	buffer is too small to read the value from the bitstream
+ * @returns 0 on success; otherwise error
  */
 
-static int decompress_l_fx(const struct cmp_cfg *cfg)
+static int decompress_l_fx(const struct cmp_cfg *cfg, struct bit_decoder *dec)
 {
 	size_t i;
-	int stream_pos = 0;
+	int err;
 	uint32_t decoded_value;
 	struct decoder_setup setup_exp_flags, setup_fx, setup_fx_var;
 	struct l_fx *data_buf = cfg->input_buf;
@@ -1381,8 +1167,9 @@ static int decompress_l_fx(const struct cmp_cfg *cfg)
 	if (model_mode_is_used(cfg->cmp_mode))
 		up_model_buf = cfg->icu_new_model_buf;
 
-	stream_pos = decompress_multi_entry_hdr((void **)&data_buf, (void **)&model_buf,
-						(void **)&up_model_buf, cfg);
+	decompress_multi_entry_hdr((void **)&data_buf, (void **)&model_buf,
+				   (void **)&up_model_buf, cfg);
+	bit_init_decoder(dec, (uint8_t *)cfg->icu_output_buf+MULTI_ENTRY_HDR_SIZE, cfg->buffer_length-MULTI_ENTRY_HDR_SIZE);
 
 	if (model_mode_is_used(cfg->cmp_mode)) {
 		model = model_buf[0];
@@ -1392,33 +1179,27 @@ static int decompress_l_fx(const struct cmp_cfg *cfg)
 		next_model_p = data_buf;
 	}
 
-	if (configure_decoder_setup(&setup_exp_flags, cfg->cmp_par_exp_flags, cfg->spill_exp_flags,
-				    cfg->round, cfg->max_used_bits->l_exp_flags, cfg))
-		return -1;
-	if (configure_decoder_setup(&setup_fx, cfg->cmp_par_fx, cfg->spill_fx,
-				    cfg->round, cfg->max_used_bits->l_fx, cfg))
-		return -1;
-	if (configure_decoder_setup(&setup_fx_var, cfg->cmp_par_fx_cob_variance, cfg->spill_fx_cob_variance,
-				    cfg->round, cfg->max_used_bits->l_fx_variance, cfg))
-		return -1;
+	configure_decoder_setup(&setup_exp_flags, dec, cfg->cmp_mode, cfg->cmp_par_exp_flags, cfg->spill_exp_flags,
+				cfg->round, cfg->max_used_bits->l_exp_flags);
+	configure_decoder_setup(&setup_fx, dec, cfg->cmp_mode, cfg->cmp_par_fx, cfg->spill_fx,
+				cfg->round, cfg->max_used_bits->l_fx);
+	configure_decoder_setup(&setup_fx_var, dec, cfg->cmp_mode, cfg->cmp_par_fx_cob_variance, cfg->spill_fx_cob_variance,
+				cfg->round, cfg->max_used_bits->l_fx_variance);
 
 	for (i = 0; ; i++) {
-		stream_pos = decode_value(&decoded_value, model.exp_flags,
-					  stream_pos, &setup_exp_flags);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_exp_flags, &decoded_value, model.exp_flags);
+		if (err)
+			break;
 		data_buf[i].exp_flags = decoded_value;
 
-		stream_pos = decode_value(&decoded_value, model.fx, stream_pos,
-					  &setup_fx);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_fx, &decoded_value, model.fx);
+		if (err)
+			break;
 		data_buf[i].fx = decoded_value;
 
-		stream_pos = decode_value(&decoded_value, model.fx_variance, stream_pos,
-					  &setup_fx_var);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_fx_var, &decoded_value, model.fx_variance);
+		if (err)
+			break;
 		data_buf[i].fx_variance = decoded_value;
 
 		if (up_model_buf) {
@@ -1435,7 +1216,7 @@ static int decompress_l_fx(const struct cmp_cfg *cfg)
 
 		model = next_model_p[i];
 	}
-	return stream_pos;
+	return err;
 }
 
 
@@ -1443,16 +1224,15 @@ static int decompress_l_fx(const struct cmp_cfg *cfg)
  * @brief decompress L_FX_EFX data
  *
  * @param cfg	pointer to the compression configuration structure
+ * @param dec	a pointer to a bit_decoder context
  *
- * @returns bit position of the last read bit in the bitstream on success;
- *	returns negative on error, returns CMP_ERROR_SMALL_BUF if the bitstream
- *	buffer is too small to read the value from the bitstream
+ * @returns 0 on success; otherwise error
  */
 
-static int decompress_l_fx_efx(const struct cmp_cfg *cfg)
+static int decompress_l_fx_efx(const struct cmp_cfg *cfg, struct bit_decoder *dec)
 {
 	size_t i;
-	int stream_pos = 0;
+	int err;
 	uint32_t decoded_value;
 	struct decoder_setup setup_exp_flags, setup_fx, setup_efx, setup_fx_var;
 	struct l_fx_efx *data_buf = cfg->input_buf;
@@ -1464,8 +1244,9 @@ static int decompress_l_fx_efx(const struct cmp_cfg *cfg)
 	if (model_mode_is_used(cfg->cmp_mode))
 		up_model_buf = cfg->icu_new_model_buf;
 
-	stream_pos = decompress_multi_entry_hdr((void **)&data_buf, (void **)&model_buf,
-						(void **)&up_model_buf, cfg);
+	decompress_multi_entry_hdr((void **)&data_buf, (void **)&model_buf,
+				   (void **)&up_model_buf, cfg);
+	bit_init_decoder(dec, (uint8_t *)cfg->icu_output_buf+MULTI_ENTRY_HDR_SIZE, cfg->buffer_length-MULTI_ENTRY_HDR_SIZE);
 
 	if (model_mode_is_used(cfg->cmp_mode)) {
 		model = model_buf[0];
@@ -1475,42 +1256,34 @@ static int decompress_l_fx_efx(const struct cmp_cfg *cfg)
 		next_model_p = data_buf;
 	}
 
-	if (configure_decoder_setup(&setup_exp_flags, cfg->cmp_par_exp_flags, cfg->spill_exp_flags,
-				    cfg->round, cfg->max_used_bits->l_exp_flags, cfg))
-		return -1;
-	if (configure_decoder_setup(&setup_fx, cfg->cmp_par_fx, cfg->spill_fx,
-				    cfg->round, cfg->max_used_bits->l_fx, cfg))
-		return -1;
-	if (configure_decoder_setup(&setup_efx, cfg->cmp_par_efx, cfg->spill_efx,
-				    cfg->round, cfg->max_used_bits->l_efx, cfg))
-		return -1;
-	if (configure_decoder_setup(&setup_fx_var, cfg->cmp_par_fx_cob_variance, cfg->spill_fx_cob_variance,
-				    cfg->round, cfg->max_used_bits->l_fx_variance, cfg))
-		return -1;
+	configure_decoder_setup(&setup_exp_flags, dec, cfg->cmp_mode, cfg->cmp_par_exp_flags, cfg->spill_exp_flags,
+				cfg->round, cfg->max_used_bits->l_exp_flags);
+	configure_decoder_setup(&setup_fx, dec, cfg->cmp_mode, cfg->cmp_par_fx, cfg->spill_fx,
+				cfg->round, cfg->max_used_bits->l_fx);
+	configure_decoder_setup(&setup_efx, dec, cfg->cmp_mode, cfg->cmp_par_efx, cfg->spill_efx,
+				cfg->round, cfg->max_used_bits->l_efx);
+	configure_decoder_setup(&setup_fx_var, dec, cfg->cmp_mode, cfg->cmp_par_fx_cob_variance, cfg->spill_fx_cob_variance,
+				cfg->round, cfg->max_used_bits->l_fx_variance);
 
 	for (i = 0; ; i++) {
-		stream_pos = decode_value(&decoded_value, model.exp_flags,
-					  stream_pos, &setup_exp_flags);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_exp_flags, &decoded_value, model.exp_flags);
+		if (err)
+			break;
 		data_buf[i].exp_flags = decoded_value;
 
-		stream_pos = decode_value(&decoded_value, model.fx, stream_pos,
-					  &setup_fx);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_fx, &decoded_value, model.fx);
+		if (err)
+			break;
 		data_buf[i].fx = decoded_value;
 
-		stream_pos = decode_value(&decoded_value, model.efx, stream_pos,
-					  &setup_efx);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_efx, &decoded_value, model.efx);
+		if (err)
+			break;
 		data_buf[i].efx = decoded_value;
 
-		stream_pos = decode_value(&decoded_value, model.fx_variance, stream_pos,
-					  &setup_fx_var);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_fx_var, &decoded_value, model.fx_variance);
+		if (err)
+			break;
 		data_buf[i].fx_variance = decoded_value;
 
 		if (up_model_buf) {
@@ -1529,7 +1302,7 @@ static int decompress_l_fx_efx(const struct cmp_cfg *cfg)
 
 		model = next_model_p[i];
 	}
-	return stream_pos;
+	return err;
 }
 
 
@@ -1537,16 +1310,15 @@ static int decompress_l_fx_efx(const struct cmp_cfg *cfg)
  * @brief decompress L_FX_NCOB data
  *
  * @param cfg	pointer to the compression configuration structure
+ * @param dec	a pointer to a bit_decoder context
  *
- * @returns bit position of the last read bit in the bitstream on success;
- *	returns negative on error, returns CMP_ERROR_SMALL_BUF if the bitstream
- *	buffer is too small to read the value from the bitstream
+ * @returns 0 on success; otherwise error
  */
 
-static int decompress_l_fx_ncob(const struct cmp_cfg *cfg)
+static int decompress_l_fx_ncob(const struct cmp_cfg *cfg, struct bit_decoder *dec)
 {
 	size_t i;
-	int stream_pos = 0;
+	int err;
 	uint32_t decoded_value;
 	struct decoder_setup setup_exp_flags, setup_fx, setup_ncob,
 			     setup_fx_var, setup_cob_var;
@@ -1559,8 +1331,9 @@ static int decompress_l_fx_ncob(const struct cmp_cfg *cfg)
 	if (model_mode_is_used(cfg->cmp_mode))
 		up_model_buf = cfg->icu_new_model_buf;
 
-	stream_pos = decompress_multi_entry_hdr((void **)&data_buf, (void **)&model_buf,
-						(void **)&up_model_buf, cfg);
+	decompress_multi_entry_hdr((void **)&data_buf, (void **)&model_buf,
+				   (void **)&up_model_buf, cfg);
+	bit_init_decoder(dec, (uint8_t *)cfg->icu_output_buf+MULTI_ENTRY_HDR_SIZE, cfg->buffer_length-MULTI_ENTRY_HDR_SIZE);
 
 	if (model_mode_is_used(cfg->cmp_mode)) {
 		model = model_buf[0];
@@ -1570,63 +1343,51 @@ static int decompress_l_fx_ncob(const struct cmp_cfg *cfg)
 		next_model_p = data_buf;
 	}
 
-	if (configure_decoder_setup(&setup_exp_flags, cfg->cmp_par_exp_flags, cfg->spill_exp_flags,
-				    cfg->round, cfg->max_used_bits->l_exp_flags, cfg))
-		return -1;
-	if (configure_decoder_setup(&setup_fx, cfg->cmp_par_fx, cfg->spill_fx,
-				    cfg->round, cfg->max_used_bits->l_fx, cfg))
-		return -1;
-	if (configure_decoder_setup(&setup_ncob, cfg->cmp_par_ncob, cfg->spill_ncob,
-				    cfg->round, cfg->max_used_bits->l_ncob, cfg))
-		return -1;
-	if (configure_decoder_setup(&setup_fx_var, cfg->cmp_par_fx_cob_variance, cfg->spill_fx_cob_variance,
-				    cfg->round, cfg->max_used_bits->l_fx_variance, cfg))
-		return -1;
-	if (configure_decoder_setup(&setup_cob_var, cfg->cmp_par_fx_cob_variance, cfg->spill_fx_cob_variance,
-				    cfg->round, cfg->max_used_bits->l_cob_variance, cfg))
-		return -1;
+	configure_decoder_setup(&setup_exp_flags, dec, cfg->cmp_mode, cfg->cmp_par_exp_flags, cfg->spill_exp_flags,
+				cfg->round, cfg->max_used_bits->l_exp_flags);
+	configure_decoder_setup(&setup_fx, dec, cfg->cmp_mode, cfg->cmp_par_fx, cfg->spill_fx,
+				cfg->round, cfg->max_used_bits->l_fx);
+	configure_decoder_setup(&setup_ncob, dec, cfg->cmp_mode, cfg->cmp_par_ncob, cfg->spill_ncob,
+				cfg->round, cfg->max_used_bits->l_ncob);
+	configure_decoder_setup(&setup_fx_var, dec, cfg->cmp_mode, cfg->cmp_par_fx_cob_variance, cfg->spill_fx_cob_variance,
+				cfg->round, cfg->max_used_bits->l_fx_variance);
+	configure_decoder_setup(&setup_cob_var, dec, cfg->cmp_mode, cfg->cmp_par_fx_cob_variance, cfg->spill_fx_cob_variance,
+				cfg->round, cfg->max_used_bits->l_cob_variance);
 
 	for (i = 0; ; i++) {
-		stream_pos = decode_value(&decoded_value, model.exp_flags,
-					  stream_pos, &setup_exp_flags);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_exp_flags, &decoded_value, model.exp_flags);
+		if (err)
+			break;
 		data_buf[i].exp_flags = decoded_value;
 
-		stream_pos = decode_value(&decoded_value, model.fx, stream_pos,
-					  &setup_fx);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_fx, &decoded_value, model.fx);
+		if (err)
+			break;
 		data_buf[i].fx = decoded_value;
 
-		stream_pos = decode_value(&decoded_value, model.ncob_x,
-					  stream_pos, &setup_ncob);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_ncob, &decoded_value, model.ncob_x);
+		if (err)
+			break;
 		data_buf[i].ncob_x = decoded_value;
 
-		stream_pos = decode_value(&decoded_value, model.ncob_y,
-					  stream_pos, &setup_ncob);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_ncob, &decoded_value, model.ncob_y);
+		if (err)
+			break;
 		data_buf[i].ncob_y = decoded_value;
 
-		stream_pos = decode_value(&decoded_value, model.fx_variance,
-					  stream_pos, &setup_fx_var);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_fx_var, &decoded_value, model.fx_variance);
+		if (err)
+			break;
 		data_buf[i].fx_variance = decoded_value;
 
-		stream_pos = decode_value(&decoded_value, model.cob_x_variance,
-					  stream_pos, &setup_cob_var);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_cob_var, &decoded_value, model.cob_x_variance);
+		if (err)
+			break;
 		data_buf[i].cob_x_variance = decoded_value;
 
-		stream_pos = decode_value(&decoded_value, model.cob_y_variance,
-					  stream_pos, &setup_cob_var);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_cob_var, &decoded_value, model.cob_y_variance);
+		if (err)
+			break;
 		data_buf[i].cob_y_variance = decoded_value;
 
 		if (up_model_buf) {
@@ -1651,7 +1412,7 @@ static int decompress_l_fx_ncob(const struct cmp_cfg *cfg)
 
 		model = next_model_p[i];
 	}
-	return stream_pos;
+	return err;
 }
 
 
@@ -1659,16 +1420,15 @@ static int decompress_l_fx_ncob(const struct cmp_cfg *cfg)
  * @brief decompress L_FX_EFX_NCOB_ECOB data
  *
  * @param cfg	pointer to the compression configuration structure
+ * @param dec	a pointer to a bit_decoder context
  *
- * @returns bit position of the last read bit in the bitstream on success;
- *	returns negative on error, returns CMP_ERROR_SMALL_BUF if the bitstream
- *	buffer is too small to read the value from the bitstream
+ * @returns 0 on success; otherwise error
  */
 
-static int decompress_l_fx_efx_ncob_ecob(const struct cmp_cfg *cfg)
+static int decompress_l_fx_efx_ncob_ecob(const struct cmp_cfg *cfg, struct bit_decoder *dec)
 {
 	size_t i;
-	int stream_pos = 0;
+	int err;
 	uint32_t decoded_value;
 	struct decoder_setup setup_exp_flags, setup_fx, setup_ncob, setup_efx,
 			     setup_ecob, setup_fx_var, setup_cob_var;
@@ -1681,8 +1441,9 @@ static int decompress_l_fx_efx_ncob_ecob(const struct cmp_cfg *cfg)
 	if (model_mode_is_used(cfg->cmp_mode))
 		up_model_buf = cfg->icu_new_model_buf;
 
-	stream_pos = decompress_multi_entry_hdr((void **)&data_buf, (void **)&model_buf,
-						(void **)&up_model_buf, cfg);
+	decompress_multi_entry_hdr((void **)&data_buf, (void **)&model_buf,
+				   (void **)&up_model_buf, cfg);
+	bit_init_decoder(dec, (uint8_t *)cfg->icu_output_buf+MULTI_ENTRY_HDR_SIZE, cfg->buffer_length-MULTI_ENTRY_HDR_SIZE);
 
 	if (model_mode_is_used(cfg->cmp_mode)) {
 		model = model_buf[0];
@@ -1692,87 +1453,70 @@ static int decompress_l_fx_efx_ncob_ecob(const struct cmp_cfg *cfg)
 		next_model_p = data_buf;
 	}
 
-	if (configure_decoder_setup(&setup_exp_flags, cfg->cmp_par_exp_flags, cfg->spill_exp_flags,
-				    cfg->round, cfg->max_used_bits->l_exp_flags, cfg))
-		return -1;
-	if (configure_decoder_setup(&setup_fx, cfg->cmp_par_fx, cfg->spill_fx,
-				    cfg->round, cfg->max_used_bits->l_fx, cfg))
-		return -1;
-	if (configure_decoder_setup(&setup_ncob, cfg->cmp_par_ncob, cfg->spill_ncob,
-				    cfg->round, cfg->max_used_bits->l_ncob, cfg))
-		return -1;
-	if (configure_decoder_setup(&setup_efx, cfg->cmp_par_efx, cfg->spill_efx,
-				    cfg->round, cfg->max_used_bits->l_efx, cfg))
-		return -1;
-	if (configure_decoder_setup(&setup_ecob, cfg->cmp_par_ecob, cfg->spill_ecob,
-				    cfg->round, cfg->max_used_bits->l_ecob, cfg))
-		return -1;
-	if (configure_decoder_setup(&setup_fx_var, cfg->cmp_par_fx_cob_variance, cfg->spill_fx_cob_variance,
-				    cfg->round, cfg->max_used_bits->l_fx_variance, cfg))
-		return -1;
-	if (configure_decoder_setup(&setup_cob_var, cfg->cmp_par_fx_cob_variance, cfg->spill_fx_cob_variance,
-				    cfg->round, cfg->max_used_bits->l_cob_variance, cfg))
-		return -1;
+	configure_decoder_setup(&setup_exp_flags, dec, cfg->cmp_mode, cfg->cmp_par_exp_flags, cfg->spill_exp_flags,
+				cfg->round, cfg->max_used_bits->l_exp_flags);
+	configure_decoder_setup(&setup_fx, dec, cfg->cmp_mode, cfg->cmp_par_fx, cfg->spill_fx,
+				cfg->round, cfg->max_used_bits->l_fx);
+	configure_decoder_setup(&setup_ncob, dec, cfg->cmp_mode, cfg->cmp_par_ncob, cfg->spill_ncob,
+				cfg->round, cfg->max_used_bits->l_ncob);
+	configure_decoder_setup(&setup_efx, dec, cfg->cmp_mode, cfg->cmp_par_efx, cfg->spill_efx,
+				cfg->round, cfg->max_used_bits->l_efx);
+	configure_decoder_setup(&setup_ecob, dec, cfg->cmp_mode, cfg->cmp_par_ecob, cfg->spill_ecob,
+				cfg->round, cfg->max_used_bits->l_ecob);
+	configure_decoder_setup(&setup_fx_var, dec, cfg->cmp_mode, cfg->cmp_par_fx_cob_variance, cfg->spill_fx_cob_variance,
+				cfg->round, cfg->max_used_bits->l_fx_variance);
+	configure_decoder_setup(&setup_cob_var, dec, cfg->cmp_mode, cfg->cmp_par_fx_cob_variance, cfg->spill_fx_cob_variance,
+				cfg->round, cfg->max_used_bits->l_cob_variance);
 
 	for (i = 0; ; i++) {
-		stream_pos = decode_value(&decoded_value, model.exp_flags,
-					  stream_pos, &setup_exp_flags);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_exp_flags, &decoded_value, model.exp_flags);
+		if (err)
+			break;
 		data_buf[i].exp_flags = decoded_value;
 
-		stream_pos = decode_value(&decoded_value, model.fx, stream_pos,
-					  &setup_fx);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_fx, &decoded_value, model.fx);
+		if (err)
+			break;
 		data_buf[i].fx = decoded_value;
 
-		stream_pos = decode_value(&decoded_value, model.ncob_x,
-					  stream_pos, &setup_ncob);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_ncob, &decoded_value, model.ncob_x);
+		if (err)
+			break;
 		data_buf[i].ncob_x = decoded_value;
 
-		stream_pos = decode_value(&decoded_value, model.ncob_y,
-					  stream_pos, &setup_ncob);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_ncob, &decoded_value, model.ncob_y);
+		if (err)
+			break;
 		data_buf[i].ncob_y = decoded_value;
 
-		stream_pos = decode_value(&decoded_value, model.efx, stream_pos,
-					  &setup_efx);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_efx, &decoded_value, model.efx);
+		if (err)
+			break;
 		data_buf[i].efx = decoded_value;
 
-		stream_pos = decode_value(&decoded_value, model.ecob_x,
-					  stream_pos, &setup_ecob);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_ecob, &decoded_value, model.ecob_x);
+		if (err)
+			break;
 		data_buf[i].ecob_x = decoded_value;
 
-		stream_pos = decode_value(&decoded_value, model.ecob_y,
-					  stream_pos, &setup_ecob);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_ecob, &decoded_value, model.ecob_y);
+		if (err)
+			break;
 		data_buf[i].ecob_y = decoded_value;
 
-		stream_pos = decode_value(&decoded_value, model.fx_variance,
-					  stream_pos, &setup_fx_var);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_fx_var, &decoded_value, model.fx_variance);
+		if (err)
+			break;
 		data_buf[i].fx_variance = decoded_value;
 
-		stream_pos = decode_value(&decoded_value, model.cob_x_variance,
-					  stream_pos, &setup_cob_var);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_cob_var, &decoded_value, model.cob_x_variance);
+		if (err)
+			break;
 		data_buf[i].cob_x_variance = decoded_value;
 
-		stream_pos = decode_value(&decoded_value, model.cob_y_variance,
-					  stream_pos, &setup_cob_var);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_cob_var, &decoded_value, model.cob_y_variance);
+		if (err)
+			break;
 		data_buf[i].cob_y_variance = decoded_value;
 
 		if (up_model_buf) {
@@ -1803,7 +1547,7 @@ static int decompress_l_fx_efx_ncob_ecob(const struct cmp_cfg *cfg)
 
 		model = next_model_p[i];
 	}
-	return stream_pos;
+	return err;
 }
 
 
@@ -1811,16 +1555,15 @@ static int decompress_l_fx_efx_ncob_ecob(const struct cmp_cfg *cfg)
  * @brief decompress N-CAM and F-CAM offset data
  *
  * @param cfg	pointer to the compression configuration structure
+ * @param dec	a pointer to a bit_decoder context
  *
- * @returns bit position of the last read bit in the bitstream on success;
- *	returns negative on error, returns CMP_ERROR_SMALL_BUF if the bitstream
- *	buffer is too small to read the value from the bitstream
+ * @returns 0 on success; otherwise error
  */
 
-static int decompress_offset(const struct cmp_cfg *cfg)
+static int decompress_offset(const struct cmp_cfg *cfg, struct bit_decoder *dec)
 {
 	size_t i;
-	int stream_pos = 0;
+	int err;
 	uint32_t decoded_value;
 	struct decoder_setup setup_mean, setup_var;
 	struct offset *data_buf = cfg->input_buf;
@@ -1832,8 +1575,9 @@ static int decompress_offset(const struct cmp_cfg *cfg)
 	if (model_mode_is_used(cfg->cmp_mode))
 		up_model_buf = cfg->icu_new_model_buf;
 
-	stream_pos = decompress_multi_entry_hdr((void **)&data_buf, (void **)&model_buf,
-						(void **)&up_model_buf, cfg);
+	decompress_multi_entry_hdr((void **)&data_buf, (void **)&model_buf,
+				   (void **)&up_model_buf, cfg);
+	bit_init_decoder(dec, (uint8_t *)cfg->icu_output_buf+MULTI_ENTRY_HDR_SIZE, cfg->buffer_length-MULTI_ENTRY_HDR_SIZE);
 
 	if (model_mode_is_used(cfg->cmp_mode)) {
 		model = model_buf[0];
@@ -1857,25 +1601,23 @@ static int decompress_offset(const struct cmp_cfg *cfg)
 			variance_bits_used = cfg->max_used_bits->nc_offset_variance;
 			break;
 		}
-		if (configure_decoder_setup(&setup_mean, cfg->cmp_par_mean, cfg->spill_mean,
-					    cfg->round, mean_bits_used, cfg))
-			return -1;
-		if (configure_decoder_setup(&setup_var, cfg->cmp_par_variance, cfg->spill_variance,
-					    cfg->round, variance_bits_used, cfg))
-			return -1;
+		configure_decoder_setup(&setup_mean, dec, cfg->cmp_mode, cfg->cmp_par_mean, cfg->spill_mean,
+					cfg->round, mean_bits_used);
+
+		configure_decoder_setup(&setup_var, dec, cfg->cmp_mode, cfg->cmp_par_variance, cfg->spill_variance,
+					cfg->round, variance_bits_used);
+
 	}
 
 	for (i = 0; ; i++) {
-		stream_pos = decode_value(&decoded_value, model.mean, stream_pos,
-					  &setup_mean);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_mean, &decoded_value, model.mean);
+		if (err)
+			break;
 		data_buf[i].mean = decoded_value;
 
-		stream_pos = decode_value(&decoded_value, model.variance, stream_pos,
-					  &setup_var);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_var, &decoded_value, model.variance);
+		if (err)
+			break;
 		data_buf[i].variance = decoded_value;
 
 		if (up_model_buf) {
@@ -1890,7 +1632,7 @@ static int decompress_offset(const struct cmp_cfg *cfg)
 
 		model = next_model_p[i];
 	}
-	return stream_pos;
+	return err;
 }
 
 
@@ -1898,16 +1640,15 @@ static int decompress_offset(const struct cmp_cfg *cfg)
  * @brief decompress N-CAM background data
  *
  * @param cfg	pointer to the compression configuration structure
+ * @param dec	a pointer to a bit_decoder context
  *
- * @returns bit position of the last read bit in the bitstream on success;
- *	returns negative on error, returns CMP_ERROR_SMALL_BUF if the bitstream
- *	buffer is too small to read the value from the bitstream
+ * @returns 0 on success; otherwise error
  */
 
-static int decompress_background(const struct cmp_cfg *cfg)
+static int decompress_background(const struct cmp_cfg *cfg, struct bit_decoder *dec)
 {
 	size_t i;
-	int stream_pos = 0;
+	int err;
 	uint32_t decoded_value;
 	struct decoder_setup setup_mean, setup_var, setup_pix;
 	struct background *data_buf = cfg->input_buf;
@@ -1919,8 +1660,9 @@ static int decompress_background(const struct cmp_cfg *cfg)
 	if (model_mode_is_used(cfg->cmp_mode))
 		up_model_buf = cfg->icu_new_model_buf;
 
-	stream_pos = decompress_multi_entry_hdr((void **)&data_buf, (void **)&model_buf,
-						(void **)&up_model_buf, cfg);
+	decompress_multi_entry_hdr((void **)&data_buf, (void **)&model_buf,
+				   (void **)&up_model_buf, cfg);
+	bit_init_decoder(dec, (uint8_t *)cfg->icu_output_buf+MULTI_ENTRY_HDR_SIZE, cfg->buffer_length-MULTI_ENTRY_HDR_SIZE);
 
 	if (model_mode_is_used(cfg->cmp_mode)) {
 		model = model_buf[0];
@@ -1946,34 +1688,31 @@ static int decompress_background(const struct cmp_cfg *cfg)
 			break;
 		}
 
-		if (configure_decoder_setup(&setup_mean, cfg->cmp_par_mean, cfg->spill_mean,
-					    cfg->round, mean_used_bits, cfg))
-			return -1;
-		if (configure_decoder_setup(&setup_var, cfg->cmp_par_variance, cfg->spill_variance,
-					    cfg->round, variance_used_bits, cfg))
-			return -1;
-		if (configure_decoder_setup(&setup_pix, cfg->cmp_par_pixels_error, cfg->spill_pixels_error,
-					    cfg->round, outlier_pixels_used_bits , cfg))
-			return -1;
+		configure_decoder_setup(&setup_mean, dec, cfg->cmp_mode, cfg->cmp_par_mean, cfg->spill_mean,
+					cfg->round, mean_used_bits);
+
+		configure_decoder_setup(&setup_var, dec, cfg->cmp_mode, cfg->cmp_par_variance, cfg->spill_variance,
+					cfg->round, variance_used_bits);
+
+		configure_decoder_setup(&setup_pix, dec, cfg->cmp_mode, cfg->cmp_par_pixels_error, cfg->spill_pixels_error,
+					cfg->round, outlier_pixels_used_bits);
+
 	}
 
 	for (i = 0; ; i++) {
-		stream_pos = decode_value(&decoded_value, model.mean, stream_pos,
-					  &setup_mean);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_mean, &decoded_value, model.mean);
+		if (err)
+			break;
 		data_buf[i].mean = decoded_value;
 
-		stream_pos = decode_value(&decoded_value, model.variance, stream_pos,
-					  &setup_var);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_var, &decoded_value, model.variance);
+		if (err)
+			break;
 		data_buf[i].variance = decoded_value;
 
-		stream_pos = decode_value(&decoded_value, model.outlier_pixels, stream_pos,
-					  &setup_pix);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_pix, &decoded_value, model.outlier_pixels);
+		if (err)
+			break;
 		data_buf[i].outlier_pixels = (__typeof__(data_buf[i].outlier_pixels))decoded_value;
 
 		if (up_model_buf) {
@@ -1990,7 +1729,7 @@ static int decompress_background(const struct cmp_cfg *cfg)
 
 		model = next_model_p[i];
 	}
-	return stream_pos;
+	return err;
 }
 
 
@@ -1998,16 +1737,15 @@ static int decompress_background(const struct cmp_cfg *cfg)
  * @brief decompress N-CAM smearing data
  *
  * @param cfg	pointer to the compression configuration structure
+ * @param dec	a pointer to a bit_decoder context
  *
- * @returns bit position of the last read bit in the bitstream on success;
- *	returns negative on error, returns CMP_ERROR_SMALL_BUF if the bitstream
- *	buffer is too small to read the value from the bitstream
+ * @returns 0 on success; otherwise error
  */
 
-static int decompress_smearing(const struct cmp_cfg *cfg)
+static int decompress_smearing(const struct cmp_cfg *cfg, struct bit_decoder *dec)
 {
 	size_t i;
-	int stream_pos = 0;
+	int err;
 	uint32_t decoded_value;
 	struct decoder_setup setup_mean, setup_var, setup_pix;
 	struct smearing *data_buf = cfg->input_buf;
@@ -2019,8 +1757,9 @@ static int decompress_smearing(const struct cmp_cfg *cfg)
 	if (model_mode_is_used(cfg->cmp_mode))
 		up_model_buf = cfg->icu_new_model_buf;
 
-	stream_pos = decompress_multi_entry_hdr((void **)&data_buf, (void **)&model_buf,
-						(void **)&up_model_buf, cfg);
+	decompress_multi_entry_hdr((void **)&data_buf, (void **)&model_buf,
+				   (void **)&up_model_buf, cfg);
+	bit_init_decoder(dec, (uint8_t *)cfg->icu_output_buf+MULTI_ENTRY_HDR_SIZE, cfg->buffer_length-MULTI_ENTRY_HDR_SIZE);
 
 	if (model_mode_is_used(cfg->cmp_mode)) {
 		model = model_buf[0];
@@ -2030,33 +1769,27 @@ static int decompress_smearing(const struct cmp_cfg *cfg)
 		next_model_p = data_buf;
 	}
 
-	if (configure_decoder_setup(&setup_mean, cfg->cmp_par_mean, cfg->spill_mean,
-				    cfg->round, cfg->max_used_bits->smearing_mean, cfg))
-		return -1;
-	if (configure_decoder_setup(&setup_var, cfg->cmp_par_variance, cfg->spill_variance,
-				    cfg->round, cfg->max_used_bits->smearing_variance_mean, cfg))
-		return -1;
-	if (configure_decoder_setup(&setup_pix, cfg->cmp_par_pixels_error, cfg->spill_pixels_error,
-				    cfg->round, cfg->max_used_bits->smearing_outlier_pixels, cfg))
-		return -1;
+	configure_decoder_setup(&setup_mean, dec, cfg->cmp_mode, cfg->cmp_par_mean, cfg->spill_mean,
+				cfg->round, cfg->max_used_bits->smearing_mean);
+	configure_decoder_setup(&setup_var, dec, cfg->cmp_mode, cfg->cmp_par_variance, cfg->spill_variance,
+				cfg->round, cfg->max_used_bits->smearing_variance_mean);
+	configure_decoder_setup(&setup_pix, dec, cfg->cmp_mode, cfg->cmp_par_pixels_error, cfg->spill_pixels_error,
+				cfg->round, cfg->max_used_bits->smearing_outlier_pixels);
 
 	for (i = 0; ; i++) {
-		stream_pos = decode_value(&decoded_value, model.mean, stream_pos,
-					  &setup_mean);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_mean, &decoded_value, model.mean);
+		if (err)
+			break;
 		data_buf[i].mean = decoded_value;
 
-		stream_pos = decode_value(&decoded_value, model.variance_mean, stream_pos,
-					  &setup_var);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_var, &decoded_value, model.variance_mean);
+		if (err)
+			break;
 		data_buf[i].variance_mean = (__typeof__(data_buf[i].variance_mean))decoded_value;
 
-		stream_pos = decode_value(&decoded_value, model.outlier_pixels, stream_pos,
-					  &setup_pix);
-		if (stream_pos <= 0)
-			return stream_pos;
+		err = decode_value(&setup_pix, &decoded_value, model.outlier_pixels);
+		if (err)
+			break;
 		data_buf[i].outlier_pixels = (__typeof__(data_buf[i].outlier_pixels))decoded_value;
 
 		if (up_model_buf) {
@@ -2073,7 +1806,7 @@ static int decompress_smearing(const struct cmp_cfg *cfg)
 
 		model = next_model_p[i];
 	}
-	return stream_pos;
+	return err;
 }
 
 
@@ -2082,17 +1815,15 @@ static int decompress_smearing(const struct cmp_cfg *cfg)
  *
  * @param cfg	pointer to a compression configuration
  *
- * @note cfg->buffer_length is measured in bytes (instead of samples as by the
- *	compression)
+ * @note cfg->buffer_length is measured in bytes
  *
  * @returns the size of the decompressed data on success; returns negative on failure
- * TODO: change return type to int32_t
  */
 
 static int decompressed_data_internal(struct cmp_cfg *cfg)
 {
+	int err;
 	uint32_t data_size;
-	int strem_len_bit = -1;
 
 	if (!cfg)
 		return -1;
@@ -2121,6 +1852,7 @@ static int decompressed_data_internal(struct cmp_cfg *cfg)
 		if (!cfg->model_buf)
 			return -1;
 
+
 	if (cfg->cmp_mode == CMP_MODE_RAW) {
 
 		if (data_size < cfg->buffer_length/CHAR_BIT)
@@ -2130,10 +1862,14 @@ static int decompressed_data_internal(struct cmp_cfg *cfg)
 			memcpy(cfg->input_buf, cfg->icu_output_buf, data_size);
 			if (cmp_input_big_to_cpu_endianness(cfg->input_buf, data_size, cfg->data_type))
 				return -1;
-			strem_len_bit = (int)data_size * CHAR_BIT;
 		}
+		err = 0;
 
 	} else {
+		struct bit_decoder dec;
+
+		bit_init_decoder(&dec, cfg->icu_output_buf, cfg->buffer_length);
+
 		switch (cfg->data_type) {
 		case DATA_TYPE_IMAGETTE:
 		case DATA_TYPE_IMAGETTE_ADAPTIVE:
@@ -2141,67 +1877,84 @@ static int decompressed_data_internal(struct cmp_cfg *cfg)
 		case DATA_TYPE_SAT_IMAGETTE_ADAPTIVE:
 		case DATA_TYPE_F_CAM_IMAGETTE:
 		case DATA_TYPE_F_CAM_IMAGETTE_ADAPTIVE:
-			strem_len_bit = decompress_imagette(cfg);
+			err = decompress_imagette(cfg, &dec);
 			break;
 		case DATA_TYPE_S_FX:
-			strem_len_bit = decompress_s_fx(cfg);
+			err = decompress_s_fx(cfg, &dec);
 			break;
 		case DATA_TYPE_S_FX_EFX:
-			strem_len_bit = decompress_s_fx_efx(cfg);
+			err = decompress_s_fx_efx(cfg, &dec);
 			break;
 		case DATA_TYPE_S_FX_NCOB:
-			strem_len_bit = decompress_s_fx_ncob(cfg);
+			err = decompress_s_fx_ncob(cfg, &dec);
 			break;
 		case DATA_TYPE_S_FX_EFX_NCOB_ECOB:
-			strem_len_bit = decompress_s_fx_efx_ncob_ecob(cfg);
+			err = decompress_s_fx_efx_ncob_ecob(cfg, &dec);
 			break;
 
 		case DATA_TYPE_F_FX:
-			strem_len_bit = decompress_f_fx(cfg);
+			err = decompress_f_fx(cfg, &dec);
 			break;
 		case DATA_TYPE_F_FX_EFX:
-			strem_len_bit = decompress_f_fx_efx(cfg);
+			err = decompress_f_fx_efx(cfg, &dec);
 			break;
 		case DATA_TYPE_F_FX_NCOB:
-			strem_len_bit = decompress_f_fx_ncob(cfg);
+			err = decompress_f_fx_ncob(cfg, &dec);
 			break;
 		case DATA_TYPE_F_FX_EFX_NCOB_ECOB:
-			strem_len_bit = decompress_f_fx_efx_ncob_ecob(cfg);
+			err = decompress_f_fx_efx_ncob_ecob(cfg, &dec);
 			break;
 
 		case DATA_TYPE_L_FX:
-			strem_len_bit = decompress_l_fx(cfg);
+			err = decompress_l_fx(cfg, &dec);
 			break;
 		case DATA_TYPE_L_FX_EFX:
-			strem_len_bit = decompress_l_fx_efx(cfg);
+			err = decompress_l_fx_efx(cfg, &dec);
 			break;
 		case DATA_TYPE_L_FX_NCOB:
-			strem_len_bit = decompress_l_fx_ncob(cfg);
+			err = decompress_l_fx_ncob(cfg, &dec);
 			break;
 		case DATA_TYPE_L_FX_EFX_NCOB_ECOB:
-			strem_len_bit = decompress_l_fx_efx_ncob_ecob(cfg);
+			err = decompress_l_fx_efx_ncob_ecob(cfg, &dec);
 			break;
 
 		case DATA_TYPE_OFFSET:
 		case DATA_TYPE_F_CAM_OFFSET:
-			strem_len_bit = decompress_offset(cfg);
+			err = decompress_offset(cfg, &dec);
 			break;
 		case DATA_TYPE_BACKGROUND:
 		case DATA_TYPE_F_CAM_BACKGROUND:
-			strem_len_bit = decompress_background(cfg);
+			err = decompress_background(cfg, &dec);
 			break;
 		case DATA_TYPE_SMEARING:
-			strem_len_bit = decompress_smearing(cfg);
+			err = decompress_smearing(cfg, &dec);
 			break;
 
 		case DATA_TYPE_UNKNOWN:
 		default:
-			strem_len_bit = -1;
+			err = -1;
 			debug_print("Error: Compressed data type not supported.\n");
 			break;
 		}
+
+		switch (bit_refill(&dec)) {
+		case BIT_OVERFLOW:
+			if (dec.cursor == dec.limit_ptr)
+				debug_print("Error: The end of the compressed bit stream has been exceeded. Please check that the compression parameters match those used to compress the data and that the compressed data are not corrupted.\n");
+			else
+				debug_print("Error: Data consistency check failed. %s", please_check_str);
+			break;
+		case BIT_END_OF_BUFFER:
+			/* check if non consumed bit are zero */
+			if (bit_read_bits(&dec, sizeof(dec.bit_container)*8 - dec.bits_consumed) == 0)
+				break;
+			/* fall through */
+		case BIT_UNFINISHED:
+			debug_print("Warning: Not all compressed data are processed.\n");
+			break;
+		}
 	}
-	if (strem_len_bit <= 0)
+	if (err)
 		return -1;
 
 	return (int)data_size;
@@ -2407,7 +2160,7 @@ int decompress_rdcu_data(uint32_t *compressed_data, const struct cmp_info *info,
 	cfg.golomb_par = info->golomb_par_used;
 	cfg.samples = info->samples_used;
 	cfg.icu_output_buf = compressed_data;
-	cfg.buffer_length = cmp_bit_to_4byte(info->cmp_size);
+	cfg.buffer_length = (info->cmp_size+7)/8;
 	cfg.max_used_bits = &MAX_USED_BITS_SAFE;
 
 	return decompressed_data_internal(&cfg);
diff --git a/lib/decompress/read_bitstream.h b/lib/decompress/read_bitstream.h
index f9624e8..920540e 100644
--- a/lib/decompress/read_bitstream.h
+++ b/lib/decompress/read_bitstream.h
@@ -1,3 +1,41 @@
+/**
+ * @file   read_bitstream.h
+ * @author Dominik Loidolt (dominik.loidolt@univie.ac.at)
+ * @date   2023
+ *
+ * @copyright GPLv2
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * @brief this library handles the reading from an MSB-first bitstream
+ *
+ * This API consists of small unitary functions, which must be inlined for best performance.
+ * Since link-time-optimization is not available for all compilers, these
+ * functions are defined into a .h to be included.
+ *
+ * Start by invoking bit_init_decoder(). A chunk of the bitstream is then stored
+ * into a local register. The local register size is 64 bits.  You can then retrieve
+ * bit-fields stored into the local register. The local register is explicitly
+ * reloaded from the memory with the bit_refill() function.
+ * A reload guarantees a minimum of 57 bits in the local register if the
+ * returned status is BIT_UNFINISHED.
+ * Otherwise, it can be less than that, so proceed accordingly.
+ * Checking if bit_decoder has reached its end can be performed with bit_end_of_stream().
+ *
+ * This is based on the bitstream part of the FiniteStateEntropy library, see:
+ * https://github.com/Cyan4973/FiniteStateEntropy/blob/dev/lib/bitstream.h
+ * by @author Yann Collet
+ * As well as some ideas from this blog post:
+ * https://fgiesen.wordpress.com/2018/02/20/reading-bits-in-far-too-many-ways-part-2/
+ * by @author Fabian Giesen
+ */
+
 #ifndef READ_BITSTREAM_H
 #define READ_BITSTREAM_H
 
@@ -9,68 +47,118 @@
 #include "../common/byteorder.h"
 
 
-static __inline uint64_t bit_read_unaligned_64(const void* ptr)
-{
-	typedef __attribute__((aligned(1))) uint64_t unalign64;
-	return *(const unalign64*)ptr;
-}
 
+/**
+ * @brief bitstream decoder context type
+ */
 
-static __inline uint64_t bit_read_unalingned_be64(const void* ptr)
-{
-	return cpu_to_be64(bit_read_unaligned_64(ptr));
-}
+struct bit_decoder {
+	uint64_t bit_container;
+	unsigned int bits_consumed;
+	const uint8_t *cursor;
+	const uint8_t *limit_ptr;
+};
 
 
 /**
- * @brief bitstream decoding context type
+ * @brief bitstream decoder status, return type of bit_refill()
  */
 
-struct bit_decoder
+enum bit_status {BIT_OVERFLOW, BIT_END_OF_BUFFER, BIT_ALL_READ_IN, BIT_UNFINISHED};
+
+
+/*
+ * bitstream decoder API
+ */
+
+static __inline size_t bit_init_decoder(struct bit_decoder *dec, const void *buf, size_t buf_size);
+static __inline uint64_t bit_peek_bits(const struct bit_decoder *dec, unsigned int nb_bits);
+static __inline void bit_consume_bits(struct bit_decoder *dec, unsigned int nb_bits);
+static __inline uint64_t bit_read_bits(struct bit_decoder *dec, unsigned int nb_bits);
+static __inline uint32_t bit_read_bits32(struct bit_decoder *dec, unsigned int nb_bits);
+static __inline uint32_t bit_read_bits32_sub_1(struct bit_decoder *dec, unsigned int nb_bits);
+static __inline unsigned int bit_end_of_stream(const struct bit_decoder *dec);
+static __inline int bit_refill(struct bit_decoder *dec);
+
+
+/*
+ * internal implementation
+ */
+
+static const uint32_t BIT_MASK[] = {
+	0,          1,          3,         7,         0xF,       0x1F,
+	0x3F,       0x7F,       0xFF,      0x1FF,     0x3FF,     0x7FF,
+	0xFFF,      0x1FFF,     0x3FFF,    0x7FFF,    0xFFFF,    0x1FFFF,
+	0x3FFFF,    0x7FFFF,    0xFFFFF,   0x1FFFFF,  0x3FFFFF,  0x7FFFFF,
+	0xFFFFFF,   0x1FFFFFF,  0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF,
+	0x3FFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF}; /* up to 32 bits */
+#define BIT_MASK_SIZE ARRAY_SIZE(BIT_mask)
+
+
+/**
+ * @brief read 8 bytes of big-endian data from an unaligned address
+ *
+ * @param ptr pointer to the data (can be unaligned)
+ *
+ * @returns 64 bit data at mem_ptr address in big-endian byte order
+ */
+
+static __inline uint64_t bit_read_unaligned_64be(const void *ptr)
 {
-	uint64_t bit_container;
-	unsigned int bits_consumed;
-	const uint8_t* cursor;
-	const uint8_t* limit_ptr;
-};
+	typedef __attribute__((aligned(1))) uint64_t unalign64;
+	return cpu_to_be64(*(const unalign64*)ptr);
+}
 
 
-static __inline size_t bit_init_decoder(struct bit_decoder *dec, const void* buf,
-				   size_t buf_size)
+/**
+ * @brief initialize a bit_decoder
+ *
+ * @param dec		a pointer to an already allocated bit_decoder structure
+ * @param buf		start address of the bitstream buffer
+ * @param buf_size	size of the bitstream in bytes
+ *
+ * @returns size of stream (== src_size), or zero if a problem is detected
+ */
+
+static __inline size_t bit_init_decoder(struct bit_decoder *dec, const void *buf,
+					size_t buf_size)
 {
 	if (buf_size < 1) {
 		memset(dec, 0, sizeof(*dec));
+		dec->bits_consumed = sizeof(dec->bit_container)*8;
 		return 0;
 	}
 
 	dec->cursor = (const uint8_t *)buf;
 
-	if (buf_size >= sizeof(dec->bit_container)) {
+	if (buf_size >= sizeof(dec->bit_container)) {  /* normal case */
 		dec->bits_consumed = 0;
-		dec->bit_container = bit_read_unalingned_be64(dec->cursor);
+		dec->bit_container = bit_read_unaligned_64be(dec->cursor);
 		dec->limit_ptr = dec->cursor + buf_size - sizeof(dec->bit_container);
 	} else {
-		dec->bits_consumed = (unsigned int)(sizeof(dec->bit_container) - buf_size)*8;
+		const uint8_t *ui8_p = (const uint8_t *)(buf);
+
+		dec->bits_consumed = (unsigned int)(sizeof(dec->bit_container) - buf_size) * 8;
 
-		dec->bit_container = (uint64_t)(((const uint8_t*)(buf))[0]) << 56;
-		switch(buf_size) {
+		dec->bit_container = (uint64_t)ui8_p[0] << 56;
+		switch (buf_size) {
 		case 7:
-			dec->bit_container += (uint64_t)(((const uint8_t*)(buf))[6]) <<  8;
+			dec->bit_container += (uint64_t)ui8_p[6] <<  8;
 			/* fall-through */
 		case 6:
-			dec->bit_container += (uint64_t)(((const uint8_t*)(buf))[5]) << 16;
+			dec->bit_container += (uint64_t)ui8_p[5] << 16;
 			/* fall-through */
 		case 5:
-			dec->bit_container += (uint64_t)(((const uint8_t*)(buf))[4]) << 24;
+			dec->bit_container += (uint64_t)ui8_p[4] << 24;
 			/* fall-through */
 		case 4:
-			dec->bit_container += (uint64_t)(((const uint8_t*)(buf))[3]) << 32;
+			dec->bit_container += (uint64_t)ui8_p[3] << 32;
 			/* fall-through */
 		case 3:
-			dec->bit_container += (uint64_t)(((const uint8_t*)(buf))[2]) << 40;
+			dec->bit_container += (uint64_t)ui8_p[2] << 40;
 			/* fall-through */
 		case 2:
-			dec->bit_container += (uint64_t)(((const uint8_t*)(buf))[1]) << 48;
+			dec->bit_container += (uint64_t)ui8_p[1] << 48;
 			/* fall-through */
 		default:
 			break;
@@ -79,53 +167,78 @@ static __inline size_t bit_init_decoder(struct bit_decoder *dec, const void* buf
 
 		dec->limit_ptr = dec->cursor;
 	}
-
 	return buf_size;
 }
 
 
+/**
+ * @brief provides next n bits from local register; local register is not modified
+ *
+ * @param dec		a pointer to a bit_decoder context
+ * @param nb_bits	number of bits to look; only works if 1 <= nb_bits <= 56
+ *
+ * @returns extracted value
+ */
+
 static __inline uint64_t bit_peek_bits(const struct bit_decoder *dec, unsigned int nb_bits)
 {
-	/* mask for the shift value register to prevent undefined behavior */
+	/* mask for the shift value register to prevent undefined behaviour */
 	uint32_t const reg_mask = 0x3F;
 
-	assert(nb_bits >= 1 && nb_bits <= (64 - 7)); /* TODO: why -7 */
-	assert(dec->bits_consumed + nb_bits <= 64);
+	assert(nb_bits >= 1 && nb_bits <= (64 - 7));
+	/* why -7: worst case refill can only put 56 bit in the bit_container */
 
 	/* shift out consumed bits; return the top nb_bits bits we want to peek */
-	return (dec->bit_container << (dec->bits_consumed &reg_mask)) >> (64-nb_bits);
+	return (dec->bit_container << (dec->bits_consumed & reg_mask)) >> (64-nb_bits);
 }
 
 
 /**
  * @brief count the leading ones in the local register; local register is not modified
- * @warning if all bits are consumed in local register (bitD->bitsConsumed  >= 64),
- *	the result is undefined
- * @param dec	a pointer to a bit_DStream_t context
- * @returns number of leading ones; up to maximum 63
+ *
+ * @param dec	pointer to a bit_decoder context
+ *
+ * @returns number of leading ones;
  */
 
-static __inline unsigned int bit_count_leading_ones(const struct bit_decoder* dec)
+static __inline unsigned int bit_peek_leading_ones(const struct bit_decoder *dec)
 {
-	/* mask for the shift value register to prevent undefined behavior */
+	/* mask for the shift value register to prevent undefined behaviour */
 	uint32_t const reg_mask = 0x3F;
 	/* shift out the bits we've already consumed */
-	uint64_t remaining_flip = ~(dec->bit_container << (dec->bits_consumed & reg_mask));
-
-	/* clzll(0) is undefined behavior */
-	if (remaining_flip)
-		return sizeof(dec->bit_container)*8;
+	uint64_t const remaining_flip = ~(dec->bit_container << (dec->bits_consumed & reg_mask));
 
-	return (unsigned int)__builtin_clzll(remaining_flip);
+	/* clzll(0) is undefined behaviour */
+	return remaining_flip ? (unsigned int)__builtin_clzll(remaining_flip) :
+		sizeof(dec->bit_container)*8;
 }
 
 
+/**
+ * @brief mark next n bits in the local register as consumed
+ *
+ * @param dec		pointer to a bit_decoder context
+ * @param nb_bits	number of bits to skip
+ */
+
 static __inline void bit_consume_bits(struct bit_decoder *dec, unsigned int nb_bits)
 {
 	dec->bits_consumed += nb_bits;
 }
 
 
+/**
+ * @brief read and consume next n bits from the local register
+ * @warning do not read more bits than the local register has unconsumed bits.
+ *	If you do this, the bit_refill function will return the BIT_OVERFLOW
+ *	status the next time the register is refilled.
+ *
+ * @param dec		pointer to a bit_decoder context
+ * @param nb_bits	number of bits to look; only works if 1 <= nb_bits <= 56
+ *
+ * @returns extracted value
+ */
+
 static __inline uint64_t bit_read_bits(struct bit_decoder *dec, unsigned int nb_bits)
 {
 	uint64_t const read_bits = bit_peek_bits(dec, nb_bits);
@@ -136,49 +249,120 @@ static __inline uint64_t bit_read_bits(struct bit_decoder *dec, unsigned int nb_
 
 
 /**
- * @brief Check if the end of the bitstream has been reached
- * @param dec	a bitstream decoding context
- * @returns 1 if DStream has _exactly_ reached its end (all bits consumed).
+ * @brief same as bit_read_bits32() but only returns 32 bit
+ * @warning do not read more bits than the local register has unconsumed bits.
+ *	If you do this, the bit_refill function will return the BIT_OVERFLOW
+ *	status the next time the register is refilled.
+ *
+ * @param dec		pointer to a bit_decoder context
+ * @param nb_bits	number of bits to read; only works if 1 <= nb_bits <= 32
+ *
+ * @returns extracted 32 bit value
+ */
+
+static __inline uint32_t bit_read_bits32(struct bit_decoder *dec, unsigned int nb_bits)
+{
+	assert(nb_bits <= 32);
+
+	return (uint32_t)bit_read_bits(dec, nb_bits);
+}
+
+
+/**
+ * @brief same as bit_read_bits32() but subtract 1 from the extracted value
+ * @warning do not read more bits than the local register has unconsumed bits.
+ *	If you do this, the bit_refill function will return the BIT_OVERFLOW
+ *	status the next time the register is refilled.
+ *
+ * @param dec		pointer to a bit_decoder context
+ * @param nb_bits	number of bits to read; only works if nb_bits <= 32
+ *
+ * @returns extracted 32 bit value minus 1
+ *
+ * @note The difference to the bit_read_bits32() function with subtraction is
+ *	that the subtracted value is masked with nb_bits. E.g. if you read 4
+ *	bits from the bitstream and get 0 and then subtract 1, you get 0xFF
+ *	instead of 0xFFFFFFFF
  */
 
-static __inline unsigned int bit_end_of_stream(const struct bit_decoder* dec)
+static __inline uint32_t bit_read_bits32_sub_1(struct bit_decoder *dec, unsigned int nb_bits)
 {
-    return ((dec->cursor == dec->limit_ptr) &&
-	    (dec->bits_consumed == sizeof(dec->bit_container)*8));
+	/* mask for the shift value register to prevent undefined behaviour */
+	uint32_t const reg_mask = sizeof(dec->bit_container)*8 - 1;
+	unsigned int const shift_bits = (64 - dec->bits_consumed - nb_bits) & reg_mask;
+	uint32_t bits_unmask;
+
+	assert(nb_bits <= 32);
+
+	bits_unmask = (uint32_t)(dec->bit_container >> shift_bits);
+	bit_consume_bits(dec, nb_bits);
+	return (bits_unmask - 1) & BIT_MASK[nb_bits];
 }
 
 
-enum {BIT_OVERFLOW, BIT_END_OF_BUFFER, BIT_ALL_READ_IN, BIT_UNFINISHED};
+/**
+ * @brief refill the local register from the buffer previously set in
+ *	bit_init_decoder()
+ *
+ * @param dec	a bitstream decoding context
+ *
+ * @note this function is safe, it guarantees that it does not read beyond
+ *	initialize buffer
+ *
+ * @returns the status of bit_decoder internal register;
+ *	BIT_UNFINISHED: internal register is filled with at least _57-bits_
+ *	BIT_END_OF_BUFFER: reached the end of the buffer, only some bits are left in the bitstream
+ *	BIT_ALL_READ_IN: _all_ bits of the buffer have been consumed
+ *	BIT_OVERFLOW: more bits have been consumed than contained in the local register
+ */
 
 static __inline int bit_refill(struct bit_decoder *dec)
 {
-	if (dec->bits_consumed > (sizeof(dec->bit_container)*8))
+	unsigned int const bytes_consumed = dec->bits_consumed >> 3;
+
+	if (dec->bits_consumed > sizeof(dec->bit_container)*8)
 		return BIT_OVERFLOW;
-	if (dec->cursor < dec->limit_ptr) {
+
+	if (dec->cursor + bytes_consumed < dec->limit_ptr) {
 		/* Advance the pointer by the number of full bytes we consumed */
-		dec->cursor += dec->bits_consumed >> 3;
+		dec->cursor += bytes_consumed;
 		/* Refill the bit container */
-		dec->bit_container = bit_read_unalingned_be64(dec->cursor);
-		/* The number of bits that we have already consumed in the current
-		 * byte, excluding the bits that formed a complete byte and were already
-		 * processed.
+		dec->bit_container = bit_read_unaligned_64be(dec->cursor);
+		/* The number of bits that we have already consumed in the
+		 * current byte, excluding the bits that formed a complete byte
+		 * and were already processed.
 		 */
 		dec->bits_consumed &= 0x7;
 		return BIT_UNFINISHED;
 	}
 
-	if (bit_end_of_stream(dec))
-		return BIT_ALL_READ_IN;
-	else
+	if (dec->cursor == dec->limit_ptr) {
+		if (dec->bits_consumed == sizeof(dec->bit_container)*8)
+			return BIT_ALL_READ_IN;
 		return BIT_END_OF_BUFFER;
+	}
 
-	/* limit_ptr < cursor < end */
+	/* limit_ptr < (cursor + bytes_consumed) < end */
 	dec->bits_consumed -= (dec->limit_ptr - dec->cursor)*8;
 	dec->cursor = dec->limit_ptr;
-	dec->bit_container = bit_read_unaligned_64(dec->cursor);
+	dec->bit_container = bit_read_unaligned_64be(dec->cursor);
 
 	return BIT_END_OF_BUFFER;
 }
 
 
+/**
+ * @brief Check if the end of the bitstream has been reached
+ *
+ * @param dec	a bitstream decoding context
+ *
+ * @returns 1 if bit_decoder has _exactly_ reached its end (all bits consumed)
+ */
+
+static __inline unsigned int bit_end_of_stream(const struct bit_decoder *dec)
+{
+	return ((dec->cursor == dec->limit_ptr) &&
+		(dec->bits_consumed == sizeof(dec->bit_container)*8));
+}
+
 #endif /* READ_BITSTREAM_H */
diff --git a/lib/rdcu_compress/cmp_rdcu.c b/lib/rdcu_compress/cmp_rdcu.c
index 133bffc..6354ee5 100644
--- a/lib/rdcu_compress/cmp_rdcu.c
+++ b/lib/rdcu_compress/cmp_rdcu.c
@@ -649,7 +649,7 @@ int rdcu_compress_data_parallel(const struct cmp_cfg *cfg,
 			return -1;
 
 		/* calculate the need bytes for the bitstream */
-		cmp_size_4byte = ((last_info->cmp_size >> 3) + 3) & ~0x3U;
+		cmp_size_4byte = cmp_bit_to_4byte(last_info->cmp_size);
 
 		/* parallel read compressed data and write input data from sram
 		 * to mirror */
diff --git a/programs/cmp_tool.c b/programs/cmp_tool.c
index fe49055..c70c7bf 100644
--- a/programs/cmp_tool.c
+++ b/programs/cmp_tool.c
@@ -299,14 +299,15 @@ int main(int argc, char **argv)
 	}
 
 	{
-		const char str[] = "### PLATO Compression/Decompression Tool Version " CMP_TOOL_VERSION " ###\n";
+		static const char str[] = "### PLATO Compression/Decompression Tool Version " CMP_TOOL_VERSION " ###\n";
 		size_t str_len = strlen(str) - 1; /* -1 for \n */
 		size_t i;
-		for (i = 0; i < str_len; ++i)
+
+		for (i = 0; i < str_len; i++)
 			printf("#");
 		printf("\n");
 		printf("%s", str);
-		for (i = 0; i < str_len; ++i)
+		for (i = 0; i < str_len; i++)
 			printf("#");
 		printf("\n");
 	}
@@ -372,7 +373,6 @@ int main(int argc, char **argv)
 		if (info_file_name) {
 			ssize_t f_size;
 			size_t ent_size;
-			uint32_t cmp_size_byte;
 
 			printf("Importing decompression information file %s ... ", info_file_name);
 			error  = cmp_info_read(info_file_name, &info, io_flags & CMP_IO_VERBOSE);
@@ -383,7 +383,7 @@ int main(int argc, char **argv)
 			printf("Importing compressed data file %s ... ", data_file_name);
 
 			ent_size = cmp_ent_create(NULL, DATA_TYPE_IMAGETTE, info.cmp_mode_used == CMP_MODE_RAW,
-						  cmp_bit_to_4byte(info.cmp_size));
+						  cmp_bit_to_byte(info.cmp_size));
 			if (!ent_size)
 				goto fail;
 			decomp_entity = calloc(1, ent_size);
@@ -392,13 +392,12 @@ int main(int argc, char **argv)
 				goto fail;
 			}
 			ent_size = cmp_ent_create(decomp_entity, DATA_TYPE_IMAGETTE, info.cmp_mode_used == CMP_MODE_RAW,
-						  cmp_bit_to_4byte(info.cmp_size));
+						  cmp_bit_to_byte(info.cmp_size));
 			if (!ent_size)
 				goto fail;
 
-			cmp_size_byte = (info.cmp_size+7)/CHAR_BIT;
 			f_size = read_file8(data_file_name, cmp_ent_get_data_buf(decomp_entity),
-					    cmp_size_byte, io_flags);
+					    cmp_bit_to_byte(info.cmp_size), io_flags);
 			if (f_size < 0)
 				goto fail;
 
@@ -417,10 +416,6 @@ int main(int argc, char **argv)
 			buf_size = (size_t)size;
 			if (buf_size < sizeof(struct cmp_entity))
 				buf_size = sizeof(struct cmp_entity);
-			/* The compressed data is read in 4-byte words, so our
-			 * data buffer must be a multiple of 4 bytes.
-			 */
-			buf_size = (buf_size + 3) & ~((size_t)0x3);
 
 			decomp_entity = calloc(1, buf_size);
 			if (!decomp_entity) {
@@ -432,11 +427,6 @@ int main(int argc, char **argv)
 			if (size < 0)
 				goto fail;
 
-			if (cmp_ent_get_size(decomp_entity) & 0x3) {
-				printf("\nThe size of the compression entity is not a multiple of 4 bytes. Padding the compression entity to a multiple of 4 bytes.\n");
-				cmp_ent_set_size(decomp_entity, (uint32_t)buf_size);
-			}
-
 			if (io_flags & CMP_IO_VERBOSE_EXTRA) {
 				cmp_ent_print(decomp_entity);
 				printf("\n");
@@ -600,7 +590,7 @@ static int guess_cmp_pars(struct cmp_cfg *cfg, const char *guess_cmp_mode,
 		return -1;
 
 	if (include_cmp_header)
-		cmp_size_bit = CHAR_BIT * (cmp_bit_to_4byte(cmp_size_bit) +
+		cmp_size_bit = CHAR_BIT * (cmp_bit_to_byte(cmp_size_bit) +
 			cmp_ent_cal_hdr_size(cfg->data_type, cfg->cmp_mode == CMP_MODE_RAW));
 
 	printf("DONE\n");
@@ -768,9 +758,8 @@ static int compression(struct cmp_cfg *cfg, struct cmp_info *info)
 	}
 
 	printf("Compress data ... ");
-	/* round up to a multiple of 4 */
-	out_buf_size = (cmp_cal_size_of_data(cfg->buffer_length, cfg->data_type) + 3) & ~0x3U;
 
+	out_buf_size = cmp_cal_size_of_data(cfg->buffer_length, cfg->data_type);
 	cmp_entity = calloc(1, out_buf_size + sizeof(struct cmp_entity));
 	if (cmp_entity == NULL) {
 		fprintf(stderr, "%s: Error allocating memory for output buffer.\n", PROGRAM_NAME);
@@ -821,10 +810,7 @@ static int compression(struct cmp_cfg *cfg, struct cmp_info *info)
 		if (cmp_gernate_rdcu_info(cfg, cmp_size, ap1_cmp_size, ap2_cmp_size, info))
 			goto error_cleanup;
 		data_to_write_to_file = cmp_ent_get_data_buf(cmp_entity);
-		if (cfg->cmp_mode == CMP_MODE_RAW)
-			cmp_size_byte = info->cmp_size/CHAR_BIT;
-		else
-			cmp_size_byte = cmp_bit_to_4byte(info->cmp_size);
+		cmp_size_byte = cmp_ent_get_cmp_data_size(cmp_entity);
 	}
 
 	printf("DONE\n");
diff --git a/test/cmp_decmp/test_cmp_decmp.c b/test/cmp_decmp/test_cmp_decmp.c
index e2c0d24..bc23a87 100644
--- a/test/cmp_decmp/test_cmp_decmp.c
+++ b/test/cmp_decmp/test_cmp_decmp.c
@@ -513,8 +513,21 @@ void compression_decompression(struct cmp_cfg *cfg)
 	TEST_ASSERT(cmp_size_bits > 0);
 
 	/* put the compression parameters in the entity header */
-	error = cmp_ent_write_cmp_pars(ent, cfg, cmp_size_bits);
-	TEST_ASSERT_FALSE(error);
+	{
+		/* mock values */
+		uint32_t version_id = ~0U;
+		uint64_t start_time = 32;
+		uint64_t end_time = 42;
+		uint16_t model_id = 0xCAFE;
+		uint8_t model_counter = 0;
+		uint32_t ent_size;
+
+		ent_size = cmp_ent_build(ent, version_id, start_time, end_time,
+					 model_id, model_counter, cfg, cmp_size_bits);
+		TEST_ASSERT_NOT_EQUAL_UINT(0, ent_size);
+		error = cmp_ent_set_size(ent, ent_size);
+		TEST_ASSERT_FALSE(error);
+	}
 
 	/* allocate the buffers for decompression */
 	TEST_ASSERT_NOT_EQUAL_INT(0, data_size);
@@ -604,8 +617,8 @@ void test_random_compression_decompression(void)
 	compression_decompression(NULL);
 }
 
-#define N_SAMPLES 5
 
+#define N_SAMPLES 5
 void test_random_compression_decompression2(void)
 {
 	struct cmp_cfg cfg;
diff --git a/test/cmp_entity/test_cmp_entity.c b/test/cmp_entity/test_cmp_entity.c
index 8694949..ec930e5 100644
--- a/test/cmp_entity/test_cmp_entity.c
+++ b/test/cmp_entity/test_cmp_entity.c
@@ -1860,10 +1860,10 @@ void test_cmp_ent_build(void)
 	cmp_size_bits = 2;
 	size = cmp_ent_build(NULL, version_id, start_time, end_time, model_id,
 			     model_counter, &cfg, cmp_size_bits);
-	TEST_ASSERT_EQUAL_UINT(IMAGETTE_ADAPTIVE_HEADER_SIZE+4, size);
+	TEST_ASSERT_EQUAL_UINT(IMAGETTE_ADAPTIVE_HEADER_SIZE+cmp_bit_to_byte((unsigned int)cmp_size_bits), size);
 	size = cmp_ent_build(ent, version_id, start_time, end_time, model_id,
 			     model_counter, &cfg, cmp_size_bits);
-	TEST_ASSERT_EQUAL_UINT(IMAGETTE_ADAPTIVE_HEADER_SIZE+4, size);
+	TEST_ASSERT_EQUAL_UINT(IMAGETTE_ADAPTIVE_HEADER_SIZE+cmp_bit_to_byte((unsigned int)cmp_size_bits), size);
 
 	/** error cases **/
 	/* cfg = NULL */
diff --git a/test/cmp_tool/cmp_tool_integration_test.py b/test/cmp_tool/cmp_tool_integration_test.py
index 403490b..5cadbf2 100755
--- a/test/cmp_tool/cmp_tool_integration_test.py
+++ b/test/cmp_tool/cmp_tool_integration_test.py
@@ -425,11 +425,11 @@ def test_compression_diff():
                             if arg == " --no_header" else "")(add_arg))
                    )
              # check compressed data
-            cmp_data = "44 44 40 00 \n"
+            cmp_data = "44 44 40 \n"
             with open(output_prefix+".cmp", encoding='utf-8') as f:
                 if add_arg == " --no_header":
                     # check compressed data file
-                    assert(f.read() == "44 44 40 00 \n")
+                    assert(f.read() == cmp_data)
                     # check info file
                     with open(output_prefix+".info", encoding='utf-8') as f:
                         info = parse_key_value(f.read())
@@ -444,7 +444,7 @@ def test_compression_diff():
                 else:
                     header = read_in_cmp_header(f.read())
                     assert(header['asw_version_id']['value'] == VERSION.split('-')[0])
-                    assert(header['cmp_ent_size']['value'] == IMAGETTE_HEADER_SIZE+4)
+                    assert(header['cmp_ent_size']['value'] == IMAGETTE_HEADER_SIZE+3)
                     assert(header['original_size']['value'] == 10)
                     # todo
                     assert(header['start_time']['value'] < cuc_timestamp(datetime.utcnow()))
@@ -458,7 +458,7 @@ def test_compression_diff():
                     assert(header['lossy_cmp_par_used']['value'] == 0)
                     assert(header['spill_used']['value'] == 60)
                     assert(header['golomb_par_used']['value'] == 7)
-                    assert(header['compressed_data']['value'] == "44444000")
+                    assert(header['compressed_data']['value'] == "444440")
 
             # decompression
             if add_arg == " --no_header":
@@ -553,7 +553,7 @@ def test_model_compression():
         if "--no_header" in add_arg:
             # check compressed data
             with open(output_prefix1+".cmp", encoding='utf-8') as f:
-                assert(f.read() == "49 24 00 00 \n")
+                assert(f.read() == "49 24 \n")
             # check info file
             with open(output_prefix1+".info", encoding='utf-8') as f:
                 info = parse_key_value(f.read())
@@ -579,7 +579,7 @@ def test_model_compression():
                         header = read_in_cmp_header(bytearray(f.read()).hex())
 
                 assert(header['asw_version_id']['value'] == VERSION.split('-')[0])
-                assert(header['cmp_ent_size']['value'] == IMAGETTE_ADAPTIVE_HEADER_SIZE+4)
+                assert(header['cmp_ent_size']['value'] == IMAGETTE_ADAPTIVE_HEADER_SIZE+2)
                 assert(header['original_size']['value'] == 10)
                 # todo
                 assert(header['start_time']['value'] < cuc_timestamp(datetime.utcnow()))
@@ -593,7 +593,7 @@ def test_model_compression():
                 assert(header['lossy_cmp_par_used']['value'] == int(cfg['round']))
                 assert(header['spill_used']['value'] == int(cfg['spill']))
                 assert(header['golomb_par_used']['value'] == int(cfg['golomb_par']))
-                assert(header['compressed_data']['value'] == "49240000")
+                assert(header['compressed_data']['value'] == "4924")
 
             # decompression
             if "--no_header" in add_arg:
@@ -688,7 +688,7 @@ def test_raw_mode_compression():
                 else:
                     header = read_in_cmp_header(f.read())
                     assert(header['asw_version_id']['value'] == VERSION.split('-')[0])
-                    assert(header['cmp_ent_size']['value'] == GENERIC_HEADER_SIZE+12)
+                    assert(header['cmp_ent_size']['value'] == GENERIC_HEADER_SIZE+10)
                     assert(header['original_size']['value'] == 10)
                     # todo
                     assert(header['start_time']['value'] < cuc_timestamp(datetime.utcnow()))
@@ -702,7 +702,7 @@ def test_raw_mode_compression():
                     assert(header['lossy_cmp_par_used']['value'] == 0)
                     # assert(header['spill_used']['value'] == 60)
                     # assert(header['golomb_par_used']['value'] == 7)
-                    assert(header['compressed_data']['value'] == data[:-1].replace(" ","")+"0000")
+                    assert(header['compressed_data']['value'] == data[:-1].replace(" ",""))
 
             # decompression
             if "--no_header" in arg:
@@ -768,13 +768,13 @@ def test_guess_option():
                     exp_out = ('', '2', '', '7.27')
                 elif sub_test == 'guess_RDCU_model':
                     exp_out = (
-                        'Importing model file model.dat ... DONE\n', '2', '', str(round((5*2)/(IMAGETTE_ADAPTIVE_HEADER_SIZE + 4), 2)))
-                        #cmp_size:15bit-> 4byte cmp_data + 40byte header -> 16bit*5/(44Byte*8) '5.33'
+                        'Importing model file model.dat ... DONE\n', '2', '', str(round((5*2)/(IMAGETTE_ADAPTIVE_HEADER_SIZE + 2), 2)))
+                        #cmp_size:15bit-> 2byte cmp_data + 40byte header -> 16bit*5/(42Byte*8)
                 elif sub_test == 'guess_level_3':
                     exp_out = (
                         '', '3', ' 0%... 6%... 13%... 19%... 25%... 32%... 38%... 44%... 50%... 57%... 64%... 72%... 80%... 88%... 94%... 100%',
-                        str(round((5*2)/(IMAGETTE_HEADER_SIZE + 4), 2))) #11.43
-                    # cmp_size:7 bit -> 4byte cmp_data + 34 byte header -> 16bit*5/(40Byte*8)
+                        str(round((5*2)/(IMAGETTE_HEADER_SIZE + 1), 3))) #11.43
+                    # cmp_size:7 bit -> 1byte cmp_data + 34 byte header -> 16bit*5/(35Byte*8)
                 else:
                     exp_out = ('', '', '')
 
@@ -1081,7 +1081,7 @@ def test_cmp_entity_not_4_byte_aligned():
 
     version_id = 0x8001_0042
     cmp_ent_size = IMAGETTE_HEADER_SIZE + len(cmp_data)//2
-    original_size = 0xA
+    original_size = 0xC
 
     start_time = cuc_timestamp(datetime.utcnow())
     end_time = cuc_timestamp(datetime.utcnow())
@@ -1103,9 +1103,9 @@ def test_cmp_entity_not_4_byte_aligned():
     ima_header = build_imagette_header(spill_used, golomb_par_used)
     cmp_data_header = generic_header + ima_header + cmp_data
 
-    data_exp = '00 01 00 02 00 03 00 04 00 05 \n'
+    data_exp = '00 01 00 02 00 03 00 04 00 05 00 06 \n'
     info = ("cmp_size = 20\n" + "golomb_par_used = 7\n" + "spill_used = 60\n"
-            + "cmp_mode_used = 2\n" +"samples_used=5\n")
+            + "cmp_mode_used = 2\n" +"samples_used=6\n")
 
     cmp_file_name = 'unaligned_cmp_size.cmp'
     info_file_name = 'unaligned_cmp_size.info'
@@ -1136,9 +1136,7 @@ def test_cmp_entity_not_4_byte_aligned():
                        "Write decompressed data to file %s.dat ... DONE\n" % (output_prefix))
             else:
                 assert(stdout == CMP_START_STR_DECMP +
-                       "Importing compressed data file %s ... \n" % (cmp_file_name) +
-                       "The size of the compression entity is not a multiple of 4 bytes. Padding the compression entity to a multiple of 4 bytes.\n" +
-                       "DONE\n" +
+                       "Importing compressed data file %s ... DONE\n" % (cmp_file_name) +
                        "Decompress data ... DONE\n"+
                        "Write decompressed data to file %s.dat ... DONE\n" % (output_prefix))
 
@@ -1173,7 +1171,7 @@ def test_header_wrong_formatted():
 def test_header_read_in():
     cmp_file_name = 'test_header_read_in.cmp'
 
-    cmp_data = '44444400'
+    cmp_data = '444444'
 
     version_id = 0x8001_0042
     cmp_ent_size = IMAGETTE_HEADER_SIZE + len(cmp_data)//2
@@ -1209,7 +1207,7 @@ def test_header_read_in():
                "Importing compressed data file %s ... FAILED\n" % (cmp_file_name))
         assert(stderr == "cmp_tool: %s: The size of the compression entity set in the "
                "header of the compression entity is not the same size as the read-in "
-               "file has. Expected: 0x28, has 0x26.\n" %(cmp_file_name))
+               "file has. Expected: 0x27, has 0x25.\n" %(cmp_file_name))
 
         # false data type
         data_type = 0x7FFE
@@ -1380,7 +1378,7 @@ def test_rdcu_pkt():
 
             # check compressed data
             with open(output_prefix1+".cmp", encoding='utf-8') as f:
-                assert(f.read() == "49 24 00 00 \n")
+                assert(f.read() == "49 24 \n")
             # check info file
             with open(output_prefix1+".info", encoding='utf-8') as f:
                 info = parse_key_value(f.read())
diff --git a/test/decmp/test_decmp.c b/test/decmp/test_decmp.c
index 695921b..cc5f071 100644
--- a/test/decmp/test_decmp.c
+++ b/test/decmp/test_decmp.c
@@ -25,51 +25,193 @@
 #include <compiler.h>
 #include <cmp_entity.h>
 #include "../../lib/icu_compress/cmp_icu.c" /* .c file included to test static functions */
-#include "../../lib/decompress//decmp.c" /* .c file included to test static functions */
+#include "../../lib/decompress/decmp.c" /* .c file included to test static functions */
 
 #define MAX_VALID_CW_LEM 32
 
 
+void test_bitstream(void)
+{
+	uint8_t i, data[12];
+	struct bit_decoder dec;
+	size_t ret;
+	int status;
+	uint32_t read_bits;
+
+	for (i = 0; i < sizeof(data); ++i)
+		data[i] = i;
+
+	ret = bit_init_decoder(&dec, data, sizeof(data));
+	TEST_ASSERT_EQUAL_size_t(sizeof(data), ret);
+
+	read_bits = bit_read_bits32(&dec, 31);
+	TEST_ASSERT_EQUAL_HEX32(0x00010203>>1, read_bits);
+	TEST_ASSERT_EQUAL_INT(31, dec.bits_consumed);
+
+	status = bit_refill(&dec);
+	TEST_ASSERT_EQUAL_INT(BIT_UNFINISHED, status);
+	TEST_ASSERT_EQUAL_INT(7, dec.bits_consumed);
+	status = bit_refill(&dec);
+	TEST_ASSERT_EQUAL_INT(BIT_UNFINISHED, status);
+	TEST_ASSERT_EQUAL_INT(7, dec.bits_consumed);
+	TEST_ASSERT_FALSE(bit_end_of_stream(&dec));
+
+	read_bits = bit_read_bits32(&dec, 32);
+	TEST_ASSERT_EQUAL_HEX32(0x82028303, read_bits);
+	TEST_ASSERT_EQUAL_INT(39, dec.bits_consumed);
+	read_bits = bit_read_bits32(&dec, 1);
+	TEST_ASSERT_EQUAL_HEX32(1, read_bits);
+	TEST_ASSERT_EQUAL_INT(40, dec.bits_consumed);
+
+	status = bit_refill(&dec);
+	TEST_ASSERT_EQUAL_INT(BIT_END_OF_BUFFER, status);
+	TEST_ASSERT_EQUAL_INT(32, dec.bits_consumed);
+	status = bit_refill(&dec);
+	TEST_ASSERT_EQUAL_INT(BIT_END_OF_BUFFER, status);
+	TEST_ASSERT_EQUAL_INT(32,dec.bits_consumed);
+	TEST_ASSERT_FALSE(bit_end_of_stream(&dec));
+
+	read_bits = bit_read_bits32(&dec, 32);
+	TEST_ASSERT_EQUAL_HEX32(0x08090A0B, read_bits);
+	status = bit_refill(&dec);
+	TEST_ASSERT_EQUAL_INT(BIT_ALL_READ_IN, status);
+	status = bit_refill(&dec);
+	TEST_ASSERT_EQUAL_INT(BIT_ALL_READ_IN, status);
+	TEST_ASSERT_TRUE(bit_end_of_stream(&dec));
+
+	bit_read_bits32(&dec, 1);
+	status = bit_refill(&dec);
+	TEST_ASSERT_EQUAL_INT(BIT_OVERFLOW, status);
+	status = bit_refill(&dec);
+	TEST_ASSERT_EQUAL_INT(BIT_OVERFLOW, status);
+	TEST_ASSERT_FALSE(bit_end_of_stream(&dec));
+
+	bit_read_bits(&dec, 57);
+	status = bit_refill(&dec);
+	TEST_ASSERT_EQUAL_INT(BIT_OVERFLOW, status);
+	bit_read_bits(&dec, 57);
+	bit_read_bits(&dec, 57);
+	bit_read_bits(&dec, 57);
+	bit_read_bits(&dec, 57);
+	TEST_ASSERT_EQUAL_INT(BIT_OVERFLOW, status);
+
+
+
+	{
+		uint8_t k, j;
+		uint8_t buf[9];
+		size_t s;
+
+		for (k = 0; k < 8; k++) {
+			memset(data, 0, sizeof(data));
+			for (j = 0; j < k; j++)
+				buf[j] = j;
+			s = bit_init_decoder(&dec, buf, j);
+			TEST_ASSERT_EQUAL_size_t(j, s);
+			for (j = 0; j < k; j++)
+				TEST_ASSERT_EQUAL_UINT(j, bit_read_bits(&dec, 8));
+			TEST_ASSERT_TRUE(bit_end_of_stream(&dec));
+			TEST_ASSERT_EQUAL_INT(BIT_ALL_READ_IN, bit_refill(&dec));
+		}
+	}
+}
+
+
 /**
- * @test count_leading_ones
+ * @test unary_decoder
  */
 
-void test_count_leading_ones(void)
+void test_unary_decoder(void)
 {
-	unsigned int n_leading_bit;
+	uint32_t leading_ones;
+	struct bit_decoder dec;
+	uint32_t unused_1 = 0;
+	uint32_t unused_2 = 0;
 	uint32_t value;
+	size_t ret;
 
-	value = 0;
-	n_leading_bit = count_leading_ones(value);
-	TEST_ASSERT_EQUAL_INT(0, n_leading_bit);
-
-	value = 0x7FFFFFFF;
-	n_leading_bit = count_leading_ones(value);
-	TEST_ASSERT_EQUAL_INT(0, n_leading_bit);
-
-	value = 0x80000000;
-	n_leading_bit = count_leading_ones(value);
-	TEST_ASSERT_EQUAL_INT(1, n_leading_bit);
-
-	value = 0xBFFFFFFF;
-	n_leading_bit = count_leading_ones(value);
-	TEST_ASSERT_EQUAL_INT(1, n_leading_bit);
 
-	value = 0xFFFF0000;
-	n_leading_bit = count_leading_ones(value);
-	TEST_ASSERT_EQUAL_INT(16, n_leading_bit);
-
-	value = 0xFFFF7FFF;
-	n_leading_bit = count_leading_ones(value);
-	TEST_ASSERT_EQUAL_INT(16, n_leading_bit);
-
-	value = 0xFFFFFFFE;
-	n_leading_bit = count_leading_ones(value);
-	TEST_ASSERT_EQUAL_INT(31, n_leading_bit);
-
-	value = 0xFFFFFFFF;
-	n_leading_bit = count_leading_ones(value);
-	TEST_ASSERT_EQUAL_INT(32, n_leading_bit);
+	value = 0;
+	ret = bit_init_decoder(&dec, &value, sizeof(value));
+	TEST_ASSERT_EQUAL_size_t(sizeof(value), ret);
+	leading_ones = unary_decoder(&dec, unused_1, unused_2);
+	TEST_ASSERT_EQUAL_INT(0, leading_ones);
+	leading_ones = unary_decoder(&dec, unused_1, unused_2);
+	TEST_ASSERT_EQUAL_INT(0, leading_ones);
+	TEST_ASSERT_EQUAL_INT(BIT_END_OF_BUFFER, bit_refill(&dec));
+
+	value = be32_to_cpu(0x7FFFFFFF);
+	ret = bit_init_decoder(&dec, &value, sizeof(value));
+	TEST_ASSERT_EQUAL_size_t(sizeof(value), ret);
+	leading_ones = unary_decoder(&dec, unused_1, unused_2);
+	TEST_ASSERT_EQUAL_INT(0, leading_ones);
+	leading_ones = unary_decoder(&dec, unused_1, unused_2);
+	TEST_ASSERT_EQUAL_INT(31, leading_ones);
+	TEST_ASSERT_EQUAL_INT(BIT_OVERFLOW, bit_refill(&dec));
+
+	value = be32_to_cpu(0x80000000);
+	ret = bit_init_decoder(&dec, &value, sizeof(value));
+	TEST_ASSERT_EQUAL_size_t(sizeof(value), ret);
+	leading_ones = unary_decoder(&dec, unused_1, unused_2);
+	TEST_ASSERT_EQUAL_INT(1, leading_ones);
+	TEST_ASSERT_EQUAL_INT(BIT_END_OF_BUFFER, bit_refill(&dec));
+	leading_ones = unary_decoder(&dec, unused_1, unused_2);
+	TEST_ASSERT_EQUAL_INT(0, leading_ones);
+	TEST_ASSERT_EQUAL_INT(BIT_END_OF_BUFFER, bit_refill(&dec));
+
+	value = be32_to_cpu(0xBFFFFFFF);
+	ret = bit_init_decoder(&dec, &value, sizeof(value));
+	TEST_ASSERT_EQUAL_size_t(sizeof(value), ret);
+	leading_ones = unary_decoder(&dec, unused_1, unused_2);
+	TEST_ASSERT_EQUAL_INT(1, leading_ones);
+	TEST_ASSERT_EQUAL_INT(BIT_END_OF_BUFFER, bit_refill(&dec));
+	leading_ones = unary_decoder(&dec, unused_1, unused_2);
+	TEST_ASSERT_EQUAL_INT(30, leading_ones);
+	TEST_ASSERT_EQUAL_INT(BIT_OVERFLOW, bit_refill(&dec));
+
+	value = be32_to_cpu(0xFFFF0000);
+	ret = bit_init_decoder(&dec, &value, sizeof(value));
+	TEST_ASSERT_EQUAL_size_t(sizeof(value), ret);
+	leading_ones = unary_decoder(&dec, unused_1, unused_2);
+	TEST_ASSERT_EQUAL_INT(16, leading_ones);
+	TEST_ASSERT_EQUAL_INT(BIT_END_OF_BUFFER, bit_refill(&dec));
+
+	value = be32_to_cpu(0xFFFF7FFF);
+	ret = bit_init_decoder(&dec, &value, sizeof(value));
+	TEST_ASSERT_EQUAL_size_t(sizeof(value), ret);
+	leading_ones = unary_decoder(&dec, unused_1, unused_2);
+	TEST_ASSERT_EQUAL_INT(16, leading_ones);
+	TEST_ASSERT_EQUAL_INT(BIT_END_OF_BUFFER, bit_refill(&dec));
+
+	value = be32_to_cpu(0xFFFFFFFE);
+	ret = bit_init_decoder(&dec, &value, sizeof(value));
+	TEST_ASSERT_EQUAL_size_t(sizeof(value), ret);
+	leading_ones = unary_decoder(&dec, unused_1, unused_2);
+	TEST_ASSERT_EQUAL_INT(31, leading_ones);
+	TEST_ASSERT_EQUAL_INT(BIT_ALL_READ_IN, bit_refill(&dec));
+
+	value = be32_to_cpu(0xFFFFFFFF);
+	ret = bit_init_decoder(&dec, &value, sizeof(value));
+	TEST_ASSERT_EQUAL_size_t(sizeof(value), ret);
+	leading_ones = unary_decoder(&dec, unused_1, unused_2);
+	TEST_ASSERT_EQUAL_INT(32, leading_ones);
+	TEST_ASSERT_EQUAL_INT(BIT_OVERFLOW, bit_refill(&dec));
+
+	{
+		uint64_t value64 = ~0ULL;
+		ret = bit_init_decoder(&dec, &value64, sizeof(value64));
+		TEST_ASSERT_EQUAL_size_t(sizeof(value64), ret);
+		leading_ones = unary_decoder(&dec, unused_1, unused_2);
+		TEST_ASSERT_EQUAL_INT(64, leading_ones);
+		TEST_ASSERT_EQUAL_INT(BIT_OVERFLOW, bit_refill(&dec));
+
+		value64 = be64_to_cpu(0xFFFFFFFF00000000);
+		ret = bit_init_decoder(&dec, &value64, sizeof(value64));
+		TEST_ASSERT_EQUAL_size_t(sizeof(value64), ret);
+		leading_ones = unary_decoder(&dec, unused_1, unused_2);
+		TEST_ASSERT_EQUAL_INT(32, leading_ones);
+		/* TEST_ASSERT_EQUAL_INT(BIT_OVERFLOW, bit_refill(&dec)); */
+	}
 }
 
 
@@ -79,113 +221,163 @@ void test_count_leading_ones(void)
 
 void test_rice_decoder(void)
 {
-	unsigned int cw_len;
-	uint32_t code_word;
-	unsigned int m = ~0U;  /* we don't need this value */
-	unsigned int log2_m;
-	uint32_t decoded_cw = ~0U;
+	struct bit_decoder dec;
+	uint64_t bitstream;
+	uint32_t m, log2_m, decoded_cw;
+	size_t buf_size;
 
 	/* log2_m = 0 test */
-	log2_m = 0;
+	log2_m = 0; m = 1U << log2_m;
 
-	code_word = 0;
-	cw_len = rice_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_EQUAL(1, cw_len);
+	bitstream = 0;
+	buf_size = bit_init_decoder(&dec, &bitstream, sizeof(bitstream));
+	TEST_ASSERT_EQUAL_size_t(sizeof(bitstream), buf_size);
+	decoded_cw = unary_decoder(&dec, m, log2_m);
+	TEST_ASSERT_EQUAL(1, dec.bits_consumed);
 	TEST_ASSERT_EQUAL(0, decoded_cw);
 
-	code_word = 0x7FFF; /* 0b0111...11 */
-	cw_len = rice_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_EQUAL(1, cw_len);
+	bitstream = cpu_to_be64(0x7FFFFFFFFFFFFFFF); /* 0b0111...11 */
+	buf_size = bit_init_decoder(&dec, &bitstream, sizeof(bitstream));
+	TEST_ASSERT_EQUAL_size_t(sizeof(bitstream), buf_size);
+	decoded_cw = unary_decoder(&dec, m, log2_m);
+	TEST_ASSERT_EQUAL(1, dec.bits_consumed);
 	TEST_ASSERT_EQUAL(0, decoded_cw);
 
-	code_word = 0x80000000;
-	cw_len = rice_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_EQUAL(2, cw_len);
+	bitstream = cpu_to_be64(0x8000000000000000);
+	buf_size = bit_init_decoder(&dec, &bitstream, sizeof(bitstream));
+	TEST_ASSERT_EQUAL_size_t(sizeof(bitstream), buf_size);
+	decoded_cw = unary_decoder(&dec, m, log2_m);
+	TEST_ASSERT_EQUAL(2, dec.bits_consumed);
 	TEST_ASSERT_EQUAL(1, decoded_cw);
 
-	code_word = 0xFFFFFFFE;
-	cw_len = rice_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_EQUAL(32, cw_len);
+	bitstream = cpu_to_be64(0xFFFFFFFE00000000);
+	buf_size = bit_init_decoder(&dec, &bitstream, sizeof(bitstream));
+	TEST_ASSERT_EQUAL_size_t(sizeof(bitstream), buf_size);
+	decoded_cw = unary_decoder(&dec, m, log2_m);
+	TEST_ASSERT_EQUAL(32, dec.bits_consumed);
 	TEST_ASSERT_EQUAL(31, decoded_cw);
 
+	bitstream = cpu_to_be64(0xFFFFFFFFFFFFFFFE);
+	buf_size = bit_init_decoder(&dec, &bitstream, sizeof(bitstream));
+	TEST_ASSERT_EQUAL_size_t(sizeof(bitstream), buf_size);
+	decoded_cw = unary_decoder(&dec, m, log2_m);
+	TEST_ASSERT_EQUAL(64, dec.bits_consumed);
+	TEST_ASSERT_EQUAL(63, decoded_cw);
+	TEST_ASSERT_EQUAL_INT(BIT_ALL_READ_IN, bit_refill(&dec));
+
+	bitstream = cpu_to_be64(0xFFFFFFFF00000000);
+	buf_size = bit_init_decoder(&dec, &bitstream, sizeof(bitstream));
+	TEST_ASSERT_EQUAL_size_t(sizeof(bitstream), buf_size);
+	decoded_cw = unary_decoder(&dec, m, log2_m);
+	TEST_ASSERT_EQUAL(33, dec.bits_consumed);
+	TEST_ASSERT_EQUAL(32, decoded_cw);
+
 	/* invalid code word (longer than 32 bit) */
-	code_word = 0xFFFFFFFF;
-	cw_len = rice_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_GREATER_THAN_UINT(MAX_VALID_CW_LEM, cw_len);
+	bitstream = cpu_to_be64(0xFFFFFFFFFFFFFFFF);
+	buf_size = bit_init_decoder(&dec, &bitstream, sizeof(bitstream));
+	TEST_ASSERT_EQUAL_size_t(sizeof(bitstream), buf_size);
+	decoded_cw = unary_decoder(&dec, m, log2_m);
+	TEST_ASSERT_EQUAL(65, dec.bits_consumed);
+	TEST_ASSERT_EQUAL(64, decoded_cw);
+	TEST_ASSERT_EQUAL_INT(BIT_OVERFLOW, bit_refill(&dec));
 
 	/* log2_m = 1 test */
-	log2_m = 1;
+	log2_m = 1; m = 1U << log2_m;
 
-	code_word = 0;
-	cw_len = rice_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_EQUAL(2, cw_len);
+	bitstream = 0;
+	buf_size = bit_init_decoder(&dec, &bitstream, sizeof(bitstream));
+	TEST_ASSERT_EQUAL_size_t(sizeof(bitstream), buf_size);
+	decoded_cw = rice_decoder(&dec, m, log2_m);
+	TEST_ASSERT_EQUAL(2, dec.bits_consumed);
 	TEST_ASSERT_EQUAL(0, decoded_cw);
 
-	code_word = 0x40000000;
-	cw_len = rice_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_EQUAL(2, cw_len);
+	bitstream = cpu_to_be64(0x4000000000000000);
+	buf_size = bit_init_decoder(&dec, &bitstream, sizeof(bitstream));
+	TEST_ASSERT_EQUAL_size_t(sizeof(bitstream), buf_size);
+	decoded_cw = rice_decoder(&dec, m, log2_m);
+	TEST_ASSERT_EQUAL(2, dec.bits_consumed);
 	TEST_ASSERT_EQUAL(1, decoded_cw);
 
-	code_word = 0XFFFFFFFC;
-	cw_len = rice_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_EQUAL(32, cw_len);
+	bitstream = cpu_to_be64(0xFFFFFFFC00000000);
+	buf_size = bit_init_decoder(&dec, &bitstream, sizeof(bitstream));
+	TEST_ASSERT_EQUAL_size_t(sizeof(bitstream), buf_size);
+	decoded_cw = rice_decoder(&dec, m, log2_m);
+	TEST_ASSERT_EQUAL(32, dec.bits_consumed);
 	TEST_ASSERT_EQUAL(60, decoded_cw);
 
-	code_word = 0XFFFFFFFD;
-	cw_len = rice_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_EQUAL(32, cw_len);
+	bitstream = cpu_to_be64(0xFFFFFFFD00000000);
+	buf_size = bit_init_decoder(&dec, &bitstream, sizeof(bitstream));
+	TEST_ASSERT_EQUAL_size_t(sizeof(bitstream), buf_size);
+	decoded_cw = rice_decoder(&dec, m, log2_m);
+	TEST_ASSERT_EQUAL(32, dec.bits_consumed);
 	TEST_ASSERT_EQUAL(61, decoded_cw);
 
 	/* invalid code word (longer than 32 bit) */
-	code_word = 0XFFFFFFFE;
-	cw_len = rice_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_GREATER_THAN_UINT(MAX_VALID_CW_LEM, cw_len);
+	bitstream = cpu_to_be64(0xFFFFFFFE00000000);
+	buf_size = bit_init_decoder(&dec, &bitstream, sizeof(bitstream));
+	TEST_ASSERT_EQUAL_size_t(sizeof(bitstream), buf_size);
+	decoded_cw = rice_decoder(&dec, m, log2_m);
+	TEST_ASSERT_EQUAL(33, dec.bits_consumed);
+	TEST_ASSERT_EQUAL(62, decoded_cw);
+	TEST_ASSERT_GREATER_THAN_UINT(MAX_VALID_CW_LEM, dec.bits_consumed);
 
 	/* log2_m = 31 test */
-	log2_m = 31;
+	log2_m = 31; m = 1U << log2_m;
 
-	code_word = 0;
-	cw_len = rice_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_EQUAL(32, cw_len);
+	bitstream = 0;
+	buf_size = bit_init_decoder(&dec, &bitstream, sizeof(bitstream));
+	TEST_ASSERT_EQUAL_size_t(sizeof(bitstream), buf_size);
+	decoded_cw = rice_decoder(&dec, m, log2_m);
+	TEST_ASSERT_EQUAL(32, dec.bits_consumed);
 	TEST_ASSERT_EQUAL(0, decoded_cw);
 
-	code_word = 1;
-	cw_len = rice_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_EQUAL(32, cw_len);
+	bitstream = cpu_to_be64(0x0000000100000000);
+	buf_size = bit_init_decoder(&dec, &bitstream, sizeof(bitstream));
+	TEST_ASSERT_EQUAL_size_t(sizeof(bitstream), buf_size);
+	decoded_cw = rice_decoder(&dec, m, log2_m);
+	TEST_ASSERT_EQUAL(32, dec.bits_consumed);
 	TEST_ASSERT_EQUAL(1, decoded_cw);
 
-	code_word = 0X7FFFFFFE;
-	cw_len = rice_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_EQUAL(32, cw_len);
+	bitstream = cpu_to_be64(0x7FFFFFFE00000000);
+	buf_size = bit_init_decoder(&dec, &bitstream, sizeof(bitstream));
+	TEST_ASSERT_EQUAL_size_t(sizeof(bitstream), buf_size);
+	decoded_cw = rice_decoder(&dec, m, log2_m);
+	TEST_ASSERT_EQUAL(32, dec.bits_consumed);
 	TEST_ASSERT_EQUAL(0X7FFFFFFE, decoded_cw);
 
-	code_word = 0X7FFFFFFD;
-	cw_len = rice_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_EQUAL(32, cw_len);
+	bitstream = cpu_to_be64(0x7FFFFFFD00000000);
+	buf_size = bit_init_decoder(&dec, &bitstream, sizeof(bitstream));
+	TEST_ASSERT_EQUAL_size_t(sizeof(bitstream), buf_size);
+	decoded_cw = rice_decoder(&dec, m, log2_m);
+	TEST_ASSERT_EQUAL(32, dec.bits_consumed);
 	TEST_ASSERT_EQUAL(0X7FFFFFFD, decoded_cw);
 
 	/* invalid code word (longer than 32 bit) */
-	code_word = 0X80000000;
-	cw_len = rice_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_GREATER_THAN_UINT(MAX_VALID_CW_LEM, cw_len);
+	bitstream = cpu_to_be64(0x8000000000000000);
+	buf_size = bit_init_decoder(&dec, &bitstream, sizeof(bitstream));
+	TEST_ASSERT_EQUAL_size_t(sizeof(bitstream), buf_size);
+	rice_decoder(&dec, m, log2_m);
+	TEST_ASSERT_GREATER_THAN_UINT(MAX_VALID_CW_LEM, dec.bits_consumed);
 
 #if 0
-this case is prevented by an assert
+/* this case is prevented by an assert */
 
 	/* test log_2 to big */
-	log2_m = 32;
-	code_word = 0x00000000;
-	cw_len = rice_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_EQUAL(0, cw_len);
+	log2_m = 32; m = 1 << log2_m;
+	bitstream = 0x00000000;
+	buf_size = bit_init_decoder(&dec, &bitstream, sizeof(bitstream));
+	decoded_cw = rice_decoder(&dec, m, log2_m);
+	TEST_ASSERT_GREATER_THAN_UINT(MAX_VALID_CW_LEM, dec.bits_consumed);
+
+	bitstream = cpu_to_be32(0xE000000000000000);
+	log2_m = 33; m = 1 << log2_m;
+	decoded_cw = rice_decoder(&dec, m, log2_m);
+	TEST_ASSERT_GREATER_THAN_UINT(MAX_VALID_CW_LEM, dec.bits_consumed);
 
-	code_word = 0xE0000000;
-	log2_m = 33;
-	cw_len = rice_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_EQUAL(0, cw_len);
 
-	log2_m = UINT_MAX;
-	cw_len = rice_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_EQUAL(0, cw_len);
+	log2_m = UINT_MAX; m = 1 << log2_m;
+	decoded_cw = rice_decoder(&dec, m, log2_m);
+	TEST_ASSERT_GREATER_THAN_UINT(MAX_VALID_CW_LEM, dec.bits_consumed);
 #endif
 }
 
@@ -196,8 +388,8 @@ this case is prevented by an assert
 
 void test_golomb_decoder(void)
 {
-	unsigned int cw_len;
-	uint32_t code_word;
+	struct bit_decoder dec;
+	uint32_t bitstream;
 	unsigned int m;
 	unsigned int log2_m;
 	uint32_t decoded_cw;
@@ -206,174 +398,208 @@ void test_golomb_decoder(void)
 	m = 1;
 	log2_m = ilog_2(m);
 
-	code_word = 0;
-	cw_len = golomb_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_EQUAL(1, cw_len);
+	bitstream = 0;
+	bit_init_decoder(&dec, &bitstream, sizeof(bitstream));
+	decoded_cw = golomb_decoder(&dec, m, log2_m);
+	TEST_ASSERT_EQUAL(1, dec.bits_consumed-32);
 	TEST_ASSERT_EQUAL(0, decoded_cw);
 
-	code_word = 0x7FFF; /* 0b0111...11 */
-	cw_len = golomb_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_EQUAL(1, cw_len);
+	bitstream = cpu_to_be32(0x7FFFFFFF); /* 0b0111...11 */
+	bit_init_decoder(&dec, &bitstream, sizeof(bitstream));
+	decoded_cw = golomb_decoder(&dec, m, log2_m);
+	TEST_ASSERT_EQUAL(1, dec.bits_consumed-32);
 	TEST_ASSERT_EQUAL(0, decoded_cw);
 
-	code_word = 0x80000000;
-	cw_len = golomb_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_EQUAL(2, cw_len);
+	bitstream = cpu_to_be32(0x80000000);
+	bit_init_decoder(&dec, &bitstream, sizeof(bitstream));
+	decoded_cw = golomb_decoder(&dec, m, log2_m);
+	TEST_ASSERT_EQUAL(2, dec.bits_consumed-32);
 	TEST_ASSERT_EQUAL(1, decoded_cw);
 
-	code_word = 0xFFFFFFFE;
-	cw_len = golomb_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_EQUAL(32, cw_len);
+	bitstream = cpu_to_be32(0xFFFFFFFE);
+	bit_init_decoder(&dec, &bitstream, sizeof(bitstream));
+	decoded_cw = golomb_decoder(&dec, m, log2_m);
+	TEST_ASSERT_EQUAL(32, dec.bits_consumed-32);
 	TEST_ASSERT_EQUAL(31, decoded_cw);
 
 	/* invalid code word (longer than 32 bit) */
-	code_word = 0xFFFFFFFF;
-	cw_len = golomb_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_GREATER_THAN_UINT(MAX_VALID_CW_LEM, cw_len);
+	bitstream = 0xFFFFFFFF;
+	golomb_decoder(&dec, m, log2_m);
+	TEST_ASSERT_GREATER_THAN_UINT(MAX_VALID_CW_LEM, dec.bits_consumed-32);
+	TEST_ASSERT_EQUAL_INT(BIT_OVERFLOW, bit_refill(&dec));
 
 
 	/* m = 2 test */
 	m = 2;
 	log2_m = ilog_2(m);
 
-	code_word = 0;
-	cw_len = golomb_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_EQUAL(2, cw_len);
+	bitstream = 0;
+	bit_init_decoder(&dec, &bitstream, sizeof(bitstream));
+	decoded_cw = golomb_decoder(&dec, m, log2_m);
+	TEST_ASSERT_EQUAL(2, dec.bits_consumed-32);
 	TEST_ASSERT_EQUAL(0, decoded_cw);
 
-	code_word = 0x40000000;
-	cw_len = golomb_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_EQUAL(2, cw_len);
+	bitstream = cpu_to_be32(0x40000000);
+	bit_init_decoder(&dec, &bitstream, sizeof(bitstream));
+	decoded_cw = golomb_decoder(&dec, m, log2_m);
+	TEST_ASSERT_EQUAL(2, dec.bits_consumed-32);
 	TEST_ASSERT_EQUAL(1, decoded_cw);
 
-	code_word = 0XFFFFFFFC;
-	cw_len = golomb_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_EQUAL(32, cw_len);
+	bitstream = cpu_to_be32(0xFFFFFFFC);
+	bit_init_decoder(&dec, &bitstream, sizeof(bitstream));
+	decoded_cw = golomb_decoder(&dec, m, log2_m);
+	TEST_ASSERT_EQUAL(32, dec.bits_consumed-32);
 	TEST_ASSERT_EQUAL(60, decoded_cw);
 
-	code_word = 0XFFFFFFFD;
-	cw_len = golomb_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_EQUAL(32, cw_len);
+	bitstream = cpu_to_be32(0xFFFFFFFD);
+	bit_init_decoder(&dec, &bitstream, sizeof(bitstream));
+	decoded_cw = golomb_decoder(&dec, m, log2_m);
+	TEST_ASSERT_EQUAL(32, dec.bits_consumed-32);
 	TEST_ASSERT_EQUAL(61, decoded_cw);
 
 	/* invalid code word (longer than 32 bit) */
-	code_word = 0XFFFFFFFE;
-	cw_len = golomb_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_GREATER_THAN_UINT(MAX_VALID_CW_LEM, cw_len);
+	bitstream = cpu_to_be32(0xFFFFFFFE);
+	bit_init_decoder(&dec, &bitstream, sizeof(bitstream));
+	golomb_decoder(&dec, m, log2_m);
+	TEST_ASSERT_GREATER_THAN_UINT(MAX_VALID_CW_LEM, dec.bits_consumed-32);
+	TEST_ASSERT_EQUAL_INT(BIT_OVERFLOW, bit_refill(&dec));
 
 
 	/* m = 3 test */
 	m = 3;
 	log2_m = ilog_2(m);
 
-	code_word = 0;
-	cw_len = golomb_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_EQUAL(2, cw_len);
+	bitstream = 0;
+	bit_init_decoder(&dec, &bitstream, sizeof(bitstream));
+	decoded_cw = golomb_decoder(&dec, m, log2_m);
+	TEST_ASSERT_EQUAL(2, dec.bits_consumed-32);
 	TEST_ASSERT_EQUAL(0, decoded_cw);
 
-	code_word = 0x40000000;
-	cw_len = golomb_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_EQUAL(3, cw_len);
+	bitstream = cpu_to_be32(0x40000000);
+	bit_init_decoder(&dec, &bitstream, sizeof(bitstream));
+	decoded_cw = golomb_decoder(&dec, m, log2_m);
+	TEST_ASSERT_EQUAL(3, dec.bits_consumed-32);
 	TEST_ASSERT_EQUAL(1, decoded_cw);
 
-	code_word = 0x60000000;
-	cw_len = golomb_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_EQUAL(3, cw_len);
+	bitstream = cpu_to_be32(0x60000000);
+	bit_init_decoder(&dec, &bitstream, sizeof(bitstream));
+	decoded_cw = golomb_decoder(&dec, m, log2_m);
+	TEST_ASSERT_EQUAL(3, dec.bits_consumed-32);
 	TEST_ASSERT_EQUAL(2, decoded_cw);
 
-	code_word = 0x80000000;
-	cw_len = golomb_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_EQUAL(3, cw_len);
+	bitstream = cpu_to_be32(0x80000000);
+	bit_init_decoder(&dec, &bitstream, sizeof(bitstream));
+	decoded_cw = golomb_decoder(&dec, m, log2_m);
+	TEST_ASSERT_EQUAL(3, dec.bits_consumed-32);
 	TEST_ASSERT_EQUAL(3, decoded_cw);
 
-	code_word = 0xA0000000;
-	cw_len = golomb_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_EQUAL(4, cw_len);
+	bitstream = cpu_to_be32(0xA0000000);
+	bit_init_decoder(&dec, &bitstream, sizeof(bitstream));
+	decoded_cw = golomb_decoder(&dec, m, log2_m);
+	TEST_ASSERT_EQUAL(4, dec.bits_consumed-32);
 	TEST_ASSERT_EQUAL(4, decoded_cw);
 
-	code_word = 0XFFFFFFFB;
-	cw_len = golomb_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_EQUAL(32, cw_len);
+	bitstream = cpu_to_be32(0xFFFFFFFB);
+	bit_init_decoder(&dec, &bitstream, sizeof(bitstream));
+	decoded_cw = golomb_decoder(&dec, m, log2_m);
+	TEST_ASSERT_EQUAL(32, dec.bits_consumed-32);
 	TEST_ASSERT_EQUAL(89, decoded_cw);
 
-	code_word = 0XFFFFFFFC;
-	cw_len = golomb_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_EQUAL(32, cw_len);
+	bitstream = cpu_to_be32(0xFFFFFFFC);
+	bit_init_decoder(&dec, &bitstream, sizeof(bitstream));
+	decoded_cw = golomb_decoder(&dec, m, log2_m);
+	TEST_ASSERT_EQUAL(32, dec.bits_consumed-32);
 	TEST_ASSERT_EQUAL(90, decoded_cw);
 
 	/* invalid code word (longer than 32 bit) */
-	code_word = 0XFFFFFFFD;
-	cw_len = golomb_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_GREATER_THAN_UINT(MAX_VALID_CW_LEM, cw_len);
+	bitstream = cpu_to_be32(0xFFFFFFFD);
+	bit_init_decoder(&dec, &bitstream, sizeof(bitstream));
+	golomb_decoder(&dec, m, log2_m);
+	TEST_ASSERT_GREATER_THAN_UINT(MAX_VALID_CW_LEM, dec.bits_consumed-32);
+	TEST_ASSERT_EQUAL_INT(BIT_OVERFLOW, bit_refill(&dec));
+
 
 
 	/* m = 0x7FFFFFFF test */
 	m = 0x7FFFFFFF;
 	log2_m = ilog_2(m);
 
-	code_word = 0;
-	cw_len = golomb_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_EQUAL(31, cw_len);
+	bitstream = 0;
+	bit_init_decoder(&dec, &bitstream, sizeof(bitstream));
+	decoded_cw = golomb_decoder(&dec, m, log2_m);
+	TEST_ASSERT_EQUAL(31, dec.bits_consumed-32);
 	TEST_ASSERT_EQUAL(0, decoded_cw);
 
-	code_word = 0x2;
-	cw_len = golomb_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_EQUAL(32, cw_len);
+	bitstream = cpu_to_be32(0x2);
+	bit_init_decoder(&dec, &bitstream, sizeof(bitstream));
+	decoded_cw = golomb_decoder(&dec, m, log2_m);
+	TEST_ASSERT_EQUAL(32, dec.bits_consumed-32);
 	TEST_ASSERT_EQUAL(1, decoded_cw);
 
-	code_word = 0X7FFFFFFF;
-	cw_len = golomb_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_EQUAL(32, cw_len);
-	TEST_ASSERT_EQUAL(0X7FFFFFFE, decoded_cw);
+	bitstream = cpu_to_be32(0x7FFFFFFF);
+	bit_init_decoder(&dec, &bitstream, sizeof(bitstream));
+	decoded_cw = golomb_decoder(&dec, m, log2_m);
+	TEST_ASSERT_EQUAL(32, dec.bits_consumed-32);
+	TEST_ASSERT_EQUAL(0x7FFFFFFE, decoded_cw);
 
-	code_word = 0X80000000;
-	cw_len = golomb_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_EQUAL(32, cw_len);
-	TEST_ASSERT_EQUAL(0X7FFFFFFF, decoded_cw);
+	bitstream = cpu_to_be32(0x80000000);
+	bit_init_decoder(&dec, &bitstream, sizeof(bitstream));
+	decoded_cw = golomb_decoder(&dec, m, log2_m);
+	TEST_ASSERT_EQUAL(32, dec.bits_consumed-32);
+	TEST_ASSERT_EQUAL(0x7FFFFFFF, decoded_cw);
 
 	/* invalid code word (longer than 32 bit) */
-	code_word = 0X80000001;
-	cw_len = golomb_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_GREATER_THAN_UINT(MAX_VALID_CW_LEM, cw_len);
+	bitstream = cpu_to_be32(0X80000001);
+	bit_init_decoder(&dec, &bitstream, sizeof(bitstream));
+	golomb_decoder(&dec, m, log2_m);
+	TEST_ASSERT_GREATER_THAN_UINT(MAX_VALID_CW_LEM, dec.bits_consumed-32);
+	TEST_ASSERT_EQUAL_INT(BIT_OVERFLOW, bit_refill(&dec));
 
 
 	/* m = 0x80000000 test */
 	m = 0x80000000;
 	log2_m = ilog_2(m);
 
-	code_word = 0;
-	cw_len = golomb_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_EQUAL(32, cw_len);
+	bitstream = 0;
+	bit_init_decoder(&dec, &bitstream, sizeof(bitstream));
+	decoded_cw = golomb_decoder(&dec, m, log2_m);
+	TEST_ASSERT_EQUAL(32, dec.bits_consumed-32);
 	TEST_ASSERT_EQUAL(0, decoded_cw);
 
-	code_word = 1;
-	cw_len = golomb_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_EQUAL(32, cw_len);
+	bitstream = cpu_to_be32(1);
+	bit_init_decoder(&dec, &bitstream, sizeof(bitstream));
+	decoded_cw = golomb_decoder(&dec, m, log2_m);
+	TEST_ASSERT_EQUAL(32, dec.bits_consumed-32);
 	TEST_ASSERT_EQUAL(1, decoded_cw);
 
-	code_word = 0X7FFFFFFE;
-	cw_len = golomb_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_EQUAL(32, cw_len);
-	TEST_ASSERT_EQUAL(0X7FFFFFFE, decoded_cw);
+	bitstream = cpu_to_be32(0x7FFFFFFE);
+	bit_init_decoder(&dec, &bitstream, sizeof(bitstream));
+	decoded_cw = golomb_decoder(&dec, m, log2_m);
+	TEST_ASSERT_EQUAL(32, dec.bits_consumed-32);
+	TEST_ASSERT_EQUAL(0x7FFFFFFE, decoded_cw);
 
-	code_word = 0X7FFFFFFD;
-	cw_len = golomb_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_EQUAL(32, cw_len);
-	TEST_ASSERT_EQUAL(0X7FFFFFFD, decoded_cw);
+	bitstream = cpu_to_be32(0x7FFFFFFD);
+	bit_init_decoder(&dec, &bitstream, sizeof(bitstream));
+	decoded_cw = golomb_decoder(&dec, m, log2_m);
+	TEST_ASSERT_EQUAL(32, dec.bits_consumed-32);
+	TEST_ASSERT_EQUAL(0x7FFFFFFD, decoded_cw);
 
 	/* invalid code word (longer than 32 bit) */
-	code_word = 0X80000000;
-	cw_len = golomb_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_GREATER_THAN_UINT(MAX_VALID_CW_LEM, cw_len);
+	bitstream = cpu_to_be32(0x80000000);
+	bit_init_decoder(&dec, &bitstream, sizeof(bitstream));
+	golomb_decoder(&dec, m, log2_m);
+	TEST_ASSERT_GREATER_THAN_UINT(MAX_VALID_CW_LEM, dec.bits_consumed-32);
+	TEST_ASSERT_EQUAL_INT(BIT_OVERFLOW, bit_refill(&dec));
 
 #if 0
 	this case is prevented by an assert
 
 	/* test log_2 to big */
-	code_word = 0x00000000;
+	bitstream = 0x00000000;
+	bit_init_decoder(&dec, &bitstream, sizeof(bitstream));
 	log2_m = 33;
-	cw_len = golomb_decoder(code_word, m, log2_m, &decoded_cw);
-	TEST_ASSERT_EQUAL(0, cw_len);
+	decoded_cw = golomb_decoder(&dec, m, log2_m);
+	TEST_ASSERT_EQUAL(0, dec.bits_consumed-32);
 #endif
 }
 
@@ -389,7 +615,7 @@ void test_select_decoder(void)
 
 	golomb_par = 1;
 	decoder = select_decoder(golomb_par);
-	TEST_ASSERT_EQUAL(rice_decoder, decoder);
+	TEST_ASSERT_EQUAL(unary_decoder, decoder);
 
 	golomb_par = 0x80000000;
 	decoder = select_decoder(golomb_par);
@@ -413,273 +639,74 @@ this case is prevented by an assert
 
 
 /**
- * @test get_n_bits32
- */
-
-void test_get_n_bits32(void)
-{
-	int bit_pos;
-	uint32_t read_value;
-	unsigned int n_bits;
-	int bit_offset;
-	uint32_t test_data_1[2] = {~0U, ~0U};
-	uint32_t test_data_2[2] = {0, 0};
-	uint32_t test_data_endianness[5]= {0};
-	unsigned int max_stream_len;
-
-	/* init test_data_endianness with big endian data */
-	test_data_endianness[0] = cpu_to_be32(0x01020304);
-	test_data_endianness[1] = cpu_to_be32(0x05060708);
-	test_data_endianness[2] = cpu_to_be32(0x090A0B0C);
-	test_data_endianness[3] = cpu_to_be32(0x0D0E0F10);
-	test_data_endianness[4] = cpu_to_be32(0x11121314);
-
-	/*  read 1 bit  */
-	/* left boarder */
-	read_value = ~0U;
-	n_bits = 1;
-	bit_offset = 0;
-	max_stream_len = 32;
-	bit_pos = get_n_bits32(&read_value, n_bits, bit_offset, test_data_1, max_stream_len);
-	TEST_ASSERT_EQUAL_INT(1, bit_pos);
-	TEST_ASSERT_EQUAL_HEX32(1, read_value);
-
-	bit_pos = get_n_bits32(&read_value, n_bits, bit_offset, test_data_2, max_stream_len);
-	TEST_ASSERT_EQUAL_INT(1, bit_pos);
-	TEST_ASSERT_EQUAL_HEX32(0, read_value);
-
-	/* right boarder */
-	read_value = ~0U;
-	n_bits = 1;
-	bit_offset = 31;
-	max_stream_len = 32;
-	bit_pos = get_n_bits32(&read_value, n_bits, bit_offset, test_data_1, max_stream_len);
-	TEST_ASSERT_EQUAL_INT(32, bit_pos);
-	TEST_ASSERT_EQUAL_HEX32(1, read_value);
-
-	bit_pos = get_n_bits32(&read_value, n_bits, bit_offset, test_data_2, max_stream_len);
-	TEST_ASSERT_EQUAL_INT(32, bit_pos);
-	TEST_ASSERT_EQUAL_HEX32(0, read_value);
-
-	/*  read 32 bit unsegmented */
-	read_value = ~0U;
-	n_bits = 32;
-	bit_offset = 0;
-	max_stream_len = 32;
-	bit_pos = get_n_bits32(&read_value, n_bits, bit_offset, test_data_1, max_stream_len);
-	TEST_ASSERT_EQUAL_INT(32, bit_pos);
-	TEST_ASSERT_EQUAL_HEX32(0xFFFFFFFF, read_value);
-
-	bit_pos = get_n_bits32(&read_value, n_bits, bit_offset, test_data_2, max_stream_len);
-	TEST_ASSERT_EQUAL_INT(32, bit_pos);
-	TEST_ASSERT_EQUAL_HEX32(0, read_value);
-
-	/*  read 32 bit segmented */
-	read_value = ~0U;
-	n_bits = 32;
-	bit_offset = 16;
-	max_stream_len = 64;
-	bit_pos = get_n_bits32(&read_value, n_bits, bit_offset, test_data_1, max_stream_len);
-	TEST_ASSERT_EQUAL_INT(48, bit_pos);
-	TEST_ASSERT_EQUAL_HEX32(0xFFFFFFFF, read_value);
-
-	bit_pos = get_n_bits32(&read_value, n_bits, bit_offset, test_data_2, max_stream_len);
-	TEST_ASSERT_EQUAL_INT(48, bit_pos);
-	TEST_ASSERT_EQUAL_HEX32(0, read_value);
-
-	/*  middle, read 2 bits  */
-	read_value = ~0U;
-	n_bits = 2;
-	bit_offset = 3;
-	max_stream_len = 32;
-	bit_pos = get_n_bits32(&read_value, n_bits, bit_offset, test_data_1, max_stream_len);
-	TEST_ASSERT_EQUAL_INT(5, bit_pos);
-	TEST_ASSERT_EQUAL_HEX32(0x3, read_value);
-
-	bit_pos = get_n_bits32(&read_value, n_bits, bit_offset, test_data_2, max_stream_len);
-	TEST_ASSERT_EQUAL_INT(5, bit_pos);
-	TEST_ASSERT_EQUAL_HEX32(0, read_value);
-
-	/* read 5 bits, unsegmented ***/
-
-		/* left border, write 0 */
-
-	/* test endianness swap */
-	read_value = ~0U;
-	bit_offset = 0;
-	max_stream_len = 160;
-	n_bits = 4;
-	bit_offset = get_n_bits32(&read_value, n_bits, bit_offset, test_data_endianness, max_stream_len);
-	TEST_ASSERT_EQUAL_INT(4, bit_offset);
-	TEST_ASSERT_EQUAL_HEX32(0x0, read_value);
-
-	n_bits = 8;
-	bit_offset = get_n_bits32(&read_value, n_bits, bit_offset, test_data_endianness, max_stream_len);
-	TEST_ASSERT_EQUAL_INT(12, bit_offset);
-	TEST_ASSERT_EQUAL_HEX32(0x10, read_value);
-
-	n_bits = 12;
-	bit_offset = get_n_bits32(&read_value, n_bits, bit_offset, test_data_endianness, max_stream_len);
-	TEST_ASSERT_EQUAL_INT(24, bit_offset);
-	TEST_ASSERT_EQUAL_HEX32(0x203, read_value);
-
-	n_bits = 16;
-	bit_offset = get_n_bits32(&read_value, n_bits, bit_offset, test_data_endianness, max_stream_len);
-	TEST_ASSERT_EQUAL_INT(40, bit_offset);
-	TEST_ASSERT_EQUAL_HEX32(0x0405, read_value);
-
-	n_bits = 20;
-	bit_offset = get_n_bits32(&read_value, n_bits, bit_offset, test_data_endianness, max_stream_len);
-	TEST_ASSERT_EQUAL_INT(60, bit_offset);
-	TEST_ASSERT_EQUAL_HEX32(0x06070, read_value);
-
-	n_bits = 24;
-	bit_offset = get_n_bits32(&read_value, n_bits, bit_offset, test_data_endianness, max_stream_len);
-	TEST_ASSERT_EQUAL_INT(84, bit_offset);
-	TEST_ASSERT_EQUAL_HEX32(0x8090A0, read_value);
-
-	n_bits = 28;
-	bit_offset = get_n_bits32(&read_value, n_bits, bit_offset, test_data_endianness, max_stream_len);
-	TEST_ASSERT_EQUAL_INT(112, bit_offset);
-	TEST_ASSERT_EQUAL_HEX32(0xB0C0D0E, read_value);
-
-	n_bits = 32;
-	bit_offset = get_n_bits32(&read_value, n_bits, bit_offset, test_data_endianness, max_stream_len);
-	TEST_ASSERT_EQUAL_INT(144, bit_offset);
-	TEST_ASSERT_EQUAL_HEX32(0x0F101112, read_value);
-
-	/* read too match */
-	n_bits = 17;
-	bit_offset = get_n_bits32(&read_value, n_bits, bit_offset, test_data_endianness, max_stream_len);
-	TEST_ASSERT_EQUAL_INT(CMP_ERROR_SMALL_BUF, bit_offset);
-
-
-	/* test error cases */
-
-	/* bit_offset lager than max_stream_len */
-	read_value = ~0U;
-	n_bits = 1;
-	max_stream_len = 32;
-	bit_offset = 33;
-	bit_pos = get_n_bits32(&read_value, n_bits, bit_offset, test_data_1, max_stream_len);
-	TEST_ASSERT_EQUAL_INT(CMP_ERROR_SMALL_BUF, bit_pos);
-
-	/* max_stream_len is 0 */
-	bit_offset = 0;
-	n_bits = 1;
-	max_stream_len = 0;
-	bit_pos = get_n_bits32(&read_value, n_bits, bit_offset, test_data_1, max_stream_len);
-	TEST_ASSERT_EQUAL_INT(CMP_ERROR_SMALL_BUF, bit_pos);
-
-	/* overflow test */
-	bit_offset = INT_MAX;
-	n_bits = 5;
-	max_stream_len = 64;
-	bit_pos = get_n_bits32(&read_value, n_bits, bit_offset, test_data_1, max_stream_len);
-	TEST_ASSERT_EQUAL_INT(CMP_ERROR_SMALL_BUF, bit_pos);
-
-#if 0
-this case is prevented by an assert
-
-	/* try to read 0 Bits */
-	read_value = ~0U;
-	n_bits = 0;
-	bit_offset = 0;
-	max_stream_len = sizeof(bitstream) * CHAR_BIT;
-	bit_pos = get_n_bits32(&read_value, n_bits, bit_offset, bitstream, max_stream_len);
-	TEST_ASSERT_EQUAL_INT(0, bit_pos);
-	TEST_ASSERT_EQUAL_HEX32(0, read_value);
-
-
-	/* test pointer to read value is NULL */
-	read_value = ~0U;
-	n_bits = 2;
-	bit_offset = 3;
-	max_stream_len = 32;
-
-	bit_pos = get_n_bits32(NULL, n_bits, bit_offset, test_data_1, max_stream_len);
-	TEST_ASSERT_EQUAL_INT(-1, bit_pos);
-
-	/* n_bits = 0 */
-	n_bits = 0;
-	bit_pos = get_n_bits32(&read_value, n_bits, bit_offset, test_data_1, max_stream_len);
-	TEST_ASSERT_EQUAL_INT(-1, bit_pos);
-
-	/* n_bits = 33 */
-	n_bits = 33;
-	bit_pos = get_n_bits32(&read_value, n_bits, bit_offset, test_data_1, max_stream_len);
-	TEST_ASSERT_EQUAL_INT(-1, bit_pos);
-
-	/* negative bit_offset */
-	bit_offset = -1;
-	bit_pos = get_n_bits32(&read_value, n_bits, bit_offset, test_data_1, max_stream_len);
-	TEST_ASSERT_EQUAL_INT(-1, bit_pos);
-
-	/* bitstream address = NULL */
-	bit_offset = 3;
-	bit_pos = get_n_bits32(&read_value, n_bits, bit_offset, NULL, max_stream_len);
-	TEST_ASSERT_EQUAL_INT(-1, bit_pos);
-#endif
-
-}
-
-
-/**
- * @test decode_normal
+ * @test decode_zero
  */
 
-void test_decode_normal(void)
+void test_decode_zero(void)
 {
 	uint32_t decoded_value = ~0U;
-	int stream_pos, stream_pos_exp, sample;
-	 /* compressed data from 0 to 6; */
-	uint32_t cmp_data[1] = {0x5BBDF7E0};
+	uint64_t cmp_data = {0x88449FC000800000};
+	struct bit_decoder dec = {0};
 	struct decoder_setup setup = {0};
+	uint32_t spillover = 8;
+	int err;
 
-	cpu_to_be32s(cmp_data); /* compressed data are stored in big-endian */
-
-	setup.decode_cw_f = rice_decoder;
-	setup.encoder_par1 = 1;
-	setup.encoder_par2 = ilog_2(setup.encoder_par1);
-	setup.bitstream_adr = cmp_data;
-	setup.max_stream_len = 28;
-	setup.max_cw_len = 16;
-
-	stream_pos = 0;
-	stream_pos_exp = 0;
-	for (sample = 0; sample < 7; sample++) {
-		stream_pos_exp += sample+1;
-		stream_pos = decode_normal(&decoded_value, stream_pos, &setup);
-		TEST_ASSERT_EQUAL_INT(stream_pos_exp, stream_pos);
-		TEST_ASSERT_EQUAL_HEX(sample, decoded_value);
-	}
-
-
-	/* error cases*/
-	/* error exactly reading over max_stream_len*/
-	stream_pos = decode_normal(&decoded_value, stream_pos, &setup);
-	TEST_ASSERT_EQUAL_INT(CMP_ERROR_SMALL_BUF, stream_pos);
-
-	/* TODO: error non exactly reading over max_stream_len*/
-
-
-	stream_pos = 0;
-	cmp_data[0] = cpu_to_be32(0xFFFF0000); /* not a valid code word for max_cw_len = 16 */
-	setup.max_stream_len = 32;
-	stream_pos = decode_normal(&decoded_value, stream_pos, &setup);
-	TEST_ASSERT_EQUAL_INT(-1, stream_pos);
+	cpu_to_be64s(&cmp_data);
+	bit_init_decoder(&dec, &cmp_data, sizeof(cmp_data));
+	configure_decoder_setup(&setup, &dec, CMP_MODE_DIFF_ZERO, 1, spillover, CMP_LOSSLESS, 16);
 
-	stream_pos = 0;
-	cmp_data[0] = cpu_to_be32(0xFFFFFFFF); /* not a valid bitstream */
-	setup.max_stream_len = 32;
-	setup.max_cw_len = 32;
-	stream_pos = decode_normal(&decoded_value, stream_pos, &setup);
-	TEST_ASSERT_EQUAL_INT(-1, stream_pos);
 
-	/* stream_pos = 0; */
-	/* stream_pos = decode_normal(&decoded_value, stream_pos, &setup); */
+	err = decode_zero(&setup, &decoded_value);
+	TEST_ASSERT_EQUAL_HEX(0, decoded_value);
+	TEST_ASSERT_FALSE(err);
+	err = decode_zero(&setup, &decoded_value);
+	TEST_ASSERT_EQUAL_HEX(0x4223, decoded_value);
+	TEST_ASSERT_FALSE(err);
+	err = decode_zero(&setup, &decoded_value);
+	TEST_ASSERT_FALSE(err);
+	TEST_ASSERT_EQUAL_HEX(6, decoded_value);
+	err = decode_zero(&setup, &decoded_value);
+	TEST_ASSERT_FALSE(err);
+	TEST_ASSERT_EQUAL_HEX(7, decoded_value);
+	err = decode_zero(&setup, &decoded_value);
+	TEST_ASSERT_EQUAL_HEX(0xFFFF, decoded_value);
+	TEST_ASSERT_FALSE(err);
+	TEST_ASSERT_EQUAL_INT(BIT_END_OF_BUFFER, bit_refill(&dec));
+
+	/* error case: read over the cmp_data buffer 1 */
+	err = decode_zero(&setup, &decoded_value);
+	TEST_ASSERT_TRUE(err);
+
+	/* error case: read over the cmp_data buffer 2 */
+	cmp_data = cpu_to_be64(0x0001000000000000); /* 8 encoded > spill_over */
+	bit_init_decoder(&dec, &cmp_data, sizeof(cmp_data));
+	bit_consume_bits(&dec, 64);
+	err = decode_zero(&setup, &decoded_value);
+	TEST_ASSERT_TRUE(err);
+
+	 /* error case: decoded value larger than the outlier parameter */
+	cmp_data = cpu_to_be64(0xFF00000000000000); /* 7 encoded > spill_over */
+	bit_init_decoder(&dec, &cmp_data, sizeof(cmp_data));
+	err = decode_zero(&setup, &decoded_value);
+	TEST_ASSERT_TRUE(err);
+	/* this should work */
+	cmp_data = cpu_to_be64(0xFE00000000000000); /* 8 encoded > spill_over */
+	bit_init_decoder(&dec, &cmp_data, sizeof(cmp_data));
+	err = decode_zero(&setup, &decoded_value);
+	TEST_ASSERT_FALSE(err);
+	TEST_ASSERT_EQUAL_HEX(6, decoded_value);
 
-	/* reading over compressed data length */
+	/* error case: value after escape symbol smaller that spillover */
+	cmp_data = cpu_to_be64(0x003000000000000); /* 0 encoded + 6 unencoded < spill_over */
+	bit_init_decoder(&dec, &cmp_data, sizeof(cmp_data));
+	err = decode_zero(&setup, &decoded_value);
+	TEST_ASSERT_TRUE(err);
+	/* this should work */
+	cmp_data = cpu_to_be64(0x004000000000000); /* 0 encoded + 7 unencoded < spill_over */
+	bit_init_decoder(&dec, &cmp_data, sizeof(cmp_data));
+	err = decode_zero(&setup, &decoded_value);
+	TEST_ASSERT_EQUAL_HEX(7, decoded_value);
+	TEST_ASSERT_FALSE(err);
 }
 
 
@@ -687,36 +714,27 @@ void test_decode_normal(void)
  * @test decode_zero
  */
 
-void test_decode_zero(void)
+void test_zero_refill_needed(void)
 {
 	uint32_t decoded_value = ~0U;
-	int stream_pos;
-	uint32_t cmp_data[] = {0x88449FE0};
+	uint64_t cmp_data[] = {0x0000000200000003, 0xFFFFFFFC00000000};
+	struct bit_decoder dec = {0};
 	struct decoder_setup setup = {0};
-	struct cmp_cfg cfg = {0};
+	uint32_t spillover = 8;
+	uint32_t m = 1<<30;
 	int err;
 
-	cpu_to_be32s(cmp_data);
-
-	cfg.data_type = DATA_TYPE_IMAGETTE;
-	cfg.cmp_mode = CMP_MODE_DIFF_ZERO;
-	cfg.icu_output_buf = cmp_data;
-	cfg.buffer_length = 4;
-
-	err = configure_decoder_setup(&setup, 1, 8, CMP_LOSSLESS, 16, &cfg);
-	TEST_ASSERT_FALSE(err);
-
-	stream_pos = 0;
+	cpu_to_be64s(&cmp_data[0]);
+	cpu_to_be64s(&cmp_data[1]);
+	bit_init_decoder(&dec, cmp_data, sizeof(cmp_data));
+	configure_decoder_setup(&setup, &dec, CMP_MODE_DIFF_ZERO, m, spillover, CMP_LOSSLESS, 32);
 
-	stream_pos = decode_zero(&decoded_value, stream_pos, &setup);
+	err = decode_zero(&setup, &decoded_value);
 	TEST_ASSERT_EQUAL_HEX(0, decoded_value);
-	stream_pos = decode_zero(&decoded_value, stream_pos, &setup);
-	TEST_ASSERT_EQUAL_HEX(0x4223, decoded_value);
-	stream_pos = decode_zero(&decoded_value, stream_pos, &setup);
-	TEST_ASSERT_EQUAL_HEX(7, decoded_value);
-	TEST_ASSERT_EQUAL_INT(28, stream_pos);
-
-	 /* TODO error case: negative stream_pos */
+	TEST_ASSERT_FALSE(err);
+	err = decode_zero(&setup, &decoded_value);
+	TEST_ASSERT_EQUAL_HEX(0xFFFFFFFE, decoded_value);
+	TEST_ASSERT_FALSE(err);
 }
 
 
@@ -727,103 +745,149 @@ void test_decode_zero(void)
 void test_decode_multi(void)
 {
 	uint32_t decoded_value = ~0U;
-	int stream_pos;
 	uint32_t cmp_data[] = {0x16B66DF8, 0x84360000};
 	struct decoder_setup setup = {0};
-	struct cmp_cfg cfg = {0};
+	struct bit_decoder dec = {0};
 	int err;
 
 	cpu_to_be32s(&cmp_data[0]);
 	cpu_to_be32s(&cmp_data[1]);
 
-	cfg.data_type = DATA_TYPE_IMAGETTE;
-	cfg.cmp_mode = CMP_MODE_DIFF_MULTI;
-	cfg.icu_output_buf = cmp_data;
-	cfg.buffer_length = 8;
-
-	err = configure_decoder_setup(&setup, 3, 8, CMP_LOSSLESS, 16, &cfg);
-	TEST_ASSERT_FALSE(err);
+	bit_init_decoder(&dec, cmp_data, sizeof(cmp_data));
+	configure_decoder_setup(&setup, &dec, CMP_MODE_DIFF_MULTI, 3, 8, CMP_LOSSLESS, 16);
 
-	stream_pos = 0;
 
-	stream_pos = decode_multi(&decoded_value, stream_pos, &setup);
+	err = decode_multi(&setup, &decoded_value);
+	TEST_ASSERT_FALSE(err);
 	TEST_ASSERT_EQUAL_HEX(0, decoded_value);
-	stream_pos = decode_multi(&decoded_value, stream_pos, &setup);
+	err = decode_multi(&setup, &decoded_value);
+	TEST_ASSERT_FALSE(err);
 	TEST_ASSERT_EQUAL_HEX(1, decoded_value);
-	stream_pos = decode_multi(&decoded_value, stream_pos, &setup);
+	err = decode_multi(&setup, &decoded_value);
+	TEST_ASSERT_FALSE(err);
 	TEST_ASSERT_EQUAL_HEX(7, decoded_value);
-	stream_pos = decode_multi(&decoded_value, stream_pos, &setup);
+	err = decode_multi(&setup, &decoded_value);
+	TEST_ASSERT_FALSE(err);
 	TEST_ASSERT_EQUAL_HEX(8, decoded_value);
-	stream_pos = decode_multi(&decoded_value, stream_pos, &setup);
+	err = decode_multi(&setup, &decoded_value);
+	TEST_ASSERT_FALSE(err);
 	TEST_ASSERT_EQUAL_HEX(9, decoded_value);
-	stream_pos = decode_multi(&decoded_value, stream_pos, &setup);
+	err = decode_multi(&setup, &decoded_value);
+	TEST_ASSERT_FALSE(err);
 	TEST_ASSERT_EQUAL_HEX(0x4223, decoded_value);
-	TEST_ASSERT_EQUAL_INT(47, stream_pos);
-
+	/* TEST_ASSERT_EQUAL_INT(47, stream_pos); */
 
 	/* error cases unencoded_len > 32 */
-	cfg.data_type = DATA_TYPE_IMAGETTE;
-	cfg.cmp_mode = CMP_MODE_DIFF_MULTI;
-	cfg.icu_output_buf = cmp_data;
-	cfg.buffer_length = 8;
-
-	err = configure_decoder_setup(&setup, 3, 8, CMP_LOSSLESS, 16, &cfg);
-	TEST_ASSERT_FALSE(err);
 
+	configure_decoder_setup(&setup, &dec, CMP_MODE_DIFF_MULTI, 3, 8, CMP_LOSSLESS, 16);
 
 	/* 0xFF -> 24 = spill(8)+16 -> unencoded_len = 34 bits */
-	stream_pos = 0;
-	cmp_data[0] = 0xFF000000;
-	cmp_data[1] = 0x00000000;
-	cpu_to_be32s(&cmp_data[0]);
-	cpu_to_be32s(&cmp_data[1]);
+	cmp_data[0] = cpu_to_be32(0xFF000000);
+	cmp_data[1] = cpu_to_be32(0x00000000);
+	bit_init_decoder(&dec, cmp_data, sizeof(cmp_data));
 
-	stream_pos = decode_multi(&decoded_value, stream_pos, &setup);
-	TEST_ASSERT_EQUAL_INT(-1, stream_pos);
+	err = decode_multi(&setup, &decoded_value);
+	TEST_ASSERT_EQUAL_INT(-1, err);
 
 
 	/* 0xFA -> 16 = spill(8)+8 -> unencoded_len = 17 bits -> larger than
 	 * 16 bit max_used_bits*/
-	stream_pos = 0;
-	cmp_data[0] = 0xFA000000;
-	cmp_data[1] = 0x00000000;
-	cpu_to_be32s(&cmp_data[0]);
-	cpu_to_be32s(&cmp_data[1]);
+	cmp_data[0] = cpu_to_be32(0xFA000000);
+	cmp_data[1] = cpu_to_be32(0x00000000);
+	bit_init_decoder(&dec, cmp_data, sizeof(cmp_data));
 
-	stream_pos = decode_multi(&decoded_value, stream_pos, &setup);
-	TEST_ASSERT_EQUAL_INT(-1, stream_pos);
+	err = decode_multi(&setup, &decoded_value);
+	TEST_ASSERT_EQUAL_INT(-1, err);
 
 	/* this should work */
-	stream_pos = 0;
-	cmp_data[0] = 0xF9000200;
-	cmp_data[1] = 0x00000000;
-	cpu_to_be32s(&cmp_data[0]);
-	cpu_to_be32s(&cmp_data[1]);
-	stream_pos = decode_multi(&decoded_value, stream_pos, &setup);
-	TEST_ASSERT_EQUAL_INT(7+16, stream_pos);
+	cmp_data[0] = cpu_to_be32(0xF9000200);
+	cmp_data[1] = cpu_to_be32(0x00000000);
+	bit_init_decoder(&dec, cmp_data, sizeof(cmp_data));
+
+	err = decode_multi(&setup, &decoded_value);
+	TEST_ASSERT_FALSE(err);
+	/* TEST_ASSERT_EQUAL_INT(7+16, stream_pos); */
 	TEST_ASSERT_EQUAL_HEX(0x8001+8, decoded_value);
 
 	/* error cases unencoded_val is not plausible */
 	/* unencoded_len = 4; unencoded_val =0b0011 */
-	stream_pos = 0;
-	cmp_data[0] = 0xEC000000;
-	cmp_data[1] = 0x00000000;
-	cpu_to_be32s(&cmp_data[0]);
-	cpu_to_be32s(&cmp_data[1]);
-	stream_pos = decode_multi(&decoded_value, stream_pos, &setup);
-	TEST_ASSERT_EQUAL_INT(-1, stream_pos);
+	cmp_data[0] = cpu_to_be32(0xEC000000);
+	cmp_data[1] = cpu_to_be32(0x00000000);
+	bit_init_decoder(&dec, cmp_data, sizeof(cmp_data));
+
+	err = decode_multi(&setup, &decoded_value);
+	TEST_ASSERT_EQUAL_INT(-1, err);
 
 	/* unencoded_len = 16; unencoded_val =0x3FFF */
-	stream_pos = 0;
-	cmp_data[0] = 0xF87FFE00;
-	cmp_data[1] = 0x00000000;
-	cpu_to_be32s(&cmp_data[0]);
-	cpu_to_be32s(&cmp_data[1]);
-	stream_pos = decode_multi(&decoded_value, stream_pos, &setup);
-	TEST_ASSERT_EQUAL_INT(-1, stream_pos);
+	cmp_data[0] = cpu_to_be32(0xF87FFE00);
+	cmp_data[1] = cpu_to_be32(0x00000000);
+	bit_init_decoder(&dec, cmp_data, sizeof(cmp_data));
+
+	err = decode_multi(&setup, &decoded_value);
+	TEST_ASSERT_EQUAL_INT(-1, err);
+	/* unencoded_len = 16; unencoded_val =0x3FFF */
+	cmp_data[0] = cpu_to_be32(0xF87FFE00);
+	cmp_data[1] = cpu_to_be32(0x00000000);
+	bit_init_decoder(&dec, cmp_data, sizeof(cmp_data));
+
+	err = decode_multi(&setup, &decoded_value);
+	TEST_ASSERT_EQUAL_INT(-1, err);
+
+	/* decoded value smaller that outlier */
+	cmp_data[0] = cpu_to_be32(0xF9FFFE00);
+	cmp_data[1] = cpu_to_be32(0x00000000);
+	bit_init_decoder(&dec, cmp_data, sizeof(cmp_data));
+
+	err = decode_multi(&setup, &decoded_value);
+	TEST_ASSERT_EQUAL_INT(-1, err);
 }
 
 
+/**
+ * @test decode_multi
+ */
+
+void test_multi_refill_needed(void)
+{
+	uint32_t decoded_value = ~0U;
+	uint8_t cmp_data[] = {0x7F, 0xFF, 0xFF, 0xFF, 0x7F, 0xFF, 0xFF, 0xF7, 0xFF, 0xFF, 0xFF, 0xFF, 0x00};
+	uint32_t cmp_data2[2];
+	struct bit_decoder dec = {0};
+	struct decoder_setup setup = {0};
+	uint32_t spillover = 16;
+	uint32_t m = 1;
+	int err;
+
+	bit_init_decoder(&dec, cmp_data, sizeof(cmp_data));
+	configure_decoder_setup(&setup, &dec, CMP_MODE_DIFF_ZERO, m, spillover, CMP_LOSSLESS, 32);
+
+	err = decode_multi(&setup, &decoded_value);
+	TEST_ASSERT_FALSE(err);
+	TEST_ASSERT_EQUAL_HEX(0, decoded_value);
+	/* this only works with a 2nd refill */
+	err = decode_multi(&setup, &decoded_value);
+	TEST_ASSERT_FALSE(err);
+	TEST_ASSERT_EQUAL_HEX(0xFFFFFFFF, decoded_value);
+	/* 2nd refill should fail */
+	err = decode_multi(&setup, &decoded_value);
+	TEST_ASSERT_TRUE(err);
+
+
+	/* decoded value smaller that outlier */
+	configure_decoder_setup(&setup, &dec, CMP_MODE_DIFF_ZERO, m, spillover, CMP_LOSSLESS, 16);
+	cmp_data2[0] = cpu_to_be32(0xFF7FFFFF);
+	cmp_data2[1] = cpu_to_be32(0x7FFF8000);
+	bit_init_decoder(&dec, cmp_data2, 6); /* bitstream is to short */
+
+	err = decode_multi(&setup, &decoded_value);
+	TEST_ASSERT_FALSE(err);
+	TEST_ASSERT_EQUAL_HEX(8, decoded_value);
+
+	/* 2nd refill should fail and outlier small than the outlier trigger */
+	err = decode_multi(&setup, &decoded_value);
+	TEST_ASSERT_EQUAL_INT(CORRUPTION_DETECTED, err);
+}
+
 /**
  * @test re_map_to_pos
  */
@@ -918,7 +982,7 @@ size_t icu_compress_data_entity(struct cmp_entity *ent, const struct cmp_cfg *cf
 	/* XXX overwrite the size of the compression entity with the size of the actual
 	 * size of the compressed data; not all allocated memory is normally used */
 	s = cmp_ent_create(ent, cfg->data_type, cfg->cmp_mode == CMP_MODE_RAW,
-			   cmp_bit_to_4byte((unsigned int)cmp_size_bits));
+			   cmp_bit_to_byte((unsigned int)cmp_size_bits));
 
 	if (cmp_ent_write_cmp_pars(ent, cfg, cmp_size_bits))
 		return 0;
@@ -981,7 +1045,8 @@ void test_decompress_imagette_model(void)
 	uint16_t up_model[5]  = {0};
 	uint32_t cmp_data[] = {0};
 	struct cmp_cfg cfg = {0};
-	int stream_pos;
+	struct bit_decoder dec;
+	int err;
 
 	cmp_data[0] = cpu_to_be32(0x49240000);
 
@@ -998,8 +1063,11 @@ void test_decompress_imagette_model(void)
 	cfg.spill = 48;
 	cfg.max_used_bits = &MAX_USED_BITS_SAFE;
 
-	stream_pos = decompress_imagette(&cfg);
-	TEST_ASSERT_EQUAL_INT(15, stream_pos);
+	bit_init_decoder(&dec, cfg.icu_output_buf, cfg.buffer_length);
+
+	err = decompress_imagette(&cfg, &dec);
+	/* TEST_ASSERT_EQUAL_INT(15, stream_pos); */
+	TEST_ASSERT_FALSE(err);
 	TEST_ASSERT_EQUAL_HEX(1, data[0]);
 	TEST_ASSERT_EQUAL_HEX(2, data[1]);
 	TEST_ASSERT_EQUAL_HEX(3, data[2]);
@@ -1069,7 +1137,8 @@ void test_cmp_decmp_s_fx_diff(void)
 	decmp_size = decompress_cmp_entiy(ent, NULL, NULL, decompressed_data);
 	TEST_ASSERT_EQUAL_INT(cmp_cal_size_of_data(cfg.samples, cfg.data_type), decmp_size);
 
-	TEST_ASSERT_FALSE(memcmp(data_to_compress, decompressed_data, (size_t)decmp_size));
+	TEST_ASSERT_EQUAL_HEX8_ARRAY(data_to_compress, decompressed_data, decmp_size);
+	/* TEST_ASSERT_FALSE(memcmp(data_to_compress, decompressed_data, (size_t)decmp_size)); */
 	/* for (i = 0; i < samples; ++i) { */
 	/* 	printf("%u == %u (round: %u)\n", data[i], decompressed_data[i], round); */
 	/* 	uint32_t mask = ~0U << round; */
@@ -1741,7 +1810,6 @@ void test_cmp_ent_write_cmp_pars(void)
  */
 
 void test_cmp_ent_read_header_error_cases(void)
-
 {
 	int error;
 	uint32_t size;
@@ -1809,6 +1877,6 @@ void test_cmp_ent_read_header_error_cases(void)
 
 void test_decompression_error_cases(void)
 {
-	/* error cases model decompression without a model Buffer */
-	/* error cases wrong cmp parameter; model value; usw */
+	/* TODO: error cases model decompression without a model Buffer */
+	/* TODO: error cases wrong cmp parameter; model value; usw */
 }
-- 
GitLab