From 475c3e63defe0a3bccd526083b1827091b7bcd15 Mon Sep 17 00:00:00 2001
From: Dominik Loidolt <dominik.loidolt@univie.ac.at>
Date: Thu, 24 Oct 2024 18:00:47 +0200
Subject: [PATCH] Feature: add chunk-specific compression parameter guessing

- Add chunk-specific compression guessing functionality
- Support both RDCU and chunk data in guess operation
- Add compression parameter file I/O functionality

This change expands the compression parameter guessing system to handle
chunk data in addition to RDCU data. The new functionality helps users
find optimal compression parameters for different types of chunk data.
---
 README.md                                  |  39 ++--
 programs/cmp_guess.c                       | 223 +++++++++++++++++++--
 programs/cmp_guess.h                       |   6 +
 programs/cmp_io.c                          | 154 +++++++++++++-
 programs/cmp_io.h                          |   4 +
 programs/cmp_tool.c                        | 127 +++++++++---
 test/cmp_tool/cmp_tool_integration_test.py |   4 +-
 7 files changed, 490 insertions(+), 67 deletions(-)

diff --git a/README.md b/README.md
index 0051d3b..5dad24c 100644
--- a/README.md
+++ b/README.md
@@ -51,30 +51,37 @@ The generated packets can be found in the `TC_FILES` directory.
 | `-i <file>` | File containing the decompression information (required if --no_header was used)|
 
 ### Guessing Options
+| Options                 | Description                                                            |
+|:------------------------|:-----------------------------------------------------------------------|
+| `--guess <rdcu|chunk>`  | Search for a good configuration for compression RDCU or chunk data set |
+| `-d <file>`             | File containing the data to be compressed                              |
+| `-m <file>`             | File containing the model of the data to be compressed                 |
+| `--guess_level <level>` | Set guess level to \<level\> (optional)<sup>[4](#fnote4)</sup>         |
 
-| Options                 | Description                                                                     |
-|:------------------------|:--------------------------------------------------------------------------------|
-| `--guess <mode>`        | Search for a good configuration for compression \<mode\><sup>[4](#fnote4)</sup> |
-| `-d <file>`             | File containing the data to be compressed                                       |
-| `-m <file>`             | File containing the model of the data to be compressed                          |
-| `--guess_level <level>` | Set guess level to \<level\> (optional)<sup>[5](#fnote5)</sup>                  |
-
-<a name="fnote4">4</a>) **NOTE:** \<mode\> can be either the compression mode
-number or the keyword: `RDCU`. The RDCU mode automatically selects the correct
-RDCU-compatible compression mode depending on if the Model (-m) option is set.  
-<a name="fnote5">5</a>) **Supported levels:** 
+<a name="fnote4">4</a>) **Supported levels:** 
 
 | guess level | Description                     |
 |:------------|:--------------------------------|
-| `1`         | fast mode (not implemented yet) |
+| `1`         | fast mode                       |
 | `2`         | default mode                    |
-| `3`         | brute force                     |
+| `3`         | slow mode (better results)      |
+
+Lower values increase step size (coarser search), while higher values decrease step size (finer search).
+
+#### Examples of Compression Parameter guessing:
+
+```bash
+# RDCU data compression guessing
+./cmp_tool --guess rdcu -d test_data/test_data1.dat -o rdcu_guess
 
-**Example of Compression Parameter guessing:**
+# Chunk mode guessing
+./cmp_tool --guess chunk -d chunk_data.dat -o chunk_guess
 
-``./cmp_tool --guess RDCU -d test_data/test_data1.dat -o myguess``
+# Custom guess level with model
+./cmp_tool --guess chunk -d chunk_data.dat -m chunk_model.dat --guess_level 3 -o chunk_guess_3
+```
 
-This command creates the file `myguess.cfg` with the guessed compression parameters.
+These commands create `.cfg` files for RDCU compression parameters. For chunk mode, a `.par` file is created containing all parameters for chunk compression.
 
 ### Data Format
 The input data is formatted as hexadecimal numbers.
diff --git a/programs/cmp_guess.c b/programs/cmp_guess.c
index cada782..14b231f 100644
--- a/programs/cmp_guess.c
+++ b/programs/cmp_guess.c
@@ -13,20 +13,25 @@
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  *
- * @brief helps the user to find a good compression parameters for a given
- *	dataset
+ * @brief helps the user find good compression parameters for a given dataset
  * @warning this part of the software is not intended to run on-board on the ICU.
  */
 
 #include <limits.h>
+#include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
 
+#include <cmp_error.h>
+#include <cmp_debug.h>
+#include <leon_inttypes.h>
 #include <cmp_data_types.h>
+#include <cmp_support.h>
 #include <cmp_icu.h>
 #include <cmp_chunk.h>
+#include <cmp_chunk_type.h>
 #include <cmp_guess.h>
-#include <leon_inttypes.h>
 
 #define CMP_GUESS_MAX_CAL_STEPS 20274
 
@@ -36,8 +41,7 @@ static int num_model_updates = CMP_GUESS_N_MODEL_UPDATE_DEF;
 
 
 /**
- * @brief sets how often the model is updated before model reset for the
- * cmp_guess function
+ * @brief sets how often the model is updated before the model reset;
  * @note the default value is CMP_GUESS_N_MODEL_UPDATE_DEF
  * @note this is needed to guess a good model_value
  *
@@ -81,7 +85,6 @@ uint16_t cmp_guess_model_value(int n_model_updates)
  * @param cmp_mode	compression mode
  *
  * @returns a good spill parameter (optimal for zero escape mechanism)
- * @warning icu compression not support yet!
  */
 
 uint32_t cmp_rdcu_get_good_spill(unsigned int golomb_par, enum cmp_mode cmp_mode)
@@ -166,7 +169,7 @@ static uint32_t brute_force(struct rdcu_cfg *rcfg)
 	uint32_t spill_best = 0;
 	uint32_t percent;
 
-	/* short cut for zero escape mechanism */
+	/* shortcut for zero escape mechanism */
 	if (zero_escape_mech_is_used(rcfg->cmp_mode))
 		return pre_cal_method(rcfg);
 
@@ -244,10 +247,10 @@ static void add_rdcu_pars_internal(struct rdcu_cfg *rcfg)
 
 /**
  * @brief guess a good compression configuration
- * @details use the referenced in the rcfg struct (samples, input_buf, model_buf
- * (optional)) and the cmp_mode to find a good set of compression parameters
+ * @details use the samples, input_buf, model_buf and the cmp_mode in rcfg to
+ *	find a good set of compression parameters
  * @note compression parameters in the rcfg struct (golomb_par, spill, model_value,
- * ap1_.., ap2_.., buffer_length, ...) are overwritten by this function
+ *	ap1_.., ap2_.., buffer_length, ...) are overwritten by this function
  *
  * @param rcfg	RDCU compression configuration structure
  * @param level	guess_level 1 -> fast; 2 -> default; 3 -> slow(brute force)
@@ -275,7 +278,7 @@ uint32_t cmp_guess(struct rdcu_cfg *rcfg, int level)
 		return 0;
 	}
 	/* make a working copy of the input data (and model) because the
-	 * following function works inplace
+	 * following function works in-place
 	 */
 	work_rcfg = *rcfg;
 	work_rcfg.icu_new_model_buf = NULL;
@@ -296,15 +299,14 @@ uint32_t cmp_guess(struct rdcu_cfg *rcfg, int level)
 		cmp_size = brute_force(&work_rcfg);
 		break;
 	case 1:
-		printf("guess level 1 not implied yet use guess level 2\n");
+		printf("guess level 1 not implied for RDCU data, I use guess level 2\n");
 		/* fall through */
 	case 2:
 		cmp_size = pre_cal_method(&work_rcfg);
 		break;
 	default:
-		fprintf(stderr, "cmp_tool: guess level not supported!\n");
+		fprintf(stderr, "cmp_tool: guess level not supported for RDCU guess mode!\n");
 		goto error;
-		break;
 	}
 	if (!cmp_size)
 		goto error;
@@ -316,10 +318,8 @@ uint32_t cmp_guess(struct rdcu_cfg *rcfg, int level)
 
 	rcfg->model_value = cmp_guess_model_value(num_model_updates);
 
-	/* if (rdcu_support_data_type_is_used(rcfg->data_type)) */
-		add_rdcu_pars_internal(rcfg);
+	add_rdcu_pars_internal(rcfg);
 
-	/* TODO: check that for non-imagette data */
 	rcfg->buffer_length = ((cmp_size + 32)&~0x1FU)/(size_of_a_sample(DATA_TYPE_IMAGETTE)*8);
 
 	return cmp_size;
@@ -329,3 +329,192 @@ error:
 	return 0;
 }
 
+
+/**
+ * @brief get the next Golomb parameter value to try based on the guess level
+ *
+ * @param cur_g		current Golomb parameter value
+ * @param guess_level	determines the granularity of the parameter search
+ *			higher values decrease step size (finer search)
+ *			lower/negative values increase step size (coarser search)
+ *			range: [-31, 31], default: 2
+ *
+ * @returns next Golomb parameter value to try
+ */
+
+static uint32_t get_next_g_par(uint32_t cur_g, int guess_level)
+{
+	uint32_t result = cur_g;
+
+	guess_level--; /* use a better guess level */
+
+	if (guess_level > 31)
+		guess_level = 31;
+
+	if (guess_level < -31)
+		guess_level = -31;
+
+
+	if (guess_level >= 0)
+		result += (1U << ilog_2(cur_g)) >> guess_level;
+	else
+		result = cur_g << -guess_level;
+
+	if (result == cur_g)
+		result++;
+
+	return result;
+}
+
+
+/**
+ * @brief estimate the optimal specific compression parameter set for a given chunk type
+ *
+ * @param chunk		pointer to the chunk data to analyse
+ * @param chunk_size	size of the chunk in bytes
+ * @param chunk_model	pointer to the model data (can be NULL)
+ * @param cmp_par	pointer to where to store the optimized compression parameters
+ * @param guess_level	controls the granularity of the parameter search; 2 is the default
+ *
+ * @returns the size of the compressed data with the estimated parameters; error
+ *	code on failure
+ */
+
+static uint32_t cmp_guess_chunk_par(const void *chunk, uint32_t chunk_size,
+				    const void *chunk_model, struct cmp_par *cmp_par,
+				    int guess_level)
+{
+	uint32_t *param_ptrs[7] = {0};
+	uint32_t cmp_size_best = ~0U;
+	int i;
+
+	if (cmp_par->lossy_par)
+		debug_print("Warning: lossy compression is not supported for chunk compression, lossy_par will be ignored.");
+	cmp_par->lossy_par = 0;
+	cmp_par->model_value = cmp_guess_model_value(num_model_updates);
+
+	switch (cmp_col_get_chunk_type(chunk)) {
+	case CHUNK_TYPE_NCAM_IMAGETTE:
+		param_ptrs[0] = &cmp_par->nc_imagette;
+		break;
+	case CHUNK_TYPE_SAT_IMAGETTE:
+		param_ptrs[0] = &cmp_par->saturated_imagette;
+		break;
+	case CHUNK_TYPE_SHORT_CADENCE:
+		param_ptrs[0] = &cmp_par->s_exp_flags;
+		param_ptrs[1] = &cmp_par->s_fx;
+		param_ptrs[2] = &cmp_par->s_ncob;
+		param_ptrs[3] = &cmp_par->s_efx;
+		param_ptrs[4] = &cmp_par->s_ecob;
+		break;
+	case CHUNK_TYPE_LONG_CADENCE:
+		param_ptrs[0] = &cmp_par->l_exp_flags;
+		param_ptrs[1] = &cmp_par->l_fx;
+		param_ptrs[2] = &cmp_par->l_ncob;
+		param_ptrs[3] = &cmp_par->l_efx;
+		param_ptrs[4] = &cmp_par->l_ecob;
+		param_ptrs[5] = &cmp_par->l_fx_cob_variance;
+		break;
+	case CHUNK_TYPE_OFFSET_BACKGROUND:
+		param_ptrs[0] = &cmp_par->nc_offset_mean;
+		param_ptrs[1] = &cmp_par->nc_offset_variance;
+		param_ptrs[2] = &cmp_par->nc_background_mean;
+		param_ptrs[3] = &cmp_par->nc_background_variance;
+		param_ptrs[4] = &cmp_par->nc_background_outlier_pixels;
+		break;
+	case CHUNK_TYPE_SMEARING:
+		param_ptrs[0] = &cmp_par->smearing_mean;
+		param_ptrs[1] = &cmp_par->smearing_variance_mean;
+		param_ptrs[2] = &cmp_par->smearing_outlier_pixels;
+		break;
+	case CHUNK_TYPE_F_CHAIN:
+		param_ptrs[0] = &cmp_par->fc_imagette;
+		param_ptrs[1] = &cmp_par->fc_offset_mean;
+		param_ptrs[2] = &cmp_par->fc_offset_variance;
+		param_ptrs[3] = &cmp_par->fc_background_mean;
+		param_ptrs[4] = &cmp_par->fc_background_variance;
+		param_ptrs[5] = &cmp_par->fc_background_outlier_pixels;
+		break;
+	case CHUNK_TYPE_UNKNOWN:
+	default: /*
+		  * default case never reached because cmp_col_get_chunk_type
+		  * returns CHUNK_TYPE_UNKNOWN if the type is unknown
+		  */
+		break;
+	}
+
+	/* init */
+	for (i = 0; param_ptrs[i] != NULL; i++)
+		*param_ptrs[i] = 1;
+
+	for (i = 0; param_ptrs[i] != NULL; i++) {
+		uint32_t best_g = *param_ptrs[i];
+		uint32_t g;
+
+		for (g = MIN_NON_IMA_GOLOMB_PAR; g < MAX_NON_IMA_GOLOMB_PAR; g =  get_next_g_par(g, guess_level)) {
+			uint32_t cmp_size;
+
+			*param_ptrs[i] = g;
+			cmp_size = compress_chunk(chunk, chunk_size, chunk_model,
+						  NULL, NULL, 0, cmp_par);
+			FORWARD_IF_ERROR(cmp_size, "");
+			if (cmp_size < cmp_size_best) {
+				cmp_size_best = cmp_size;
+				best_g = g;
+			}
+		}
+		*param_ptrs[i] = best_g;
+	}
+
+	return cmp_size_best;
+}
+
+
+/**
+ * @brief estimate an optimal compression parameters for the given chunk
+ *
+ * @param chunk		pointer to the chunk data to analyse
+ * @param chunk_size	size of the chunk in bytes
+ * @param chunk_model	pointer to the model data (can be NULL)
+ * @param cmp_par	pointer to where to store the optimized compression parameters
+ * @param guess_level	controls the granularity of the parameter search; 2 is
+ *			the default
+ *
+ * @returns the size of the compressed data with the estimated parameters; error
+ *	code on failure
+ */
+
+uint32_t cmp_guess_chunk(const void *chunk, uint32_t chunk_size,
+			 const void *chunk_model, struct cmp_par *cmp_par,
+			 int guess_level)
+{
+	uint32_t cmp_size_zero, cmp_size_multi;
+	struct cmp_par cmp_par_zero;
+	struct cmp_par cmp_par_multi;
+
+	memset(&cmp_par_zero, 0, sizeof(cmp_par_zero));
+	memset(&cmp_par_multi, 0, sizeof(cmp_par_multi));
+
+	if (chunk_model) {
+		cmp_par_zero.cmp_mode = CMP_MODE_DIFF_ZERO;
+		cmp_par_multi.cmp_mode = CMP_MODE_MODEL_MULTI;
+	} else {
+		cmp_par_zero.cmp_mode = CMP_MODE_DIFF_ZERO;
+		cmp_par_multi.cmp_mode = CMP_MODE_DIFF_MULTI;
+	}
+	cmp_size_zero = cmp_guess_chunk_par(chunk, chunk_size, chunk_model,
+					    &cmp_par_zero, guess_level);
+	FORWARD_IF_ERROR(cmp_size_zero, "");
+
+	cmp_size_multi = cmp_guess_chunk_par(chunk, chunk_size, chunk_model,
+					     &cmp_par_multi, guess_level);
+	FORWARD_IF_ERROR(cmp_size_multi, "");
+
+	if (cmp_size_zero <= cmp_size_multi) {
+		*cmp_par = cmp_par_zero;
+		return cmp_size_zero;
+	}
+
+	*cmp_par = cmp_par_multi;
+	return cmp_size_multi;
+}
diff --git a/programs/cmp_guess.h b/programs/cmp_guess.h
index 3de2cb6..0893a9a 100644
--- a/programs/cmp_guess.h
+++ b/programs/cmp_guess.h
@@ -21,6 +21,7 @@
 #define CMP_GUESS_H
 
 #include <cmp_support.h>
+#include <cmp_chunk.h>
 
 
 #define DEFAULT_GUESS_LEVEL 2
@@ -34,6 +35,11 @@
 #define CMP_GUESS_N_MODEL_UPDATE_DEF	8
 
 uint32_t cmp_guess(struct rdcu_cfg *rcfg, int level);
+
+uint32_t cmp_guess_chunk(const void *chunk, uint32_t chunk_size,
+			 const void *chunk_model, struct cmp_par *cmp_par,
+			 int guess_level);
+
 void cmp_guess_set_model_updates(int n_model_updates);
 
 uint32_t cmp_rdcu_get_good_spill(unsigned int golomb_par, enum cmp_mode cmp_mode);
diff --git a/programs/cmp_io.c b/programs/cmp_io.c
index 9678fb6..0bfd803 100644
--- a/programs/cmp_io.c
+++ b/programs/cmp_io.c
@@ -474,6 +474,41 @@ const char *data_type2string(enum cmp_data_type data_type)
 }
 
 
+/**
+ * @brief compares two strings case-insensitively
+ *
+ * @param s1	the first string to compare
+ * @param s2	the second string to compare
+ *
+ * @returns an integer greater than, equal to, or less than 0, according as s1
+ *	is lexicographically greater than, equal to, or less than s2 after
+ *	translation of each corresponding character to lower-case.  The strings
+ *	themselves are not modified.
+ */
+
+int case_insensitive_compare(const char* s1, const char* s2)
+{
+	size_t i;
+
+	for (i=0; ; ++i) {
+		unsigned int x1 = (unsigned char)s1[i];
+		unsigned int x2 = (unsigned char)s2[i];
+		int r;
+
+		if (x1 - 'A' < 26U)
+			x1 += 32; /* tolower */
+		if (x2 - 'A' < 26U)
+			x2 += 32; /* tolower */
+
+		r = (int)x1 - (int)x2;
+		if (r)
+			return r;
+
+		if (!x1) return 0;
+	}
+}
+
+
 /**
  * @brief parse a compression mode value string to an integer
  * @note string can be either a number or the name of the compression mode
@@ -511,7 +546,7 @@ int cmp_mode_parse(const char *cmp_mode_str, enum cmp_mode *cmp_mode)
 		size_t j;
 
 		for (j = 0; j < ARRAY_SIZE(conversion); j++) {
-			if (!strcmp(cmp_mode_str, conversion[j].str)) {
+			if (!case_insensitive_compare(cmp_mode_str, conversion[j].str)) {
 				*cmp_mode = conversion[j].cmp_mode;
 				return 0;
 			}
@@ -1794,3 +1829,120 @@ int cmp_info_to_file(const struct cmp_info *info, const char *output_prefix,
 
 	return 0;
 }
+
+
+/**
+ * @brief write compression parameters to a stream
+ * @note internal use only!
+ *
+ * @param fp	FILE pointer
+ * @param par	pointer to a compression parameters struct to print
+ */
+
+static void write_cmp_par_internal(FILE *fp, const struct cmp_par *par)
+{
+	if (!fp)
+		return;
+
+	if (!par) {
+		fprintf(fp, "Pointer to the compression parmeters is NULL.\n");
+		return;
+	}
+
+
+	fprintf(fp, "#-------------------------------------------------------------------------------\n");
+	fprintf(fp, "\n");
+	fprintf(fp, "# Chunk compression parameters\n");
+	fprintf(fp, "\n");
+	fprintf(fp, "#-------------------------------------------------------------------------------\n");
+
+	fprintf(fp, "cmp_mode = %d\n", par->cmp_mode);
+	fprintf(fp, "model_value = %" PRIu32 "\n", par->model_value);
+	fprintf(fp, "lossy_par = %" PRIu32 "\n", par->lossy_par);
+	fprintf(fp, "#-------------------------------------------------------------------------------\n");
+
+	fprintf(fp, "nc_imagette = %" PRIu32 "\n", par->nc_imagette);
+	fprintf(fp, "#-------------------------------------------------------------------------------\n");
+
+	fprintf(fp, "s_exp_flags = %" PRIu32 "\n", par->s_exp_flags);
+	fprintf(fp, "s_fx = %" PRIu32 "\n", par->s_fx);
+	fprintf(fp, "s_ncob = %" PRIu32 "\n", par->s_ncob);
+	fprintf(fp, "s_efx = %" PRIu32 "\n", par->s_efx);
+	fprintf(fp, "s_ecob = %" PRIu32 "\n", par->s_ecob);
+	fprintf(fp, "#-------------------------------------------------------------------------------\n");
+
+	fprintf(fp, "l_exp_flags = %" PRIu32 "\n", par->l_exp_flags);
+	fprintf(fp, "l_fx = %" PRIu32 "\n", par->l_fx);
+	fprintf(fp, "l_ncob = %" PRIu32 "\n", par->l_ncob);
+	fprintf(fp, "l_efx = %" PRIu32 "\n", par->l_efx);
+	fprintf(fp, "l_ecob = %" PRIu32 "\n", par->l_ecob);
+	fprintf(fp, "l_fx_cob_variance = %" PRIu32 "\n", par->l_fx_cob_variance);
+	fprintf(fp, "#-------------------------------------------------------------------------------\n");
+
+	fprintf(fp, "saturated_imagette = %" PRIu32 "\n", par->saturated_imagette);
+	fprintf(fp, "#-------------------------------------------------------------------------------\n");
+
+	fprintf(fp, "nc_offset_mean = %" PRIu32 "\n", par->nc_offset_mean);
+	fprintf(fp, "nc_offset_variance = %" PRIu32 "\n", par->nc_offset_variance);
+	fprintf(fp, "nc_background_mean = %" PRIu32 "\n", par->nc_background_mean);
+	fprintf(fp, "nc_background_variance = %" PRIu32 "\n", par->nc_background_variance);
+	fprintf(fp, "nc_background_outlier_pixels = %" PRIu32 "\n", par->nc_background_outlier_pixels);
+	fprintf(fp, "#-------------------------------------------------------------------------------\n");
+
+	fprintf(fp, "smearing_mean = %" PRIu32 "\n", par->smearing_mean);
+	fprintf(fp, "smearing_variance_mean = %" PRIu32 "\n", par->smearing_variance_mean);
+	fprintf(fp, "smearing_outlier_pixels = %" PRIu32 "\n", par->smearing_outlier_pixels);
+	fprintf(fp, "#-------------------------------------------------------------------------------\n");
+
+	fprintf(fp, "fc_imagette = %" PRIu32 "\n", par->fc_imagette);
+	fprintf(fp, "fc_offset_mean = %" PRIu32 "\n", par->fc_offset_mean);
+	fprintf(fp, "fc_offset_variance = %" PRIu32 "\n", par->fc_offset_variance);
+	fprintf(fp, "fc_background_mean = %" PRIu32 "\n", par->fc_background_mean);
+	fprintf(fp, "fc_background_variance = %" PRIu32 "\n", par->fc_background_variance);
+	fprintf(fp, "fc_background_outlier_pixels = %" PRIu32 "\n", par->fc_background_outlier_pixels);
+	fprintf(fp, "#-------------------------------------------------------------------------------\n");
+}
+
+
+/**
+ * @brief prints cmp_par struct
+ *
+ * @param par	pointer to a compression parameters struct to print
+ */
+
+void cmp_par_print(const struct cmp_par *par)
+{
+	write_cmp_par_internal(stdout, par);
+}
+
+
+/**
+ * @brief write the compression parameters to a file
+ *
+ * @param par		pointer to a compression parameters struct
+ * @param output_prefix	prefix of the written file (.par is added to the prefix)
+ * @param verbose	print verbose output if not zero
+ *
+ * @returns 0 on success, error otherwise
+ */
+
+int cmp_par_fo_file(const struct cmp_par *par, const char *output_prefix,
+		    int verbose)
+{
+	FILE *fp = open_file(output_prefix, ".par");
+
+	if (fp == NULL) {
+		fprintf(stderr, "%s: %s%s: %s\n", PROGRAM_NAME, output_prefix,
+			".cfg", strerror(errno));
+		return -1;
+	}
+
+	write_cmp_par_internal(fp, par);
+
+	fclose(fp);
+
+	if (verbose)
+		cmp_par_print(par);
+
+	return 0;
+}
diff --git a/programs/cmp_io.h b/programs/cmp_io.h
index 34dcff0..86d0cb5 100644
--- a/programs/cmp_io.h
+++ b/programs/cmp_io.h
@@ -68,12 +68,16 @@ int cmp_cfg_fo_file(const struct rdcu_cfg *rcfg, const char *output_prefix,
 		    int verbose, int add_ap_pars);
 int cmp_info_to_file(const struct cmp_info *info, const char *output_prefix,
 		     int add_ap_pars);
+int cmp_par_fo_file(const struct cmp_par *par, const char *output_prefix,
+		    int verbose);
 void cmp_cfg_print(const struct rdcu_cfg *rcfg, int add_ap_pars);
+void cmp_par_print(const struct cmp_par *par);
 
 int atoui32(const char *dep_str, const char *val_str, uint32_t *red_val);
 int cmp_mode_parse(const char *cmp_mode_str, enum cmp_mode *cmp_mode);
 
 enum cmp_data_type string2data_type(const char *data_type_str);
 const char *data_type2string(enum cmp_data_type data_type);
+int case_insensitive_compare(const char* s1, const char* s2);
 
 #endif /* CMP_IO_H */
diff --git a/programs/cmp_tool.c b/programs/cmp_tool.c
index 3cd3c8a..5a9f5de 100644
--- a/programs/cmp_tool.c
+++ b/programs/cmp_tool.c
@@ -19,31 +19,56 @@
  * @see Data Compression User Manual PLATO-UVIE-PL-UM-0001
  */
 
+#include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <limits.h>
+#include <string.h>
+#include <errno.h>
 #include <getopt.h>
 
+#include "cmp_support.h"
 #include "cmp_tool-config.h"
 #include "cmp_io.h"
 #include "cmp_icu.h"
 #include "cmp_chunk.h"
-#include "cmp_rdcu.h"
+#include "cmp_rdcu_cfg.h"
 #include "decmp.h"
 #include "cmp_guess.h"
 #include "cmp_entity.h"
 #include "rdcu_pkt_to_file.h"
-#include "cmp_data_types.h"
 
 
 #define BUFFER_LENGTH_DEF_FAKTOR 2
 
 #define DEFAULT_MODEL_ID 53264  /* random default id */
 
+/**
+ * @brief checks if an optional argument is present
+ *
+ * this macro evaluates whether the current argument pointer optarg is null and
+ * if there is a valid argument present at the current index of argv it updates
+ * optarg and increments the index optind if an argument is found it also
+ * ensures that the argument is not null empty or another option
+ *
+ * @return true if an optional argument is present and updates optarg
+ * @see https://stackoverflow.com/a/69177115
+ */
+
+#define OPTIONAL_ARGUMENT_IS_PRESENT \
+	((optarg == NULL \
+	&& optind < argc /* make sure optind is valid */ \
+	&& NULL != argv[optind] /* make sure it's not a null string */ \
+	&& '\0' != argv[optind][0] /* ... or an empty string */ \
+	&& '-' != argv[optind][0]) /* ... or another option */ \
+	? ((optarg = argv[optind++]) != NULL) /* update optind so the next getopt_long invocation skips argv[optind] */ \
+	: (optarg != NULL))
+
 
 /* find a good set of compression parameters for a given dataset */
-static int guess_cmp_pars(struct rdcu_cfg *rcfg, const char *guess_cmp_mode,
-			  int guess_level);
+static int guess_cmp_pars(struct rdcu_cfg *rcfg, struct cmp_par *chunk_par,
+			  uint32_t input_size, const char *guess_cmp_mode, const
+			  char *guess_level_str);
 
 /* compress chunk data and write the results to files */
 static int compression_of_chunk(const void *chunk, uint32_t size, void *model,
@@ -77,13 +102,13 @@ enum {
 
 static const struct option long_options[] = {
 	{"rdcu_par", no_argument, NULL, 'a'},
-	{"model_cfg", optional_argument, NULL, 'n'},
+	{"model_cfg", no_argument, NULL, 'n'},
 	{"help", no_argument, NULL, 'h'},
 	{"verbose", no_argument, NULL, 'v'},
 	{"version", no_argument, NULL, 'V'},
 	{"rdcu_pkt", no_argument, NULL, RDCU_PKT_OPTION},
-	{"diff_cfg", optional_argument, NULL, DIFF_CFG_OPTION},
-	{"guess", required_argument, NULL, GUESS_OPTION},
+	{"diff_cfg", no_argument, NULL, DIFF_CFG_OPTION},
+	{"guess", optional_argument, NULL, GUESS_OPTION},
 	{"guess_level", required_argument, NULL, GUESS_LEVEL},
 	{"last_info", required_argument, NULL, LAST_INFO},
 	{"no_header", no_argument, NULL, NO_HEADER},
@@ -139,13 +164,13 @@ int main(int argc, char **argv)
 	const char *info_file_name = NULL;
 	const char *data_file_name = NULL;
 	const char *model_file_name = NULL;
-	const char *guess_cmp_mode = NULL;
+	char *guess_option = NULL;
+	const char *guess_level_str = NULL;
 	const char *program_name = argv[0];
 
 	int cmp_operation = 0;
 	int print_model_cfg = 0;
 	int guess_operation = 0;
-	int guess_level = DEFAULT_GUESS_LEVEL;
 	int print_diff_cfg = 0;
 
 	/* buffer containing all read in compressed data for decompression */
@@ -213,10 +238,11 @@ int main(int argc, char **argv)
 			break;
 		case GUESS_OPTION:
 			guess_operation = 1;
-			guess_cmp_mode = optarg;
+			if (OPTIONAL_ARGUMENT_IS_PRESENT)
+				guess_option = optarg;
 			break;
 		case GUESS_LEVEL:
-			guess_level = atoi(optarg);
+			guess_level_str = optarg;
 			break;
 		case LAST_INFO:
 			last_info_file_name = optarg;
@@ -338,9 +364,13 @@ int main(int argc, char **argv)
 			if (cmp_type == CMP_TYPE_ERROR)
 				goto fail;
 			printf("DONE\n");
-		} else {
+		} else { /* guess_operation */
 			printf("## Search for a good set of compression parameters ##\n");
-			cmp_type = CMP_TYPE_RDCU; /* guess_cmp_pars only works for RDCU like compression */
+
+			if (guess_option == NULL || !case_insensitive_compare(guess_option, "chunk"))
+				cmp_type = CMP_TYPE_CHUNK;
+			else
+				cmp_type = CMP_TYPE_RDCU;
 		}
 
 		printf("Importing data file %s ... ", data_file_name);
@@ -488,7 +518,8 @@ int main(int argc, char **argv)
 	}
 
 	if (guess_operation) {
-		error = guess_cmp_pars(&rcfg, guess_cmp_mode, guess_level);
+		error = guess_cmp_pars(&rcfg, &chunk_par, input_size,
+				       guess_option, guess_level_str);
 	} else if (cmp_operation) {
 		if (cmp_type == CMP_TYPE_CHUNK)
 			error = compression_of_chunk(rcfg.input_buf, input_size,
@@ -542,16 +573,32 @@ fail:
  * @brief find a good set of compression parameters for a given dataset
  */
 
-static int guess_cmp_pars(struct rdcu_cfg *rcfg, const char *guess_cmp_mode,
-			  int guess_level)
+static int guess_cmp_pars(struct rdcu_cfg *rcfg, struct cmp_par *chunk_par,
+			  uint32_t input_size, const char *guess_option,
+			  const char *guess_level_str)
 {
 	int error;
 	uint32_t cmp_size_bit;
 	double cr;
 	enum cmp_data_type data_type;
+	char *endptr;
+	int guess_level;
+
+	if (guess_level_str) {
+		long number = strtol(guess_level_str, &endptr, 10);
+
+		if (errno != 0 || *endptr != '\0' || number < INT_MIN || number > INT_MAX) {
+			printf("Invalid guess level number: %s\n", guess_level_str);
+			return -1;
+		}
+		guess_level = (int)number;
+	} else {
+		guess_level = DEFAULT_GUESS_LEVEL;
+	}
 
 	printf("Search for a good set of compression parameters (level: %d) ... ", guess_level);
-	if (!strcmp(guess_cmp_mode, "RDCU")) {
+	fflush(stdout);
+	if (!case_insensitive_compare(guess_option, "rdcu")) {
 		if (add_rdcu_pars)
 			data_type = DATA_TYPE_IMAGETTE_ADAPTIVE;
 		else
@@ -560,11 +607,13 @@ static int guess_cmp_pars(struct rdcu_cfg *rcfg, const char *guess_cmp_mode,
 			rcfg->cmp_mode = CMP_GUESS_DEF_MODE_MODEL;
 		else
 			rcfg->cmp_mode = CMP_GUESS_DEF_MODE_DIFF;
+	} else if (!case_insensitive_compare(guess_option, "chunk")) {
+		data_type = DATA_TYPE_CHUNK;
 	} else {
 		data_type = DATA_TYPE_IMAGETTE;
-		error = cmp_mode_parse(guess_cmp_mode, &rcfg->cmp_mode);
+		error = cmp_mode_parse(guess_option, &rcfg->cmp_mode);
 		if (error) {
-			fprintf(stderr, "%s: Error: unknown compression mode: %s\n", PROGRAM_NAME, guess_cmp_mode);
+			fprintf(stderr, "%s: Error: unknown guess option: %s\n", PROGRAM_NAME, guess_option);
 			return -1;
 		}
 	}
@@ -573,23 +622,39 @@ static int guess_cmp_pars(struct rdcu_cfg *rcfg, const char *guess_cmp_mode,
 		return -1;
 	}
 
-	cmp_size_bit = cmp_guess(rcfg, guess_level);
-	if (!cmp_size_bit)
-		return -1;
+	if (data_type == DATA_TYPE_CHUNK) {
+		uint32_t result = cmp_guess_chunk(rcfg->input_buf, input_size,
+					rcfg->model_buf, chunk_par, guess_level);
 
-	if (include_cmp_header)
-		cmp_size_bit = CHAR_BIT * (cmp_bit_to_byte(cmp_size_bit) +
-			cmp_ent_cal_hdr_size(data_type, rcfg->cmp_mode == CMP_MODE_RAW));
+		if (cmp_is_error(result))
+			return -1;
+		else
+			cmp_size_bit = 8 * result;
+		printf("DONE\n");
 
-	printf("DONE\n");
+		printf("Write the guessed compression chunk parameters to file %s.par ... ", output_prefix);
+		error = cmp_par_fo_file(chunk_par, output_prefix, io_flags & CMP_IO_VERBOSE);
+		if (error)
+			return -1;
+	} else {
+		input_size = rcfg->samples * sizeof(uint16_t);
+		cmp_size_bit = cmp_guess(rcfg, guess_level);
+		if (!cmp_size_bit)
+			return -1;
+		if (include_cmp_header)
+			cmp_size_bit = CHAR_BIT * (cmp_bit_to_byte(cmp_size_bit) +
+				cmp_ent_cal_hdr_size(data_type, rcfg->cmp_mode == CMP_MODE_RAW));
+		printf("DONE\n");
+
+		printf("Write the guessed compression configuration to file %s.cfg ... ", output_prefix);
+		error = cmp_cfg_fo_file(rcfg, output_prefix, io_flags & CMP_IO_VERBOSE, add_rdcu_pars);
+		if (error)
+			return -1;
+	}
 
-	printf("Write the guessed compression configuration to file %s.cfg ... ", output_prefix);
-	error = cmp_cfg_fo_file(rcfg, output_prefix, io_flags & CMP_IO_VERBOSE, add_rdcu_pars);
-	if (error)
-		return -1;
 	printf("DONE\n");
 
-	cr = (8.0 * rcfg->samples * sizeof(uint16_t))/cmp_size_bit;
+	cr = (8.0 * input_size)/cmp_size_bit;
 	printf("Guessed parameters can compress the data with a CR of %.2f.\n", cr);
 
 	return 0;
diff --git a/test/cmp_tool/cmp_tool_integration_test.py b/test/cmp_tool/cmp_tool_integration_test.py
index 4b872f6..c4856c4 100755
--- a/test/cmp_tool/cmp_tool_integration_test.py
+++ b/test/cmp_tool/cmp_tool_integration_test.py
@@ -835,7 +835,7 @@ def test_guess_option():
                        "Search for a good set of compression parameters (level: 2) ... DONE\n" +
                        "Write the guessed compression configuration to file not_exist/guess.cfg ... FAILED\n")
             elif sub_test == 'guess_level_not_supported':
-                assert(stderr == "cmp_tool: guess level not supported!\n")
+                assert(stderr == "cmp_tool: guess level not supported for RDCU guess mode!\n")
                 assert(returncode == EXIT_FAILURE)
                 assert(stdout == CMP_START_STR_GUESS +
                        "Importing data file %s ... \n" % (data_file_name) +
@@ -844,7 +844,7 @@ def test_guess_option():
                        "Search for a good set of compression parameters (level: 10) ... FAILED\n")
             elif sub_test == 'guess_unknown_mode':
                 assert(
-                    stderr == "cmp_tool: Error: unknown compression mode: MODE_UNKNOWN\n")
+                    stderr == "cmp_tool: Error: unknown guess option: MODE_UNKNOWN\n")
                 assert(returncode == EXIT_FAILURE)
                 assert(stdout == CMP_START_STR_GUESS +
                        "Importing data file %s ... \n" % (data_file_name) +
-- 
GitLab