cats_csv.c

// SPDX-License-Identifier: GPL-3.0-or-later
//
// cats_csv.c
//
// Copyright (C) 2011-2024, University of Vienna and Vienna Institute for Nature Conservation & Analyses, Andreas Gattringer.
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 3 of the License, or (at
// your option) any later version.
//
// This program is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
//

#include "cats_global.h"
#include <assert.h>
#include <math.h>
#include <stdlib.h>
#include <string.h>

#include "cats_csv.h"
#include <logging/logging.h>
#include <memory/cats_memory.h>


struct cats_csv *csv_new(char *header_line, int32_t expected_fields)
{
        int32_t fields = count_fields(header_line, ",");

        if (fields != expected_fields) {
                log_message(LOG_ERROR, "%s: disparity in field count: got %d expected %d\n", __func__, fields,
                            expected_fields);
                exit(EXIT_FAILURE);
        }

        struct string_array *headers = get_all_tokens(header_line, ",");

        if (headers->count != expected_fields) {
                log_message(LOG_ERROR, "%s: disparity in field count: got %d expected %d\n", __func__, headers->count,
                            expected_fields);
                exit(EXIT_FAILURE);
        }

        struct cats_csv *result = malloc_or_die_trace(sizeof(struct cats_csv), __func__);

        result->column_count = headers->count;
        result->data_row_count = 0;
        result->data = NULL;
        result->headers = malloc_or_die_trace(result->column_count * sizeof(char *), __func__);

        for (int32_t i = 0; i < headers->count; i++) {
                result->headers[i] = strdup(headers->string[i]);
        }

        free_string_array(&headers);
        return result;
}


void csv_free(struct cats_csv **csv)
{
        struct cats_csv *this = *csv;
        if (!this) {
                log_message(LOG_WARNING, "%s received empty struct - cannot free", __func__);
                return;
        }

        if (this->headers) {
                for (int32_t i = 0; i < this->column_count; i++) {
                        free(this->headers[i]);
                }

                free(this->headers);
        }

        if (this->data) {
                for (int32_t row = 0; row < this->data_row_count; row++) {

                        if (this->data[row] == NULL) continue;

                        for (int32_t col = 0; col < this->column_count; col++) {
                                free(this->data[row][col]);
                                this->data[row][col] = NULL;
                        }

                        free(this->data[row]);
                        this->data[row] = NULL;
                }

                free(this->data);
        }

        free(this);
        *csv = NULL;
}


int32_t csv_get_field_idx(struct cats_csv *csv, const char *field_name)
{
        assert(csv);
        assert(field_name);

        int32_t field_idx = -1;
        for (int32_t i = 0; i < csv->column_count; i++) {
                if (!strcmp(field_name, csv->headers[i])) {
                        field_idx = i;
                        break;
                }
        }

        if (field_idx < 0) {
                log_message(LOG_ERROR, "%s: could not find field '%s' in csv\n", __func__, field_name);
                exit(EXIT_FAILURE);

        }

        return field_idx;
}


char *csv_get_value_field_name(struct cats_csv *csv, int32_t row, const char *field_name) // untested indirect
{
        assert(csv);
        assert(row >= 0);
        assert(row <= csv->data_row_count);

        int32_t field_idx = csv_get_field_idx(csv, field_name);

        if (row > csv->data_row_count) {
                log_message(LOG_ERROR, "%s: invalid row number %d, csv has %d rows\n", __func__, row,
                            csv->data_row_count);
                exit(EXIT_FAILURE);
        }

        return csv->data[row][field_idx];
}


char *csv_get_value_field_idx(struct cats_csv *csv, int32_t row, int32_t field_idx) // untested indirect
{
        assert(csv);
        assert(row >= 0);
        assert(field_idx >= 0);

        if (field_idx < 0 || field_idx > csv->column_count) {
                log_message(LOG_ERROR, "%s: invalid field index %d, csv has %d fields\n", __func__, field_idx,
                            csv->column_count);
                exit(EXIT_FAILURE);
        }

        if (row > csv->data_row_count || row < 0) {
                log_message(LOG_ERROR, "%s: invalid row index %d, csv has %d rows\n", __func__, row,
                            csv->data_row_count);
                exit(EXIT_FAILURE);
        }

        return csv->data[row][field_idx];
}


double csv_get_double_field_name(struct cats_csv *csv, int32_t row, const char *field_name) // untested indirect
{
        char *string = csv_get_value_field_name(csv, row, field_name);
        double value = NAN;
        bool success = string_to_double(string, &value);
        if (!success) {
                log_message(LOG_ERROR, "error converting %s to floating point number!", string);
                exit(EXIT_FAILURE);
        }

        return value;
}


double csv_get_double_field_idx(struct cats_csv *csv, int32_t row, int32_t field_idx) // untested indirect
{
        char *string = csv_get_value_field_idx(csv, row, field_idx);
        double value = NAN;

        bool success = string_to_double(string, &value);

        if (!success) {
                log_message(LOG_ERROR, "error converting %s to floating point number!", string);
                exit(EXIT_FAILURE);
        }
        return value;
}


int32_t csv_get_int32_field_idx(struct cats_csv *csv, int32_t row, int32_t field_idx) // untested indirect
{
        char *string = csv_get_value_field_idx(csv, row, field_idx);
        int32_t value = INT32_MIN;
        bool success = string_to_integer(string, &value);

        if (!success) {
                log_message(LOG_ERROR, "error converting %s to integer!", string);
                exit(EXIT_FAILURE);
        }
        return value;
}

void csv_add_row(struct cats_csv *csv, char *line)
{
        if (strlen(line) == 0) return;  // ignore empty line
        if (line[0] == '#') return;    // ignore comment line

        int32_t fields = count_fields(line, ",");

        if (csv->column_count != fields) {
                log_message(LOG_ERROR, "%s: disparity in field count: got %d expected %d\n", __func__, fields,
                            csv->column_count);
                exit(EXIT_FAILURE);
        }

        struct string_array *data = get_all_tokens(line, ",");

        if (csv->column_count != data->count) {
                log_message(LOG_ERROR, "%s: disparity in field count: got %d expected %d\n", __func__, data->count,
                            csv->column_count);
                exit(EXIT_FAILURE);
        }


        csv->data_row_count = csv->data_row_count + 1;

        csv->data = realloc(csv->data, csv->data_row_count * sizeof(char **));

        int32_t row = csv->data_row_count - 1;

        csv->data[row] = malloc_or_die_trace(fields * sizeof(char *), __func__);

        for (int32_t i = 0; i < csv->column_count; i++) {
                csv->data[row][i] = strdup(data->string[i]);

        }

        free_string_array(&data);

}


struct cats_csv *csv_from_filename(const char *filename, int expected_fields)
{
        FILE *f = fopen(filename, "r");
        ENSURE_FILE_OPENED(f, filename)

        struct cats_csv *csv = csv_read_file(f, expected_fields);

        fclose(f);
        return csv;
}


struct cats_csv *csv_read_file(FILE *input, int expected_fields) // tested
{
        char *header_line = read_single_line(input);

        struct cats_csv *csv = csv_new(header_line, expected_fields);

        free(header_line);

        char *line = NULL;
        line = read_single_line(input);

        while (line) {
                csv_add_row(csv, line);
                free(line);
                line = read_single_line(input);
        }
        return csv;
}