Skip to content
Snippets Groups Projects
Commit 45ec0955 authored by Andreas Hellerschmied's avatar Andreas Hellerschmied
Browse files

Calculate sample statistics. Changed names of CSV output files.

parent 05189eee
No related branches found
No related tags found
No related merge requests found
......@@ -7,11 +7,11 @@ Data analysis tools for lifescale with GUI.
The program *ls2csv* reads the content of the xlsm files written by lifescale units, parses the data and writes them to three csv
files (where `[run-name]` is the name from the settings sheet):
* `Masses_Vibrio_[run-name].csv`: Data series from the sheet AcquisitionIntervals.
* `SampleMetadata_[run-name].csv`: Data from the sheet PanelData.
* `Summary_[run-name].csv`: Data from the sheet IntervalAnalysis plus sample related data from AcquisitionIntervals.
* `Metadata_[run-name].csv`: Data from the sheet PanelData.
* `SampleSummary_[run-name].csv`: Data from the sheet IntervalAnalysis plus sample related data from AcquisitionIntervals.
### Usage:
* Conversion: `ls2csv -i [path and nale of xlsm file] -o [outpur directory]`
* Conversion: `ls2csv -i [path and nale of xlsm file] -o [outpur directory] [-s] [-nv]`
* Help: `ls2csv -h`
......
......@@ -36,12 +36,18 @@ def ls2csv():
parser.add_argument("-o", "--out-dir", type=is_dir, required=True, help="Output directory for the CSV files.")
parser.add_argument("-nv", "--not-verbose", required=False, help="Disable command line status messages.",
action='store_true')
parser.add_argument("-s", "--sample-stats", required=False, help="Calculate sample statistics (mean, median, IQR, "
"StDev.) and add it to the output CSV files.",
action='store_true')
# parser.add_argument("--out-dir", type=is_dir, required=False,
# help="path to output directory", default=OUT_PATH)
args = parser.parse_args()
verbose = not args.not_verbose
return ls2csv_main(xlsm_filename=args.input_xlsm, output_dir=args.out_dir, verbose=verbose)
return ls2csv_main(xlsm_filename=args.input_xlsm,
output_dir=args.out_dir,
sample_stats=args.sample_stats,
verbose=verbose)
if __name__ == '__main__':
......
......@@ -386,6 +386,8 @@ class LSData:
zip(id_list, well_list, interval_num_list, time_list, masses_list, volume_list, total_num_particles_list,
transit_time_list, pressure_drop_list)),
columns=df_masses_columns)
df_masses['Id'] = df_masses['Id'].astype(int)
df_masses['IntervalNumber'] = df_masses['IntervalNumber'].astype(int)
# Sensor:
# sensor_dict = {}
......@@ -426,7 +428,7 @@ class LSData:
filename_ending = f'{self.run_name}.csv'
# Write PanelData:
filename = os.path.join(output_filepath, f'SampleMetadata_{filename_ending}')
filename = os.path.join(output_filepath, f'Metadata_{filename_ending}')
if verbose:
print(f'Write PanelData to: {filename}')
self.df_panel_data.to_csv(filename, index=False)
......@@ -444,7 +446,7 @@ class LSData:
self.df_masses.to_csv(filename, index=False)
# Write IntervalAnalysis:
filename = os.path.join(output_filepath, f'Summary_{filename_ending}')
filename = os.path.join(output_filepath, f'SamplesSummary_{filename_ending}')
if verbose:
print(f'Write IntervalAnalysis to: {filename}')
self.df_interval_analysis.to_csv(filename, index=False)
......@@ -453,6 +455,25 @@ class LSData:
# TODO: Select columns for output (settable as parameter + default settings for each csv file)
# TODO: Optionally order data series by time (parameter)?!
def calc_sample_statistics(self, verbose=True):
"""Calculate statistical values for each sample and add it to the self.df_interval_analysis."""
if verbose:
print('Calculate sample statistics.')
for idx, row in self.df_interval_analysis.iterrows():
tmp_filter = (self.df_masses['Id'] == row.Id) & (self.df_masses['IntervalNumber'] == row.IntervalNumber)
tmp_filter_1 = (self.df_interval_analysis['Id'] == row.Id) & \
(self.df_interval_analysis['IntervalNumber'] == row.IntervalNumber)
self.df_interval_analysis.loc[tmp_filter_1, 'Mass_median'] = self.df_masses.loc[tmp_filter, 'Mass'].median()
self.df_interval_analysis.loc[tmp_filter_1, 'Mass_std'] = self.df_masses.loc[tmp_filter, 'Mass'].std()
self.df_interval_analysis.loc[tmp_filter_1, 'Mass_q25'] = \
self.df_masses.loc[tmp_filter, 'Mass'].quantile(0.25)
self.df_interval_analysis.loc[tmp_filter_1, 'Mass_q75'] = \
self.df_masses.loc[tmp_filter, 'Mass'].quantile(0.75)
self.df_interval_analysis.loc[tmp_filter_1, 'Mass_iqr'] = \
self.df_interval_analysis.loc[tmp_filter_1, 'Mass_q75'] - \
self.df_interval_analysis.loc[tmp_filter_1, 'Mass_q25']
@property
def get_number_of_observations(self):
"""Return the number of observations (items in the data series)."""
......
......@@ -5,7 +5,9 @@ Copyright (C) 2022 Andreas Hellerschmied <heller182@gmx.at>"""
from lifescale.models.ls_data import LSData
def ls2csv(xlsm_filename, output_dir, verbose=True):
def ls2csv(xlsm_filename, output_dir, sample_stats=True, verbose=True):
"""Convert lifescale output file (xlsm) to csv files."""
ls_data = LSData.from_xlsm_file(input_xlsm_filename=xlsm_filename, verbose=verbose)
if sample_stats:
ls_data.calc_sample_statistics(verbose=verbose)
ls_data.export_csv_files(output_dir, verbose=verbose)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment