Skip to content
Snippets Groups Projects
Commit 83e5b2f7 authored by Andreas Hellerschmied's avatar Andreas Hellerschmied
Browse files

Merge branch 'feature_statistics' into 'develop'

Feature statistics

See merge request hellerdev/lifescale_utils!3
parents 05189eee 25d09ee3
No related branches found
No related tags found
No related merge requests found
......@@ -7,12 +7,30 @@ Data analysis tools for lifescale with GUI.
The program *ls2csv* reads the content of the xlsm files written by lifescale units, parses the data and writes them to three csv
files (where `[run-name]` is the name from the settings sheet):
* `Masses_Vibrio_[run-name].csv`: Data series from the sheet AcquisitionIntervals.
* `SampleMetadata_[run-name].csv`: Data from the sheet PanelData.
* `Summary_[run-name].csv`: Data from the sheet IntervalAnalysis plus sample related data from AcquisitionIntervals.
* `Metadata_[run-name].csv`: Data from the sheet PanelData.
* `SampleSummary_[run-name].csv`: Data from the sheet IntervalAnalysis plus sample related data from AcquisitionIntervals.
### Usage:
* Conversion: `ls2csv -i [path and nale of xlsm file] -o [outpur directory]`
* Help: `ls2csv -h`
```
ls2csv -i [path and nale of xlsm file] -o [outpur directory] [-s] [-nv]
options:
-h, --help show this help message and exit
-i INPUT_XLSM, --input-xlsm INPUT_XLSM
Path and name of the input xlsm file created by
lifescale. (default: None)
-o OUT_DIR, --out-dir OUT_DIR
Output directory for the CSV files. (default: None)
-nv, --not-verbose Disable command line status messages. (default: False)
-s, --sample-stats Calculate sample statistics of masses (median, std.
deviation, quartiles, interquartile range) and add
them to the SampleSummary output CSV file (columns:
Mass_median, Mass_std, Mass_q25, Mass_q75,Mass_iqr).
(default: False)
-t, --sort-masses-by-time
Sort data in the Masses CSV file by acquisition time.
(default: False)
```
# License and copyright
......
"""LifeScale utils command line interface module.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
:Authors:
Andreas Hellerschmied (heller182@gmx.at)
"""
\ No newline at end of file
"""Command line interface of lifescale utils.
Copyright (C) 2022 Andreas Hellerschmied <heller182@gmx.at>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
"""
from lifescale.scripts.ls2csv import ls2csv as ls2csv_main
......@@ -36,13 +49,27 @@ def ls2csv():
parser.add_argument("-o", "--out-dir", type=is_dir, required=True, help="Output directory for the CSV files.")
parser.add_argument("-nv", "--not-verbose", required=False, help="Disable command line status messages.",
action='store_true')
# parser.add_argument("--out-dir", type=is_dir, required=False,
# help="path to output directory", default=OUT_PATH)
parser.add_argument("-s", "--sample-stats", required=False, help="Calculate sample statistics of masses (median, "
"std. deviation, quartiles, interquartile range) "
"and add them to the "
"SampleSummary output CSV file (columns: "
"Mass_median, Mass_std, Mass_q25, Mass_q75,"
"Mass_iqr).",
action='store_true')
parser.add_argument("-t", "--sort-masses-by-time", required=False, help="Sort data in the Masses CSV file by "
"acquisition time.",
action='store_true')
args = parser.parse_args()
verbose = not args.not_verbose
return ls2csv_main(xlsm_filename=args.input_xlsm, output_dir=args.out_dir, verbose=verbose)
return ls2csv_main(xlsm_filename=args.input_xlsm,
output_dir=args.out_dir,
sample_stats=args.sample_stats,
sort_by_time=args.sort_masses_by_time,
verbose=verbose)
if __name__ == '__main__':
"""Main function for debugging and testing."""
ls2csv()
"""LifeScale utils objects module.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
:Authors:
Andreas Hellerschmied (heller182@gmx.at)
"""
\ No newline at end of file
......@@ -132,7 +132,7 @@ class LSData:
input_xlsm_filename : str
Filename and path of the xlsm file written by LifeScale.
verbose : bool, optional (default = `True`)
If `True`. status messages are written to the command line.
If `True`, status messages are written to the command line.
Returns
-------
......@@ -140,7 +140,7 @@ class LSData:
Contains all LS output data loaded from the given xlsm file.
"""
REQUIRED_XLSM_SHEET_NAMES = [
REQUIRED_XLSM_SHEET_NAMES = [ # Raise an exception if they are not present in the input xlsm file.
'AcquisitionIntervals',
'IntervalAnalysis',
'PanelData',
......@@ -386,6 +386,8 @@ class LSData:
zip(id_list, well_list, interval_num_list, time_list, masses_list, volume_list, total_num_particles_list,
transit_time_list, pressure_drop_list)),
columns=df_masses_columns)
df_masses['Id'] = df_masses['Id'].astype(int)
df_masses['IntervalNumber'] = df_masses['IntervalNumber'].astype(int)
# Sensor:
# sensor_dict = {}
......@@ -410,8 +412,24 @@ class LSData:
df_masses=df_masses,
)
def export_csv_files(self, output_filepath, verbose=True, sort_by_time=False):
"""Write CSV files to output directory"""
def export_csv_files(self, output_filepath, sort_by_time=False, verbose=True):
"""Write CSV files to output directory
Parameters
----------
output_filepath : str
Path to the output directory.
sort_by_time : bool, optional (default=`False`)
Sort data in the Masses CSV file by the observation time.
verbose : bool, optional (default = `True`)
If `True`, status messages are written to the command line.
Returns
-------
:py:obj:`.LSData`
Contains all LS output data loaded from the given xlsm file.
"""
if verbose:
print('Write output')
......@@ -426,7 +444,7 @@ class LSData:
filename_ending = f'{self.run_name}.csv'
# Write PanelData:
filename = os.path.join(output_filepath, f'SampleMetadata_{filename_ending}')
filename = os.path.join(output_filepath, f'Metadata_{filename_ending}')
if verbose:
print(f'Write PanelData to: {filename}')
self.df_panel_data.to_csv(filename, index=False)
......@@ -444,14 +462,32 @@ class LSData:
self.df_masses.to_csv(filename, index=False)
# Write IntervalAnalysis:
filename = os.path.join(output_filepath, f'Summary_{filename_ending}')
filename = os.path.join(output_filepath, f'SamplesSummary_{filename_ending}')
if verbose:
print(f'Write IntervalAnalysis to: {filename}')
self.df_interval_analysis.to_csv(filename, index=False)
# TODO: Output format (number of digits)
# TODO: Select columns for output (settable as parameter + default settings for each csv file)
# TODO: Optionally order data series by time (parameter)?!
def calc_sample_statistics(self, verbose=True):
"""Calculate statistical values for each sample and add it to the self.df_interval_analysis."""
if verbose:
print('Calculate sample statistics.')
for idx, row in self.df_interval_analysis.iterrows():
tmp_filter = (self.df_masses['Id'] == row.Id) & (self.df_masses['IntervalNumber'] == row.IntervalNumber)
tmp_filter_1 = (self.df_interval_analysis['Id'] == row.Id) & \
(self.df_interval_analysis['IntervalNumber'] == row.IntervalNumber)
self.df_interval_analysis.loc[tmp_filter_1, 'Mass_median'] = self.df_masses.loc[tmp_filter, 'Mass'].median()
self.df_interval_analysis.loc[tmp_filter_1, 'Mass_std'] = self.df_masses.loc[tmp_filter, 'Mass'].std()
self.df_interval_analysis.loc[tmp_filter_1, 'Mass_q25'] = \
self.df_masses.loc[tmp_filter, 'Mass'].quantile(0.25)
self.df_interval_analysis.loc[tmp_filter_1, 'Mass_q75'] = \
self.df_masses.loc[tmp_filter, 'Mass'].quantile(0.75)
self.df_interval_analysis.loc[tmp_filter_1, 'Mass_iqr'] = \
self.df_interval_analysis.loc[tmp_filter_1, 'Mass_q75'] - \
self.df_interval_analysis.loc[tmp_filter_1, 'Mass_q25']
@property
def get_number_of_observations(self):
......@@ -513,4 +549,4 @@ if __name__ == '__main__':
ls_data = LSData.from_xlsm_file(input_xlsm_filename=xlsm_filename)
ls_data.export_csv_files(output_directory)
print('test')
print('End')
"""LifeScale utils scripts module.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
:Authors:
Andreas Hellerschmied (heller182@gmx.at)
"""
\ No newline at end of file
"""Conversion program from xlsm to csv.
Copyright (C) 2022 Andreas Hellerschmied <heller182@gmx.at>"""
Copyright (C) 2022 Andreas Hellerschmied <heller182@gmx.at>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
"""
from lifescale.models.ls_data import LSData
def ls2csv(xlsm_filename, output_dir, verbose=True):
def ls2csv(xlsm_filename, output_dir, sample_stats=True, sort_by_time=False, verbose=True):
"""Convert lifescale output file (xlsm) to csv files."""
ls_data = LSData.from_xlsm_file(input_xlsm_filename=xlsm_filename, verbose=verbose)
ls_data.export_csv_files(output_dir, verbose=verbose)
if sample_stats:
ls_data.calc_sample_statistics(verbose=verbose)
ls_data.export_csv_files(output_dir, sort_by_time=sort_by_time, verbose=verbose)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment