diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000000000000000000000000000000000000..dc4c575fe1343ac222be57a9d056e0b766d01183 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +exclude lifescale/models/ls_run.py lifescale/scripts/run_gui.py \ No newline at end of file diff --git a/README.md b/README.md index 321815f75c666edd17729d5ffeb67096a1801c7b..4a9f49948138a93a80905cd523c7337386fe5365 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,14 @@ # lifescale_utils Data analysis tools for lifescale with GUI. -# Command line programms: +# Command line programs: ## ls2csv -The program *ls2csv* reads the content of the xlsm files written by lifescale units, parses the data and writes thems to three csv -fieles: - * Masses_Vibrio_[run-name].csv: Contains the data series from the sheet AcquisitionIntervals. - * SampleMetadata_[run-name].csv: Data from the sheet PanelData. - * Summary_[run-name].csv: Contains the data from the sheet IntervalAnalysis. +The program *ls2csv* reads the content of the xlsm files written by lifescale units, parses the data and writes them to three csv +files (where `[run-name]` is the name from the settings sheet): + * `Masses_Vibrio_[run-name].csv`: Data series from the sheet AcquisitionIntervals. + * `SampleMetadata_[run-name].csv`: Data from the sheet PanelData. + * `Summary_[run-name].csv`: Data from the sheet IntervalAnalysis plus sample related data from AcquisitionIntervals. ### Usage: * Conversion: `ls2csv -i [path and nale of xlsm file] -o [outpur directory]` diff --git a/lifescale/__init__.py b/lifescale/__init__.py index 9e0448d387540c298fffc572385958227863965d..af1d517ca89f1f94b3c97b63573cf5f95052ea4e 100644 --- a/lifescale/__init__.py +++ b/lifescale/__init__.py @@ -1,4 +1,4 @@ -"""LifeSclae utils is a utility program for handling data output. +"""LifeScale utils is a utility program for handling data output. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -17,8 +17,8 @@ along with this program. If not, see <https://www.gnu.org/licenses/>. Andreas Hellerschmied (heller182@gmx.at) """ -__version__ = '0.0.2' +__version__ = '0.0.3' __author__ = 'Andreas Hellerschmied' __git_repo__ = 'tba' __email__ = 'heller182@gmx.at' -__copyright__ = '(c) 2022 Andreas Hellerschmied' \ No newline at end of file +__copyright__ = '(c) 2022 Andreas Hellerschmied' diff --git a/lifescale/command_line/command_line.py b/lifescale/command_line/command_line.py index 212762d19b1a61cca49eb7db660d71fe6afd0f21..e5c45880ad9d5ce122ab8b9ccf1d125507d2fd1b 100644 --- a/lifescale/command_line/command_line.py +++ b/lifescale/command_line/command_line.py @@ -1,4 +1,4 @@ -""" Command line interface of lifescale utils +"""Command line interface of lifescale utils. Copyright (C) 2022 Andreas Hellerschmied <heller182@gmx.at> """ @@ -25,13 +25,14 @@ def is_dir(pathname): def ls2csv(): """Command line interface including argument parser for the lifescale2csv converter.""" parser = argparse.ArgumentParser(prog="ls2csv", - description="Covnersion from lifescale xlsm output to csv files", - epilog="The ls2csv converter loads and parses xslm files created by the lifescale " + description="Conversion from lifescale xlsm output to csv files", + epilog="The ls2csv converter loads and parses xlsm files created by the lifescale " "unit. It writes several csv files to the output directory that contain " - "extraced data from the input xlsm file in an easily readable way.", + "extracted data from the input xlsm file in an easily readable way.", formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument("-i", "--input-xlsm", type=is_file, required=True, help="Path and name of the input xlsm file created by " - "lifescale.") + parser.add_argument("-i", "--input-xlsm", type=is_file, required=True, help="Path and name of the input xlsm file " + "created by " + "lifescale.") parser.add_argument("-o", "--out-dir", type=is_dir, required=True, help="Output directory for the CSV files.") parser.add_argument("-nv", "--not-verbose", required=False, help="Disable command line status messages.", action='store_true') @@ -40,10 +41,8 @@ def ls2csv(): args = parser.parse_args() verbose = not args.not_verbose - return ls2csv_main(xlsm_filename=args.input_xlsm, oputput_dir=args.out_dir, verbose=verbose) + return ls2csv_main(xlsm_filename=args.input_xlsm, output_dir=args.out_dir, verbose=verbose) if __name__ == '__main__': ls2csv() - - diff --git a/lifescale/models/ls_data.py b/lifescale/models/ls_data.py index 44c70f2a1a9194b0f2755dd284fe1805221a816c..f14a9cdba775f63bd22061a11d6ff6a419dc3299 100644 --- a/lifescale/models/ls_data.py +++ b/lifescale/models/ls_data.py @@ -39,18 +39,18 @@ class LSData: End time of the run. settings_dict : dict Contains all settings from the Settings sheet of the input xlsm file. If more than one attributes are provides - for a parameter (dictionary key), the dictionary item is a list. If no attribute is provided, the item is `None`. + for a parameter (dictionary key), the dictionary item is a list. If no attribute is provided, the item is `None` df_panel_data : pandas dataframe Pandas dataframe that holds the data of the PanelData sheet of the input xlsm file. df_interval_analysis : pandas dataframe Pandas dataframe that holds the data of the IntervalAnalysis sheet plus additional data of the input xlsm file. df_masses : pandas dataframe - Pandas dataframe that holds the data derived from the AcquisiotionIntervals sheet of the input xlsm file. + Pandas dataframe that holds the data derived from the AcquisitionIntervals sheet of the input xlsm file. """ def __init__(self, run_name='', - accession_number='', + guid='', input_xlsm_filename='', output_dir_path='', start_time_dt=None, @@ -69,10 +69,10 @@ class LSData: self.run_name = run_name else: raise TypeError('"run_name" needs to be a string') - if isinstance(accession_number, str): - self.accession_number = accession_number + if isinstance(guid, str): + self.guid = guid else: - raise TypeError('"accession_number" needs to be a string') + raise TypeError('"guid" needs to be a string') # output_dir_path: if isinstance(output_dir_path, str): @@ -123,10 +123,6 @@ class LSData: # Initialize additional attributes: pass - - - - @classmethod def from_xlsm_file(cls, input_xlsm_filename, verbose=True): """Constructor that generates and populates the LSData object from an xlsm LS output file. @@ -170,12 +166,12 @@ class LSData: # Check, if all required sheets are available: if set(REQUIRED_XLSM_SHEET_NAMES) - set(sheet_names): - missings_steets = list(set(REQUIRED_XLSM_SHEET_NAMES) - set(sheet_names)) - raise AssertionError(f'The following sheets are missing the file {input_xlsm_filename}: {missings_steets}') + missing_sheets = list(set(REQUIRED_XLSM_SHEET_NAMES) - set(sheet_names)) + raise AssertionError(f'The following sheets are missing the file {input_xlsm_filename}: {missing_sheets}') # PanelData: if verbose: - print(f' - Parse PanalData') + print(f' - Parse PanelData') df_panel_data = xl_file.parse('PanelData') df_panel_data = remove_space_from_column_names(df_panel_data) df_panel_data['NumberOfIntervals'] = None @@ -193,9 +189,11 @@ class LSData: df_interval_analysis['MeasuredVolume'] = None # From AcquisitionIntervals df_interval_analysis['ResonantFrequency'] = None # From AcquisitionIntervals if not (df_interval_analysis[['Id', 'IntervalNumber']].value_counts().count() == len(df_interval_analysis)): - raise AssertionError(f'The combination if the values in the columns "Id" and "IntervalNumber" in IntervalAnalysis is not unique!') + raise AssertionError( + f'The combination if the values in the columns "Id" and "IntervalNumber" in IntervalAnalysis is not ' + f'unique!') - # Settings: + # Settings: if verbose: print(f' - Parse Settings') settings_dict = {} @@ -213,12 +211,12 @@ class LSData: else: settings_dict[row[0]] = tmp_list run_name = settings_dict['Name'] - if settings_dict['AccessionNumber'] is None: - accession_number = '' + if settings_dict['Guid'] is None: + guid = '' else: - accession_number = str(settings_dict['AccessionNumber']) + guid = str(settings_dict['Guid']) start_time_dt = settings_dict['StartTime'] - end_time_dt =start_time_dt + dt.timedelta(settings_dict['ElapsedTime']/(24*60)) + end_time_dt = start_time_dt + dt.timedelta(settings_dict['ElapsedTime'] / (24 * 60)) # # Settings (dataframe): # df_settings = xl_file.parse('Settings', header=None).transpose() @@ -226,7 +224,6 @@ class LSData: # df_settings = df_settings[1:] # df_settings.reset_index(drop=True, inplace=True) - # Masses (from sheet AcquisitionIntervals): if verbose: print(f' - Parse Masses') @@ -240,11 +237,11 @@ class LSData: transit_time_list = [] pressure_drop_list = [] time_list_length_old = 0 - masses_list_length_old = 0 - volume_list_length_old = 0 - total_num_particles_list_length_old = 0 - transit_time_list_length_old = 0 - pressure_drop_list_length_old = 0 + # masses_list_length_old = 0 + # volume_list_length_old = 0 + # total_num_particles_list_length_old = 0 + # transit_time_list_length_old = 0 + # pressure_drop_list_length_old = 0 current_id = None current_well = None @@ -256,14 +253,14 @@ class LSData: if row[0] == 'Id': current_id = row[1] if ~(df_interval_analysis['Id'] == current_id).any(): - raise AssertionError(f'"ID="{current_id} is not available in InteralAnalysis!') + raise AssertionError(f'"ID="{current_id} is not available in IntervalAnalysis!') if ~(df_panel_data['Id'] == current_id).any(): raise AssertionError(f'"ID="{current_id} is not available in PanelData!') continue if row[0] == 'Well': current_well = row[1] if ~(df_interval_analysis['Well'] == current_well).any(): - raise AssertionError(f'"Well="{current_well} is not available in InteralAnalysis!') + raise AssertionError(f'"Well="{current_well} is not available in IntervalAnalysis!') if ~(df_panel_data['Well'] == current_well).any(): raise AssertionError(f'"Well="{current_well} is not available in PanelData!') continue @@ -278,7 +275,7 @@ class LSData: current_interval_num = row[1] if ~(df_interval_analysis['IntervalNumber'] == current_interval_num).any(): raise AssertionError( - f'"IntervalNumber="{current_interval_num} is not available in InteralAnalysis!') + f'"IntervalNumber="{current_interval_num} is not available in IntervalAnalysis!') continue if row[0] == 'StartTime': continue @@ -287,37 +284,48 @@ class LSData: if row[0] == 'DilutionFactor': continue if row[0] == 'Status': - tmp_filter = (df_interval_analysis['Id'] == current_id) & (df_interval_analysis['IntervalNumber'] == current_interval_num) + tmp_filter = (df_interval_analysis['Id'] == current_id) & ( + df_interval_analysis['IntervalNumber'] == current_interval_num) if len(tmp_filter[tmp_filter]) != 1: - raise AssertionError(f'Invalid number of matches of "Id={current_id}" and "IntervalNumber={current_interval_num}" in IntervalAnalysis: {len(tmp_filter[tmp_filter])}') + raise AssertionError( + f'Invalid number of matches of "Id={current_id}" and "IntervalNumber={current_interval_num}" ' + f'in IntervalAnalysis: {len(tmp_filter[tmp_filter])}') df_interval_analysis.loc[tmp_filter, 'Status'] = row[1] continue if row[0] == 'DetectedParticles': tmp_filter = (df_interval_analysis['Id'] == current_id) & ( - df_interval_analysis['IntervalNumber'] == current_interval_num) + df_interval_analysis['IntervalNumber'] == current_interval_num) if len(tmp_filter[tmp_filter]) != 1: - raise AssertionError(f'Invalid number of matches of "Id={current_id}" and "IntervalNumber={current_interval_num}" in IntervalAnalysis: {len(tmp_filter[tmp_filter])}') + raise AssertionError( + f'Invalid number of matches of "Id={current_id}" and "IntervalNumber={current_interval_num}" ' + f'in IntervalAnalysis: {len(tmp_filter[tmp_filter])}') df_interval_analysis.loc[tmp_filter, 'DetectedParticles'] = row[1] current_detected_particles = row[1] # For cross-checks continue if row[0] == 'MeasuredVolume': tmp_filter = (df_interval_analysis['Id'] == current_id) & ( - df_interval_analysis['IntervalNumber'] == current_interval_num) + df_interval_analysis['IntervalNumber'] == current_interval_num) if len(tmp_filter[tmp_filter]) != 1: - raise AssertionError(f'Invalid number of matches of "Id={current_id}" and "IntervalNumber={current_interval_num}" in IntervalAnalysis: {len(tmp_filter[tmp_filter])}') + raise AssertionError( + f'Invalid number of matches of "Id={current_id}" and "IntervalNumber={current_interval_num}" ' + f'in IntervalAnalysis: {len(tmp_filter[tmp_filter])}') df_interval_analysis.loc[tmp_filter, 'MeasuredVolume'] = row[1] continue if row[0] == 'ResonantFrequency': tmp_filter = (df_interval_analysis['Id'] == current_id) & ( df_interval_analysis['IntervalNumber'] == current_interval_num) if len(tmp_filter[tmp_filter]) != 1: - raise AssertionError(f'Invalid number of matches of "Id={current_id}" and "IntervalNumber={current_interval_num}" in IntervalAnalysis: {len(tmp_filter[tmp_filter])}') + raise AssertionError( + f'Invalid number of matches of "Id={current_id}" and "IntervalNumber={current_interval_num}" ' + f'in IntervalAnalysis: {len(tmp_filter[tmp_filter])}') df_interval_analysis.loc[tmp_filter, 'ResonantFrequency'] = row[1] continue if row[0] == 'Time': tmp_list = row_to_list(row[1:]) if (len(tmp_list) == 0) and (current_detected_particles != 0): - raise AssertionError(f'Number of "DetectedParticles={current_detected_particles}" does not match length of "Time" series (= {len(tmp_list)}) ') + raise AssertionError( + f'Number of "DetectedParticles={current_detected_particles}" does not match length of "Time" ' + f'series (= {len(tmp_list)}) ') time_list += tmp_list continue if row[0] == 'Mass': @@ -345,7 +353,8 @@ class LSData: if not (len(time_list) == len(masses_list) == len(volume_list) == len(total_num_particles_list) == len( transit_time_list) == len(pressure_drop_list)): raise AssertionError( - f'The lengths of the data series in AcquisitionIntervals of "Well={current_well}" and "IntervalNumber={current_interval_num}" do not match!') + f'The lengths of the data series in AcquisitionIntervals of "Well={current_well}" and ' + f'"IntervalNumber={current_interval_num}" do not match!') # Set up lists for well, id and interval number: num_additional_items_in_data_series = len(time_list) - time_list_length_old tmp_list = [current_id] * num_additional_items_in_data_series @@ -356,23 +365,27 @@ class LSData: interval_num_list += tmp_list # Reset counters: time_list_length_old = len(time_list) - masses_list_length_old = len(masses_list) - volume_list_length_old = len(volume_list) - total_num_particles_list_length_old = len(total_num_particles_list) - transit_time_list_length_old = len(transit_time_list) - pressure_drop_list_length_old = len(pressure_drop_list) + # masses_list_length_old = len(masses_list) + # volume_list_length_old = len(volume_list) + # total_num_particles_list_length_old = len(total_num_particles_list) + # transit_time_list_length_old = len(transit_time_list) + # pressure_drop_list_length_old = len(pressure_drop_list) continue # Check if the length of all data series lists match: if not (len(time_list) == len(masses_list) == len(volume_list) == len(total_num_particles_list) == len( - transit_time_list) == len(pressure_drop_list) == len(id_list) == len(well_list) == len(interval_num_list)): + transit_time_list) == len(pressure_drop_list) == len(id_list) == len(well_list) == len( + interval_num_list)): raise AssertionError( f'The lengths of the data series in AcquisitionIntervals do not match!') # Create dataframe: - df_masses_columns = ['Id', 'Well', 'IntervalNumber', 'Time', 'Mass', 'Volume', 'TotalNumberOfParticlesThroughSensor', 'TransitTime', 'PressureDrop'] - df_masses = pd.DataFrame(list(zip(id_list, well_list, interval_num_list, time_list, masses_list, volume_list, total_num_particles_list, transit_time_list, pressure_drop_list)), - columns = df_masses_columns) + df_masses_columns = ['Id', 'Well', 'IntervalNumber', 'Time', 'Mass', 'Volume', + 'TotalNumberOfParticlesThroughSensor', 'TransitTime', 'PressureDrop'] + df_masses = pd.DataFrame(list( + zip(id_list, well_list, interval_num_list, time_list, masses_list, volume_list, total_num_particles_list, + transit_time_list, pressure_drop_list)), + columns=df_masses_columns) # Sensor: # sensor_dict = {} @@ -387,15 +400,15 @@ class LSData: print(f'...finished loading and parsing data!') return cls(run_name=run_name, - accession_number=accession_number, - input_xlsm_filename=input_xlsm_filename, - start_time_dt=start_time_dt, - end_time_dt=end_time_dt, - settings_dict=settings_dict, - df_panel_data=df_panel_data, - df_interval_analysis=df_interval_analysis, - df_masses=df_masses, - ) + guid=guid, + input_xlsm_filename=input_xlsm_filename, + start_time_dt=start_time_dt, + end_time_dt=end_time_dt, + settings_dict=settings_dict, + df_panel_data=df_panel_data, + df_interval_analysis=df_interval_analysis, + df_masses=df_masses, + ) def export_csv_files(self, output_filepath, verbose=True, sort_by_time=False): """Write CSV files to output directory""" @@ -407,8 +420,8 @@ class LSData: raise AssertionError(f'The output path does not exist: {output_filepath}') self.output_dir_path = output_filepath - if self.accession_number: - filename_ending = f'{self.run_name}_{self.accession_number}.csv' + if self.guid: + filename_ending = f'{self.run_name}_{self.guid}.csv' else: filename_ending = f'{self.run_name}.csv' @@ -437,7 +450,7 @@ class LSData: self.df_interval_analysis.to_csv(filename, index=False) # TODO: Output format (number of digits) - # TODO: Select columns for output (setable as parameter + default settings for each csv file) + # TODO: Select columns for output (settable as parameter + default settings for each csv file) # TODO: Optionally order data series by time (parameter)?! @property @@ -466,7 +479,8 @@ class LSData: def __str__(self): if self.run_name is not None: - return f'Run "{self.run_name}" with {self.get_number_of_observations} observations in {self.get_number_of_intervals} intervals and {self.get_number_of_wells} wells.' + return f'Run "{self.run_name}" with {self.get_number_of_observations} observations in ' \ + f'{self.get_number_of_intervals} intervals and {self.get_number_of_wells} wells. ' else: return f'Not data available yet.' @@ -494,11 +508,9 @@ def row_to_list(row) -> list: if __name__ == '__main__': """Main function, primarily for debugging and testing.""" xlsm_filename = '../../data/Example_several_wells/Vibrio_starvation_24.11.22_221125_163425.xlsm' - oputput_directory = '../../output/' + output_directory = '../../output/' ls_data = LSData.from_xlsm_file(input_xlsm_filename=xlsm_filename) - ls_data.export_csv_files(oputput_directory) + ls_data.export_csv_files(output_directory) print('test') - - diff --git a/lifescale/scripts/ls2csv.py b/lifescale/scripts/ls2csv.py index f13a56795bb5a1e1a748701fa3ef79c42062024c..7903228e2a9711e56e5c173725dfc94016acb436 100644 --- a/lifescale/scripts/ls2csv.py +++ b/lifescale/scripts/ls2csv.py @@ -1,11 +1,11 @@ -"""Converstion program from xlsm to csv +"""Conversion program from xlsm to csv. Copyright (C) 2022 Andreas Hellerschmied <heller182@gmx.at>""" from lifescale.models.ls_data import LSData -def ls2csv(xlsm_filename, oputput_dir, verbose=True): +def ls2csv(xlsm_filename, output_dir, verbose=True): """Convert lifescale output file (xlsm) to csv files.""" ls_data = LSData.from_xlsm_file(input_xlsm_filename=xlsm_filename, verbose=verbose) - ls_data.export_csv_files(oputput_dir, verbose=verbose) + ls_data.export_csv_files(output_dir, verbose=verbose) diff --git a/lifescale/scripts/run_gui.py b/lifescale/scripts/run_gui.py index 6d8f018a56a3fae620af93a36f8301e5cc217b24..c7b7fb62509c887fc4a315e287d932242937777f 100644 --- a/lifescale/scripts/run_gui.py +++ b/lifescale/scripts/run_gui.py @@ -12,4 +12,4 @@ def run_gui(): if __name__ == "__main__": """Main Program.""" - run_gui() \ No newline at end of file + run_gui() diff --git a/makefile b/makefile index 54c3aa9b332bea0dfe517994b7cdf84a86f6c8d1..50604b59aac78b45ba9a97620a3214e50df003c5 100644 --- a/makefile +++ b/makefile @@ -5,7 +5,7 @@ test: $(info Test-run for this makefile!) $(info Yeah!!) -# Project initaialization +# Project initialization init: pip install -r requirements.txt @@ -23,6 +23,7 @@ test_pack_uninstall: # Build package with setuptools (new version): build: + rm -rf lifescale_utils.egg-info/ python -m build # Upload package to pypi.org diff --git a/setup.cfg b/setup.cfg index a82a87c774f2e43f9dbe360234184b299259c59b..5cb308073936bf00b7166d0d1f735ff6878304e2 100644 --- a/setup.cfg +++ b/setup.cfg @@ -9,6 +9,7 @@ long_description = file: README.md long_description_content_type = text/markdown keywords = Lifescale license = GNU GPLv3 +license_files = LICENSE classifiers = License :: OSI Approved :: GNU General Public License (GPL) Programming Language :: Python :: 3 @@ -26,3 +27,9 @@ install_requires = [options.entry_points] console_scripts = ls2csv = lifescale.command_line.command_line:ls2csv + +[options.packages.find] +exclude = + lifescale.gui* + lifescale.scripts.run_gui.py + lifescale.models.ls_run.py