Cleaned and refactored code and set up a clean distributon (excluded...

Cleaned and refactored code and set up a clean distributon (excluded unfinished gui module and other files that are not required. Set up MANIFEST.in

Cleaned and refactored code and set up a clean distributon (excluded...
f0dbb52d · Andreas Hellerschmied · 2cbc9405 · f0dbb52d · f0dbb52d · f0dbb52d
Commit f0dbb52d authored Dec 29, 2022 by Andreas Hellerschmied
--- a/MANIFEST.in
+++ b/MANIFEST.in
+exclude lifescale/models/ls_run.py lifescale/scripts/run_gui.py
\ No newline at end of file
--- a/README.md
+++ b/README.md
 # lifescale_utils
 Data analysis tools for lifescale with GUI.

-# Command line programms:
+# Command line programs:

 ## ls2csv
-The program *ls2csv* reads the content of the xlsm files written by lifescale units, parses the data and writes thems to three csv 
-fieles:
-  * Masses_Vibrio_[run-name].csv: Contains the data series from the sheet AcquisitionIntervals.
-  * SampleMetadata_[run-name].csv: Data from the sheet PanelData.
-  * Summary_[run-name].csv: Contains the data from the sheet IntervalAnalysis.
+The program *ls2csv* reads the content of the xlsm files written by lifescale units, parses the data and writes them to three csv 
+files (where `[run-name]` is the name from the settings sheet):
+  * `Masses_Vibrio_[run-name].csv`: Data series from the sheet AcquisitionIntervals.
+  * `SampleMetadata_[run-name].csv`: Data from the sheet PanelData.
+  * `Summary_[run-name].csv`: Data from the sheet IntervalAnalysis plus sample related data from AcquisitionIntervals.

 ### Usage:
  * Conversion: `ls2csv -i [path and nale of xlsm file] -o [outpur directory]`

--- a/lifescale/__init__.py
+++ b/lifescale/__init__.py
-"""LifeSclae utils is a utility program for handling data output.
+"""LifeScale utils is a utility program for handling data output.

 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -17,7 +17,7 @@ along with this program.  If not, see <https://www.gnu.org/licenses/>.
    Andreas Hellerschmied (heller182@gmx.at)
 """

-__version__ = '0.0.2'
+__version__ = '0.0.3'
 __author__ = 'Andreas Hellerschmied'
 __git_repo__ = 'tba'
 __email__ = 'heller182@gmx.at'

--- a/lifescale/command_line/command_line.py
+++ b/lifescale/command_line/command_line.py
-""" Command line interface of lifescale utils
+"""Command line interface of lifescale utils.

 Copyright (C) 2022  Andreas Hellerschmied <heller182@gmx.at>
 """
@@ -25,12 +25,13 @@ def is_dir(pathname):
 def ls2csv():
    """Command line interface including argument parser for the lifescale2csv converter."""
    parser = argparse.ArgumentParser(prog="ls2csv",
-                                     description="Covnersion from lifescale xlsm output to csv files",
-                                     epilog="The ls2csv converter loads and parses xslm files created by the lifescale "
+                                     description="Conversion from lifescale xlsm output to csv files",
+                                     epilog="The ls2csv converter loads and parses xlsm files created by the lifescale "
                                            "unit. It writes several csv files to the output directory that contain "
-                                            "extraced data from the input xlsm file in an easily readable way.",
+                                            "extracted data from the input xlsm file in an easily readable way.",
                                     formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument("-i", "--input-xlsm", type=is_file, required=True, help="Path and name of the input xlsm file created by "
+    parser.add_argument("-i", "--input-xlsm", type=is_file, required=True, help="Path and name of the input xlsm file "
+                                                                                "created by "
                                                                                "lifescale.")
    parser.add_argument("-o", "--out-dir", type=is_dir, required=True, help="Output directory for the CSV files.")
    parser.add_argument("-nv", "--not-verbose", required=False, help="Disable command line status messages.",
@@ -40,10 +41,8 @@ def ls2csv():
    args = parser.parse_args()
    verbose = not args.not_verbose

-    return ls2csv_main(xlsm_filename=args.input_xlsm, oputput_dir=args.out_dir, verbose=verbose)
+    return ls2csv_main(xlsm_filename=args.input_xlsm, output_dir=args.out_dir, verbose=verbose)


 if __name__ == '__main__':
    ls2csv()
-
-
--- a/lifescale/models/ls_data.py
+++ b/lifescale/models/ls_data.py
@@ -39,18 +39,18 @@ class LSData:
        End time of the run.
    settings_dict : dict
        Contains all settings from the Settings sheet of the input xlsm file. If more than one attributes are provides
-        for a parameter (dictionary key), the dictionary item is a list. If no attribute is provided, the item is `None`.
+        for a parameter (dictionary key), the dictionary item is a list. If no attribute is provided, the item is `None`
    df_panel_data : pandas dataframe
        Pandas dataframe that holds the data of the PanelData sheet of the input xlsm file.
    df_interval_analysis : pandas dataframe
        Pandas dataframe that holds the data of the IntervalAnalysis sheet plus additional data of the input xlsm file.
    df_masses : pandas dataframe
-        Pandas dataframe that holds the data derived from the AcquisiotionIntervals sheet of the input xlsm file.
+        Pandas dataframe that holds the data derived from the AcquisitionIntervals sheet of the input xlsm file.
    """

    def __init__(self,
                 run_name='',
-                 accession_number='',
+                 guid='',
                 input_xlsm_filename='',
                 output_dir_path='',
                 start_time_dt=None,
@@ -69,10 +69,10 @@ class LSData:
            self.run_name = run_name
        else:
            raise TypeError('"run_name" needs to be a string')
-        if isinstance(accession_number, str):
-            self.accession_number = accession_number
+        if isinstance(guid, str):
+            self.guid = guid
        else:
-            raise TypeError('"accession_number" needs to be a string')
+            raise TypeError('"guid" needs to be a string')

        # output_dir_path:
        if isinstance(output_dir_path, str):
@@ -123,10 +123,6 @@ class LSData:
        # Initialize additional attributes:
        pass

-
-
-
-
    @classmethod
    def from_xlsm_file(cls, input_xlsm_filename, verbose=True):
        """Constructor that generates and populates the LSData object from an xlsm LS output file.
@@ -170,12 +166,12 @@ class LSData:

        # Check, if all required sheets are available:
        if set(REQUIRED_XLSM_SHEET_NAMES) - set(sheet_names):
-            missings_steets = list(set(REQUIRED_XLSM_SHEET_NAMES) - set(sheet_names))
-            raise AssertionError(f'The following sheets are missing the file {input_xlsm_filename}: {missings_steets}')
+            missing_sheets = list(set(REQUIRED_XLSM_SHEET_NAMES) - set(sheet_names))
+            raise AssertionError(f'The following sheets are missing the file {input_xlsm_filename}: {missing_sheets}')

        # PanelData:
        if verbose:
-            print(f' - Parse PanalData')
+            print(f' - Parse PanelData')
        df_panel_data = xl_file.parse('PanelData')
        df_panel_data = remove_space_from_column_names(df_panel_data)
        df_panel_data['NumberOfIntervals'] = None
@@ -193,7 +189,9 @@ class LSData:
        df_interval_analysis['MeasuredVolume'] = None  # From AcquisitionIntervals
        df_interval_analysis['ResonantFrequency'] = None  # From AcquisitionIntervals
        if not (df_interval_analysis[['Id', 'IntervalNumber']].value_counts().count() == len(df_interval_analysis)):
-            raise AssertionError(f'The combination if the values in the columns "Id" and "IntervalNumber" in IntervalAnalysis is not unique!')
+            raise AssertionError(
+                f'The combination if the values in the columns "Id" and "IntervalNumber" in IntervalAnalysis is not '
+                f'unique!')

            # Settings:
        if verbose:
@@ -213,10 +211,10 @@ class LSData:
                else:
                    settings_dict[row[0]] = tmp_list
        run_name = settings_dict['Name']
-        if settings_dict['AccessionNumber'] is None:
-            accession_number = ''
+        if settings_dict['Guid'] is None:
+            guid = ''
        else:
-            accession_number = str(settings_dict['AccessionNumber'])
+            guid = str(settings_dict['Guid'])
        start_time_dt = settings_dict['StartTime']
        end_time_dt = start_time_dt + dt.timedelta(settings_dict['ElapsedTime'] / (24 * 60))

@@ -226,7 +224,6 @@ class LSData:
        # df_settings = df_settings[1:]
        # df_settings.reset_index(drop=True, inplace=True)

-
        # Masses (from sheet AcquisitionIntervals):
        if verbose:
            print(f' - Parse Masses')
@@ -240,11 +237,11 @@ class LSData:
        transit_time_list = []
        pressure_drop_list = []
        time_list_length_old = 0
-        masses_list_length_old = 0
-        volume_list_length_old = 0
-        total_num_particles_list_length_old = 0
-        transit_time_list_length_old = 0
-        pressure_drop_list_length_old = 0
+        # masses_list_length_old = 0
+        # volume_list_length_old = 0
+        # total_num_particles_list_length_old = 0
+        # transit_time_list_length_old = 0
+        # pressure_drop_list_length_old = 0

        current_id = None
        current_well = None
@@ -256,14 +253,14 @@ class LSData:
            if row[0] == 'Id':
                current_id = row[1]
                if ~(df_interval_analysis['Id'] == current_id).any():
-                    raise AssertionError(f'"ID="{current_id} is not available in InteralAnalysis!')
+                    raise AssertionError(f'"ID="{current_id} is not available in IntervalAnalysis!')
                if ~(df_panel_data['Id'] == current_id).any():
                    raise AssertionError(f'"ID="{current_id} is not available in PanelData!')
                continue
            if row[0] == 'Well':
                current_well = row[1]
                if ~(df_interval_analysis['Well'] == current_well).any():
-                    raise AssertionError(f'"Well="{current_well} is not available in InteralAnalysis!')
+                    raise AssertionError(f'"Well="{current_well} is not available in IntervalAnalysis!')
                if ~(df_panel_data['Well'] == current_well).any():
                    raise AssertionError(f'"Well="{current_well} is not available in PanelData!')
                continue
@@ -278,7 +275,7 @@ class LSData:
                current_interval_num = row[1]
                if ~(df_interval_analysis['IntervalNumber'] == current_interval_num).any():
                    raise AssertionError(
-                        f'"IntervalNumber="{current_interval_num} is not available in InteralAnalysis!')
+                        f'"IntervalNumber="{current_interval_num} is not available in IntervalAnalysis!')
                continue
            if row[0] == 'StartTime':
                continue
@@ -287,16 +284,21 @@ class LSData:
            if row[0] == 'DilutionFactor':
                continue
            if row[0] == 'Status':
-                tmp_filter = (df_interval_analysis['Id'] == current_id) & (df_interval_analysis['IntervalNumber'] == current_interval_num)
+                tmp_filter = (df_interval_analysis['Id'] == current_id) & (
+                        df_interval_analysis['IntervalNumber'] == current_interval_num)
                if len(tmp_filter[tmp_filter]) != 1:
-                    raise AssertionError(f'Invalid number of matches of "Id={current_id}" and "IntervalNumber={current_interval_num}" in IntervalAnalysis: {len(tmp_filter[tmp_filter])}')
+                    raise AssertionError(
+                        f'Invalid number of matches of "Id={current_id}" and "IntervalNumber={current_interval_num}" '
+                        f'in IntervalAnalysis: {len(tmp_filter[tmp_filter])}')
                df_interval_analysis.loc[tmp_filter, 'Status'] = row[1]
                continue
            if row[0] == 'DetectedParticles':
                tmp_filter = (df_interval_analysis['Id'] == current_id) & (
                        df_interval_analysis['IntervalNumber'] == current_interval_num)
                if len(tmp_filter[tmp_filter]) != 1:
-                    raise AssertionError(f'Invalid number of matches of "Id={current_id}" and "IntervalNumber={current_interval_num}" in IntervalAnalysis: {len(tmp_filter[tmp_filter])}')
+                    raise AssertionError(
+                        f'Invalid number of matches of "Id={current_id}" and "IntervalNumber={current_interval_num}" '
+                        f'in IntervalAnalysis: {len(tmp_filter[tmp_filter])}')
                df_interval_analysis.loc[tmp_filter, 'DetectedParticles'] = row[1]
                current_detected_particles = row[1]  # For cross-checks
                continue
@@ -304,20 +306,26 @@ class LSData:
                tmp_filter = (df_interval_analysis['Id'] == current_id) & (
                        df_interval_analysis['IntervalNumber'] == current_interval_num)
                if len(tmp_filter[tmp_filter]) != 1:
-                    raise AssertionError(f'Invalid number of matches of "Id={current_id}" and "IntervalNumber={current_interval_num}" in IntervalAnalysis: {len(tmp_filter[tmp_filter])}')
+                    raise AssertionError(
+                        f'Invalid number of matches of "Id={current_id}" and "IntervalNumber={current_interval_num}" '
+                        f'in IntervalAnalysis: {len(tmp_filter[tmp_filter])}')
                df_interval_analysis.loc[tmp_filter, 'MeasuredVolume'] = row[1]
                continue
            if row[0] == 'ResonantFrequency':
                tmp_filter = (df_interval_analysis['Id'] == current_id) & (
                        df_interval_analysis['IntervalNumber'] == current_interval_num)
                if len(tmp_filter[tmp_filter]) != 1:
-                    raise AssertionError(f'Invalid number of matches of "Id={current_id}" and "IntervalNumber={current_interval_num}" in IntervalAnalysis: {len(tmp_filter[tmp_filter])}')
+                    raise AssertionError(
+                        f'Invalid number of matches of "Id={current_id}" and "IntervalNumber={current_interval_num}" '
+                        f'in IntervalAnalysis: {len(tmp_filter[tmp_filter])}')
                df_interval_analysis.loc[tmp_filter, 'ResonantFrequency'] = row[1]
                continue
            if row[0] == 'Time':
                tmp_list = row_to_list(row[1:])
                if (len(tmp_list) == 0) and (current_detected_particles != 0):
-                    raise AssertionError(f'Number of "DetectedParticles={current_detected_particles}" does not match length of "Time" series (= {len(tmp_list)}) ')
+                    raise AssertionError(
+                        f'Number of "DetectedParticles={current_detected_particles}" does not match length of "Time" '
+                        f'series (= {len(tmp_list)}) ')
                time_list += tmp_list
                continue
            if row[0] == 'Mass':
@@ -345,7 +353,8 @@ class LSData:
                if not (len(time_list) == len(masses_list) == len(volume_list) == len(total_num_particles_list) == len(
                        transit_time_list) == len(pressure_drop_list)):
                    raise AssertionError(
-                        f'The lengths of the data series in AcquisitionIntervals of "Well={current_well}" and "IntervalNumber={current_interval_num}" do not match!')
+                        f'The lengths of the data series in AcquisitionIntervals of "Well={current_well}" and '
+                        f'"IntervalNumber={current_interval_num}" do not match!')
                # Set up lists for well, id and interval number:
                num_additional_items_in_data_series = len(time_list) - time_list_length_old
                tmp_list = [current_id] * num_additional_items_in_data_series
@@ -356,22 +365,26 @@ class LSData:
                interval_num_list += tmp_list
                # Reset counters:
                time_list_length_old = len(time_list)
-                masses_list_length_old = len(masses_list)
-                volume_list_length_old = len(volume_list)
-                total_num_particles_list_length_old = len(total_num_particles_list)
-                transit_time_list_length_old = len(transit_time_list)
-                pressure_drop_list_length_old = len(pressure_drop_list)
+                # masses_list_length_old = len(masses_list)
+                # volume_list_length_old = len(volume_list)
+                # total_num_particles_list_length_old = len(total_num_particles_list)
+                # transit_time_list_length_old = len(transit_time_list)
+                # pressure_drop_list_length_old = len(pressure_drop_list)
                continue

        # Check if the length of all data series lists match:
        if not (len(time_list) == len(masses_list) == len(volume_list) == len(total_num_particles_list) == len(
-                transit_time_list) == len(pressure_drop_list) == len(id_list) == len(well_list) == len(interval_num_list)):
+                transit_time_list) == len(pressure_drop_list) == len(id_list) == len(well_list) == len(
+                interval_num_list)):
            raise AssertionError(
                f'The lengths of the data series in AcquisitionIntervals do not match!')

        # Create dataframe:
-        df_masses_columns = ['Id', 'Well', 'IntervalNumber', 'Time', 'Mass', 'Volume', 'TotalNumberOfParticlesThroughSensor', 'TransitTime', 'PressureDrop']
-        df_masses = pd.DataFrame(list(zip(id_list, well_list, interval_num_list, time_list, masses_list, volume_list, total_num_particles_list, transit_time_list, pressure_drop_list)),
+        df_masses_columns = ['Id', 'Well', 'IntervalNumber', 'Time', 'Mass', 'Volume',
+                             'TotalNumberOfParticlesThroughSensor', 'TransitTime', 'PressureDrop']
+        df_masses = pd.DataFrame(list(
+            zip(id_list, well_list, interval_num_list, time_list, masses_list, volume_list, total_num_particles_list,
+                transit_time_list, pressure_drop_list)),
            columns=df_masses_columns)

        # Sensor:
@@ -387,7 +400,7 @@ class LSData:
            print(f'...finished loading and parsing data!')

        return cls(run_name=run_name,
-                     accession_number=accession_number,
+                   guid=guid,
                   input_xlsm_filename=input_xlsm_filename,
                   start_time_dt=start_time_dt,
                   end_time_dt=end_time_dt,
@@ -407,8 +420,8 @@ class LSData:
            raise AssertionError(f'The output path does not exist: {output_filepath}')
        self.output_dir_path = output_filepath

-        if self.accession_number:
-            filename_ending = f'{self.run_name}_{self.accession_number}.csv'
+        if self.guid:
+            filename_ending = f'{self.run_name}_{self.guid}.csv'
        else:
            filename_ending = f'{self.run_name}.csv'

@@ -437,7 +450,7 @@ class LSData:
        self.df_interval_analysis.to_csv(filename, index=False)

        # TODO: Output format (number of digits)
-        # TODO: Select columns for output (setable as parameter + default settings for each csv file)
+        # TODO: Select columns for output (settable as parameter + default settings for each csv file)
        # TODO: Optionally order data series by time (parameter)?!

    @property
@@ -466,7 +479,8 @@ class LSData:

    def __str__(self):
        if self.run_name is not None:
-            return f'Run "{self.run_name}" with {self.get_number_of_observations} observations in {self.get_number_of_intervals} intervals and {self.get_number_of_wells} wells.'
+            return f'Run "{self.run_name}" with {self.get_number_of_observations} observations in ' \
+                   f'{self.get_number_of_intervals} intervals and {self.get_number_of_wells} wells. '
        else:
            return f'Not data available yet.'

@@ -494,11 +508,9 @@ def row_to_list(row) -> list:
 if __name__ == '__main__':
    """Main function, primarily for debugging and testing."""
    xlsm_filename = '../../data/Example_several_wells/Vibrio_starvation_24.11.22_221125_163425.xlsm'
-    oputput_directory = '../../output/'
+    output_directory = '../../output/'

    ls_data = LSData.from_xlsm_file(input_xlsm_filename=xlsm_filename)
-    ls_data.export_csv_files(oputput_directory)
+    ls_data.export_csv_files(output_directory)

    print('test')
-
-
--- a/lifescale/scripts/ls2csv.py
+++ b/lifescale/scripts/ls2csv.py
-"""Converstion program from xlsm to csv
+"""Conversion program from xlsm to csv.

 Copyright (C) 2022  Andreas Hellerschmied <heller182@gmx.at>"""

 from lifescale.models.ls_data import LSData


-def ls2csv(xlsm_filename, oputput_dir, verbose=True):
+def ls2csv(xlsm_filename, output_dir, verbose=True):
    """Convert lifescale output file (xlsm) to csv files."""
    ls_data = LSData.from_xlsm_file(input_xlsm_filename=xlsm_filename, verbose=verbose)
-    ls_data.export_csv_files(oputput_dir, verbose=verbose)
+    ls_data.export_csv_files(output_dir, verbose=verbose)
--- a/lifescale/scripts/run_gui.py
+++ b/lifescale/scripts/run_gui.py
--- a/makefile
+++ b/makefile
@@ -5,7 +5,7 @@ test:
 	$(info Test-run for this makefile!)
 	$(info Yeah!!)

-# Project initaialization
+# Project initialization
 init:
 	pip install -r requirements.txt

@@ -23,6 +23,7 @@ test_pack_uninstall:

 # Build package with setuptools (new version):
 build:
+	rm -rf lifescale_utils.egg-info/
 	python -m build

 # Upload package to pypi.org

--- a/setup.cfg
+++ b/setup.cfg
@@ -9,6 +9,7 @@ long_description = file: README.md
 long_description_content_type = text/markdown
 keywords = Lifescale
 license = GNU GPLv3
+license_files = LICENSE
 classifiers =
    License :: OSI Approved :: GNU General Public License (GPL)
    Programming Language :: Python :: 3
@@ -26,3 +27,9 @@ install_requires =
 [options.entry_points]
 console_scripts =
    ls2csv = lifescale.command_line.command_line:ls2csv
+
+[options.packages.find]
+exclude =
+    lifescale.gui*
+    lifescale.scripts.run_gui.py
+    lifescale.models.ls_run.py