diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000000000000000000000000000000000000..dc4c575fe1343ac222be57a9d056e0b766d01183 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +exclude lifescale/models/ls_run.py lifescale/scripts/run_gui.py \ No newline at end of file diff --git a/README.md b/README.md index 49c7c8b3c85f91445e50bf9b241dd809f4d2c571..4a9f49948138a93a80905cd523c7337386fe5365 100644 --- a/README.md +++ b/README.md @@ -1,94 +1,34 @@ -# lifescale_gui +# lifescale_utils Data analysis tools for lifescale with GUI. -# Installation and setup -* **1. Create virtual environment (venv)** - * `python3 -m venv env` -* **2. Activate virtual environment** - * `source env/bin/activate` -* **3. Clone git repository to local machine** - * `git clone git@gitlab.com:hellerdev/lifescale_gui.git` - * `cd lifescale_gui` -* **4. Install required python packages using pip** - * `pip install -r requirements.txt` +# Command line programs: - -## Installation issues on Ubuntu (20.04): -After just installing PyQt5 with pip3 the following error occurred when trying to actually run a PyQt GUI: qt.qpa.plugin: Could not load the Qt platform plugin "xcb" in "" even though it was found. -This issue was resolved by installing the QT dev tools (Designer, etc.): -sudo apt-get install qttools5-dev-tools +## ls2csv +The program *ls2csv* reads the content of the xlsm files written by lifescale units, parses the data and writes them to three csv +files (where `[run-name]` is the name from the settings sheet): + * `Masses_Vibrio_[run-name].csv`: Data series from the sheet AcquisitionIntervals. + * `SampleMetadata_[run-name].csv`: Data from the sheet PanelData. + * `Summary_[run-name].csv`: Data from the sheet IntervalAnalysis plus sample related data from AcquisitionIntervals. -# Test installation with setuptools -With command line interface. +### Usage: + * Conversion: `ls2csv -i [path and nale of xlsm file] -o [outpur directory]` + * Help: `ls2csv -h` -* **1. Configure setup.py** - * Define entry points (*console_scripts*) -* **2. Activate virtual environment** - * e.g. `source env/bin/activate` -* **3. Run setup.py** - * `python3 setup.py develop` -## Using make -`python3 setup.py develop` +# License and copyright -# Run application on Windows and create a stand-alone Windows executable file: -TODO - -# Comments on requirements.txt file: -* Two entries can be deleted: - * -e git+git@gitlab.com:Heller182/grav.git@fe528c0769502e84a06be67a742032cacfd386df#egg=gravtools - * pkg-resources==0.0.0 (created due a bug when using Linux, see: https://stackoverflow.com/questions/39577984/what-is-pkg-resources-0-0-0-in-output-of-pip-freeze-command) +Copyright (C) 2022 Andreas Hellerschmied (<heller182@gmx.at>) +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. -# Create HTML documentation with sphinx: -Run make in the gravtools/doc directory: -* `>>>make html_doc` +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. -# Guidelines and conventions - -## Code style: -* Respect the PEP conventions on python coding! - * PEP 8 -- Style Guide for Python Code: https://www.python.org/dev/peps/pep-0008/ -* The maximum line length is 120 characters -* Use **type hints**: https://www.python.org/dev/peps/pep-0484/ -* Use docstrings according to the numpy standard: https://numpydoc.readthedocs.io/en/latest/format.html#docstring-standard - * They are useful to generate the documentation automatically - * Example: https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_numpy.html -* Comment code, if necessary! -* Use English language for the code, docstrings and comments - * German is allowed for user interfaces (GUI, command line), although English is preferred - -## Documentation and docstring style -* The API reference is created with sphinx (https://www.sphinx-doc.org/). -* Docstrings have to follow the numpy standard, see: https://numpydoc.readthedocs.io/en/latest/format.html#docstring-standard - * Examples: https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_numpy.html -* Package documentation via docstring in __ini__.py files -* Module documentation via docstring at first lines of py-file -* Documentation of classes, class methods and functions via docstrings - -## Command line interface and executable scripts -* The command line interface is realized via entry points (console_scripts) in setuptools (python packaging tool) - * Input arguments are handled with argparse - * The code is located in the command_line module (gravtools/command_line.py) -* Executable scripts are located in gravtools/scripts - -## Dependancies -* Required python packages are listed in requirements.txt - * created with `>>>pip freeze > requirements.txt` - -## Version control with GIT -* Gitlab repository: https://gitlab.com/Heller182/grav -* Branching model: - * **master** branch: Current release version - * **develop** branch: Current working version. - * All team members merge their feature branches into develop (merge request via gitlab) - * Make sure that the develop branch contains a fully functional version of the code! - * **feature** branches: Branches of develop for the implementation of new features and other changes. - * Code changes only in feature branches! - * Naming convention: feature_<description of change/feature>, e.g. feature_new_tide_model -* Use gitignore files to prevent any data files (except example files), IDE control files, compiled python code, etc. from being stored in the GIT repository - * Generally rule: Ignore everything in a directory and define explicit exceptions! - -## Packaging and distribution -* With setuptools +You should have received a copy of the GNU General Public License +along with this program. If not, see <https://www.gnu.org/licenses/>. diff --git a/lifescale/__init__.py b/lifescale/__init__.py index d0606497d4371e2c9983b4bc823072ad7ebc7069..af1d517ca89f1f94b3c97b63573cf5f95052ea4e 100644 --- a/lifescale/__init__.py +++ b/lifescale/__init__.py @@ -1,4 +1,4 @@ -"""LifeSclae GUI is a utility program for handling data output. +"""LifeScale utils is a utility program for handling data output. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -17,8 +17,8 @@ along with this program. If not, see <https://www.gnu.org/licenses/>. Andreas Hellerschmied (heller182@gmx.at) """ -__version__ = '0.0.1' +__version__ = '0.0.3' __author__ = 'Andreas Hellerschmied' __git_repo__ = 'tba' __email__ = 'heller182@gmx.at' -__copyright__ = '(c) 2022 Andreas Hellerschmied' \ No newline at end of file +__copyright__ = '(c) 2022 Andreas Hellerschmied' diff --git a/lifescale/mass_peak_caller/__init__.py b/lifescale/command_line/__init__.py similarity index 100% rename from lifescale/mass_peak_caller/__init__.py rename to lifescale/command_line/__init__.py diff --git a/lifescale/command_line/command_line.py b/lifescale/command_line/command_line.py new file mode 100644 index 0000000000000000000000000000000000000000..e5c45880ad9d5ce122ab8b9ccf1d125507d2fd1b --- /dev/null +++ b/lifescale/command_line/command_line.py @@ -0,0 +1,48 @@ +"""Command line interface of lifescale utils. + +Copyright (C) 2022 Andreas Hellerschmied <heller182@gmx.at> +""" + +from lifescale.scripts.ls2csv import ls2csv as ls2csv_main +import argparse +import os + + +def is_file(filename): + """Check, whether the input string is the path to an existing file.""" + if os.path.isfile(filename): + return filename + raise argparse.ArgumentTypeError("'{}' is not a valid file.".format(filename)) + + +def is_dir(pathname): + """Check, whether the input string is a valid and existing filepath.""" + if os.path.exists(pathname): + return pathname + raise argparse.ArgumentTypeError("'{}' is not a valid directory.".format(pathname)) + + +def ls2csv(): + """Command line interface including argument parser for the lifescale2csv converter.""" + parser = argparse.ArgumentParser(prog="ls2csv", + description="Conversion from lifescale xlsm output to csv files", + epilog="The ls2csv converter loads and parses xlsm files created by the lifescale " + "unit. It writes several csv files to the output directory that contain " + "extracted data from the input xlsm file in an easily readable way.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument("-i", "--input-xlsm", type=is_file, required=True, help="Path and name of the input xlsm file " + "created by " + "lifescale.") + parser.add_argument("-o", "--out-dir", type=is_dir, required=True, help="Output directory for the CSV files.") + parser.add_argument("-nv", "--not-verbose", required=False, help="Disable command line status messages.", + action='store_true') + # parser.add_argument("--out-dir", type=is_dir, required=False, + # help="path to output directory", default=OUT_PATH) + args = parser.parse_args() + verbose = not args.not_verbose + + return ls2csv_main(xlsm_filename=args.input_xlsm, output_dir=args.out_dir, verbose=verbose) + + +if __name__ == '__main__': + ls2csv() diff --git a/lifescale/mass_peak_caller/configure_peakcaller.py b/lifescale/mass_peak_caller/configure_peakcaller.py deleted file mode 100644 index 5268d8c30172e67109ba4ec77dda84fab8bed97e..0000000000000000000000000000000000000000 --- a/lifescale/mass_peak_caller/configure_peakcaller.py +++ /dev/null @@ -1,50 +0,0 @@ -import os -import platform -import json - -DEFAULT_CONFIG = { - "mass_transformation": 0.00574, - "mass_cutoff": 20, - "peak_width_cutoff": 5, - "peak_distance_cutoff": 5, - "raw_data_folder": "~/research/lifescale_raw_data_test/development_raw_data_folder" -} - -LINUX_PATH = "./dev_config.json" -WINDOWS_PATH = r"C:\Users\LifeScale\Documents\peak_caller_config\peak_caller_config.json" - -def load_config(): - if platform.system() == "Linux": - try: - with open(LINUX_PATH, "r") as f: - config = json.load(f) - return config, None - except FileNotFoundError as e: - config = DEFAULT_CONFIG - with open(LINUX_PATH, "w") as f: - json.dump(config, f) - return config, LINUX_PATH - elif platform.system() == "Windows": - try: - with open(WINDOWS_PATH, "r") as f: - config = json.load(f) - return config, None - except FileNotFoundError as e: - config = DEFAULT_CONFIG - with open(WINDOWS_PATH, "w") as f: - json.dump(config, f) - return config, WINDOWS_PATH - -def configure_peakcaller(raw_data_folder, mass_transformation, mass_cutoff, peak_width_cutoff, peak_distance_cutoff, config, command): - print(locals()) - new_config = {k:v for k,v in locals().items() if k != "config" and k != "command" and v is not None} - print(new_config) - old_config = locals()["config"] - merged_config = {k:new_config[k] if k in new_config else old_config[k] for k in old_config} - if platform.system() == "Linux": - with open(LINUX_PATH, "w") as f: - json.dump(merged_config, f) - elif platform.system() == "Windows": - with open(WINDOWS_PATH, "w") as f: - json.dump(merged_config, f) - return merged_config diff --git a/lifescale/mass_peak_caller/dev_config.json b/lifescale/mass_peak_caller/dev_config.json deleted file mode 100644 index 129888342d7db45b2f7919d3acdb0d7697625476..0000000000000000000000000000000000000000 --- a/lifescale/mass_peak_caller/dev_config.json +++ /dev/null @@ -1 +0,0 @@ -{"mass_transformation": 0.00574, "mass_cutoff": 20.0, "peak_width_cutoff": 5.0, "peak_distance_cutoff": 5.0, "raw_data_folder": "/home/heller/pyProjects/gooey_lifescale/LSdata/raw_data"} \ No newline at end of file diff --git a/lifescale/mass_peak_caller/naive_peaks.py b/lifescale/mass_peak_caller/naive_peaks.py deleted file mode 100644 index fd51974faf6ab8bd5bd33082fb7d3c7b21cebbd3..0000000000000000000000000000000000000000 --- a/lifescale/mass_peak_caller/naive_peaks.py +++ /dev/null @@ -1,92 +0,0 @@ -""" GUI application for processing LifeScale data. - -copyright 2019 Joseph Elsherbini -all rights reserved -""" - -import os -import struct -import json -import re -import datetime -from itertools import chain -from operator import itemgetter -import numpy as np -import pandas as pd -import scipy.signal - -NOW = datetime.datetime.now().strftime("%Y%m%d%H%M%S") - -def list_experiments(config): - raw_data_files = [f for f in os.listdir(os.path.expanduser(config["raw_data_folder"])) if re.search(r"(.+)_(\d{6})_(\d{6})", f) and os.path.splitext(f)[1] == ""] - unique_experiments = sorted(sorted(list(set([re.search(r"(.+)_(\d{6})_(\d{6})", f).groups() for f in raw_data_files])), - key=itemgetter(2), reverse=True), key=itemgetter(1), reverse=True) - return (["{} {}".format(e[0], get_date_time(e[1], e[2])) for e in unique_experiments], ["_".join(e) for e in unique_experiments]) - -def get_date_time(date, time): - fmt_string = "%m/%d/%Y %H:%M:%S" - return datetime.datetime(2000+int(date[0:2]), int(date[2:4]), int(date[4:6]), int(time[0:2]), int(time[2:4]), int(time[4:6])).strftime(fmt_string) - - -def call_peaks(experiment, output_folder, metadata_file, config, command): - update_now() - all_experiments= list_experiments(config) - exp_name = [e[1] for e in zip(all_experiments[0], all_experiments[1]) if e[0] == experiment][0] - exp_files = [os.path.join(os.path.expanduser(config["raw_data_folder"]), f) for f in os.listdir(os.path.expanduser(config["raw_data_folder"])) if exp_name in f and os.path.splitext(f)[1] == ""] - print(exp_name, exp_files) - peaks = write_peaks(exp_name, exp_files, output_folder, metadata_file, config) - write_summary(exp_name, peaks, output_folder) - # TODO write_plots(exp_name, peaks, output_folder, config) - write_config(exp_name, output_folder, config) - return config - -def update_now(): - global NOW - NOW = datetime.datetime.now().strftime("%Y%m%d%H%M%S") - -def parse_metadata(metadata_file): - return pd.read_csv(metadata_file)[["Id", "Well"]] - -def load_raw_data(exp_name, exp_files): - for f_path in exp_files: - m = re.search(r"(.+)_(\d{6})_(\d{6})_c(\d+)_v(\d+)", f_path) - exp_date, exp_time, exp_cycle, exp_measurement = m.group(2,3,4,5) - print(exp_name, exp_date, exp_time, exp_cycle, exp_measurement) - n_datapoints = int(os.path.getsize(f_path) / 8) - with open(f_path, "rb") as f: - content = f.read() - a = np.array(struct.unpack("d"*n_datapoints, content))[10:] - yield dict(zip(["exp_name", "exp_date", "exp_time", "exp_cycle", "exp_measurement", "data_array"], - [exp_name, exp_date, exp_time, exp_cycle, exp_measurement, a])) - -def generate_peaks(measurement, config): - filtered_signal = scipy.signal.savgol_filter(measurement["data_array"], window_length=5, polyorder=3) - peaks, _ = scipy.signal.find_peaks(-filtered_signal, width=config["peak_width_cutoff"], prominence=config["mass_cutoff"]*config["mass_transformation"], distance=config["peak_distance_cutoff"]) - masses = scipy.signal.peak_prominences(-filtered_signal, peaks)[0]*(1/config["mass_transformation"]) - for peak, mass in zip(peaks, masses): - yield dict(zip(["exp_name", "exp_date", "exp_time", "exp_cycle", "exp_measurement", "event_index","event_mass"], - [measurement["exp_name"], measurement["exp_date"],measurement["exp_time"],measurement["exp_cycle"],measurement["exp_measurement"], peak, mass])) - -def write_peaks(exp_name, exp_files, output_folder, metadata_file, config): - peaks = pd.DataFrame(chain.from_iterable([generate_peaks(measurement, config) for measurement in load_raw_data(exp_name, exp_files)])) - if metadata_file: - metadata = parse_metadata(metadata_file) - peaks = peaks.astype({'exp_measurement':'int32'}).merge(metadata.astype({'Id':'int32'}), how='left', left_on='exp_measurement', right_on='Id') - peaks["Well"] = ["".join([w[0],w[1:].zfill(2)]) for w in peaks["Well"]] - out_path = os.path.join(os.path.expanduser(output_folder), "{}_{}_peaks.csv".format(NOW, exp_name)) - peaks.to_csv(out_path, index=False) - return peaks - -def write_summary(exp_name, peaks, output_folder): - print(peaks.columns) - if "Well" in peaks.columns: - summary = peaks.groupby(["Well", "exp_cycle"])["event_mass"].describe() - else: - summary = peaks.groupby(["exp_measurement", "exp_cycle"])["event_mass"].describe() - out_path = os.path.join(os.path.expanduser(output_folder), "{}_{}_summary.csv".format(NOW, exp_name)) - summary.to_csv(out_path) - -def write_config(exp_name, output_folder, config): - output_path = os.path.join(os.path.expanduser(output_folder), "{}_{}_config.json".format(NOW, exp_name)) - with open(output_path, "w") as f: - json.dump(config, f) diff --git a/lifescale/mass_peak_caller/peak_caller_gui.py b/lifescale/mass_peak_caller/peak_caller_gui.py deleted file mode 100644 index d618f3514a25bb44291079c708006335cb7cd049..0000000000000000000000000000000000000000 --- a/lifescale/mass_peak_caller/peak_caller_gui.py +++ /dev/null @@ -1,60 +0,0 @@ -import os -import re -from datetime import datetime -from functools import partial -from operator import itemgetter -from gooey import Gooey, GooeyParser -import naive_peaks -import configure_peakcaller - -DISPATCHER = { - "call_peaks": naive_peaks.call_peaks, - "config": configure_peakcaller.configure_peakcaller -} - -def show_error_modal(error_msg): - """ Spawns a modal with error_msg""" - # wx imported locally so as not to interfere with Gooey - import wx - app = wx.App() - dlg = wx.MessageDialog(None, error_msg, 'Error', wx.ICON_ERROR) - dlg.ShowModal() - dlg.Destroy() - -def add_call_peak_gui(subs, config): - p = subs.add_parser('call_peaks', prog='Call Mass Peaks', help='Get Mass Peaks from Raw Lifescale Data') - p.add_argument( - 'experiment', - metavar='Choose an Experiment', - help='Choose the name of an experiment', - widget='Dropdown', - choices=naive_peaks.list_experiments(config)[0]) - p.add_argument('output_folder', widget="DirChooser") - p.add_argument('--metadata_file', '-f', widget="FileChooser", help="If provided, convert vial ids to sample names. Should be the exported csv file called PanelData.csv.") - -def add_config_gui(subs, config): - p = subs.add_parser('config', prog="Configure Program", help="Options to change where this program looks for data, and the calibration used for frequency to mass conversion.") - p.add_argument('--raw_data_folder', widget="DirChooser", help="currently {}".format(config["raw_data_folder"])) - p.add_argument('--mass_transformation', type=float, help='currently {} Hz/fg'.format(config["mass_transformation"])) - p.add_argument('--mass_cutoff', '-m', type=float, default=20, help='currently {} fg - minimum mass of the peak (minimum 5fg recommended)'.format(config["mass_cutoff"])) - p.add_argument('--peak_width_cutoff', '-w', type=float, default=5, help='currently {} - width cutoff for peaks - minimum datapoints looking larger than noise'.format(config["peak_width_cutoff"])) - p.add_argument('--peak_distance_cutoff', '-d', type=float, default=5, help='currently {} - distance cutoff for peaks - minimum datapoints between peaks'.format(config["peak_distance_cutoff"])) - -@Gooey(program_name='Mass Peak Caller', image_dir='./images', required_cols=1) -def main(): - current_config, file_not_found = configure_peakcaller.load_config() - if file_not_found: - show_error_modal("No configuration file found at {}.\nWrote default configuration to that location.\nContinuing with default config.".format(file_not_found)) - - parser = GooeyParser(description='Get Mass Peaks from Raw Lifescale Data') - subs = parser.add_subparsers(help='commands', dest='command') - add_call_peak_gui(subs, current_config) - add_config_gui(subs, current_config) - - args = parser.parse_args() - opts = vars(args) - func = partial(DISPATCHER[args.command], config=current_config) - current_config = func(**opts) - -if __name__ == '__main__': - main() diff --git a/lifescale/models/ls_data.py b/lifescale/models/ls_data.py index 0eee038070eec24094aa9ab396aaf9e88873f685..f14a9cdba775f63bd22061a11d6ff6a419dc3299 100644 --- a/lifescale/models/ls_data.py +++ b/lifescale/models/ls_data.py @@ -39,18 +39,18 @@ class LSData: End time of the run. settings_dict : dict Contains all settings from the Settings sheet of the input xlsm file. If more than one attributes are provides - for a parameter (dictionary key), the dictionary item is a list. If no attribute is provided, the item is `None`. + for a parameter (dictionary key), the dictionary item is a list. If no attribute is provided, the item is `None` df_panel_data : pandas dataframe Pandas dataframe that holds the data of the PanelData sheet of the input xlsm file. df_interval_analysis : pandas dataframe Pandas dataframe that holds the data of the IntervalAnalysis sheet plus additional data of the input xlsm file. df_masses : pandas dataframe - Pandas dataframe that holds the data derived from the AcquisiotionIntervals sheet of the input xlsm file. + Pandas dataframe that holds the data derived from the AcquisitionIntervals sheet of the input xlsm file. """ def __init__(self, run_name='', - accession_number='', + guid='', input_xlsm_filename='', output_dir_path='', start_time_dt=None, @@ -69,10 +69,10 @@ class LSData: self.run_name = run_name else: raise TypeError('"run_name" needs to be a string') - if isinstance(accession_number, str): - self.accession_number = accession_number + if isinstance(guid, str): + self.guid = guid else: - raise TypeError('"accession_number" needs to be a string') + raise TypeError('"guid" needs to be a string') # output_dir_path: if isinstance(output_dir_path, str): @@ -123,10 +123,6 @@ class LSData: # Initialize additional attributes: pass - - - - @classmethod def from_xlsm_file(cls, input_xlsm_filename, verbose=True): """Constructor that generates and populates the LSData object from an xlsm LS output file. @@ -170,12 +166,12 @@ class LSData: # Check, if all required sheets are available: if set(REQUIRED_XLSM_SHEET_NAMES) - set(sheet_names): - missings_steets = list(set(REQUIRED_XLSM_SHEET_NAMES) - set(sheet_names)) - raise AssertionError(f'The following sheets are missing the file {input_xlsm_filename}: {missings_steets}') + missing_sheets = list(set(REQUIRED_XLSM_SHEET_NAMES) - set(sheet_names)) + raise AssertionError(f'The following sheets are missing the file {input_xlsm_filename}: {missing_sheets}') # PanelData: if verbose: - print(f' - Parse PanalData') + print(f' - Parse PanelData') df_panel_data = xl_file.parse('PanelData') df_panel_data = remove_space_from_column_names(df_panel_data) df_panel_data['NumberOfIntervals'] = None @@ -193,9 +189,11 @@ class LSData: df_interval_analysis['MeasuredVolume'] = None # From AcquisitionIntervals df_interval_analysis['ResonantFrequency'] = None # From AcquisitionIntervals if not (df_interval_analysis[['Id', 'IntervalNumber']].value_counts().count() == len(df_interval_analysis)): - raise AssertionError(f'The combination if the values in the columns "Id" and "IntervalNumber" in IntervalAnalysis is not unique!') + raise AssertionError( + f'The combination if the values in the columns "Id" and "IntervalNumber" in IntervalAnalysis is not ' + f'unique!') - # Settings: + # Settings: if verbose: print(f' - Parse Settings') settings_dict = {} @@ -206,19 +204,19 @@ class LSData: if item_not_nan_max_idx is np.nan: # No items that are not NaN! settings_dict[row[0]] = None else: - tmp_list = short_row[:item_not_nan_max_idx].to_list() + tmp_list = short_row.loc[:item_not_nan_max_idx].to_list() num_items = len(tmp_list) if num_items == 1: settings_dict[row[0]] = tmp_list[0] else: settings_dict[row[0]] = tmp_list run_name = settings_dict['Name'] - if settings_dict['AccessionNumber'] is None: - accession_number = '' + if settings_dict['Guid'] is None: + guid = '' else: - accession_number = str(settings_dict['AccessionNumber']) + guid = str(settings_dict['Guid']) start_time_dt = settings_dict['StartTime'] - end_time_dt =start_time_dt + dt.timedelta(settings_dict['ElapsedTime']/(24*60)) + end_time_dt = start_time_dt + dt.timedelta(settings_dict['ElapsedTime'] / (24 * 60)) # # Settings (dataframe): # df_settings = xl_file.parse('Settings', header=None).transpose() @@ -226,7 +224,6 @@ class LSData: # df_settings = df_settings[1:] # df_settings.reset_index(drop=True, inplace=True) - # Masses (from sheet AcquisitionIntervals): if verbose: print(f' - Parse Masses') @@ -240,11 +237,11 @@ class LSData: transit_time_list = [] pressure_drop_list = [] time_list_length_old = 0 - masses_list_length_old = 0 - volume_list_length_old = 0 - total_num_particles_list_length_old = 0 - transit_time_list_length_old = 0 - pressure_drop_list_length_old = 0 + # masses_list_length_old = 0 + # volume_list_length_old = 0 + # total_num_particles_list_length_old = 0 + # transit_time_list_length_old = 0 + # pressure_drop_list_length_old = 0 current_id = None current_well = None @@ -256,14 +253,14 @@ class LSData: if row[0] == 'Id': current_id = row[1] if ~(df_interval_analysis['Id'] == current_id).any(): - raise AssertionError(f'"ID="{current_id} is not available in InteralAnalysis!') + raise AssertionError(f'"ID="{current_id} is not available in IntervalAnalysis!') if ~(df_panel_data['Id'] == current_id).any(): raise AssertionError(f'"ID="{current_id} is not available in PanelData!') continue if row[0] == 'Well': current_well = row[1] if ~(df_interval_analysis['Well'] == current_well).any(): - raise AssertionError(f'"Well="{current_well} is not available in InteralAnalysis!') + raise AssertionError(f'"Well="{current_well} is not available in IntervalAnalysis!') if ~(df_panel_data['Well'] == current_well).any(): raise AssertionError(f'"Well="{current_well} is not available in PanelData!') continue @@ -278,7 +275,7 @@ class LSData: current_interval_num = row[1] if ~(df_interval_analysis['IntervalNumber'] == current_interval_num).any(): raise AssertionError( - f'"IntervalNumber="{current_interval_num} is not available in InteralAnalysis!') + f'"IntervalNumber="{current_interval_num} is not available in IntervalAnalysis!') continue if row[0] == 'StartTime': continue @@ -287,37 +284,48 @@ class LSData: if row[0] == 'DilutionFactor': continue if row[0] == 'Status': - tmp_filter = (df_interval_analysis['Id'] == current_id) & (df_interval_analysis['IntervalNumber'] == current_interval_num) + tmp_filter = (df_interval_analysis['Id'] == current_id) & ( + df_interval_analysis['IntervalNumber'] == current_interval_num) if len(tmp_filter[tmp_filter]) != 1: - raise AssertionError(f'Invalid number of matches of "Id={current_id}" and "IntervalNumber={current_interval_num}" in IntervalAnalysis: {len(tmp_filter[tmp_filter])}') + raise AssertionError( + f'Invalid number of matches of "Id={current_id}" and "IntervalNumber={current_interval_num}" ' + f'in IntervalAnalysis: {len(tmp_filter[tmp_filter])}') df_interval_analysis.loc[tmp_filter, 'Status'] = row[1] continue if row[0] == 'DetectedParticles': tmp_filter = (df_interval_analysis['Id'] == current_id) & ( - df_interval_analysis['IntervalNumber'] == current_interval_num) + df_interval_analysis['IntervalNumber'] == current_interval_num) if len(tmp_filter[tmp_filter]) != 1: - raise AssertionError(f'Invalid number of matches of "Id={current_id}" and "IntervalNumber={current_interval_num}" in IntervalAnalysis: {len(tmp_filter[tmp_filter])}') + raise AssertionError( + f'Invalid number of matches of "Id={current_id}" and "IntervalNumber={current_interval_num}" ' + f'in IntervalAnalysis: {len(tmp_filter[tmp_filter])}') df_interval_analysis.loc[tmp_filter, 'DetectedParticles'] = row[1] current_detected_particles = row[1] # For cross-checks continue if row[0] == 'MeasuredVolume': tmp_filter = (df_interval_analysis['Id'] == current_id) & ( - df_interval_analysis['IntervalNumber'] == current_interval_num) + df_interval_analysis['IntervalNumber'] == current_interval_num) if len(tmp_filter[tmp_filter]) != 1: - raise AssertionError(f'Invalid number of matches of "Id={current_id}" and "IntervalNumber={current_interval_num}" in IntervalAnalysis: {len(tmp_filter[tmp_filter])}') + raise AssertionError( + f'Invalid number of matches of "Id={current_id}" and "IntervalNumber={current_interval_num}" ' + f'in IntervalAnalysis: {len(tmp_filter[tmp_filter])}') df_interval_analysis.loc[tmp_filter, 'MeasuredVolume'] = row[1] continue if row[0] == 'ResonantFrequency': tmp_filter = (df_interval_analysis['Id'] == current_id) & ( df_interval_analysis['IntervalNumber'] == current_interval_num) if len(tmp_filter[tmp_filter]) != 1: - raise AssertionError(f'Invalid number of matches of "Id={current_id}" and "IntervalNumber={current_interval_num}" in IntervalAnalysis: {len(tmp_filter[tmp_filter])}') + raise AssertionError( + f'Invalid number of matches of "Id={current_id}" and "IntervalNumber={current_interval_num}" ' + f'in IntervalAnalysis: {len(tmp_filter[tmp_filter])}') df_interval_analysis.loc[tmp_filter, 'ResonantFrequency'] = row[1] continue if row[0] == 'Time': tmp_list = row_to_list(row[1:]) if (len(tmp_list) == 0) and (current_detected_particles != 0): - raise AssertionError(f'Number of "DetectedParticles={current_detected_particles}" does not match length of "Time" series (= {len(tmp_list)}) ') + raise AssertionError( + f'Number of "DetectedParticles={current_detected_particles}" does not match length of "Time" ' + f'series (= {len(tmp_list)}) ') time_list += tmp_list continue if row[0] == 'Mass': @@ -345,7 +353,8 @@ class LSData: if not (len(time_list) == len(masses_list) == len(volume_list) == len(total_num_particles_list) == len( transit_time_list) == len(pressure_drop_list)): raise AssertionError( - f'The lengths of the data series in AcquisitionIntervals of "Well={current_well}" and "IntervalNumber={current_interval_num}" do not match!') + f'The lengths of the data series in AcquisitionIntervals of "Well={current_well}" and ' + f'"IntervalNumber={current_interval_num}" do not match!') # Set up lists for well, id and interval number: num_additional_items_in_data_series = len(time_list) - time_list_length_old tmp_list = [current_id] * num_additional_items_in_data_series @@ -356,23 +365,27 @@ class LSData: interval_num_list += tmp_list # Reset counters: time_list_length_old = len(time_list) - masses_list_length_old = len(masses_list) - volume_list_length_old = len(volume_list) - total_num_particles_list_length_old = len(total_num_particles_list) - transit_time_list_length_old = len(transit_time_list) - pressure_drop_list_length_old = len(pressure_drop_list) + # masses_list_length_old = len(masses_list) + # volume_list_length_old = len(volume_list) + # total_num_particles_list_length_old = len(total_num_particles_list) + # transit_time_list_length_old = len(transit_time_list) + # pressure_drop_list_length_old = len(pressure_drop_list) continue # Check if the length of all data series lists match: if not (len(time_list) == len(masses_list) == len(volume_list) == len(total_num_particles_list) == len( - transit_time_list) == len(pressure_drop_list) == len(id_list) == len(well_list) == len(interval_num_list)): + transit_time_list) == len(pressure_drop_list) == len(id_list) == len(well_list) == len( + interval_num_list)): raise AssertionError( f'The lengths of the data series in AcquisitionIntervals do not match!') # Create dataframe: - df_masses_columns = ['Id', 'Well', 'IntervalNumber', 'Time', 'Mass', 'Volume', 'TotalNumberOfParticlesThroughSensor', 'TransitTime', 'PressureDrop'] - df_masses = pd.DataFrame(list(zip(id_list, well_list, interval_num_list, time_list, masses_list, volume_list, total_num_particles_list, transit_time_list, pressure_drop_list)), - columns = df_masses_columns) + df_masses_columns = ['Id', 'Well', 'IntervalNumber', 'Time', 'Mass', 'Volume', + 'TotalNumberOfParticlesThroughSensor', 'TransitTime', 'PressureDrop'] + df_masses = pd.DataFrame(list( + zip(id_list, well_list, interval_num_list, time_list, masses_list, volume_list, total_num_particles_list, + transit_time_list, pressure_drop_list)), + columns=df_masses_columns) # Sensor: # sensor_dict = {} @@ -387,26 +400,28 @@ class LSData: print(f'...finished loading and parsing data!') return cls(run_name=run_name, - accession_number=accession_number, - input_xlsm_filename=input_xlsm_filename, - start_time_dt=start_time_dt, - end_time_dt=end_time_dt, - settings_dict=settings_dict, - df_panel_data=df_panel_data, - df_interval_analysis=df_interval_analysis, - df_masses=df_masses, - ) + guid=guid, + input_xlsm_filename=input_xlsm_filename, + start_time_dt=start_time_dt, + end_time_dt=end_time_dt, + settings_dict=settings_dict, + df_panel_data=df_panel_data, + df_interval_analysis=df_interval_analysis, + df_masses=df_masses, + ) def export_csv_files(self, output_filepath, verbose=True, sort_by_time=False): """Write CSV files to output directory""" - print('Write output') + if verbose: + print('Write output') + # Checks: if not os.path.exists(output_filepath): raise AssertionError(f'The output path does not exist: {output_filepath}') self.output_dir_path = output_filepath - if self.accession_number: - filename_ending = f'{self.run_name}_{self.accession_number}.csv' + if self.guid: + filename_ending = f'{self.run_name}_{self.guid}.csv' else: filename_ending = f'{self.run_name}.csv' @@ -435,7 +450,7 @@ class LSData: self.df_interval_analysis.to_csv(filename, index=False) # TODO: Output format (number of digits) - # TODO: Select columns for output (setable as parameter + default settings for each csv file) + # TODO: Select columns for output (settable as parameter + default settings for each csv file) # TODO: Optionally order data series by time (parameter)?! @property @@ -464,10 +479,12 @@ class LSData: def __str__(self): if self.run_name is not None: - return f'Run "{self.run_name}" with {self.get_number_of_observations} observations in {self.get_number_of_intervals} intervals and {self.get_number_of_wells} wells.' + return f'Run "{self.run_name}" with {self.get_number_of_observations} observations in ' \ + f'{self.get_number_of_intervals} intervals and {self.get_number_of_wells} wells. ' else: return f'Not data available yet.' + def remove_space_from_column_names(df): """Removes white space from column names of input dataframe.""" col_names = df.columns @@ -477,24 +494,23 @@ def remove_space_from_column_names(df): df.columns = col_names_corrected return df + def row_to_list(row) -> list: """Convert dataframe row to list and remove all trailing NaN values.""" item_not_nan_max_idx = row.loc[~row.isna()].index.max() if item_not_nan_max_idx is np.nan: # No items that are not NaN! out_list = [] else: - out_list = row[:item_not_nan_max_idx].to_list() + out_list = row.loc[:item_not_nan_max_idx].to_list() return out_list if __name__ == '__main__': """Main function, primarily for debugging and testing.""" xlsm_filename = '../../data/Example_several_wells/Vibrio_starvation_24.11.22_221125_163425.xlsm' - oputput_directory = '../../output/' + output_directory = '../../output/' ls_data = LSData.from_xlsm_file(input_xlsm_filename=xlsm_filename) - ls_data.export_csv_files(oputput_directory) + ls_data.export_csv_files(output_directory) print('test') - - diff --git a/lifescale/scripts/ls2csv.py b/lifescale/scripts/ls2csv.py new file mode 100644 index 0000000000000000000000000000000000000000..7903228e2a9711e56e5c173725dfc94016acb436 --- /dev/null +++ b/lifescale/scripts/ls2csv.py @@ -0,0 +1,11 @@ +"""Conversion program from xlsm to csv. + +Copyright (C) 2022 Andreas Hellerschmied <heller182@gmx.at>""" + +from lifescale.models.ls_data import LSData + + +def ls2csv(xlsm_filename, output_dir, verbose=True): + """Convert lifescale output file (xlsm) to csv files.""" + ls_data = LSData.from_xlsm_file(input_xlsm_filename=xlsm_filename, verbose=verbose) + ls_data.export_csv_files(output_dir, verbose=verbose) diff --git a/lifescale/scripts/run_gui.py b/lifescale/scripts/run_gui.py index 6cbf1022c1da200e77c81e4cbd1bf513f8712b74..c7b7fb62509c887fc4a315e287d932242937777f 100644 --- a/lifescale/scripts/run_gui.py +++ b/lifescale/scripts/run_gui.py @@ -1,4 +1,7 @@ -"""Start the lifescale GUI from here!""" +"""Start the lifescale GUI from here! + +Copyright (C) 2022 Andreas Hellerschmied <heller182@gmx.at> +""" from lifescale.gui.gui_main import main @@ -9,4 +12,4 @@ def run_gui(): if __name__ == "__main__": """Main Program.""" - run_gui() \ No newline at end of file + run_gui() diff --git a/makefile b/makefile index d2ff4f1f5b70aba7ad29f4c775902cfd41c1975c..50604b59aac78b45ba9a97620a3214e50df003c5 100644 --- a/makefile +++ b/makefile @@ -5,10 +5,27 @@ test: $(info Test-run for this makefile!) $(info Yeah!!) -# Project initaialization +# Project initialization init: pip install -r requirements.txt # Convert *.ui files from Qt Designer to Python files: py_gui: pyuic6 -o lifescale/gui/MainWindow.py lifescale/gui/MainWindow.ui + +# Package test (install in current virtual environment, editable install with pip) +test_pack: + pip install -e . + +# Uninstall test package +test_pack_uninstall: + pip uninstall gravtools + +# Build package with setuptools (new version): +build: + rm -rf lifescale_utils.egg-info/ + python -m build + +# Upload package to pypi.org +pypi_push: + twine upload --verbose dist/* \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000000000000000000000000000000000000..07de284aa5c45f56b69ca6f605edf72a14785b99 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +requires = ["setuptools", "wheel"] +build-backend = "setuptools.build_meta" \ No newline at end of file diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000000000000000000000000000000000000..5cb308073936bf00b7166d0d1f735ff6878304e2 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,35 @@ +[metadata] +name = lifescale-utils +version = attr: lifescale.__version__ +author = Andreas Hellerschmied +author_email = heller182@gmx.at +url = https://gitlab.com/hellerdev/lifescale_utils +description = Lifescale utility software. +long_description = file: README.md +long_description_content_type = text/markdown +keywords = Lifescale +license = GNU GPLv3 +license_files = LICENSE +classifiers = + License :: OSI Approved :: GNU General Public License (GPL) + Programming Language :: Python :: 3 + +[options] +python_requires = >=3.6, <4 +packages = find: +zip_safe = True +include_package_data = True +install_requires = + numpy + pandas + openpyxl + +[options.entry_points] +console_scripts = + ls2csv = lifescale.command_line.command_line:ls2csv + +[options.packages.find] +exclude = + lifescale.gui* + lifescale.scripts.run_gui.py + lifescale.models.ls_run.py