Added benchmarking routines

ca3925fd · Aiko Voigt · ad40b2bd · ca3925fd · ca3925fd · ca3925fd
Commit ca3925fd authored Sep 16, 2021 by Aiko Voigt
--- a/benchmarks/benchmark_2d.py
+++ b/benchmarks/benchmark_2d.py
+# Perform benchmarking of 2d cloud data given a previously computed cubulation
+
+# parse command line parameters
+import sys
+resol      = sys.argv[1]
+startcell  = int(sys.argv[2])
+searchrad  = int(sys.argv[3])
+
+# gridfile including path
+gridfile = '/work/bb1018/b380459/NAWDEX/grids/icon-grid_nawdex_78w40e23n80n_'+resol+'.nc'
+
+# load other needed packages
+sys.path.append('/pf/b/b380459/connected-components-3d/')
+sys.path.append('/pf/b/b380459/tricco/')
+import tricco
+import datetime
+
+print('                                         ')
+print('-----------------------------------------')
+print('Working on resolution of', resol)
+
+# load previously computed cubulation
+import numpy as np
+cubulpath = '/scratch/b/b380459/tricco_output/'
+cubulfile = cubulpath+'/icon-grid_nawdex_78w40e23n80n_'+resol+'_cubulation_start'+str(startcell)+'_radius'+str(searchrad)+'.npy'
+cubulation = np.load(cubulfile, allow_pickle=True)
+
+# read in cloud data
+
+# cloud file depends on resolution
+datapath={'R80000m': '/work/bb1018/b380459/NAWDEX/ICON_OUTPUT_NWP/nawdexnwp-80km-mis-0001/',
+          'R40000m': '/work/bb1018/b380459/NAWDEX/ICON_OUTPUT_NWP/nawdexnwp-40km-mis-0001/',
+          'R20000m': '/work/bb1018/b380459/NAWDEX/ICON_OUTPUT_NWP/nawdexnwp-20km-mis-0001/',
+          'R10000m': '/work/bb1018/b380459/NAWDEX/ICON_OUTPUT_NWP/nawdexnwp-10km-mis-0001/'}
+datafile={'R80000m': 'nawdexnwp-80km-mis-0001_2016092200_2d_30min_DOM01_ML_',
+          'R40000m': 'nawdexnwp-40km-mis-0001_2016092200_2d_30min_DOM01_ML_',
+          'R20000m': 'nawdexnwp-20km-mis-0001_2016092200_2d_30min_DOM01_ML_',
+          'R10000m': 'nawdexnwp-10km-mis-0001_2016092200_2d_30min_DOM01_ML_'}
+
+dtime_dat = list()
+dtime_ver = list()
+dtime_edg = list()
+# loop over 1 day of 30-min output data --> 48 timesteps
+for time in range(10,59):
+    # read in data
+    begin_time = datetime.datetime.now()
+    field, field_cube = tricco.prepare_field(model='ICON', path=datapath[resol], 
+                            file=datafile[resol]+'00'+str(time)+'.nc',
+                            var='clct', threshold=85.0, cubulation=cubulation)
+    end_time = datetime.datetime.now()
+    dtime_dat.append(end_time-begin_time)
+    # perform connected component analysis for vertex connectivity
+    begin_time = datetime.datetime.now()
+    _ = tricco.compute_connected_components_2d(cubulation, field_cube, connectivity = 'vertex')
+    end_time = datetime.datetime.now()
+    dtime_ver.append(end_time-begin_time)
+    # perform connected component analysis for edge connectivity
+    begin_time = datetime.datetime.now()
+    _ = tricco.compute_connected_components_2d(cubulation, field_cube, connectivity = 'edge')
+    end_time = datetime.datetime.now()
+    dtime_edg.append(end_time-begin_time)
+
+print(' ')
+print('Time to read in 1 time step of data, including moving it onto the 3d cubulated grid; done for 48 time steps')
+print('Mean:', np.mean(dtime_dat), 'Min:', np.min(dtime_dat), 'Max:', np.max(dtime_dat))
+print(' ')
+print('Time to do 2d connected component labeling with vertex connectivity for 1 time step; done for 48 time steps')
+print('Mean:', np.mean(dtime_ver), 'Min:', np.min(dtime_ver), 'Max:', np.max(dtime_ver))
+print(' ')
+print('Time to do 2d connected component labeling with edge connectivity for 1 time step: done for 48 time steps')
+print('Mean:', np.mean(dtime_edg), 'Min:', np.min(dtime_edg), 'Max:', np.max(dtime_edg))
+
+print('-----------------------------------------')
+print('                                         ')
--- a/benchmarks/benchmark_2d.run
+++ b/benchmarks/benchmark_2d.run
+#!/bin/bash
+#=============================================================================
+# mistral batch job parameters
+#-----------------------------------------------------------------------------
+#SBATCH --account=bb1152
+#SBATCH --job-name=benchmark_2d.run
+#SBATCH --partition=compute
+#SBATCH --nodes=1
+#SBATCH --threads-per-core=1
+#SBATCH --output=/pf/b/b380459/BigDataClouds/tricco/benchmarks/LOG.benchmark_2d.run.%j.o
+#SBATCH --error=/pf/b/b380459/BigDataClouds/tricco/benchmarks/LOG.benchmark_2d.run.%j.o
+#SBATCH --exclusive
+#SBATCH --time=08:00:00
+
+cd /pf/b/b380459/BigDataClouds/tricco/benchmarks
+
+MYPYTHON="/pf/b/b380459/conda-envs/Nawdex-Hackathon/bin/python3.8"
+echo "Working with" ${MYPYTHON}
+
+for i in {1..10}; do
+   $MYPYTHON benchmark_2d.py R80000m 5738 102
+done
+
+for i in {1..10}; do
+   $MYPYTHON benchmark_2d.py R40000m 18538 230
+done
+
+for i in {1..10}; do
+   $MYPYTHON benchmark_2d.py R20000m 69309 460
+done
+
+for i in {1..10}; do
+   $MYPYTHON benchmark_2d.py R10000m 264792 2000
+done
--- a/benchmarks/benchmark_3d.py
+++ b/benchmarks/benchmark_3d.py
+# Perform benchmarking of 3d cloud data given a previously computed cubulation
+
+# parse command line parameters
+import sys
+resol      = sys.argv[1]
+startcell  = int(sys.argv[2])
+searchrad  = int(sys.argv[3])
+
+# gridfile including path
+gridfile = '/work/bb1018/b380459/NAWDEX/grids/icon-grid_nawdex_78w40e23n80n_'+resol+'.nc'
+
+# load other needed packages
+sys.path.append('/pf/b/b380459/connected-components-3d/')
+sys.path.append('/pf/b/b380459/tricco/')
+import tricco
+import datetime
+
+print('                                         ')
+print('-----------------------------------------')
+print('Working on resolution of', resol)
+
+# load previously computed cubulation
+import numpy as np
+cubulpath = '/scratch/b/b380459/tricco_output/'
+cubulfile = cubulpath+'/icon-grid_nawdex_78w40e23n80n_'+resol+'_cubulation_start'+str(startcell)+'_radius'+str(searchrad)+'.npy'
+cubulation = np.load(cubulfile, allow_pickle=True)
+
+# read in cloud data
+
+# cloud file depends on resolution
+datapath={'R80000m': '/work/bb1018/b380459/NAWDEX/ICON_OUTPUT_NWP/nawdexnwp-80km-mis-0001/',
+          'R40000m': '/work/bb1018/b380459/NAWDEX/ICON_OUTPUT_NWP/nawdexnwp-40km-mis-0001/',
+          'R20000m': '/work/bb1018/b380459/NAWDEX/ICON_OUTPUT_NWP/nawdexnwp-20km-mis-0001/',
+          'R10000m': '/work/bb1018/b380459/NAWDEX/ICON_OUTPUT_NWP/nawdexnwp-10km-mis-0001/'}
+datafile={'R80000m': 'nawdexnwp-80km-mis-0001_2016092200_3dcloud_DOM01_ML_',
+          'R40000m': 'nawdexnwp-40km-mis-0001_2016092200_3dcloud_DOM01_ML_',
+          'R20000m': 'nawdexnwp-20km-mis-0001_2016092200_3dcloud_DOM01_ML_',
+          'R10000m': 'nawdexnwp-10km-mis-0001_2016092200_3dcloud_DOM01_ML_'}
+
+dtime_dat = list()
+dtime_ver = list()
+dtime_edg = list()
+# loop over 1 day of 30-min output data --> 48 timesteps
+for time in range(10,59):
+    # read in data
+    begin_time = datetime.datetime.now()
+    field, field_cube = tricco.prepare_field_lev(model='ICON', path=datapath[resol], 
+                            file=datafile[resol]+'00'+str(time)+'.nc',
+                            var='clc', threshold=85.0, cubulation=cubulation)
+    end_time = datetime.datetime.now()
+    dtime_dat.append(end_time-begin_time)
+    # perform connected component analysis for vertex connectivity
+    begin_time = datetime.datetime.now()
+    _ = tricco.compute_connected_components_3d(cubulation, field_cube, connectivity = 'vertex')
+    end_time = datetime.datetime.now()
+    dtime_ver.append(end_time-begin_time)
+    # perform connected component analysis for edge connectivity
+    begin_time = datetime.datetime.now()
+    _ = tricco.compute_connected_components_3d(cubulation, field_cube, connectivity = 'edge')
+    end_time = datetime.datetime.now()
+    dtime_edg.append(end_time-begin_time)
+
+print(' ')
+print('Time to read in 1 time step of data, including moving it onto the 3d x lev cubulated grid; done for 48 time steps')
+print('Mean:', np.mean(dtime_dat), 'Min:', np.min(dtime_dat), 'Max:', np.max(dtime_dat))
+print(' ')
+print('Time to do 3d connected component labeling with vertex connectivity for 1 time step; done for 48 time steps')
+print('Mean:', np.mean(dtime_ver), 'Min:', np.min(dtime_ver), 'Max:', np.max(dtime_ver))
+print(' ')
+print('Time to do 3d connected component labeling with edge connectivity for 1 time step; done for 48 time steps')
+print('Mean:', np.mean(dtime_edg), 'Min:', np.min(dtime_edg), 'Max:', np.max(dtime_edg))
+
+print('-----------------------------------------')
+print('                                         ')
--- a/benchmarks/benchmark_3d.run
+++ b/benchmarks/benchmark_3d.run
+#!/bin/bash
+#=============================================================================
+# mistral batch job parameters
+#-----------------------------------------------------------------------------
+#SBATCH --account=bb1152
+#SBATCH --job-name=benchmark_3d.run
+#SBATCH --partition=compute
+#SBATCH --nodes=1
+#SBATCH --threads-per-core=1
+#SBATCH --output=/pf/b/b380459/BigDataClouds/tricco/benchmarks/LOG.benchmark_3d.run.%j.o
+#SBATCH --error=/pf/b/b380459/BigDataClouds/tricco/benchmarks/LOG.benchmark_3d.run.%j.o
+#SBATCH --exclusive
+#SBATCH --time=08:00:00
+
+cd /pf/b/b380459/BigDataClouds/tricco/benchmarks
+
+MYPYTHON="/pf/b/b380459/conda-envs/Nawdex-Hackathon/bin/python3.8"
+echo "Working with" ${MYPYTHON}
+
+#for i in {1..10}; do
+#   $MYPYTHON benchmark_3d.py R80000m 5738 102
+#done
+
+#for i in {1..10}; do
+#   $MYPYTHON benchmark_3d.py R40000m 18538 230
+#done
+
+#for i in {1..10}; do
+#   $MYPYTHON benchmark_3d.py R20000m 69309 460
+#done
+
+for i in {1..10}; do
+   $MYPYTHON benchmark_3d.py R10000m 264792 2000
+done
--- a/benchmarks/benchmark_cubulation.ipynb
+++ b/benchmarks/benchmark_cubulation.ipynb
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Notebook for benchmarking runtime of cubulation\n",
-    "\n",
-    "Tests are performed on an exclusive compute node of the DKRZ supercomputer Mistral in Hamburg, Germany.\n",
-    "\n",
-    "A compute node has the following specs (https://www.dkrz.de/up/systems/mistral/configuration):\n",
-    " * 2x 12-core Intel Xeon E5-2680 v3 (Haswell) @ 2.5GHz\n",
-    " * 24 cores (48 logical CPUs)\n",
-    " * 64 GB main memory\n",
-    " \n",
-    "As for the grids we consider limited-area ICON grids that cover a large part of the North Atlantic. They were for example used for the NAWDEX simulations described Senf, F., A. Voigt et al, 2020: Increasing Resolution and Resolving Convection Improve the Simulation of Cloud‐Radiative Effects Over the North Atlantic, JGR Atmospheres. https://agupubs.onlinelibrary.wiley.com/doi/full/10.1029/2020JD032667."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Load required packages. Note: adding cc3d to system path is needed because it is required by tricco."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import timeit\n",
-    "import sys\n",
-    "sys.path.append('/pf/b/b380459/connected-components-3d/')\n",
-    "sys.path.append('/pf/b/b380459/BigDataClouds/tricco/')\n",
-    "import tricco"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Define the start triangle and radius of outward search as a function of grid resolution."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "dict_start  = {'80000m': 5783, '40000m': 18538, '20000m': 69309, '10000m': 264792}\n",
-    "dict_radius = {'80000m': 102 , '40000m': 230  , '20000m': 460  , '10000m': 2000  }"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Working on resolution of 80000m\n",
-      "Time to read the grid:\n",
-      "310 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)\n",
-      "Time to compute cubulation:\n",
-      "11.7 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)\n",
-      "Working on resolution of 40000m\n",
-      "Time to read the grid:\n",
-      "163 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)\n",
-      "Time to compute cubulation:\n",
-      "1min 13s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)\n",
-      "Working on resolution of 20000m\n",
-      "Time to read the grid:\n",
-      "144 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)\n",
-      "Time to compute cubulation:\n",
-      "11min 34s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)\n",
-      "Working on resolution of 10000m\n",
-      "Time to read the grid:\n",
-      "312 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)\n",
-      "Time to compute cubulation:\n"
-     ]
-    }
-   ],
-   "source": [
-    "# path to grid files\n",
-    "gridpath = '/work/bb1018/b380459/NAWDEX/grids/'\n",
-    "\n",
-    "# model resolutions under investigation\n",
-    "for res in ['80000m', '40000m', '20000m', '10000m']:\n",
-    "    \n",
-    "    print('Working on resolution of', res)\n",
-    "        \n",
-    "    gridfile = 'icon-grid_nawdex_78w40e23n80n_R'+res+'.nc'\n",
-    "    print('Time to read the grid:')\n",
-    "    %timeit -r 1 -n 1 tricco.grid_functions.grid = tricco.prepare_grid(model='ICON',path=gridpath, file=gridfile)\n",
-    "    \n",
-    "    print('Time to compute cubulation:')\n",
-    "    %timeit -r 1 -n 1 tricco.compute_cubulation(start_triangle=dict_start[res], radius=dict_radius[res], print_progress=False)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Nawdex-Hackathon",
-   "language": "python",
-   "name": "nawdex-hackathon"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.8.10"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
-%% Cell type:markdown id: tags:
-
-# Notebook for benchmarking runtime of cubulation
-
-Tests are performed on an exclusive compute node of the DKRZ supercomputer Mistral in Hamburg, Germany.
-
-A compute node has the following specs (https://www.dkrz.de/up/systems/mistral/configuration):
- * 2x 12-core Intel Xeon E5-2680 v3 (Haswell) @ 2.5GHz
- * 24 cores (48 logical CPUs)
- * 64 GB main memory
-
-As for the grids we consider limited-area ICON grids that cover a large part of the North Atlantic. They were for example used for the NAWDEX simulations described Senf, F., A. Voigt et al, 2020: Increasing Resolution and Resolving Convection Improve the Simulation of Cloud‐Radiative Effects Over the North Atlantic, JGR Atmospheres. https://agupubs.onlinelibrary.wiley.com/doi/full/10.1029/2020JD032667.
-
-%% Cell type:markdown id: tags:
-
-Load required packages. Note: adding cc3d to system path is needed because it is required by tricco.
-
-%% Cell type:code id: tags:
-
-``` python
-import timeit
-import sys
-sys.path.append('/pf/b/b380459/connected-components-3d/')
-sys.path.append('/pf/b/b380459/BigDataClouds/tricco/')
-import tricco
-```
-
-%% Cell type:markdown id: tags:
-
-Define the start triangle and radius of outward search as a function of grid resolution.
-
-%% Cell type:code id: tags:
-
-``` python
-dict_start  = {'80000m': 5783, '40000m': 18538, '20000m': 69309, '10000m': 264792}
-dict_radius = {'80000m': 102 , '40000m': 230  , '20000m': 460  , '10000m': 2000  }
-```
-
-%% Cell type:code id: tags:
-
-``` python
-# path to grid files
-gridpath = '/work/bb1018/b380459/NAWDEX/grids/'
-
-# model resolutions under investigation
-for res in ['80000m', '40000m', '20000m', '10000m']:
-
-    print('Working on resolution of', res)
-
-    gridfile = 'icon-grid_nawdex_78w40e23n80n_R'+res+'.nc'
-    print('Time to read the grid:')
-    %timeit -r 1 -n 1 tricco.grid_functions.grid = tricco.prepare_grid(model='ICON',path=gridpath, file=gridfile)
-
-    print('Time to compute cubulation:')
-    %timeit -r 1 -n 1 tricco.compute_cubulation(start_triangle=dict_start[res], radius=dict_radius[res], print_progress=False)
-```
-
-%% Output
-
-    Working on resolution of 80000m
-    Time to read the grid:
-    310 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
-    Time to compute cubulation:
-    11.7 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
-    Working on resolution of 40000m
-    Time to read the grid:
-    163 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
-    Time to compute cubulation:
-    1min 13s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
-    Working on resolution of 20000m
-    Time to read the grid:
-    144 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
-    Time to compute cubulation:
-    11min 34s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
-    Working on resolution of 10000m
-    Time to read the grid:
-    312 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
-    Time to compute cubulation:
--- a/benchmarks/benchmark_cubulation.py
+++ b/benchmarks/benchmark_cubulation.py
+# Perform benchmarking of cubulation routine on DKRZ Mistral
+
+# parse command line parameters
+import sys
+resol      = sys.argv[1]
+startcell  = int(sys.argv[2])
+searchrad  = int(sys.argv[3])
+savecubul  = int(sys.argv[4])
+
+# gridfile including path
+gridfile = '/work/bb1018/b380459/NAWDEX/grids/icon-grid_nawdex_78w40e23n80n_'+resol+'.nc'
+
+# load other needed packages
+sys.path.append('/pf/b/b380459/connected-components-3d/')
+sys.path.append('/pf/b/b380459/tricco/')
+import tricco
+import datetime
+
+print('                                         ')
+print('-----------------------------------------')
+print('Working on resolution of', resol)
+print('Start cell:', startcell, 'Search radius:', searchrad, 'Save cubulation:', savecubul)
+
+begin_time = datetime.datetime.now()
+tricco.grid_functions.grid = tricco.prepare_grid(model='ICON',path='/', file=gridfile)
+end_time = datetime.datetime.now()
+print('Time to read the grid:', end_time-begin_time)
+
+begin_time = datetime.datetime.now()
+cubulation = tricco.compute_cubulation(start_triangle=startcell, radius=searchrad, print_progress=False)
+end_time = datetime.datetime.now()
+print('Time to compute cubulation:', end_time-begin_time)
+
+# optional: save cubulation
+if savecubul == 1:
+    import numpy as np
+    np.save('/scratch/b/b380459/tricco_output/icon-grid_nawdex_78w40e23n80n_'
+            +resol+'_cubulation_start'+str(startcell)+'_radius'+str(searchrad), cubulation)
+
+print('-----------------------------------------')
+print('                                         ')
--- a/benchmarks/benchmark_cubulation.run
+++ b/benchmarks/benchmark_cubulation.run
+#!/bin/bash
+#=============================================================================
+# mistral batch job parameters
+#-----------------------------------------------------------------------------
+#SBATCH --account=bb1152
+#SBATCH --job-name=benchmark_cubulation.run
+#SBATCH --partition=compute
+#SBATCH --nodes=1
+#SBATCH --threads-per-core=1
+#SBATCH --output=/pf/b/b380459/BigDataClouds/tricco/benchmarks/LOG.benchmark_cubulation.run.%j.o
+#SBATCH --error=/pf/b/b380459/BigDataClouds/tricco/benchmarks/LOG.benchmark_cubulation.run.%j.o
+#SBATCH --exclusive
+#SBATCH --time=08:00:00
+
+cd /pf/b/b380459/BigDataClouds/tricco/benchmarks
+
+MYPYTHON="/pf/b/b380459/conda-envs/Nawdex-Hackathon/bin/python3.8"
+echo "Working with" ${MYPYTHON}
+
+for i in {1..10}; do
+#   $MYPYTHON benchmark_cubulation.py R80000m 5738 102 1
+   $MYPYTHON benchmark_cubulation.py R80000m 5568  200 1
+done
+
+for i in {1..10}; do
+#   $MYPYTHON benchmark_cubulation.py R40000m 18538 230 1
+   $MYPYTHON benchmark_cubulation.py R40000m 18493 400 1
+done
+
+#for i in {1..10}; do
+#   $MYPYTHON benchmark_cubulation.py R20000m 69309 460 1
+#done
+
+#for i in {1..10}; do
+#   $MYPYTHON benchmark_cubulation.py R10000m 264792 2000 1
+#done
--- a/benchmarks/benchmark_cubulation.txt
+++ b/benchmarks/benchmark_cubulation.txt
+Benchmarking runtime of cubulation
+
+Tests are performed on an exclusive compute node of the DKRZ supercomputer Mistral in Hamburg, Germany.
+
+A compute node has the following specs (https://www.dkrz.de/up/systems/mistral/configuration):
+  * 2x12-core Intel Xeon E5-2680 v3 (Haswell) @ 2.5GHz,
+  * 24 cores (48 logical CPUs),
+  * 64 GB main memory.
+ 
+As for the grids we consider limited-area ICON grids that cover a large part of the North Atlantic. They were for example used for the NAWDEX simulations described Senf, F., A. Voigt et al, 2020: Increasing Resolution and Resolving Convection Improve the Simulation of Cloud‐Radiative Effects Over the North Atlantic, JGR Atmospheres. https://agupubs.onlinelibrary.wiley.com/doi/full/10.1029/2020JD032667.
+
+Usage:
+
+benchmark_cubulation.run is a batch job that is submitted to an exclusive compute node via sbatch and that calls benchmark_cubulation.py. The grid resolution, start triangle and search radius for the cubulation are handed over to benchmark_cubulation.py, which reads in the grid file and does the cubulation.
--- a/examples/find_radius.py
+++ b/examples/find_radius.py
+# Purpose: For a given start cell find the smallest radius that allows one to cover all grid cell.
+# This is helpful as a too large radius artifically inflates the size of the cubulated grid.
+
+# Written for the limited-area grids of ICON used in the Tricco introduction paper.
+
+# parse command line parameters
+import sys
+resol  = sys.argv[1]
+start  = int(sys.argv[2])
+radius = int(sys.argv[3])
+
+print('-----------------------------------------')
+print('Working on ICON grid with resolution', resol)
+print('Start cell   :', start)
+print('Search radius:', radius)
+
+import sys
+sys.path.append('/pf/b/b380459/connected-components-3d/')
+sys.path.append('/pf/b/b380459/BigDataClouds/tricco/')
+import tricco
+
+tricco.grid_functions.grid = tricco.prepare_grid(model='ICON', path='./data/', 
+                                                 file='icon-grid_nawdex_78w40e23n80n_'+resol+'.nc')
+
+cubulation = tricco.compute_cubulation(start_triangle=start, radius=radius, print_progress=True)
--- a/examples/find_startcell.py
+++ b/examples/find_startcell.py
+# Purpose: Find the cell closest to a given latitude-longitude position.
+# This is helpful to set the start cell of the cubulation routine.
+
+# Written for the limited-area grids of ICON used in the Tricco introduction paper.
+
+# convert rad to deg
+import numpy as np
+rad2deg=180.0/np.pi
+
+# parse command line parameters
+import sys
+resol      = sys.argv[1]
+lat        = float(sys.argv[2])
+lon        = float(sys.argv[3])
+
+print('-----------------------------------------')
+print('Working on ICON grid with resolution', resol)
+print('Searching for cell closest to lat', lat, 'and lon', lon)
+
+# gridfile including path
+gridfile = '/work/bb1018/b380459/NAWDEX/grids/icon-grid_nawdex_78w40e23n80n_'+resol+'.nc'
+
+# load lat-lon info of grid and convert to deg
+import xarray as xr
+ds_grid  = xr.load_dataset(gridfile)
+clat = rad2deg*ds_grid['clat'].values
+clon = rad2deg*ds_grid['clon'].values
+
+dist = np.power(clat-lat,2) + np.power(clon-lon,2)
+
+print('Closest cell has index', np.argmin(dist))
+print('Note: The startcell for tricco is the cell index - 1.')
+print('      This is because the found cell index is on the ICON grid')
+print('      and the ICON indexing starts with 1.')
+print('-----------------------------------------')
--- a/setup.py
+++ b/setup.py
 from setuptools import setup, find_packages

-VERSION = '0.0.4' 
+VERSION = '0.0.1' 
 DESCRIPTION = 'TriCCo'
 LONG_DESCRIPTION = 'TriCCo: a python package for connected component labeling on triangular grids'