Skip to content
Snippets Groups Projects
Commit ca3925fd authored by Aiko Voigt's avatar Aiko Voigt
Browse files

Added benchmarking routines

parent ad40b2bd
Branches
Tags
No related merge requests found
# Perform benchmarking of 2d cloud data given a previously computed cubulation
# parse command line parameters
import sys
resol = sys.argv[1]
startcell = int(sys.argv[2])
searchrad = int(sys.argv[3])
# gridfile including path
gridfile = '/work/bb1018/b380459/NAWDEX/grids/icon-grid_nawdex_78w40e23n80n_'+resol+'.nc'
# load other needed packages
sys.path.append('/pf/b/b380459/connected-components-3d/')
sys.path.append('/pf/b/b380459/tricco/')
import tricco
import datetime
print(' ')
print('-----------------------------------------')
print('Working on resolution of', resol)
# load previously computed cubulation
import numpy as np
cubulpath = '/scratch/b/b380459/tricco_output/'
cubulfile = cubulpath+'/icon-grid_nawdex_78w40e23n80n_'+resol+'_cubulation_start'+str(startcell)+'_radius'+str(searchrad)+'.npy'
cubulation = np.load(cubulfile, allow_pickle=True)
# read in cloud data
# cloud file depends on resolution
datapath={'R80000m': '/work/bb1018/b380459/NAWDEX/ICON_OUTPUT_NWP/nawdexnwp-80km-mis-0001/',
'R40000m': '/work/bb1018/b380459/NAWDEX/ICON_OUTPUT_NWP/nawdexnwp-40km-mis-0001/',
'R20000m': '/work/bb1018/b380459/NAWDEX/ICON_OUTPUT_NWP/nawdexnwp-20km-mis-0001/',
'R10000m': '/work/bb1018/b380459/NAWDEX/ICON_OUTPUT_NWP/nawdexnwp-10km-mis-0001/'}
datafile={'R80000m': 'nawdexnwp-80km-mis-0001_2016092200_2d_30min_DOM01_ML_',
'R40000m': 'nawdexnwp-40km-mis-0001_2016092200_2d_30min_DOM01_ML_',
'R20000m': 'nawdexnwp-20km-mis-0001_2016092200_2d_30min_DOM01_ML_',
'R10000m': 'nawdexnwp-10km-mis-0001_2016092200_2d_30min_DOM01_ML_'}
dtime_dat = list()
dtime_ver = list()
dtime_edg = list()
# loop over 1 day of 30-min output data --> 48 timesteps
for time in range(10,59):
# read in data
begin_time = datetime.datetime.now()
field, field_cube = tricco.prepare_field(model='ICON', path=datapath[resol],
file=datafile[resol]+'00'+str(time)+'.nc',
var='clct', threshold=85.0, cubulation=cubulation)
end_time = datetime.datetime.now()
dtime_dat.append(end_time-begin_time)
# perform connected component analysis for vertex connectivity
begin_time = datetime.datetime.now()
_ = tricco.compute_connected_components_2d(cubulation, field_cube, connectivity = 'vertex')
end_time = datetime.datetime.now()
dtime_ver.append(end_time-begin_time)
# perform connected component analysis for edge connectivity
begin_time = datetime.datetime.now()
_ = tricco.compute_connected_components_2d(cubulation, field_cube, connectivity = 'edge')
end_time = datetime.datetime.now()
dtime_edg.append(end_time-begin_time)
print(' ')
print('Time to read in 1 time step of data, including moving it onto the 3d cubulated grid; done for 48 time steps')
print('Mean:', np.mean(dtime_dat), 'Min:', np.min(dtime_dat), 'Max:', np.max(dtime_dat))
print(' ')
print('Time to do 2d connected component labeling with vertex connectivity for 1 time step; done for 48 time steps')
print('Mean:', np.mean(dtime_ver), 'Min:', np.min(dtime_ver), 'Max:', np.max(dtime_ver))
print(' ')
print('Time to do 2d connected component labeling with edge connectivity for 1 time step: done for 48 time steps')
print('Mean:', np.mean(dtime_edg), 'Min:', np.min(dtime_edg), 'Max:', np.max(dtime_edg))
print('-----------------------------------------')
print(' ')
#!/bin/bash
#=============================================================================
# mistral batch job parameters
#-----------------------------------------------------------------------------
#SBATCH --account=bb1152
#SBATCH --job-name=benchmark_2d.run
#SBATCH --partition=compute
#SBATCH --nodes=1
#SBATCH --threads-per-core=1
#SBATCH --output=/pf/b/b380459/BigDataClouds/tricco/benchmarks/LOG.benchmark_2d.run.%j.o
#SBATCH --error=/pf/b/b380459/BigDataClouds/tricco/benchmarks/LOG.benchmark_2d.run.%j.o
#SBATCH --exclusive
#SBATCH --time=08:00:00
cd /pf/b/b380459/BigDataClouds/tricco/benchmarks
MYPYTHON="/pf/b/b380459/conda-envs/Nawdex-Hackathon/bin/python3.8"
echo "Working with" ${MYPYTHON}
for i in {1..10}; do
$MYPYTHON benchmark_2d.py R80000m 5738 102
done
for i in {1..10}; do
$MYPYTHON benchmark_2d.py R40000m 18538 230
done
for i in {1..10}; do
$MYPYTHON benchmark_2d.py R20000m 69309 460
done
for i in {1..10}; do
$MYPYTHON benchmark_2d.py R10000m 264792 2000
done
# Perform benchmarking of 3d cloud data given a previously computed cubulation
# parse command line parameters
import sys
resol = sys.argv[1]
startcell = int(sys.argv[2])
searchrad = int(sys.argv[3])
# gridfile including path
gridfile = '/work/bb1018/b380459/NAWDEX/grids/icon-grid_nawdex_78w40e23n80n_'+resol+'.nc'
# load other needed packages
sys.path.append('/pf/b/b380459/connected-components-3d/')
sys.path.append('/pf/b/b380459/tricco/')
import tricco
import datetime
print(' ')
print('-----------------------------------------')
print('Working on resolution of', resol)
# load previously computed cubulation
import numpy as np
cubulpath = '/scratch/b/b380459/tricco_output/'
cubulfile = cubulpath+'/icon-grid_nawdex_78w40e23n80n_'+resol+'_cubulation_start'+str(startcell)+'_radius'+str(searchrad)+'.npy'
cubulation = np.load(cubulfile, allow_pickle=True)
# read in cloud data
# cloud file depends on resolution
datapath={'R80000m': '/work/bb1018/b380459/NAWDEX/ICON_OUTPUT_NWP/nawdexnwp-80km-mis-0001/',
'R40000m': '/work/bb1018/b380459/NAWDEX/ICON_OUTPUT_NWP/nawdexnwp-40km-mis-0001/',
'R20000m': '/work/bb1018/b380459/NAWDEX/ICON_OUTPUT_NWP/nawdexnwp-20km-mis-0001/',
'R10000m': '/work/bb1018/b380459/NAWDEX/ICON_OUTPUT_NWP/nawdexnwp-10km-mis-0001/'}
datafile={'R80000m': 'nawdexnwp-80km-mis-0001_2016092200_3dcloud_DOM01_ML_',
'R40000m': 'nawdexnwp-40km-mis-0001_2016092200_3dcloud_DOM01_ML_',
'R20000m': 'nawdexnwp-20km-mis-0001_2016092200_3dcloud_DOM01_ML_',
'R10000m': 'nawdexnwp-10km-mis-0001_2016092200_3dcloud_DOM01_ML_'}
dtime_dat = list()
dtime_ver = list()
dtime_edg = list()
# loop over 1 day of 30-min output data --> 48 timesteps
for time in range(10,59):
# read in data
begin_time = datetime.datetime.now()
field, field_cube = tricco.prepare_field_lev(model='ICON', path=datapath[resol],
file=datafile[resol]+'00'+str(time)+'.nc',
var='clc', threshold=85.0, cubulation=cubulation)
end_time = datetime.datetime.now()
dtime_dat.append(end_time-begin_time)
# perform connected component analysis for vertex connectivity
begin_time = datetime.datetime.now()
_ = tricco.compute_connected_components_3d(cubulation, field_cube, connectivity = 'vertex')
end_time = datetime.datetime.now()
dtime_ver.append(end_time-begin_time)
# perform connected component analysis for edge connectivity
begin_time = datetime.datetime.now()
_ = tricco.compute_connected_components_3d(cubulation, field_cube, connectivity = 'edge')
end_time = datetime.datetime.now()
dtime_edg.append(end_time-begin_time)
print(' ')
print('Time to read in 1 time step of data, including moving it onto the 3d x lev cubulated grid; done for 48 time steps')
print('Mean:', np.mean(dtime_dat), 'Min:', np.min(dtime_dat), 'Max:', np.max(dtime_dat))
print(' ')
print('Time to do 3d connected component labeling with vertex connectivity for 1 time step; done for 48 time steps')
print('Mean:', np.mean(dtime_ver), 'Min:', np.min(dtime_ver), 'Max:', np.max(dtime_ver))
print(' ')
print('Time to do 3d connected component labeling with edge connectivity for 1 time step; done for 48 time steps')
print('Mean:', np.mean(dtime_edg), 'Min:', np.min(dtime_edg), 'Max:', np.max(dtime_edg))
print('-----------------------------------------')
print(' ')
#!/bin/bash
#=============================================================================
# mistral batch job parameters
#-----------------------------------------------------------------------------
#SBATCH --account=bb1152
#SBATCH --job-name=benchmark_3d.run
#SBATCH --partition=compute
#SBATCH --nodes=1
#SBATCH --threads-per-core=1
#SBATCH --output=/pf/b/b380459/BigDataClouds/tricco/benchmarks/LOG.benchmark_3d.run.%j.o
#SBATCH --error=/pf/b/b380459/BigDataClouds/tricco/benchmarks/LOG.benchmark_3d.run.%j.o
#SBATCH --exclusive
#SBATCH --time=08:00:00
cd /pf/b/b380459/BigDataClouds/tricco/benchmarks
MYPYTHON="/pf/b/b380459/conda-envs/Nawdex-Hackathon/bin/python3.8"
echo "Working with" ${MYPYTHON}
#for i in {1..10}; do
# $MYPYTHON benchmark_3d.py R80000m 5738 102
#done
#for i in {1..10}; do
# $MYPYTHON benchmark_3d.py R40000m 18538 230
#done
#for i in {1..10}; do
# $MYPYTHON benchmark_3d.py R20000m 69309 460
#done
for i in {1..10}; do
$MYPYTHON benchmark_3d.py R10000m 264792 2000
done
%% Cell type:markdown id: tags:
# Notebook for benchmarking runtime of cubulation
Tests are performed on an exclusive compute node of the DKRZ supercomputer Mistral in Hamburg, Germany.
A compute node has the following specs (https://www.dkrz.de/up/systems/mistral/configuration):
* 2x 12-core Intel Xeon E5-2680 v3 (Haswell) @ 2.5GHz
* 24 cores (48 logical CPUs)
* 64 GB main memory
As for the grids we consider limited-area ICON grids that cover a large part of the North Atlantic. They were for example used for the NAWDEX simulations described Senf, F., A. Voigt et al, 2020: Increasing Resolution and Resolving Convection Improve the Simulation of Cloud‐Radiative Effects Over the North Atlantic, JGR Atmospheres. https://agupubs.onlinelibrary.wiley.com/doi/full/10.1029/2020JD032667.
%% Cell type:markdown id: tags:
Load required packages. Note: adding cc3d to system path is needed because it is required by tricco.
%% Cell type:code id: tags:
``` python
import timeit
import sys
sys.path.append('/pf/b/b380459/connected-components-3d/')
sys.path.append('/pf/b/b380459/BigDataClouds/tricco/')
import tricco
```
%% Cell type:markdown id: tags:
Define the start triangle and radius of outward search as a function of grid resolution.
%% Cell type:code id: tags:
``` python
dict_start = {'80000m': 5783, '40000m': 18538, '20000m': 69309, '10000m': 264792}
dict_radius = {'80000m': 102 , '40000m': 230 , '20000m': 460 , '10000m': 2000 }
```
%% Cell type:code id: tags:
``` python
# path to grid files
gridpath = '/work/bb1018/b380459/NAWDEX/grids/'
# model resolutions under investigation
for res in ['80000m', '40000m', '20000m', '10000m']:
print('Working on resolution of', res)
gridfile = 'icon-grid_nawdex_78w40e23n80n_R'+res+'.nc'
print('Time to read the grid:')
%timeit -r 1 -n 1 tricco.grid_functions.grid = tricco.prepare_grid(model='ICON',path=gridpath, file=gridfile)
print('Time to compute cubulation:')
%timeit -r 1 -n 1 tricco.compute_cubulation(start_triangle=dict_start[res], radius=dict_radius[res], print_progress=False)
```
%% Output
Working on resolution of 80000m
Time to read the grid:
310 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
Time to compute cubulation:
11.7 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
Working on resolution of 40000m
Time to read the grid:
163 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
Time to compute cubulation:
1min 13s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
Working on resolution of 20000m
Time to read the grid:
144 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
Time to compute cubulation:
11min 34s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
Working on resolution of 10000m
Time to read the grid:
312 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
Time to compute cubulation:
# Perform benchmarking of cubulation routine on DKRZ Mistral
# parse command line parameters
import sys
resol = sys.argv[1]
startcell = int(sys.argv[2])
searchrad = int(sys.argv[3])
savecubul = int(sys.argv[4])
# gridfile including path
gridfile = '/work/bb1018/b380459/NAWDEX/grids/icon-grid_nawdex_78w40e23n80n_'+resol+'.nc'
# load other needed packages
sys.path.append('/pf/b/b380459/connected-components-3d/')
sys.path.append('/pf/b/b380459/tricco/')
import tricco
import datetime
print(' ')
print('-----------------------------------------')
print('Working on resolution of', resol)
print('Start cell:', startcell, 'Search radius:', searchrad, 'Save cubulation:', savecubul)
begin_time = datetime.datetime.now()
tricco.grid_functions.grid = tricco.prepare_grid(model='ICON',path='/', file=gridfile)
end_time = datetime.datetime.now()
print('Time to read the grid:', end_time-begin_time)
begin_time = datetime.datetime.now()
cubulation = tricco.compute_cubulation(start_triangle=startcell, radius=searchrad, print_progress=False)
end_time = datetime.datetime.now()
print('Time to compute cubulation:', end_time-begin_time)
# optional: save cubulation
if savecubul == 1:
import numpy as np
np.save('/scratch/b/b380459/tricco_output/icon-grid_nawdex_78w40e23n80n_'
+resol+'_cubulation_start'+str(startcell)+'_radius'+str(searchrad), cubulation)
print('-----------------------------------------')
print(' ')
#!/bin/bash
#=============================================================================
# mistral batch job parameters
#-----------------------------------------------------------------------------
#SBATCH --account=bb1152
#SBATCH --job-name=benchmark_cubulation.run
#SBATCH --partition=compute
#SBATCH --nodes=1
#SBATCH --threads-per-core=1
#SBATCH --output=/pf/b/b380459/BigDataClouds/tricco/benchmarks/LOG.benchmark_cubulation.run.%j.o
#SBATCH --error=/pf/b/b380459/BigDataClouds/tricco/benchmarks/LOG.benchmark_cubulation.run.%j.o
#SBATCH --exclusive
#SBATCH --time=08:00:00
cd /pf/b/b380459/BigDataClouds/tricco/benchmarks
MYPYTHON="/pf/b/b380459/conda-envs/Nawdex-Hackathon/bin/python3.8"
echo "Working with" ${MYPYTHON}
for i in {1..10}; do
# $MYPYTHON benchmark_cubulation.py R80000m 5738 102 1
$MYPYTHON benchmark_cubulation.py R80000m 5568 200 1
done
for i in {1..10}; do
# $MYPYTHON benchmark_cubulation.py R40000m 18538 230 1
$MYPYTHON benchmark_cubulation.py R40000m 18493 400 1
done
#for i in {1..10}; do
# $MYPYTHON benchmark_cubulation.py R20000m 69309 460 1
#done
#for i in {1..10}; do
# $MYPYTHON benchmark_cubulation.py R10000m 264792 2000 1
#done
Benchmarking runtime of cubulation
Tests are performed on an exclusive compute node of the DKRZ supercomputer Mistral in Hamburg, Germany.
A compute node has the following specs (https://www.dkrz.de/up/systems/mistral/configuration):
* 2x12-core Intel Xeon E5-2680 v3 (Haswell) @ 2.5GHz,
* 24 cores (48 logical CPUs),
* 64 GB main memory.
As for the grids we consider limited-area ICON grids that cover a large part of the North Atlantic. They were for example used for the NAWDEX simulations described Senf, F., A. Voigt et al, 2020: Increasing Resolution and Resolving Convection Improve the Simulation of Cloud‐Radiative Effects Over the North Atlantic, JGR Atmospheres. https://agupubs.onlinelibrary.wiley.com/doi/full/10.1029/2020JD032667.
Usage:
benchmark_cubulation.run is a batch job that is submitted to an exclusive compute node via sbatch and that calls benchmark_cubulation.py. The grid resolution, start triangle and search radius for the cubulation are handed over to benchmark_cubulation.py, which reads in the grid file and does the cubulation.
# Purpose: For a given start cell find the smallest radius that allows one to cover all grid cell.
# This is helpful as a too large radius artifically inflates the size of the cubulated grid.
# Written for the limited-area grids of ICON used in the Tricco introduction paper.
# parse command line parameters
import sys
resol = sys.argv[1]
start = int(sys.argv[2])
radius = int(sys.argv[3])
print('-----------------------------------------')
print('Working on ICON grid with resolution', resol)
print('Start cell :', start)
print('Search radius:', radius)
import sys
sys.path.append('/pf/b/b380459/connected-components-3d/')
sys.path.append('/pf/b/b380459/BigDataClouds/tricco/')
import tricco
tricco.grid_functions.grid = tricco.prepare_grid(model='ICON', path='./data/',
file='icon-grid_nawdex_78w40e23n80n_'+resol+'.nc')
cubulation = tricco.compute_cubulation(start_triangle=start, radius=radius, print_progress=True)
# Purpose: Find the cell closest to a given latitude-longitude position.
# This is helpful to set the start cell of the cubulation routine.
# Written for the limited-area grids of ICON used in the Tricco introduction paper.
# convert rad to deg
import numpy as np
rad2deg=180.0/np.pi
# parse command line parameters
import sys
resol = sys.argv[1]
lat = float(sys.argv[2])
lon = float(sys.argv[3])
print('-----------------------------------------')
print('Working on ICON grid with resolution', resol)
print('Searching for cell closest to lat', lat, 'and lon', lon)
# gridfile including path
gridfile = '/work/bb1018/b380459/NAWDEX/grids/icon-grid_nawdex_78w40e23n80n_'+resol+'.nc'
# load lat-lon info of grid and convert to deg
import xarray as xr
ds_grid = xr.load_dataset(gridfile)
clat = rad2deg*ds_grid['clat'].values
clon = rad2deg*ds_grid['clon'].values
dist = np.power(clat-lat,2) + np.power(clon-lon,2)
print('Closest cell has index', np.argmin(dist))
print('Note: The startcell for tricco is the cell index - 1.')
print(' This is because the found cell index is on the ICON grid')
print(' and the ICON indexing starts with 1.')
print('-----------------------------------------')
from setuptools import setup, find_packages
VERSION = '0.0.4'
VERSION = '0.0.1'
DESCRIPTION = 'TriCCo'
LONG_DESCRIPTION = 'TriCCo: a python package for connected component labeling on triangular grids'
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment