Skip to content
Snippets Groups Projects
Commit ca3925fd authored by Aiko Voigt's avatar Aiko Voigt
Browse files

Added benchmarking routines

parent ad40b2bd
No related branches found
No related tags found
No related merge requests found
# Perform benchmarking of 2d cloud data given a previously computed cubulation
# parse command line parameters
import sys
resol = sys.argv[1]
startcell = int(sys.argv[2])
searchrad = int(sys.argv[3])
# gridfile including path
gridfile = '/work/bb1018/b380459/NAWDEX/grids/icon-grid_nawdex_78w40e23n80n_'+resol+'.nc'
# load other needed packages
sys.path.append('/pf/b/b380459/connected-components-3d/')
sys.path.append('/pf/b/b380459/tricco/')
import tricco
import datetime
print(' ')
print('-----------------------------------------')
print('Working on resolution of', resol)
# load previously computed cubulation
import numpy as np
cubulpath = '/scratch/b/b380459/tricco_output/'
cubulfile = cubulpath+'/icon-grid_nawdex_78w40e23n80n_'+resol+'_cubulation_start'+str(startcell)+'_radius'+str(searchrad)+'.npy'
cubulation = np.load(cubulfile, allow_pickle=True)
# read in cloud data
# cloud file depends on resolution
datapath={'R80000m': '/work/bb1018/b380459/NAWDEX/ICON_OUTPUT_NWP/nawdexnwp-80km-mis-0001/',
'R40000m': '/work/bb1018/b380459/NAWDEX/ICON_OUTPUT_NWP/nawdexnwp-40km-mis-0001/',
'R20000m': '/work/bb1018/b380459/NAWDEX/ICON_OUTPUT_NWP/nawdexnwp-20km-mis-0001/',
'R10000m': '/work/bb1018/b380459/NAWDEX/ICON_OUTPUT_NWP/nawdexnwp-10km-mis-0001/'}
datafile={'R80000m': 'nawdexnwp-80km-mis-0001_2016092200_2d_30min_DOM01_ML_',
'R40000m': 'nawdexnwp-40km-mis-0001_2016092200_2d_30min_DOM01_ML_',
'R20000m': 'nawdexnwp-20km-mis-0001_2016092200_2d_30min_DOM01_ML_',
'R10000m': 'nawdexnwp-10km-mis-0001_2016092200_2d_30min_DOM01_ML_'}
dtime_dat = list()
dtime_ver = list()
dtime_edg = list()
# loop over 1 day of 30-min output data --> 48 timesteps
for time in range(10,59):
# read in data
begin_time = datetime.datetime.now()
field, field_cube = tricco.prepare_field(model='ICON', path=datapath[resol],
file=datafile[resol]+'00'+str(time)+'.nc',
var='clct', threshold=85.0, cubulation=cubulation)
end_time = datetime.datetime.now()
dtime_dat.append(end_time-begin_time)
# perform connected component analysis for vertex connectivity
begin_time = datetime.datetime.now()
_ = tricco.compute_connected_components_2d(cubulation, field_cube, connectivity = 'vertex')
end_time = datetime.datetime.now()
dtime_ver.append(end_time-begin_time)
# perform connected component analysis for edge connectivity
begin_time = datetime.datetime.now()
_ = tricco.compute_connected_components_2d(cubulation, field_cube, connectivity = 'edge')
end_time = datetime.datetime.now()
dtime_edg.append(end_time-begin_time)
print(' ')
print('Time to read in 1 time step of data, including moving it onto the 3d cubulated grid; done for 48 time steps')
print('Mean:', np.mean(dtime_dat), 'Min:', np.min(dtime_dat), 'Max:', np.max(dtime_dat))
print(' ')
print('Time to do 2d connected component labeling with vertex connectivity for 1 time step; done for 48 time steps')
print('Mean:', np.mean(dtime_ver), 'Min:', np.min(dtime_ver), 'Max:', np.max(dtime_ver))
print(' ')
print('Time to do 2d connected component labeling with edge connectivity for 1 time step: done for 48 time steps')
print('Mean:', np.mean(dtime_edg), 'Min:', np.min(dtime_edg), 'Max:', np.max(dtime_edg))
print('-----------------------------------------')
print(' ')
#!/bin/bash
#=============================================================================
# mistral batch job parameters
#-----------------------------------------------------------------------------
#SBATCH --account=bb1152
#SBATCH --job-name=benchmark_2d.run
#SBATCH --partition=compute
#SBATCH --nodes=1
#SBATCH --threads-per-core=1
#SBATCH --output=/pf/b/b380459/BigDataClouds/tricco/benchmarks/LOG.benchmark_2d.run.%j.o
#SBATCH --error=/pf/b/b380459/BigDataClouds/tricco/benchmarks/LOG.benchmark_2d.run.%j.o
#SBATCH --exclusive
#SBATCH --time=08:00:00
cd /pf/b/b380459/BigDataClouds/tricco/benchmarks
MYPYTHON="/pf/b/b380459/conda-envs/Nawdex-Hackathon/bin/python3.8"
echo "Working with" ${MYPYTHON}
for i in {1..10}; do
$MYPYTHON benchmark_2d.py R80000m 5738 102
done
for i in {1..10}; do
$MYPYTHON benchmark_2d.py R40000m 18538 230
done
for i in {1..10}; do
$MYPYTHON benchmark_2d.py R20000m 69309 460
done
for i in {1..10}; do
$MYPYTHON benchmark_2d.py R10000m 264792 2000
done
# Perform benchmarking of 3d cloud data given a previously computed cubulation
# parse command line parameters
import sys
resol = sys.argv[1]
startcell = int(sys.argv[2])
searchrad = int(sys.argv[3])
# gridfile including path
gridfile = '/work/bb1018/b380459/NAWDEX/grids/icon-grid_nawdex_78w40e23n80n_'+resol+'.nc'
# load other needed packages
sys.path.append('/pf/b/b380459/connected-components-3d/')
sys.path.append('/pf/b/b380459/tricco/')
import tricco
import datetime
print(' ')
print('-----------------------------------------')
print('Working on resolution of', resol)
# load previously computed cubulation
import numpy as np
cubulpath = '/scratch/b/b380459/tricco_output/'
cubulfile = cubulpath+'/icon-grid_nawdex_78w40e23n80n_'+resol+'_cubulation_start'+str(startcell)+'_radius'+str(searchrad)+'.npy'
cubulation = np.load(cubulfile, allow_pickle=True)
# read in cloud data
# cloud file depends on resolution
datapath={'R80000m': '/work/bb1018/b380459/NAWDEX/ICON_OUTPUT_NWP/nawdexnwp-80km-mis-0001/',
'R40000m': '/work/bb1018/b380459/NAWDEX/ICON_OUTPUT_NWP/nawdexnwp-40km-mis-0001/',
'R20000m': '/work/bb1018/b380459/NAWDEX/ICON_OUTPUT_NWP/nawdexnwp-20km-mis-0001/',
'R10000m': '/work/bb1018/b380459/NAWDEX/ICON_OUTPUT_NWP/nawdexnwp-10km-mis-0001/'}
datafile={'R80000m': 'nawdexnwp-80km-mis-0001_2016092200_3dcloud_DOM01_ML_',
'R40000m': 'nawdexnwp-40km-mis-0001_2016092200_3dcloud_DOM01_ML_',
'R20000m': 'nawdexnwp-20km-mis-0001_2016092200_3dcloud_DOM01_ML_',
'R10000m': 'nawdexnwp-10km-mis-0001_2016092200_3dcloud_DOM01_ML_'}
dtime_dat = list()
dtime_ver = list()
dtime_edg = list()
# loop over 1 day of 30-min output data --> 48 timesteps
for time in range(10,59):
# read in data
begin_time = datetime.datetime.now()
field, field_cube = tricco.prepare_field_lev(model='ICON', path=datapath[resol],
file=datafile[resol]+'00'+str(time)+'.nc',
var='clc', threshold=85.0, cubulation=cubulation)
end_time = datetime.datetime.now()
dtime_dat.append(end_time-begin_time)
# perform connected component analysis for vertex connectivity
begin_time = datetime.datetime.now()
_ = tricco.compute_connected_components_3d(cubulation, field_cube, connectivity = 'vertex')
end_time = datetime.datetime.now()
dtime_ver.append(end_time-begin_time)
# perform connected component analysis for edge connectivity
begin_time = datetime.datetime.now()
_ = tricco.compute_connected_components_3d(cubulation, field_cube, connectivity = 'edge')
end_time = datetime.datetime.now()
dtime_edg.append(end_time-begin_time)
print(' ')
print('Time to read in 1 time step of data, including moving it onto the 3d x lev cubulated grid; done for 48 time steps')
print('Mean:', np.mean(dtime_dat), 'Min:', np.min(dtime_dat), 'Max:', np.max(dtime_dat))
print(' ')
print('Time to do 3d connected component labeling with vertex connectivity for 1 time step; done for 48 time steps')
print('Mean:', np.mean(dtime_ver), 'Min:', np.min(dtime_ver), 'Max:', np.max(dtime_ver))
print(' ')
print('Time to do 3d connected component labeling with edge connectivity for 1 time step; done for 48 time steps')
print('Mean:', np.mean(dtime_edg), 'Min:', np.min(dtime_edg), 'Max:', np.max(dtime_edg))
print('-----------------------------------------')
print(' ')
#!/bin/bash
#=============================================================================
# mistral batch job parameters
#-----------------------------------------------------------------------------
#SBATCH --account=bb1152
#SBATCH --job-name=benchmark_3d.run
#SBATCH --partition=compute
#SBATCH --nodes=1
#SBATCH --threads-per-core=1
#SBATCH --output=/pf/b/b380459/BigDataClouds/tricco/benchmarks/LOG.benchmark_3d.run.%j.o
#SBATCH --error=/pf/b/b380459/BigDataClouds/tricco/benchmarks/LOG.benchmark_3d.run.%j.o
#SBATCH --exclusive
#SBATCH --time=08:00:00
cd /pf/b/b380459/BigDataClouds/tricco/benchmarks
MYPYTHON="/pf/b/b380459/conda-envs/Nawdex-Hackathon/bin/python3.8"
echo "Working with" ${MYPYTHON}
#for i in {1..10}; do
# $MYPYTHON benchmark_3d.py R80000m 5738 102
#done
#for i in {1..10}; do
# $MYPYTHON benchmark_3d.py R40000m 18538 230
#done
#for i in {1..10}; do
# $MYPYTHON benchmark_3d.py R20000m 69309 460
#done
for i in {1..10}; do
$MYPYTHON benchmark_3d.py R10000m 264792 2000
done
%% Cell type:markdown id: tags:
# Notebook for benchmarking runtime of cubulation
Tests are performed on an exclusive compute node of the DKRZ supercomputer Mistral in Hamburg, Germany.
A compute node has the following specs (https://www.dkrz.de/up/systems/mistral/configuration):
* 2x 12-core Intel Xeon E5-2680 v3 (Haswell) @ 2.5GHz
* 24 cores (48 logical CPUs)
* 64 GB main memory
As for the grids we consider limited-area ICON grids that cover a large part of the North Atlantic. They were for example used for the NAWDEX simulations described Senf, F., A. Voigt et al, 2020: Increasing Resolution and Resolving Convection Improve the Simulation of Cloud‐Radiative Effects Over the North Atlantic, JGR Atmospheres. https://agupubs.onlinelibrary.wiley.com/doi/full/10.1029/2020JD032667.
%% Cell type:markdown id: tags:
Load required packages. Note: adding cc3d to system path is needed because it is required by tricco.
%% Cell type:code id: tags:
``` python
import timeit
import sys
sys.path.append('/pf/b/b380459/connected-components-3d/')
sys.path.append('/pf/b/b380459/BigDataClouds/tricco/')
import tricco
```
%% Cell type:markdown id: tags:
Define the start triangle and radius of outward search as a function of grid resolution.
%% Cell type:code id: tags:
``` python
dict_start = {'80000m': 5783, '40000m': 18538, '20000m': 69309, '10000m': 264792}
dict_radius = {'80000m': 102 , '40000m': 230 , '20000m': 460 , '10000m': 2000 }
```
%% Cell type:code id: tags:
``` python
# path to grid files
gridpath = '/work/bb1018/b380459/NAWDEX/grids/'
# model resolutions under investigation
for res in ['80000m', '40000m', '20000m', '10000m']:
print('Working on resolution of', res)
gridfile = 'icon-grid_nawdex_78w40e23n80n_R'+res+'.nc'
print('Time to read the grid:')
%timeit -r 1 -n 1 tricco.grid_functions.grid = tricco.prepare_grid(model='ICON',path=gridpath, file=gridfile)
print('Time to compute cubulation:')
%timeit -r 1 -n 1 tricco.compute_cubulation(start_triangle=dict_start[res], radius=dict_radius[res], print_progress=False)
```
%% Output
Working on resolution of 80000m
Time to read the grid:
310 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
Time to compute cubulation:
11.7 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
Working on resolution of 40000m
Time to read the grid:
163 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
Time to compute cubulation:
1min 13s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
Working on resolution of 20000m
Time to read the grid:
144 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
Time to compute cubulation:
11min 34s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
Working on resolution of 10000m
Time to read the grid:
312 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
Time to compute cubulation:
# Perform benchmarking of cubulation routine on DKRZ Mistral
# parse command line parameters
import sys
resol = sys.argv[1]
startcell = int(sys.argv[2])
searchrad = int(sys.argv[3])
savecubul = int(sys.argv[4])
# gridfile including path
gridfile = '/work/bb1018/b380459/NAWDEX/grids/icon-grid_nawdex_78w40e23n80n_'+resol+'.nc'
# load other needed packages
sys.path.append('/pf/b/b380459/connected-components-3d/')
sys.path.append('/pf/b/b380459/tricco/')
import tricco
import datetime
print(' ')
print('-----------------------------------------')
print('Working on resolution of', resol)
print('Start cell:', startcell, 'Search radius:', searchrad, 'Save cubulation:', savecubul)
begin_time = datetime.datetime.now()
tricco.grid_functions.grid = tricco.prepare_grid(model='ICON',path='/', file=gridfile)
end_time = datetime.datetime.now()
print('Time to read the grid:', end_time-begin_time)
begin_time = datetime.datetime.now()
cubulation = tricco.compute_cubulation(start_triangle=startcell, radius=searchrad, print_progress=False)
end_time = datetime.datetime.now()
print('Time to compute cubulation:', end_time-begin_time)
# optional: save cubulation
if savecubul == 1:
import numpy as np
np.save('/scratch/b/b380459/tricco_output/icon-grid_nawdex_78w40e23n80n_'
+resol+'_cubulation_start'+str(startcell)+'_radius'+str(searchrad), cubulation)
print('-----------------------------------------')
print(' ')
#!/bin/bash
#=============================================================================
# mistral batch job parameters
#-----------------------------------------------------------------------------
#SBATCH --account=bb1152
#SBATCH --job-name=benchmark_cubulation.run
#SBATCH --partition=compute
#SBATCH --nodes=1
#SBATCH --threads-per-core=1
#SBATCH --output=/pf/b/b380459/BigDataClouds/tricco/benchmarks/LOG.benchmark_cubulation.run.%j.o
#SBATCH --error=/pf/b/b380459/BigDataClouds/tricco/benchmarks/LOG.benchmark_cubulation.run.%j.o
#SBATCH --exclusive
#SBATCH --time=08:00:00
cd /pf/b/b380459/BigDataClouds/tricco/benchmarks
MYPYTHON="/pf/b/b380459/conda-envs/Nawdex-Hackathon/bin/python3.8"
echo "Working with" ${MYPYTHON}
for i in {1..10}; do
# $MYPYTHON benchmark_cubulation.py R80000m 5738 102 1
$MYPYTHON benchmark_cubulation.py R80000m 5568 200 1
done
for i in {1..10}; do
# $MYPYTHON benchmark_cubulation.py R40000m 18538 230 1
$MYPYTHON benchmark_cubulation.py R40000m 18493 400 1
done
#for i in {1..10}; do
# $MYPYTHON benchmark_cubulation.py R20000m 69309 460 1
#done
#for i in {1..10}; do
# $MYPYTHON benchmark_cubulation.py R10000m 264792 2000 1
#done
Benchmarking runtime of cubulation
Tests are performed on an exclusive compute node of the DKRZ supercomputer Mistral in Hamburg, Germany.
A compute node has the following specs (https://www.dkrz.de/up/systems/mistral/configuration):
* 2x12-core Intel Xeon E5-2680 v3 (Haswell) @ 2.5GHz,
* 24 cores (48 logical CPUs),
* 64 GB main memory.
As for the grids we consider limited-area ICON grids that cover a large part of the North Atlantic. They were for example used for the NAWDEX simulations described Senf, F., A. Voigt et al, 2020: Increasing Resolution and Resolving Convection Improve the Simulation of Cloud‐Radiative Effects Over the North Atlantic, JGR Atmospheres. https://agupubs.onlinelibrary.wiley.com/doi/full/10.1029/2020JD032667.
Usage:
benchmark_cubulation.run is a batch job that is submitted to an exclusive compute node via sbatch and that calls benchmark_cubulation.py. The grid resolution, start triangle and search radius for the cubulation are handed over to benchmark_cubulation.py, which reads in the grid file and does the cubulation.
# Purpose: For a given start cell find the smallest radius that allows one to cover all grid cell.
# This is helpful as a too large radius artifically inflates the size of the cubulated grid.
# Written for the limited-area grids of ICON used in the Tricco introduction paper.
# parse command line parameters
import sys
resol = sys.argv[1]
start = int(sys.argv[2])
radius = int(sys.argv[3])
print('-----------------------------------------')
print('Working on ICON grid with resolution', resol)
print('Start cell :', start)
print('Search radius:', radius)
import sys
sys.path.append('/pf/b/b380459/connected-components-3d/')
sys.path.append('/pf/b/b380459/BigDataClouds/tricco/')
import tricco
tricco.grid_functions.grid = tricco.prepare_grid(model='ICON', path='./data/',
file='icon-grid_nawdex_78w40e23n80n_'+resol+'.nc')
cubulation = tricco.compute_cubulation(start_triangle=start, radius=radius, print_progress=True)
# Purpose: Find the cell closest to a given latitude-longitude position.
# This is helpful to set the start cell of the cubulation routine.
# Written for the limited-area grids of ICON used in the Tricco introduction paper.
# convert rad to deg
import numpy as np
rad2deg=180.0/np.pi
# parse command line parameters
import sys
resol = sys.argv[1]
lat = float(sys.argv[2])
lon = float(sys.argv[3])
print('-----------------------------------------')
print('Working on ICON grid with resolution', resol)
print('Searching for cell closest to lat', lat, 'and lon', lon)
# gridfile including path
gridfile = '/work/bb1018/b380459/NAWDEX/grids/icon-grid_nawdex_78w40e23n80n_'+resol+'.nc'
# load lat-lon info of grid and convert to deg
import xarray as xr
ds_grid = xr.load_dataset(gridfile)
clat = rad2deg*ds_grid['clat'].values
clon = rad2deg*ds_grid['clon'].values
dist = np.power(clat-lat,2) + np.power(clon-lon,2)
print('Closest cell has index', np.argmin(dist))
print('Note: The startcell for tricco is the cell index - 1.')
print(' This is because the found cell index is on the ICON grid')
print(' and the ICON indexing starts with 1.')
print('-----------------------------------------')
from setuptools import setup, find_packages
VERSION = '0.0.4'
VERSION = '0.0.1'
DESCRIPTION = 'TriCCo'
LONG_DESCRIPTION = 'TriCCo: a python package for connected component labeling on triangular grids'
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment