Skip to content
Snippets Groups Projects
Commit 575ccac8 authored by Aiko Voigt's avatar Aiko Voigt
Browse files

Adds dask distributed client for PV pot generation, also core functions added

parent 01a2b8c7
No related branches found
No related tags found
No related merge requests found
import numpy as np
import xarray as xr
import time
def windspeed(_ds):
return np.sqrt(np.power(_ds["u10"],2)+np.power(_ds["v10"],2))
def windspeed2(a, b):
func = lambda x, y: np.sqrt(x**2 + y**2)
return xr.apply_ufunc(func, a, b, dask="parallelized")
def pv_pot(_ds):
sechour=3600 # seconds per hour
c1 = 4.3
c2 = 0.943
c3 = 0.028
c4 = -1.528
# cell temperature
T_cell = c1 + c2 * (_ds.t2m - 273.15) + c3 * _ds.ssrd/sechour + c4 * _ds.wspd
# performance ratio
beta = -0.005
p_r = 1 + beta*(T_cell-25)
# pv potential
pv_pot = p_r * _ds.ssrd/(sechour) * 1/1000
return pv_pot
def measure_performance(code_to_run):
start_time = time.time()
# Run the code
code_to_run()
end_time = time.time()
execution_time = end_time - start_time
print(f"Execution time: {execution_time:.5f} seconds")
\ No newline at end of file
%% Cell type:markdown id:6b0f6097-a216-43cf-be0b-2e23a325510d tags:
# Using dask distributed cluster to speed up computation of PV potential by factor of 10
%% Cell type:code id:9355f147-7603-413d-8989-69201a37653f tags:
``` python
import xarray as xr
import numpy as np
import time
from dask.distributed import Client
import core as core
import warnings
warnings.filterwarnings("ignore")
```
%% Cell type:code id:56840920-bb48-440f-b983-f9a71471b57f tags:
``` python
# location of era5 data on teachinghub
path="/home/voigta80/LEHRE/msc-intro-comp-met-ex-w2024/data/era5/"
```
%% Cell type:markdown id:a8507960-3056-4721-8ec5-d167c3d3f955 tags:
Start dask cluster with 10 processes (workers), each with 5 threads.
%% Cell type:code id:61415c08-e2ed-4649-9371-0501c4cea567 tags:
``` python
client = Client(n_workers=20, threads_per_worker=5)
```
%% Cell type:code id:41ee4b25-2070-4ab0-936e-bf94c0d5a8c3 tags:
``` python
client
```
%% Output
<Client: 'tcp://127.0.0.1:38787' processes=1 threads=5, memory=753.83 GiB>
%% Cell type:code id:7bea8c4c-2b36-4d07-9689-d69c07b1af53 tags:
``` python
start_time = time.time()
ds=xr.open_mfdataset(path+"era5-2000-*.nc", engine="netcdf4", chunks={"valid_time":1e5} )
ds["wspd"] = core.windspeed(ds)
pvpot = core.pv_pot(ds).groupby(ds.valid_time.dt.month).mean("valid_time").compute()
end_time = time.time()
execution_time = end_time - start_time
```
%% Cell type:code id:f637f592-f58b-46ad-885a-80670c8ba292 tags:
``` python
print(f"Execution time: {execution_time:.5f} seconds")
```
%% Cell type:code id:70964d66-66a4-44bd-bf54-e64ef6876e67 tags:
``` python
client.shutdown()
```
%% Cell type:code id:a375ca41-8af0-46e6-aa25-80241b9ab8f1 tags:
``` python
pvpot
```
%% Cell type:markdown id:7d41920d-ccc6-4657-bacc-19006b87d7b8 tags: %% Cell type:markdown id:7d41920d-ccc6-4657-bacc-19006b87d7b8 tags:
# Example calculation of PV potential for ERA5 data # Example calculation of PV potential for ERA5 data
%% Cell type:code id:99875287-2b84-460f-9c90-2e2542ff4e9b tags: %% Cell type:code id:99875287-2b84-460f-9c90-2e2542ff4e9b tags:
``` python ``` python
import xarray as xr import xarray as xr
import numpy as np import numpy as np
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
``` ```
%% Cell type:code id:1f4067af-2ee7-43f5-b43d-94739e229f5c tags: %% Cell type:code id:1f4067af-2ee7-43f5-b43d-94739e229f5c tags:
``` python ``` python
# location of era5 data on teachinghub # location of era5 data on teachinghub
path="/home/voigta80/LEHRE/msc-intro-comp-met-ex-w2024/data/era5/" path="/home/voigta80/LEHRE/msc-intro-comp-met-ex-w2024/data/era5/"
``` ```
%% Cell type:markdown id:a2c87dd8-340b-4487-a4a0-e3d4c6dbc9aa tags: %% Cell type:markdown id:a2c87dd8-340b-4487-a4a0-e3d4c6dbc9aa tags:
Load data for Jan and Feb 1979. Load data for Jan and Feb 1979.
%% Cell type:code id:daf3daff-d39d-4a49-8c13-3ffba150a8b4 tags: %% Cell type:code id:daf3daff-d39d-4a49-8c13-3ffba150a8b4 tags:
``` python ``` python
ds=xr.open_mfdataset([path+"era5-1979-01.nc", path+"era5-1979-02.nc"]) ds=xr.open_mfdataset([path+"era5-1979-01.nc", path+"era5-1979-02.nc"])
``` ```
%% Cell type:markdown id:8af1d101-3f38-42b9-9f9d-f47c37b660f3 tags: %% Cell type:markdown id:8af1d101-3f38-42b9-9f9d-f47c37b660f3 tags:
Calculate wind speed. Calculate wind speed.
%% Cell type:code id:07374d0b-6a20-4027-9b78-65e59b26b959 tags: %% Cell type:code id:07374d0b-6a20-4027-9b78-65e59b26b959 tags:
``` python ``` python
ds["wspd"] = np.sqrt(np.power(ds["u10"],2)+np.power(ds["v10"],2)) ds["wspd"] = np.sqrt(np.power(ds["u10"],2)+np.power(ds["v10"],2))
``` ```
%% Cell type:markdown id:c484d2f8-130c-45b8-a741-e118f58726c1 tags: %% Cell type:markdown id:c484d2f8-130c-45b8-a741-e118f58726c1 tags:
Calculate PV potential following Jerez et al. 2015, https://www.nature.com/articles/ncomms10014. Calculate PV potential following Jerez et al. 2015, https://www.nature.com/articles/ncomms10014.
**Note:** radiative fluxes are accumulated over 1 hour, so we need to divided by seconds per hour to obtain fluxes in Wm-2. **Note:** radiative fluxes are accumulated over 1 hour, so we need to divided by seconds per hour to obtain fluxes in Wm-2.
%% Cell type:code id:447d7b3b-957c-44a2-abae-72e6ef3a1ec7 tags: %% Cell type:code id:447d7b3b-957c-44a2-abae-72e6ef3a1ec7 tags:
``` python ``` python
sechour=3600 # secondd per hour sechour=3600 # seconds per hour
c1 = 4.3 c1 = 4.3
c2 = 0.943 c2 = 0.943
c3 = 0.028 c3 = 0.028
c4 = -1.528 c4 = -1.528
# cell temperature # cell temperature
T_cell = c1 + c2 * (ds.t2m - 273.15) + c3 * ds.ssrd/sechour + c4 * ds.wspd T_cell = c1 + c2 * (ds.t2m - 273.15) + c3 * ds.ssrd/sechour + c4 * ds.wspd
# performance ratio # performance ratio
beta = -0.005 beta = -0.005
p_r = 1 + beta*(T_cell-25) p_r = 1 + beta*(T_cell-25)
# pv potential # pv potential
pv_pot = p_r * ds.ssrd/(sechour) * 1/1000 pv_pot = p_r * ds.ssrd/(sechour) * 1/1000
``` ```
%% Cell type:markdown id:036de419-2716-4ead-bd94-1d551b03b29c tags: %% Cell type:markdown id:036de419-2716-4ead-bd94-1d551b03b29c tags:
Calculate time-mean PV potential and plot as a map. Calculate time-mean PV potential and plot as a map.
%% Cell type:code id:3bc58d12-7d2d-45ae-996b-e18764af4889 tags: %% Cell type:code id:3bc58d12-7d2d-45ae-996b-e18764af4889 tags:
``` python ``` python
pv_pot_tmean = pv_pot.mean("valid_time").compute() pv_pot_tmean = pv_pot.mean("valid_time").compute()
``` ```
%% Cell type:code id:65dc280c-78e0-483f-833e-9b32df81c728 tags: %% Cell type:code id:65dc280c-78e0-483f-833e-9b32df81c728 tags:
``` python ``` python
plt.contourf(ds.longitude, ds.latitude, pv_pot_tmean) plt.contourf(ds.longitude, ds.latitude, pv_pot_tmean)
plt.colorbar() plt.colorbar()
``` ```
%% Output %% Output
<matplotlib.colorbar.Colorbar at 0x7f841d6a3130> <matplotlib.colorbar.Colorbar at 0x7f841d6a3130>
%% Cell type:code id:70cc5656-f511-437c-bddf-73ea620d37ca tags: %% Cell type:code id:70cc5656-f511-437c-bddf-73ea620d37ca tags:
``` python ``` python
``` ```
......
%% Cell type:markdown id:7d41920d-ccc6-4657-bacc-19006b87d7b8 tags:
# Example calculation of PV potential for ERA5 data
I calculate for one year and derive monthly-mean values of PV potential. I compare two manners of parallelizing the work:
1. multiprocessing with 1 process per file, hence 12 processes; each process uses dask-xarray under the hood and can hence use >100% CPU
2. dask-xarray processing of 1 dataset with all 12 months
%% Cell type:code id:99875287-2b84-460f-9c90-2e2542ff4e9b tags:
``` python
import xarray as xr
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
from multiprocessing import Process, Queue
import core as core
import warnings
warnings.filterwarnings("ignore")
```
%% Cell type:code id:1f4067af-2ee7-43f5-b43d-94739e229f5c tags:
``` python
# location of era5 data on teachinghub
path="/home/voigta80/LEHRE/msc-intro-comp-met-ex-w2024/data/era5/"
```
%% Cell type:markdown id:e447b6c3-da76-4bf2-a417-2d957389ab73 tags:
## 1. Multiprocessing
%% Cell type:code id:973862f1-8afc-417c-a009-0864322bb95b tags:
``` python
# generate list of era5 files for a given year
def get_filelists(year: str):
flist = list()
for file in Path(path).rglob("era5-"+year+"-*.nc"):
flist.append(file)
return flist
```
%% Cell type:code id:83ae56d9-2577-4fee-9f2c-5ef80da798ec tags:
``` python
# function to compute time-mean pv potential, will be called by multiprocessing
def batchcompute_pvpot(file, queue):
ds = xr.open_dataset(file, engine="netcdf4", chunks={"valid_time":1e5} )
ds["wspd"] = core.windspeed(ds)
pv_pot = core.pv_pot(ds).mean("valid_time").compute()
queue.put(pv_pot)
return None
```
%% Cell type:code id:8456f917-6cad-4fd7-940b-d083bf15ac78 tags:
``` python
nlat = 721;
nlon = 1440;
def multi_processing():
year= "2000"
flist = get_filelists(year)
# use 1 process per monthly file
nprocs = len(flist)
# output from each process
pvpot_chk = np.zeros((nprocs,nlat,nlon))
queue = Queue()
processes = [Process(target=batchcompute_pvpot,
args=(flist[i], queue)) for i in range(0, nprocs)]
for process in processes: process.start() # start all processes
for i in range(nprocs): # collect results from processes
pvpot_chk[i] = queue.get()
for process in processes: process.join() # wait for all processes to complete
# merge into yearly array
pvpot = np.stack(pvpot_chk, axis=0)
```
%% Cell type:code id:a2b43143-a8d2-4691-a7c9-aea6673cc231 tags:
``` python
core.measure_performance(multi_processing)
```
%% Output
Execution time: 78.37880 seconds
%% Cell type:markdown id:ee433aec-223e-445c-9040-70e86d1118e1 tags:
## 2. Dask-array on merged file
%% Cell type:code id:daf3daff-d39d-4a49-8c13-3ffba150a8b4 tags:
``` python
def dask_xarray():
ds2=xr.open_mfdataset(path+"era5-2000-*.nc", chunks={"valid_time":1e5} )
ds2["wspd"] = core.windspeed(ds2)
pvpot2 = core.pv_pot(ds2).groupby(ds2.valid_time.dt.month).mean("valid_time").compute()
```
%% Cell type:code id:0be1428c-aeca-4678-82df-81666b2df90e tags:
``` python
core.measure_performance(dask_xarray)
```
%% Output
Execution time: 679.63075 seconds
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment