diff --git a/make_binned_heating.py b/make_binned_heating.py new file mode 100644 index 0000000000000000000000000000000000000000..bcb2779ef8f80f1e0e084d392b7737fedd21e017 --- /dev/null +++ b/make_binned_heating.py @@ -0,0 +1,115 @@ +# Generate binned version of heating rates for each year and save as netcdf file +# Binning is done for 5-day pentads and 2.5 deg lat and lon bins +# +# on IMG JET call with /jetfs/home/avoigt/micromamba/envs/cloudsat-calipso/bin/python3.12 make_binned_heating.py + +import rioxarray as rxr +import xarray as xr +import numpy as np +import pandas as pd +from pathlib import Path +from multiprocessing import Process, Queue + +import warnings +warnings.filterwarnings("ignore") + +import sys +sys.path.append("/jetfs/home/avoigt/cloudsat-calipso-heating-rates/core/") +import core as core + +# define the binning into pentads x lats x lons +nlats=72; dlats=2.5; +lats=np.linspace(-90+dlats/2, 90-dlats/2, nlats) +nlons=144; dlons=2.5; +lons=np.linspace(-180+dlons/2, 180.0-dlons/2, nlons) +ddays=5; npents=np.int64(365/5); # pentad means +pents=np.arange(0,npents) + +# generate list of 2b-flxhr-lidar and ecmwf-aux files for a given year +def get_filelists(year: str): + flist_cs=list() # 2b-flxhr-lidar + flist_ec=list() # ecmwf-aux + for file_cs in Path("/jetfs/shared-data/CLOUDSAT/Data/2B-FLXHR-LIDAR.P2_R05/"+year+"/").rglob(year+"*.hdf"): + flist_cs.append(file_cs) + file_ec = Path(str(file_cs).replace("/2B-FLXHR-LIDAR.P2_R05/","/ECMWF-AUX.P1_R05/").replace("_CS_2B-FLXHR-LIDAR_GRANULE_P2_","_CS_ECMWF-AUX_GRANULE_P1_")) + flist_ec.append(file_ec) + return flist_cs, flist_ec + +# function to compute heating rates, will be called by multiprocessing +def batchcompute_heatingrates(flist_cs, flist_ec, queue): + # "climatology" arrays + hr_allsky = np.zeros((npents, 2, 125, nlats, nlons)) + hr_clrsky = np.zeros((npents, 2, 125, nlats, nlons)) + # count the number of rays that entered the summation for allsky and clrsky + # this is needed to compute the mean over the bin + nbr_allsky = np.zeros((npents, 2, 125, nlats, nlons)) + nbr_clrsky = np.zeros((npents, 2, 125, nlats, nlons)) + + for ifile in range(len(flist_cs)): + file_cs = flist_cs[ifile]; + file_ec = flist_ec[ifile]; + data = xr.merge([core.read_fluxes(str(file_cs)), core.read_toaswin(str(file_cs)), + core.read_plevthickness(str(file_ec)), core.read_lon_lat(str(file_cs)), + core.read_time(str(file_cs))]) + hr = core.compute_heatingrates(data) + + # derive indices for binned data + ind_pents= np.int64(np.floor(pd.to_datetime(data.TIME).dayofyear-1)/ddays) + # leap years could lead to a day with pentad index npents, we catch this manually + # and set the index to npents-1 for this special case + ind_pents = np.where(ind_pents>npents-1, npents-1, ind_pents) + ind_lats = np.int64(np.round((data.LAT+(90-dlats/2))/dlats)) + ind_lons = np.int64(np.round((data.LON+(180-dlons/2))/dlons)) + # longitude of 180 leads to an ind_lons=nlons, we catch this manually here + # and set the index to nlons-1 for this special case + ind_lons = np.where(ind_lons>nlons-1, nlons-1, ind_lons) + + # loop over rays and accumulate heating rates + # for this, set all nan values to zero and keep track of the number of accumulated not-nan values to later compute the mean + for iy in np.arange(hr.y.size): + hr_allsky[ind_pents[iy],:,:,ind_lats[iy], ind_lons[iy]] += np.nan_to_num(hr["HR"].values[:,iy,:], nan=0.0) + hr_clrsky[ind_pents[iy],:,:,ind_lats[iy], ind_lons[iy]] += np.nan_to_num(hr["HR_NC"].values[:,iy,:], nan=0.0) + nbr_allsky[ind_pents[iy],:,:,ind_lats[iy], ind_lons[iy]] += np.int64(1-np.isnan(hr["HR"].values[:,iy,:])) + nbr_clrsky[ind_pents[iy],:,:,ind_lats[iy], ind_lons[iy]] += np.int64(1-np.isnan(hr["HR_NC"].values[:,iy,:])) + queue.put([hr_allsky, hr_clrsky, nbr_allsky, nbr_clrsky]) + return None + +# loop over years +for year in ["2006", "2007", "2008", "2009", "2010", "2011", "2012", "2013", "2014", "2015", "2016", "2017"]: + + print("Working on year", year) + + flist_cs, flist_ec = get_filelists(year) + + # break file lists into smaller chunks that can be processed in parallel + nprocs = 40 # number of parallel processes + nfiles = len(flist_cs) # number of 2b-flxr-lidar files to process; there are equally many ecmwf-aux files + chk_size = np.int64(nfiles/nprocs)+1 # number of files per process, add 1 to make sure include all files + flist_cs_chk = [flist_cs[i*chk_size:(i+1)*chk_size] for i in range(0, nprocs)] + flist_ec_chk = [flist_ec[i*chk_size:(i+1)*chk_size] for i in range(0, nprocs)] + + # arrays with chunks of binned data: nprocs x months x bands x height x lat x lon + hr_allsky_chk = np.zeros((nprocs, npents, 2, 125, nlats, nlons)) + hr_clrsky_chk = np.zeros((nprocs, npents, 2, 125, nlats, nlons)) + nbr_allsky_chk = np.zeros((nprocs, npents, 2, 125, nlats, nlons)) + nbr_clrsky_chk = np.zeros((nprocs, npents, 2, 125, nlats, nlons)) + + # store the results from each process + queue = Queue() + processes = [Process(target=batchcompute_heatingrates, + args=(flist_cs_chk[i], flist_ec_chk[i], queue)) for i in range(0, nprocs)] + for process in processes: process.start() # start all processes + for i in range(nprocs): # collect results from processes + [hr_allsky_chk[i], hr_clrsky_chk[i], nbr_allsky_chk[i], nbr_clrsky_chk[i]] = queue.get() + for process in processes: process.join() # wait for all processes to complete + + # combine results from processes + hr_allsky = np.nansum(hr_allsky_chk, axis=0)/np.nansum(nbr_allsky_chk, axis=0) + hr_clrsky = np.nansum(hr_clrsky_chk, axis=0)/np.nansum(nbr_clrsky_chk, axis=0) + + # save to netcdf via xarray + da_hr_allsky_sw = xr.DataArray(hr_allsky[:,0], name="HR_allsky_sw", dims=("pentad", "height", "lat", "lon",), coords={"pentad": pents, "height": np.arange(0,125), "lat": lats, "lon": lons}) + da_hr_allsky_lw = xr.DataArray(hr_allsky[:,1], name="HR_allsky_lw", dims=("pentad", "height", "lat", "lon",), coords={"pentad": pents, "height": np.arange(0,125), "lat": lats, "lon": lons}) + da_hr_clrsky_sw = xr.DataArray(hr_clrsky[:,0], name="HR_clrsky_sw", dims=("pentad", "height", "lat", "lon",), coords={"pentad": pents, "height": np.arange(0,125), "lat": lats, "lon": lons}) + da_hr_clrsky_lw = xr.DataArray(hr_clrsky[:,1], name="HR_clrsky_lw", dims=("pentad", "height", "lat", "lon",), coords={"pentad": pents, "height": np.arange(0,125), "lat": lats, "lon": lons}) + xr.merge([da_hr_allsky_sw, da_hr_allsky_lw, da_hr_clrsky_sw, da_hr_clrsky_lw]).to_netcdf("2B-FLXHR-LIDAR.P2_R05.heatingrates_binned."+year+".nc") diff --git a/postprocess_binned_heating.py b/postprocess_binned_heating.py new file mode 100644 index 0000000000000000000000000000000000000000..18076c0aea655c529cd7a5972515dc34ab6b785b --- /dev/null +++ b/postprocess_binned_heating.py @@ -0,0 +1,42 @@ +# Take the yearly output of make_binned_heating.py and postprocess: proper time and height coordinates, set metadata, save into one netcdf file with all years +# +# on IMG JET call with /jetfs/home/avoigt/micromamba/envs/cloudsat-calipso/bin/python3.12 postprocess_binned_heating.py + +import xarray as xr +import numpy as np + +import sys +sys.path.append("/jetfs/home/avoigt/cloudsat-calipso-heating-rates/core/") +import core as core + +import warnings +warnings.filterwarnings("ignore") + + +# derive characteristic height as the mean height from a sample granule +file="/jetfs/shared-data/CLOUDSAT/Data/2B-FLXHR-LIDAR.P2_R05/2010/150/2010150002219_21737_CS_2B-FLXHR-LIDAR_GRANULE_P2_R05_E03_F00.hdf" +height = core.read_height(file)["HGT"].mean("y") + + +ds_list = list() +for year in ["2006", "2007", "2008", "2009", "2010", "2011", "2012", "2013", "2014", "2015", "2016", "2017"]: + ds = xr.open_dataset("/jetfs/home/avoigt/cloudsat-calipso-heating-rates/2B-FLXHR-LIDAR.P2_R05.heatingrates_binned."+year+".nc") + time = np.arange(year+"-01-03", year+"-12-31", 5, dtype='datetime64[D]') + ds = ds.rename({"pentad": "time"}).assign_coords({"time": time}) + ds = ds.assign_coords({"height": height}) + ds = ds.assign_attrs({"Data": "Atmospheric radiative heating rates based on 2B-FLXHR-LIDAR.P2_R05"}) + ds = ds.assign_attrs({"Data producer": "Aiko Voigt, Dept of Meteorology and Geophysics, University of Vienna"}) + ds = ds.assign_attrs({"Time": "Data generated on April 5, 2024"}) + ds = ds.assign_attrs({"Software": "https://gitlab.phaidra.org/climate/cloudsat-calipso-heating-rates"}) + text = "Geographical variations in the actual height of the height bins are neglected for simplicity. " \ + "Data on the same height bins are averaged together. "\ + "The height of the heights bins is defined as the mean height along a sample granule." + ds = ds.assign_attrs({"Note on height coordinate": text}) + ds = ds.drop_vars("xfull") + ds_list.append(ds) + del ds + +# merge into one dataset and save to netcdf +ds_merged = xr.merge(ds_list) +ds_merged.to_netcdf("2B-FLXHR-LIDAR.P2_R05.heatingrates_binned.2006-2017.nc") +