diff --git a/core/core.py b/core/core.py index a76210123ad9f011129fdd43c729361ad2671a0a..9e3a17315bf3edd89a4016a9a760dc1f2ef10d11 100644 --- a/core/core.py +++ b/core/core.py @@ -122,6 +122,8 @@ def read_toaswin(fname: str): """ Read TOA incoming solar radiation. + Note: works for R05 but not R04 release of 2b-flxhr-lidar + Input: - fname: full path of hdf file (string) Output: @@ -146,6 +148,36 @@ def read_toaswin(fname: str): return da.to_dataset() +def read_toaswin_r04(fname: str): + """ + Read TOA incoming solar radiation for release R04 of 2b-flxhr-lidar. + + Note: FD_TOA_IncomingSolar does not exist for R04 release. + + Input: + - fname: full path of hdf file (string) + Output: + - toaswin: xarray dataset with toaswin + """ + import rioxarray as rxr + import xarray as xr + import numpy as np + + data = rxr.open_rasterio(fname) + + # all-sky downward flux + FD = data[0]["FD"] + missval = FD.min().values + FD = 0.1*xr.where(FD==missval, np.nan, FD) + + # toa sw in is the upper most level of the shortwave component of FD + toaswin = FD.isel(band=0).isel(x=0) + # construct xarray dataset following the dimensions and coordinates of 2B-FLXHR-LIDAR + y = np.arange(0,np.size(toaswin))+0.5 + da = xr.DataArray(toaswin, name="TOASWIN", dims=("y"), coords={"y": y}) + return da.to_dataset() + + def read_fluxes(fname: str): """ Reads radiative fluxes from 2B-FLXHR-LIDAR file. diff --git a/make_binned_heating_R04.py b/make_binned_heating_R04.py new file mode 100644 index 0000000000000000000000000000000000000000..603b1b4046693160179ed0e012b355a9cb4e3f71 --- /dev/null +++ b/make_binned_heating_R04.py @@ -0,0 +1,126 @@ +# Generate binned version of heating rates for each year and save as netcdf file +# Binning is done for 5-day pentads and 2.5 deg lat and lon bins +# +# same as make_binned_heating.py but for revision R04 +# +# on IMG JET call with /jetfs/home/avoigt/micromamba/envs/cloudsat-calipso/bin/python3.12 make_binned_heating_R04.py + +import xarray as xr +import numpy as np +import pandas as pd +from pathlib import Path +from multiprocessing import Process, Queue +import os + +import warnings +warnings.filterwarnings("ignore") + +import sys +sys.path.append("/jetfs/home/avoigt/cloudsat-calipso-heating-rates/core/") +import core as core + + +# define the binning into pentads x lats x lons +nlats=72; dlats=2.5; +lats=np.linspace(-90+dlats/2, 90-dlats/2, nlats) +nlons=144; dlons=2.5; +lons=np.linspace(-180+dlons/2, 180.0-dlons/2, nlons) +ddays=5; npents=np.int64(365/5); # pentad means +pents=np.arange(0,npents) + + +# generate list of 2b-flxhr-lidar and ecmwf-aux files for a given year +def get_filelists(year: str): + flist_cs=list() # 2b-flxhr-lidar + flist_ec=list() # ecmwf-aux + for file_cs in Path("/jetfs/shared-data/CLOUDSAT/Data/2B-FLXHR-LIDAR.P2_R04/"+year+"/").rglob(year+"*.hdf"): + file_ec = Path(str(file_cs).replace("/2B-FLXHR-LIDAR.P2_R04/","/ECMWF-AUX.P1_R05/").replace("_CS_2B-FLXHR-LIDAR_GRANULE_P2_R04","_CS_ECMWF-AUX_GRANULE_P1_R05").replace(".hdf", "_F00.hdf")) + # only keep those combinations of file_cs, file_ec for which the ecmwf-aux file is available + if os.path.isfile(file_cs) and os.path.isfile(file_ec): + flist_cs.append(file_cs) + flist_ec.append(file_ec) + return flist_cs, flist_ec + + +# function to compute heating rates, will be called by multiprocessing +def batchcompute_heatingrates(flist_cs, flist_ec, queue): + # "climatology" arrays + hr_allsky = np.zeros((npents, 2, 125, nlats, nlons)) + hr_clrsky = np.zeros((npents, 2, 125, nlats, nlons)) + # count the number of rays that entered the summation for allsky and clrsky + # this is needed to compute the mean over the bin + nbr_allsky = np.zeros((npents, 2, 125, nlats, nlons)) + nbr_clrsky = np.zeros((npents, 2, 125, nlats, nlons)) + + for ifile in range(len(flist_cs)): + file_cs = flist_cs[ifile]; + file_ec = flist_ec[ifile]; + data = xr.merge([core.read_fluxes(str(file_cs)), core.read_toaswin_r04(str(file_cs)), + core.read_plevthickness(str(file_ec)), core.read_lon_lat(str(file_cs)), + core.read_time(str(file_cs))]) + hr = core.compute_heatingrates(data) + + # derive indices for binned data + ind_pents= np.int64(np.floor(pd.to_datetime(data.TIME).dayofyear-1)/ddays) + # leap years could lead to a day with pentad index npents, we catch this manually + # and set the index to npents-1 for this special case + ind_pents = np.where(ind_pents>npents-1, npents-1, ind_pents) + ind_lats = np.int64(np.round((data.LAT+(90-dlats/2))/dlats)) + ind_lons = np.int64(np.round((data.LON+(180-dlons/2))/dlons)) + # longitude of 180 leads to an ind_lons=nlons, we catch this manually here + # and set the index to nlons-1 for this special case + ind_lons = np.where(ind_lons>nlons-1, nlons-1, ind_lons) + + # loop over rays and accumulate heating rates + # for this, set all nan values to zero and keep track of the number of accumulated not-nan values to later compute the mean + for iy in np.arange(hr.y.size): + # manual fix: some rays have data.TIME="NaT", which leads to invalid ind_pents values such as -9223372036854775808 + # we ignore these rays here + if ind_pents[iy]>=0 and ind_pents[iy]<npents: + hr_allsky[ind_pents[iy],:,:,ind_lats[iy], ind_lons[iy]] += np.nan_to_num(hr["HR"].values[:,iy,:], nan=0.0) + hr_clrsky[ind_pents[iy],:,:,ind_lats[iy], ind_lons[iy]] += np.nan_to_num(hr["HR_NC"].values[:,iy,:], nan=0.0) + nbr_allsky[ind_pents[iy],:,:,ind_lats[iy], ind_lons[iy]] += np.int64(1-np.isnan(hr["HR"].values[:,iy,:])) + nbr_clrsky[ind_pents[iy],:,:,ind_lats[iy], ind_lons[iy]] += np.int64(1-np.isnan(hr["HR_NC"].values[:,iy,:])) + queue.put([hr_allsky, hr_clrsky, nbr_allsky, nbr_clrsky]) + return None + + +# loop over years +for year in ["2006", "2007", "2008", "2009", "2010", "2011"]: + + print("Working on year", year) + + flist_cs, flist_ec = get_filelists(year) + + # break file lists into smaller chunks that can be processed in parallel + nprocs = 40 # number of parallel processes + nfiles = len(flist_cs) # number of 2b-flxr-lidar files to process; there are equally many ecmwf-aux files + chk_size = np.int64(nfiles/nprocs)+1 # number of files per process, add 1 to make sure include all files + flist_cs_chk = [flist_cs[i*chk_size:(i+1)*chk_size] for i in range(0, nprocs)] + flist_ec_chk = [flist_ec[i*chk_size:(i+1)*chk_size] for i in range(0, nprocs)] + + # arrays with chunks of binned data: nprocs x months x bands x height x lat x lon + hr_allsky_chk = np.zeros((nprocs, npents, 2, 125, nlats, nlons)) + hr_clrsky_chk = np.zeros((nprocs, npents, 2, 125, nlats, nlons)) + nbr_allsky_chk = np.zeros((nprocs, npents, 2, 125, nlats, nlons)) + nbr_clrsky_chk = np.zeros((nprocs, npents, 2, 125, nlats, nlons)) + + # store the results from each process + queue = Queue() + processes = [Process(target=batchcompute_heatingrates, + args=(flist_cs_chk[i], flist_ec_chk[i], queue)) for i in range(0, nprocs)] + for process in processes: process.start() # start all processes + for i in range(nprocs): # collect results from processes + [hr_allsky_chk[i], hr_clrsky_chk[i], nbr_allsky_chk[i], nbr_clrsky_chk[i]] = queue.get() + for process in processes: process.join() # wait for all processes to complete + + # combine results from processes + hr_allsky = np.nansum(hr_allsky_chk, axis=0)/np.nansum(nbr_allsky_chk, axis=0) + hr_clrsky = np.nansum(hr_clrsky_chk, axis=0)/np.nansum(nbr_clrsky_chk, axis=0) + + # save to netcdf via xarray + da_hr_allsky_sw = xr.DataArray(hr_allsky[:,0], name="HR_allsky_sw", dims=("pentad", "height", "lat", "lon",), coords={"pentad": pents, "height": np.arange(0,125), "lat": lats, "lon": lons}) + da_hr_allsky_lw = xr.DataArray(hr_allsky[:,1], name="HR_allsky_lw", dims=("pentad", "height", "lat", "lon",), coords={"pentad": pents, "height": np.arange(0,125), "lat": lats, "lon": lons}) + da_hr_clrsky_sw = xr.DataArray(hr_clrsky[:,0], name="HR_clrsky_sw", dims=("pentad", "height", "lat", "lon",), coords={"pentad": pents, "height": np.arange(0,125), "lat": lats, "lon": lons}) + da_hr_clrsky_lw = xr.DataArray(hr_clrsky[:,1], name="HR_clrsky_lw", dims=("pentad", "height", "lat", "lon",), coords={"pentad": pents, "height": np.arange(0,125), "lat": lats, "lon": lons}) + xr.merge([da_hr_allsky_sw, da_hr_allsky_lw, da_hr_clrsky_sw, da_hr_clrsky_lw]).to_netcdf("/jetfs/scratch/avoigt/CLOUDSAT/2B-FLXHR-LIDAR.P2_R04.heatingrates_binned."+year+".nc") diff --git a/postprocess_binned_heating_R04.py b/postprocess_binned_heating_R04.py new file mode 100644 index 0000000000000000000000000000000000000000..f291e49c279eb5499821b7ad5ccd87edfc80a0a6 --- /dev/null +++ b/postprocess_binned_heating_R04.py @@ -0,0 +1,43 @@ +# Take the yearly output of make_binned_heating.py and postprocess: proper time and height coordinates, set metadata, save into one netcdf file with all years +# +# same as postprocess_binned_heating.py but for R04 revision +# +# on IMG JET call with /jetfs/home/avoigt/micromamba/envs/cloudsat-calipso/bin/python3.12 postprocess_binned_heating_R04.py + +import xarray as xr +import numpy as np + +import sys +sys.path.append("/jetfs/home/avoigt/cloudsat-calipso-heating-rates/core/") +import core as core + +import warnings +warnings.filterwarnings("ignore") + + +# derive characteristic height as the mean height from a sample granule +file="/jetfs/shared-data/CLOUDSAT/Data/2B-FLXHR-LIDAR.P2_R04/2010/149/2010149011757_21723_CS_2B-FLXHR-LIDAR_GRANULE_P2_R04_E03.hdf" +height = core.read_height(file)["HGT"].mean("y") + + +ds_list = list() +for year in ["2006", "2007", "2008", "2009", "2010", "2011"]: + ds = xr.open_dataset("/jetfs/scratch/avoigt/CLOUDSAT/2B-FLXHR-LIDAR.P2_R04.heatingrates_binned."+year+".nc", chunks="auto") + time = np.arange(year+"-01-03", year+"-12-31", 5, dtype='datetime64[D]') + ds = ds.rename({"pentad": "time"}).assign_coords({"time": time}) + ds = ds.assign_coords({"height": height.values}) + ds = ds.assign_attrs({"Data": "Atmospheric radiative heating rates based on 2B-FLXHR-LIDAR.P2_R04"}) + ds = ds.assign_attrs({"Data producer": "Aiko Voigt, Dept of Meteorology and Geophysics, University of Vienna"}) + ds = ds.assign_attrs({"Time": "Data generated on May 13, 2024"}) + ds = ds.assign_attrs({"Software": "https://gitlab.phaidra.org/climate/cloudsat-calipso-heating-rates"}) + text = "Geographical variations in the actual height of the height bins are neglected for simplicity. " \ + "Data on the same height bins are averaged together. "\ + "The height of the heights bins is defined as the mean height along a sample granule." + ds = ds.assign_attrs({"Note on height coordinate": text}) + ds_list.append(ds) + del ds + +# merge into one dataset and save to netcdf +ds_merged = xr.concat(ds_list, "time") +ds_merged.to_netcdf("/jetfs/scratch/avoigt/CLOUDSAT/2B-FLXHR-LIDAR.P2_R04.heatingrates_binned.2006-2011.nc") +