Adds analysis for R04

d179db75 · Aiko Voigt · b2c0159e · d179db75 · d179db75 · d179db75
Commit d179db75 authored 11 months ago by Aiko Voigt
--- a/core/core.py
+++ b/core/core.py
@@ -122,6 +122,8 @@ def read_toaswin(fname: str):
    """
    Read TOA incoming solar radiation.

+    Note: works for R05 but not R04 release of 2b-flxhr-lidar
+   
    Input:
    - fname: full path of hdf file (string)
    Output:
@@ -146,6 +148,36 @@ def read_toaswin(fname: str):
    return da.to_dataset()


+def read_toaswin_r04(fname: str):
+    """
+    Read TOA incoming solar radiation for release R04 of 2b-flxhr-lidar.
+
+    Note: FD_TOA_IncomingSolar does not exist for R04 release.
+
+    Input:
+    - fname: full path of hdf file (string)
+    Output:
+    - toaswin: xarray dataset with toaswin
+    """
+    import rioxarray as rxr
+    import xarray as xr
+    import numpy as np
+
+    data = rxr.open_rasterio(fname)
+
+    # all-sky downward flux
+    FD = data[0]["FD"]
+    missval = FD.min().values
+    FD = 0.1*xr.where(FD==missval, np.nan, FD)
+    
+    # toa sw in is the upper most level of the shortwave component of FD
+    toaswin = FD.isel(band=0).isel(x=0)
+    # construct xarray dataset following the dimensions and coordinates of 2B-FLXHR-LIDAR
+    y = np.arange(0,np.size(toaswin))+0.5
+    da = xr.DataArray(toaswin, name="TOASWIN", dims=("y"), coords={"y": y})
+    return da.to_dataset()
+
+
 def read_fluxes(fname: str):
    """
    Reads radiative fluxes from 2B-FLXHR-LIDAR file.

--- a/make_binned_heating_R04.py
+++ b/make_binned_heating_R04.py
+# Generate binned version of heating rates for each year and save as netcdf file
+# Binning is done for 5-day pentads and 2.5 deg lat and lon bins
+#
+# same as make_binned_heating.py but for revision R04
+#
+# on IMG JET call with /jetfs/home/avoigt/micromamba/envs/cloudsat-calipso/bin/python3.12  make_binned_heating_R04.py
+
+import xarray as xr
+import numpy as np
+import pandas as pd
+from pathlib import Path
+from multiprocessing import Process, Queue
+import os
+
+import warnings
+warnings.filterwarnings("ignore")
+
+import sys
+sys.path.append("/jetfs/home/avoigt/cloudsat-calipso-heating-rates/core/")
+import core as core
+
+
+# define the binning into pentads x lats x lons
+nlats=72; dlats=2.5;
+lats=np.linspace(-90+dlats/2, 90-dlats/2, nlats)
+nlons=144; dlons=2.5;
+lons=np.linspace(-180+dlons/2, 180.0-dlons/2, nlons)
+ddays=5; npents=np.int64(365/5); # pentad means
+pents=np.arange(0,npents) 
+
+
+# generate list of 2b-flxhr-lidar and ecmwf-aux files for a given year
+def get_filelists(year: str):
+    flist_cs=list() # 2b-flxhr-lidar
+    flist_ec=list() # ecmwf-aux
+    for file_cs in Path("/jetfs/shared-data/CLOUDSAT/Data/2B-FLXHR-LIDAR.P2_R04/"+year+"/").rglob(year+"*.hdf"):
+        file_ec = Path(str(file_cs).replace("/2B-FLXHR-LIDAR.P2_R04/","/ECMWF-AUX.P1_R05/").replace("_CS_2B-FLXHR-LIDAR_GRANULE_P2_R04","_CS_ECMWF-AUX_GRANULE_P1_R05").replace(".hdf", "_F00.hdf"))
+        # only keep those combinations of file_cs, file_ec for which the ecmwf-aux file is available
+        if os.path.isfile(file_cs) and os.path.isfile(file_ec):
+            flist_cs.append(file_cs)
+            flist_ec.append(file_ec)
+    return flist_cs, flist_ec
+
+
+# function to compute heating rates, will be called by multiprocessing
+def batchcompute_heatingrates(flist_cs, flist_ec, queue):
+    # "climatology" arrays
+    hr_allsky = np.zeros((npents, 2, 125, nlats, nlons))
+    hr_clrsky = np.zeros((npents, 2, 125, nlats, nlons))
+    # count the number of rays that entered the summation for allsky and clrsky
+    # this is needed to compute the mean over the bin
+    nbr_allsky = np.zeros((npents, 2, 125, nlats, nlons))
+    nbr_clrsky = np.zeros((npents, 2, 125, nlats, nlons))
+
+    for ifile in range(len(flist_cs)):
+        file_cs = flist_cs[ifile];
+        file_ec = flist_ec[ifile];
+        data = xr.merge([core.read_fluxes(str(file_cs)), core.read_toaswin_r04(str(file_cs)), 
+                         core.read_plevthickness(str(file_ec)), core.read_lon_lat(str(file_cs)),
+                         core.read_time(str(file_cs))])
+        hr = core.compute_heatingrates(data)
+
+        # derive indices for binned data
+        ind_pents= np.int64(np.floor(pd.to_datetime(data.TIME).dayofyear-1)/ddays)
+        # leap years could lead to a day with pentad index npents, we catch this manually
+        # and set the index to npents-1 for this special case
+        ind_pents = np.where(ind_pents>npents-1, npents-1, ind_pents)
+        ind_lats = np.int64(np.round((data.LAT+(90-dlats/2))/dlats))
+        ind_lons = np.int64(np.round((data.LON+(180-dlons/2))/dlons))
+        # longitude of 180 leads to an ind_lons=nlons, we catch this manually here
+        # and set the index to nlons-1 for this special case
+        ind_lons = np.where(ind_lons>nlons-1, nlons-1, ind_lons)
+
+        # loop over rays and accumulate heating rates
+        # for this, set all nan values to zero and keep track of the number of accumulated not-nan values to later compute the mean
+        for iy in np.arange(hr.y.size):
+            # manual fix: some rays have data.TIME="NaT", which leads to invalid ind_pents values such as -9223372036854775808
+            # we ignore these rays here
+            if ind_pents[iy]>=0 and ind_pents[iy]<npents:
+                hr_allsky[ind_pents[iy],:,:,ind_lats[iy], ind_lons[iy]] += np.nan_to_num(hr["HR"].values[:,iy,:], nan=0.0)
+                hr_clrsky[ind_pents[iy],:,:,ind_lats[iy], ind_lons[iy]] += np.nan_to_num(hr["HR_NC"].values[:,iy,:], nan=0.0)
+                nbr_allsky[ind_pents[iy],:,:,ind_lats[iy], ind_lons[iy]] += np.int64(1-np.isnan(hr["HR"].values[:,iy,:]))
+                nbr_clrsky[ind_pents[iy],:,:,ind_lats[iy], ind_lons[iy]] += np.int64(1-np.isnan(hr["HR_NC"].values[:,iy,:]))
+    queue.put([hr_allsky, hr_clrsky, nbr_allsky, nbr_clrsky])
+    return None
+
+
+# loop over years
+for year in ["2006", "2007", "2008", "2009", "2010", "2011"]:
+    
+    print("Working on year", year)
+
+    flist_cs, flist_ec = get_filelists(year)
+
+    # break file lists into smaller chunks that can be processed in parallel
+    nprocs = 40 # number of parallel processes
+    nfiles = len(flist_cs) # number of 2b-flxr-lidar files to process; there are equally many ecmwf-aux files
+    chk_size = np.int64(nfiles/nprocs)+1 # number of files per process, add 1 to make sure include all files
+    flist_cs_chk = [flist_cs[i*chk_size:(i+1)*chk_size] for i in range(0, nprocs)]
+    flist_ec_chk = [flist_ec[i*chk_size:(i+1)*chk_size] for i in range(0, nprocs)]
+
+    # arrays with chunks of binned data: nprocs x months x bands x height x lat x lon
+    hr_allsky_chk  = np.zeros((nprocs, npents, 2, 125, nlats, nlons))
+    hr_clrsky_chk  = np.zeros((nprocs, npents, 2, 125, nlats, nlons))
+    nbr_allsky_chk = np.zeros((nprocs, npents, 2, 125, nlats, nlons))
+    nbr_clrsky_chk = np.zeros((nprocs, npents, 2, 125, nlats, nlons))
+
+    # store the results from each process
+    queue = Queue()
+    processes = [Process(target=batchcompute_heatingrates, 
+                         args=(flist_cs_chk[i], flist_ec_chk[i], queue)) for i in range(0, nprocs)]
+    for process in processes: process.start() # start all processes
+    for i in range(nprocs): # collect results from processes
+        [hr_allsky_chk[i], hr_clrsky_chk[i], nbr_allsky_chk[i], nbr_clrsky_chk[i]] = queue.get()
+    for process in processes: process.join()  # wait for all processes to complete
+
+    # combine results from processes
+    hr_allsky = np.nansum(hr_allsky_chk, axis=0)/np.nansum(nbr_allsky_chk, axis=0)
+    hr_clrsky = np.nansum(hr_clrsky_chk, axis=0)/np.nansum(nbr_clrsky_chk, axis=0)
+
+    # save to netcdf via xarray
+    da_hr_allsky_sw = xr.DataArray(hr_allsky[:,0], name="HR_allsky_sw", dims=("pentad", "height", "lat", "lon",), coords={"pentad": pents, "height": np.arange(0,125), "lat": lats, "lon": lons})
+    da_hr_allsky_lw = xr.DataArray(hr_allsky[:,1], name="HR_allsky_lw", dims=("pentad", "height", "lat", "lon",), coords={"pentad": pents, "height": np.arange(0,125), "lat": lats, "lon": lons})
+    da_hr_clrsky_sw = xr.DataArray(hr_clrsky[:,0], name="HR_clrsky_sw", dims=("pentad", "height", "lat", "lon",), coords={"pentad": pents, "height": np.arange(0,125), "lat": lats, "lon": lons})
+    da_hr_clrsky_lw = xr.DataArray(hr_clrsky[:,1], name="HR_clrsky_lw", dims=("pentad", "height", "lat", "lon",), coords={"pentad": pents, "height": np.arange(0,125), "lat": lats, "lon": lons})
+    xr.merge([da_hr_allsky_sw, da_hr_allsky_lw, da_hr_clrsky_sw, da_hr_clrsky_lw]).to_netcdf("/jetfs/scratch/avoigt/CLOUDSAT/2B-FLXHR-LIDAR.P2_R04.heatingrates_binned."+year+".nc")
--- a/postprocess_binned_heating_R04.py
+++ b/postprocess_binned_heating_R04.py
+# Take the yearly output of make_binned_heating.py and postprocess: proper time and height coordinates, set metadata, save into one netcdf file with all years
+#
+# same as postprocess_binned_heating.py but for R04 revision
+# 
+# on IMG JET call with /jetfs/home/avoigt/micromamba/envs/cloudsat-calipso/bin/python3.12  postprocess_binned_heating_R04.py
+
+import xarray as xr
+import numpy as np
+
+import sys
+sys.path.append("/jetfs/home/avoigt/cloudsat-calipso-heating-rates/core/")
+import core as core
+
+import warnings
+warnings.filterwarnings("ignore")
+
+
+# derive characteristic height as the mean height from a sample granule
+file="/jetfs/shared-data/CLOUDSAT/Data/2B-FLXHR-LIDAR.P2_R04/2010/149/2010149011757_21723_CS_2B-FLXHR-LIDAR_GRANULE_P2_R04_E03.hdf"
+height = core.read_height(file)["HGT"].mean("y")
+
+
+ds_list = list()
+for year in ["2006", "2007", "2008", "2009", "2010", "2011"]:
+    ds = xr.open_dataset("/jetfs/scratch/avoigt/CLOUDSAT/2B-FLXHR-LIDAR.P2_R04.heatingrates_binned."+year+".nc", chunks="auto")
+    time = np.arange(year+"-01-03", year+"-12-31", 5, dtype='datetime64[D]')
+    ds = ds.rename({"pentad": "time"}).assign_coords({"time": time})
+    ds = ds.assign_coords({"height": height.values})
+    ds = ds.assign_attrs({"Data": "Atmospheric radiative heating rates based on 2B-FLXHR-LIDAR.P2_R04"})
+    ds = ds.assign_attrs({"Data producer": "Aiko Voigt, Dept of Meteorology and Geophysics, University of Vienna"})
+    ds = ds.assign_attrs({"Time": "Data generated on May 13, 2024"})
+    ds = ds.assign_attrs({"Software": "https://gitlab.phaidra.org/climate/cloudsat-calipso-heating-rates"})
+    text = "Geographical variations in the actual height of the height bins are neglected for simplicity. " \
+         "Data on the same height bins are averaged together. "\
+         "The height of the heights bins is defined as the mean height along a sample granule."
+    ds = ds.assign_attrs({"Note on height coordinate": text})
+    ds_list.append(ds)
+    del ds
+
+# merge into one dataset and save to netcdf
+ds_merged = xr.concat(ds_list, "time")
+ds_merged.to_netcdf("/jetfs/scratch/avoigt/CLOUDSAT/2B-FLXHR-LIDAR.P2_R04.heatingrates_binned.2006-2011.nc")
+