Skip to content
Snippets Groups Projects
Commit d179db75 authored by Aiko Voigt's avatar Aiko Voigt
Browse files

Adds analysis for R04

parent b2c0159e
Branches
No related tags found
No related merge requests found
......@@ -122,6 +122,8 @@ def read_toaswin(fname: str):
"""
Read TOA incoming solar radiation.
Note: works for R05 but not R04 release of 2b-flxhr-lidar
Input:
- fname: full path of hdf file (string)
Output:
......@@ -146,6 +148,36 @@ def read_toaswin(fname: str):
return da.to_dataset()
def read_toaswin_r04(fname: str):
"""
Read TOA incoming solar radiation for release R04 of 2b-flxhr-lidar.
Note: FD_TOA_IncomingSolar does not exist for R04 release.
Input:
- fname: full path of hdf file (string)
Output:
- toaswin: xarray dataset with toaswin
"""
import rioxarray as rxr
import xarray as xr
import numpy as np
data = rxr.open_rasterio(fname)
# all-sky downward flux
FD = data[0]["FD"]
missval = FD.min().values
FD = 0.1*xr.where(FD==missval, np.nan, FD)
# toa sw in is the upper most level of the shortwave component of FD
toaswin = FD.isel(band=0).isel(x=0)
# construct xarray dataset following the dimensions and coordinates of 2B-FLXHR-LIDAR
y = np.arange(0,np.size(toaswin))+0.5
da = xr.DataArray(toaswin, name="TOASWIN", dims=("y"), coords={"y": y})
return da.to_dataset()
def read_fluxes(fname: str):
"""
Reads radiative fluxes from 2B-FLXHR-LIDAR file.
......
# Generate binned version of heating rates for each year and save as netcdf file
# Binning is done for 5-day pentads and 2.5 deg lat and lon bins
#
# same as make_binned_heating.py but for revision R04
#
# on IMG JET call with /jetfs/home/avoigt/micromamba/envs/cloudsat-calipso/bin/python3.12 make_binned_heating_R04.py
import xarray as xr
import numpy as np
import pandas as pd
from pathlib import Path
from multiprocessing import Process, Queue
import os
import warnings
warnings.filterwarnings("ignore")
import sys
sys.path.append("/jetfs/home/avoigt/cloudsat-calipso-heating-rates/core/")
import core as core
# define the binning into pentads x lats x lons
nlats=72; dlats=2.5;
lats=np.linspace(-90+dlats/2, 90-dlats/2, nlats)
nlons=144; dlons=2.5;
lons=np.linspace(-180+dlons/2, 180.0-dlons/2, nlons)
ddays=5; npents=np.int64(365/5); # pentad means
pents=np.arange(0,npents)
# generate list of 2b-flxhr-lidar and ecmwf-aux files for a given year
def get_filelists(year: str):
flist_cs=list() # 2b-flxhr-lidar
flist_ec=list() # ecmwf-aux
for file_cs in Path("/jetfs/shared-data/CLOUDSAT/Data/2B-FLXHR-LIDAR.P2_R04/"+year+"/").rglob(year+"*.hdf"):
file_ec = Path(str(file_cs).replace("/2B-FLXHR-LIDAR.P2_R04/","/ECMWF-AUX.P1_R05/").replace("_CS_2B-FLXHR-LIDAR_GRANULE_P2_R04","_CS_ECMWF-AUX_GRANULE_P1_R05").replace(".hdf", "_F00.hdf"))
# only keep those combinations of file_cs, file_ec for which the ecmwf-aux file is available
if os.path.isfile(file_cs) and os.path.isfile(file_ec):
flist_cs.append(file_cs)
flist_ec.append(file_ec)
return flist_cs, flist_ec
# function to compute heating rates, will be called by multiprocessing
def batchcompute_heatingrates(flist_cs, flist_ec, queue):
# "climatology" arrays
hr_allsky = np.zeros((npents, 2, 125, nlats, nlons))
hr_clrsky = np.zeros((npents, 2, 125, nlats, nlons))
# count the number of rays that entered the summation for allsky and clrsky
# this is needed to compute the mean over the bin
nbr_allsky = np.zeros((npents, 2, 125, nlats, nlons))
nbr_clrsky = np.zeros((npents, 2, 125, nlats, nlons))
for ifile in range(len(flist_cs)):
file_cs = flist_cs[ifile];
file_ec = flist_ec[ifile];
data = xr.merge([core.read_fluxes(str(file_cs)), core.read_toaswin_r04(str(file_cs)),
core.read_plevthickness(str(file_ec)), core.read_lon_lat(str(file_cs)),
core.read_time(str(file_cs))])
hr = core.compute_heatingrates(data)
# derive indices for binned data
ind_pents= np.int64(np.floor(pd.to_datetime(data.TIME).dayofyear-1)/ddays)
# leap years could lead to a day with pentad index npents, we catch this manually
# and set the index to npents-1 for this special case
ind_pents = np.where(ind_pents>npents-1, npents-1, ind_pents)
ind_lats = np.int64(np.round((data.LAT+(90-dlats/2))/dlats))
ind_lons = np.int64(np.round((data.LON+(180-dlons/2))/dlons))
# longitude of 180 leads to an ind_lons=nlons, we catch this manually here
# and set the index to nlons-1 for this special case
ind_lons = np.where(ind_lons>nlons-1, nlons-1, ind_lons)
# loop over rays and accumulate heating rates
# for this, set all nan values to zero and keep track of the number of accumulated not-nan values to later compute the mean
for iy in np.arange(hr.y.size):
# manual fix: some rays have data.TIME="NaT", which leads to invalid ind_pents values such as -9223372036854775808
# we ignore these rays here
if ind_pents[iy]>=0 and ind_pents[iy]<npents:
hr_allsky[ind_pents[iy],:,:,ind_lats[iy], ind_lons[iy]] += np.nan_to_num(hr["HR"].values[:,iy,:], nan=0.0)
hr_clrsky[ind_pents[iy],:,:,ind_lats[iy], ind_lons[iy]] += np.nan_to_num(hr["HR_NC"].values[:,iy,:], nan=0.0)
nbr_allsky[ind_pents[iy],:,:,ind_lats[iy], ind_lons[iy]] += np.int64(1-np.isnan(hr["HR"].values[:,iy,:]))
nbr_clrsky[ind_pents[iy],:,:,ind_lats[iy], ind_lons[iy]] += np.int64(1-np.isnan(hr["HR_NC"].values[:,iy,:]))
queue.put([hr_allsky, hr_clrsky, nbr_allsky, nbr_clrsky])
return None
# loop over years
for year in ["2006", "2007", "2008", "2009", "2010", "2011"]:
print("Working on year", year)
flist_cs, flist_ec = get_filelists(year)
# break file lists into smaller chunks that can be processed in parallel
nprocs = 40 # number of parallel processes
nfiles = len(flist_cs) # number of 2b-flxr-lidar files to process; there are equally many ecmwf-aux files
chk_size = np.int64(nfiles/nprocs)+1 # number of files per process, add 1 to make sure include all files
flist_cs_chk = [flist_cs[i*chk_size:(i+1)*chk_size] for i in range(0, nprocs)]
flist_ec_chk = [flist_ec[i*chk_size:(i+1)*chk_size] for i in range(0, nprocs)]
# arrays with chunks of binned data: nprocs x months x bands x height x lat x lon
hr_allsky_chk = np.zeros((nprocs, npents, 2, 125, nlats, nlons))
hr_clrsky_chk = np.zeros((nprocs, npents, 2, 125, nlats, nlons))
nbr_allsky_chk = np.zeros((nprocs, npents, 2, 125, nlats, nlons))
nbr_clrsky_chk = np.zeros((nprocs, npents, 2, 125, nlats, nlons))
# store the results from each process
queue = Queue()
processes = [Process(target=batchcompute_heatingrates,
args=(flist_cs_chk[i], flist_ec_chk[i], queue)) for i in range(0, nprocs)]
for process in processes: process.start() # start all processes
for i in range(nprocs): # collect results from processes
[hr_allsky_chk[i], hr_clrsky_chk[i], nbr_allsky_chk[i], nbr_clrsky_chk[i]] = queue.get()
for process in processes: process.join() # wait for all processes to complete
# combine results from processes
hr_allsky = np.nansum(hr_allsky_chk, axis=0)/np.nansum(nbr_allsky_chk, axis=0)
hr_clrsky = np.nansum(hr_clrsky_chk, axis=0)/np.nansum(nbr_clrsky_chk, axis=0)
# save to netcdf via xarray
da_hr_allsky_sw = xr.DataArray(hr_allsky[:,0], name="HR_allsky_sw", dims=("pentad", "height", "lat", "lon",), coords={"pentad": pents, "height": np.arange(0,125), "lat": lats, "lon": lons})
da_hr_allsky_lw = xr.DataArray(hr_allsky[:,1], name="HR_allsky_lw", dims=("pentad", "height", "lat", "lon",), coords={"pentad": pents, "height": np.arange(0,125), "lat": lats, "lon": lons})
da_hr_clrsky_sw = xr.DataArray(hr_clrsky[:,0], name="HR_clrsky_sw", dims=("pentad", "height", "lat", "lon",), coords={"pentad": pents, "height": np.arange(0,125), "lat": lats, "lon": lons})
da_hr_clrsky_lw = xr.DataArray(hr_clrsky[:,1], name="HR_clrsky_lw", dims=("pentad", "height", "lat", "lon",), coords={"pentad": pents, "height": np.arange(0,125), "lat": lats, "lon": lons})
xr.merge([da_hr_allsky_sw, da_hr_allsky_lw, da_hr_clrsky_sw, da_hr_clrsky_lw]).to_netcdf("/jetfs/scratch/avoigt/CLOUDSAT/2B-FLXHR-LIDAR.P2_R04.heatingrates_binned."+year+".nc")
# Take the yearly output of make_binned_heating.py and postprocess: proper time and height coordinates, set metadata, save into one netcdf file with all years
#
# same as postprocess_binned_heating.py but for R04 revision
#
# on IMG JET call with /jetfs/home/avoigt/micromamba/envs/cloudsat-calipso/bin/python3.12 postprocess_binned_heating_R04.py
import xarray as xr
import numpy as np
import sys
sys.path.append("/jetfs/home/avoigt/cloudsat-calipso-heating-rates/core/")
import core as core
import warnings
warnings.filterwarnings("ignore")
# derive characteristic height as the mean height from a sample granule
file="/jetfs/shared-data/CLOUDSAT/Data/2B-FLXHR-LIDAR.P2_R04/2010/149/2010149011757_21723_CS_2B-FLXHR-LIDAR_GRANULE_P2_R04_E03.hdf"
height = core.read_height(file)["HGT"].mean("y")
ds_list = list()
for year in ["2006", "2007", "2008", "2009", "2010", "2011"]:
ds = xr.open_dataset("/jetfs/scratch/avoigt/CLOUDSAT/2B-FLXHR-LIDAR.P2_R04.heatingrates_binned."+year+".nc", chunks="auto")
time = np.arange(year+"-01-03", year+"-12-31", 5, dtype='datetime64[D]')
ds = ds.rename({"pentad": "time"}).assign_coords({"time": time})
ds = ds.assign_coords({"height": height.values})
ds = ds.assign_attrs({"Data": "Atmospheric radiative heating rates based on 2B-FLXHR-LIDAR.P2_R04"})
ds = ds.assign_attrs({"Data producer": "Aiko Voigt, Dept of Meteorology and Geophysics, University of Vienna"})
ds = ds.assign_attrs({"Time": "Data generated on May 13, 2024"})
ds = ds.assign_attrs({"Software": "https://gitlab.phaidra.org/climate/cloudsat-calipso-heating-rates"})
text = "Geographical variations in the actual height of the height bins are neglected for simplicity. " \
"Data on the same height bins are averaged together. "\
"The height of the heights bins is defined as the mean height along a sample granule."
ds = ds.assign_attrs({"Note on height coordinate": text})
ds_list.append(ds)
del ds
# merge into one dataset and save to netcdf
ds_merged = xr.concat(ds_list, "time")
ds_merged.to_netcdf("/jetfs/scratch/avoigt/CLOUDSAT/2B-FLXHR-LIDAR.P2_R04.heatingrates_binned.2006-2011.nc")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment