diff --git a/config/jet.py b/config/jet.py index 54df3835c982d055e9825b0f4162dcd68b94a0dc..7b70f86bbdd754b1072666a88ee71e0a3732d261 100755 --- a/config/jet.py +++ b/config/jet.py @@ -4,17 +4,17 @@ from dartwrf.exp_config import exp cluster = utils.ClusterConfig(exp) cluster.name = 'jet' -cluster.max_nproc = 12 +cluster.max_nproc = 20 cluster.use_slurm = True cluster.size_WRF_jobarray = exp.n_ens -cluster.np_WRF = 12 +cluster.np_WRF = 16 # binaries -cluster.python = '/jetfs/home/lkugler/miniconda3/envs/DART/bin/python' +cluster.python = '/jetfs/home/lkugler/miniforge3/envs/verif/bin/python' cluster.ncks = '/jetfs/spack/opt/spack/linux-rhel8-skylake_avx512/intel-2021.7.1/nco-5.1.0-izrhxv24jqco5epjhf5ledsqwanojc5m/bin/ncks' -cluster.ideal = '/jetfs/home/lkugler/bin/ideal-v4.3_v1.22.exe' +cluster.ideal = '/jetfs/home/lkugler/data/compile/bin/ideal-v4.3-quarter_ss-20250204.exe' cluster.wrfexe = '/jetfs/home/lkugler/bin/wrf-v4.3_v1.22_ifort_20230413.exe' -cluster.dart_modules = 'module purge; module load rttov/v13.2-gcc-8.5.0; ' +cluster.dart_modules = 'module purge; module load rttov/v13.2-gcc-8.5.0' cluster.wrf_modules = """module purge; module load intel-oneapi-compilers/2022.2.1-zkofgc5 hdf5/1.12.2-intel-2021.7.1-w5sw2dq netcdf-fortran/4.5.3-intel-2021.7.1-27ldrnt netcdf-c/4.7.4-intel-2021.7.1-lnfs5zz intel-oneapi-mpi/2021.7.1-intel-2021.7.1-pt3unoz export HDF5=/jetfs/spack/opt/spack/linux-rhel8-skylake_avx512/intel-2021.7.1/hdf5-1.12.2-w5sw2dqpcq2orlmeowleamoxr65dhhdc """ @@ -26,16 +26,31 @@ cluster.archive_base = '/jetfs/home/lkugler/data/sim_archive/' # paths used as input cluster.srcdir = '/jetfs/home/lkugler/data/compile/WRF-4.3/run' -cluster.dart_srcdir = '/jetfs/home/lkugler/data/compile/DART/DART-10.8.3/models/wrf/work' +cluster.dart_srcdir = '/jetfs/home/lkugler/data/compile/DART/DART-10.8.3_10pct/models/wrf/work/' cluster.rttov_srcdir = '/jetfs/home/lkugler/data/compile/RTTOV13/rtcoef_rttov13/' cluster.dartwrf_dir = '/jetfs/home/lkugler/DART-WRF/' # other inputs -cluster.geo_em_for_WRF_ideal = '/jetfs/home/lkugler/data/geo_em.d01.nc' +cluster.geo_em_nature = '/jetfs/home/lkugler/data/sim_archive/geo_em.d01.nc.250m_1600x1600' +cluster.geo_em_forecast = '/jetfs/home/lkugler/data/sim_archive/geo_em.d01.nc.2km_200x200' cluster.obs_impact_filename = cluster.dartwrf_dir+'/templates/impactfactor_T.txt' -cluster.namelist = cluster.dartwrf_dir+'/templates/namelist.input' +cluster.namelist = cluster.dartwrf_dir+'/templates/namelist.input' #_eta_nat' cluster.run_WRF = '/jetfs/home/lkugler/DART-WRF/dartwrf/run_ens.jet.sh' -cluster.slurm_cfg = {"account": "lkugler", "partition": "compute", #"nodelist": "jet07", +cluster.slurm_cfg = {"account": "lkugler", "partition": "all", "ntasks": "1", "ntasks-per-core": "1", "mem": "30G", "mail-type": "FAIL", "mail-user": "lukas.kugler@univie.ac.at"} + +# WRF file format, will only change if WRF changes +cluster.wrfout_format = '/wrfout_d01_%Y-%m-%d_%H:%M:%S' + +# pattern for the init_time folder in sim_archive +cluster.pattern_init_time = "/%Y-%m-%d_%H:%M/" + +# how an obs_seq.out file is archived +cluster.pattern_obs_seq_out = cluster.archivedir + \ + "/diagnostics/%Y-%m-%d_%H:%M_obs_seq.out" + +# how an obs_seq.final file is archived +cluster.pattern_obs_seq_final = cluster.archivedir + \ + "/diagnostics/%Y-%m-%d_%H:%M_obs_seq.final" diff --git a/cycled_exp.py b/cycled_exp.py index b2173aaa6be30f0ff695e127f6c26822f727ad1e..42b71b51688e12b62a41a86590da62c6cc51932c 100755 --- a/cycled_exp.py +++ b/cycled_exp.py @@ -1,46 +1,58 @@ #!/usr/bin/python3 -import os, sys, shutil, glob, warnings +import os +import sys import datetime as dt from dartwrf.workflows import WorkFlows +from dartwrf.utils import save_dict if __name__ == "__main__": """ Run a cycled OSSE with WRF and DART. """ - w = WorkFlows(exp_config='exp_template.py', server_config='jet.py') + w = WorkFlows(exp_config='exp_hires.py', server_config='jet.py') timedelta_integrate = dt.timedelta(minutes=15) timedelta_btw_assim = dt.timedelta(minutes=15) id = None - if True: # warm bubble + if False: # warm bubble prior_path_exp = '/jetfs/scratch/lkugler/data/sim_archive/exp_v1.19_P3_wbub7_noDA' init_time = dt.datetime(2008, 7, 30, 12) time = dt.datetime(2008, 7, 30, 12, 30) - last_assim_time = dt.datetime(2008, 7, 30, 13,30) + last_assim_time = dt.datetime(2008, 7, 30, 13, 30) forecast_until = dt.datetime(2008, 7, 30, 18) - + w.prepare_WRFrundir(init_time) # id = w.run_ideal(depends_on=id) - # id = w.wrfinput_insert_wbubble(depends_on=id) - - if False: # random - prior_path_exp = '/jetfs/scratch/lkugler/data/sim_archive/exp_v1.19_P2_noDA' + # id = w.wrfinput_insert_wbubble(depends_on=id) - init_time = dt.datetime(2008, 7, 30, 7) + prior_path_exp = '/jetfs/scratch/a11716773/master_thesis_2023/data2/sim_archive/free_ens/' + init_time = dt.datetime(2008, 7, 30, 11) time = dt.datetime(2008, 7, 30, 12) + last_assim_time = dt.datetime(2008, 7, 30, 13) + forecast_until = dt.datetime(2008, 7, 30, 17) + + prior_path_exp = '/jetfs/scratch/lkugler/data/sim_archive/exp_v1.19_P2_noDA+1/' + init_time = dt.datetime(2008, 7, 30, 12, 30) + time = dt.datetime(2008, 7, 30, 13) last_assim_time = dt.datetime(2008, 7, 30, 14) forecast_until = dt.datetime(2008, 7, 30, 18) + if True: # random + # exp_v1.19_P2_noDA+1/' + prior_path_exp = '/jetfs/scratch/lkugler/data/sim_archive/exp_nat250m_noDA_f/' + init_time = dt.datetime(2008, 7, 30, 11,45) + time = dt.datetime(2008, 7, 30, 12) + last_assim_time = dt.datetime(2008, 7, 30, 13) + forecast_until = dt.datetime(2008, 7, 30, 13,15) + w.prepare_WRFrundir(init_time) # id = w.run_ideal(depends_on=id) # prior_path_exp = w.cluster.archivedir - prior_init_time = init_time - prior_valid_time = time while time <= last_assim_time: @@ -49,10 +61,53 @@ if __name__ == "__main__": # i.e. 13z as a prior to assimilate 12z observations prior_valid_time = time - id = w.assimilate(time, prior_init_time, prior_valid_time, prior_path_exp, depends_on=id) - + if False: + ACF_config = dict( + var='WV73', + # 192, 96, 48, 24, 12), #(256, 128, 64, 32, 16), # # + scales_km=(12,), #192, 96, 48, 24, 12), + observed_width_km=384, + dx_km_obs=1.0, + dx_km_forecast=2.0, + # ('value', 0.6), #('percentile', 30), #('value', 230), #('value', 0.6), #('value', 230), #('percentile', 90), # ('value', 0.6), # + threshold=('value', 230), + difference=False, + first_guess_pattern='/RT_wrfout_d01_%Y-%m-%d_%H:%M:%S.nc', + + # observed_data='/jetfs/scratch/a11716773/master_thesis_2023/data2/sim_archive/nature_dx=2000m/RT_wrfout_d01_%Y-%m-%d_%H:%M:%S.nc', + observed_data='/jetfs/home/lkugler/data/sim_archive/nat_250m_obs1km/2008-07-30_12:00/1/RT_wrfout_d01_%Y-%m-%d_%H_%M_%S.nc', + # observed_data='/jetfs/home/lkugler/data/sim_archive/exp_v1.18_P1_nature+1/2008-07-30_06:00/1/RT_wrfout_d01_%Y-%m-%d_%H_%M_%S.nc', + f_grid_obs='/jetfs/home/lkugler/data/sim_archive/geo_em.d01.nc.250m-1km_400x400', + + # obs_err_csv='/jetfs/home/lkugler/CloudfractionDA/data/hrNat_VIS/OE_VIS_CF_0.6.csv', + # obs_err_csv='/jetfs/home/lkugler/CloudfractionDA/data/hrNat_VIS/OE_VIS_CF_0.6_difference.csv', + # obs_err_csv='/jetfs/home/lkugler/CloudfractionDA/data/hrNat_VIS/OE_VIS_SO_difference.csv', + obs_err_csv='/jetfs/home/lkugler/CloudfractionDA/data/hrNat_IR/OE-WV73_CF_230.csv', + # obs_err_csv='/jetfs/home/lkugler/CloudfractionDA/data/hrNat_IR/OE-WV73_CF_230_difference.csv', + # obs_err_csv='/jetfs/home/lkugler/CloudfractionDA/data/hrNat_IR/OE-WV73_superobs.csv', + # obs_err_csv='/jetfs/home/lkugler/CloudfractionDA/data/hrNat_IR/OE-WV73_SO_difference.csv', + + # obs_err_csv='/jetfs/home/lkugler/CloudfractionDA/data/exp_v1.18_P1_nature+1/OE-STDEV_delta_192-6_12z-14z.csv', + # obs_err_csv='/jetfs/home/lkugler/CloudfractionDA/data/exp_v1.18_P1_nature+1/obs_error_stddev_delta_approach_12z-14z.csv' + # obs_err_csv='/jetfs/home/lkugler/CloudfractionDA/data/OE_WV73_SO_theoretical.csv', + # obs_err_csv='/jetfs/home/lkugler/CloudfractionDA/data/OE_VIS_SO_theoretical.csv', + # obs_err_csv='/jetfs/home/lkugler/CloudfractionDA/data/obs_error_stddev_andrea.csv', + inflation_OE_var=2.0, + ) + # if time.minute == 0: # first and last assimilation + # ACF_config['scales_km'] = (192, 96, 48, 24, 12) + # else: + # ACF_config['scales_km'] = (24, 12) + + save_dict(ACF_config, time.strftime( + w.cluster.scripts_rundir+'/ACF_config_%H%M.pkl')) + + id = w.assimilate(time, prior_init_time, + prior_valid_time, prior_path_exp, depends_on=id) + # 1) Set posterior = prior - id = w.prepare_IC_from_prior(prior_path_exp, prior_init_time, prior_valid_time, depends_on=id) + id = w.prepare_IC_from_prior( + prior_path_exp, prior_init_time, prior_valid_time, depends_on=id) # 2) Update posterior += updates from assimilation id = w.update_IC_from_DA(time, depends_on=id) @@ -60,41 +115,32 @@ if __name__ == "__main__": # How long shall we integrate? timedelta_integrate = timedelta_btw_assim output_restart_interval = timedelta_btw_assim.total_seconds()/60 - if time == last_assim_time: # this_forecast_init.minute in [0,]: # longer forecast every full hour - timedelta_integrate = forecast_until - last_assim_time # dt.timedelta(hours=4) + if time == last_assim_time: + timedelta_integrate = forecast_until - \ + last_assim_time # dt.timedelta(hours=4) output_restart_interval = 9999 # no restart file after last assim # 3) Run WRF ensemble id = w.run_ENS(begin=time, # start integration from here - end=time + timedelta_integrate, # integrate until here - output_restart_interval=output_restart_interval, - first_minutes=True, - depends_on=id) - + end=time + timedelta_integrate, # integrate until here + output_restart_interval=output_restart_interval, + first_second=False, # to get a +1 second forecast after each restart + depends_on=id) + + # evaluate the forecast at +1 second after the assimilation time + # _ = w.evaluate_obs_posterior_after_analysis(time, time+dt.timedelta(seconds=1), depends_on=id) + # as we have WRF output, we can use own exp path as prior - prior_path_exp = w.cluster.archivedir # prior_path_exp = '/jetfs/scratch/lkugler/data/sim_archive/exp_v1.19_P2_noDA/' + prior_path_exp = w.cluster.archivedir + + id = w.create_satimages(time, depends_on=id) - id_sat = w.create_satimages(time, depends_on=id) - # increment time time += timedelta_btw_assim # update time variables prior_init_time = time - timedelta_btw_assim - w.verify_sat(id_sat) + w.verify_sat(id) w.verify_wrf(id) - - -# assim_times = [dt.datetime(2008,7,30,12,30), ] -# time range from 12:30 to 13:30 every 15 minutes -assim_times = [dt.datetime(2008,7,30,12,30) + dt.timedelta(minutes=15*i) for i in range(5)] -tuples = [] -for init in assim_times: - for s in range(30,3*60+1,30): - tuples.append((init, init+dt.timedelta(seconds=s))) - - -# evaluate the forecast at +1 minute after the assimilation time -w.evaluate_obs_posterior_after_analysis(tuples, depends_on=id) diff --git a/dartwrf/assimilate.py b/dartwrf/assimilate.py index 6b669ce3ea402944dac9277eff1fe391ad1de0ba..4059257907127667f20f31482dc1506b6ba65f6d 100755 --- a/dartwrf/assimilate.py +++ b/dartwrf/assimilate.py @@ -1,78 +1,65 @@ -import os, sys, shutil, warnings, glob +import os +import sys +import shutil +import warnings +import glob import time as time_module import datetime as dt import numpy as np -from dartwrf.utils import symlink, copy, mkdir, try_remove, print, shell, write_txt +from dartwrf.utils import symlink, copy, mkdir, try_remove, print, shell, write_txt, load_dict from dartwrf import wrfout_add_geo from dartwrf.obs import error_models as err from dartwrf.obs import obsseq -from dartwrf.obs import create_obsseq_out as osq_out from dartwrf import dart_nml from dartwrf.exp_config import exp from dartwrf.server_config import cluster +def link_DART_exe(): + """Link the DART executables to the run_DART directory + """ + bins = ['perfect_model_obs', 'filter', 'obs_diag', 'obs_seq_to_netcdf'] + for b in bins: + symlink(os.path.join(cluster.dart_srcdir, b), + os.path.join(cluster.dart_rundir, b)) -wrfout_format = '/wrfout_d01_%Y-%m-%d_%H:%M:%S' # WRF file format, will only change if WRF changes -pattern_init_time = "/%Y-%m-%d_%H:%M/" # pattern for the init_time folder in sim_archive -pattern_obs_seq_out = cluster.archivedir + "/diagnostics/%Y-%m-%d_%H:%M_obs_seq.out" # how an obs_seq.out file is archived -pattern_obs_seq_final = cluster.archivedir + "/diagnostics/%Y-%m-%d_%H:%M_obs_seq.final" # how an obs_seq.final file is archived - -def _prepare_DART_grid_template(prior_path_exp=False): - # DART needs a wrfinput file as a template for the grid - # No data except grid info will be read from this file - # The grid information must match exactly with the nature file "wrfout_d01" + symlink(cluster.dart_srcdir+'/../../../assimilation_code/programs/gen_sampling_err_table/' + + 'work/sampling_error_correction_table.nc', + cluster.dart_rundir+'/sampling_error_correction_table.nc') - # find any model forecast and link it - if os.path.isfile(cluster.dart_rundir + "/wrfinput_d01"): - pass # file exists - else: - if prior_path_exp == False: - raise ValueError('If run_DART/wrfinput_d01 does not exist, prior_path_exp may not be empty') - else: - try: - f = glob.glob(prior_path_exp+'/*/1/wrfout_d01*')[0] - except IndexError: - raise IOError('Could not find any model forecast file to get the grid information using the pattern '+prior_path_exp+'/*/wrfout_d01*') - print('link', f) - copy(f, cluster.dart_rundir + "/wrfinput_d01") - - # add coordinates if not already present - if cluster.geo_em_for_WRF_ideal: - wrfout_add_geo.run(cluster.geo_em_for_WRF_ideal, cluster.dart_rundir + "/wrfinput_d01") - -def _find_nature(time): - """Find the path to the nature file for the given time +def link_RTTOV_files(): + """Link required files for running RTTOV to run_DART """ - glob_pattern = time.strftime(exp.nature_wrfout_pattern) # replace time in pattern - print('searching for nature in pattern:', glob_pattern) - try: - f_nat = glob.glob(glob_pattern)[0] # find the nature wrfout-file - except IndexError: - raise IOError("no nature found with pattern "+glob_pattern) + if cluster.rttov_srcdir != False: + rttov_files = ['rttov13pred54L/rtcoef_msg_4_seviri_o3.dat', + 'mfasis_lut/rttov_mfasis_cld_msg_4_seviri_deff.H5', + 'cldaer_visir/sccldcoef_msg_4_seviri.dat'] - # check user input - if not 'wrfout' in f_nat.split('/')[-1]: - warnings.warn(f_nat+" does not contain 'wrfout' in filename, are you sure this is a valid nature file?") - - if not os.path.exists(f_nat): - raise IOError(f_nat+" does not exist -> no nature found") - return f_nat + for f_src in rttov_files: + destname = os.path.basename(f_src) + if 'rtcoef' in f_src: + destname = 'rtcoef_msg_4_seviri.dat' + + symlink(cluster.rttov_srcdir + f_src, + cluster.dart_rundir+'/'+destname) + + symlink(cluster.dart_rundir+'/rttov_mfasis_cld_msg_4_seviri_deff.H5', + cluster.dart_rundir+'/rttov_mfasis_cld_msg_4_seviri.H5') # use deff, not OPAC + + symlink(cluster.dart_srcdir+'/../../../observations/forward_operators/rttov_sensor_db.csv', + cluster.dart_rundir+'/rttov_sensor_db.csv') -def prepare_nature_dart(time): - """Prepares DART nature (wrfout_d01) if available +def prepare_DART_grid_template(): + """Prepare DART grid template wrfinput_d01 file from a prior file - Args: - time (dt.datetime): Time at which observations will be made + DART needs a wrfinput file as a template for the grid information. + No data except grid info will be read from this file. + The grid information must match exactly with the prior file "wrfout_d01" """ - print("prepare nature") - f_nat = _find_nature(time) - shutil.copy(f_nat, cluster.dart_rundir + "/wrfout_d01") # copy nature wrfout to DART directory - - # add coordinates if necessary - if cluster.geo_em_for_WRF_ideal: - wrfout_add_geo.run(cluster.geo_em_for_WRF_ideal, cluster.dart_rundir + "/wrfout_d01") + copy(cluster.dart_rundir+'/prior_ens1/wrfout_d01', cluster.dart_rundir + "/wrfinput_d01") + if cluster.geo_em_forecast: + wrfout_add_geo.run(cluster.geo_em_forecast, cluster.dart_rundir + "/wrfinput_d01") def prepare_prior_ensemble(assim_time, prior_init_time, prior_valid_time, prior_path_exp): """Prepares DART files for running filter @@ -89,31 +76,28 @@ def prepare_prior_ensemble(assim_time, prior_init_time, prior_valid_time, prior_ print("link wrfout file to DART background file") wrfout_run = ( prior_path_exp - + prior_init_time.strftime(pattern_init_time) + + prior_init_time.strftime(cluster.pattern_init_time) + str(iens) - + prior_valid_time.strftime(wrfout_format) + + prior_valid_time.strftime(cluster.wrfout_format) ) dart_ensdir = cluster.dart_rundir + "/prior_ens" + str(iens) wrfout_dart = dart_ensdir + "/wrfout_d01" # copy prior ens file from archived wrfout files (prior_path_exp) - os.makedirs(dart_ensdir, exist_ok=True) - print("copy", wrfout_run, "to", wrfout_dart) - copy(wrfout_run, wrfout_dart) - - # DART needs a grid file for each ensemble member (this is no real wrfinput file) - symlink(wrfout_dart, dart_ensdir + "/wrfinput_d01") + print("link", wrfout_run, "to", wrfout_dart) + symlink(wrfout_run, wrfout_dart) # ensure prior time matches assim time # can be intentionally different, e.g. by using a prior for a different time if assim_time != prior_valid_time: + copy(wrfout_run, wrfout_dart) print("overwriting time in prior from nature wrfout") - shell(cluster.ncks+ " -A -v XTIME,Times "+ - cluster.dart_rundir+"/wrfout_d01 "+ wrfout_dart) + shell(cluster.ncks + " -A -v XTIME,Times " + + cluster.dart_rundir+"/wrfout_d01 " + wrfout_dart) # this seems to be necessary (else wrong level selection) - if cluster.geo_em_for_WRF_ideal: - wrfout_add_geo.run(cluster.geo_em_for_WRF_ideal, wrfout_dart) + #if cluster.geo_em_forecast: + # wrfout_add_geo.run(cluster.geo_em_forecast, wrfout_dart) use_linked_files_as_prior() write_list_of_outputfiles() @@ -126,6 +110,7 @@ def prepare_prior_ensemble(assim_time, prior_init_time, prior_valid_time, prior_ os.system("rm -rf " + cluster.dart_rundir + "/perfect_output_*") os.system("rm -rf " + cluster.dart_rundir + "/obs_seq.fina*") + def use_linked_files_as_prior(): """Instruct DART to use the prior ensemble as input """ @@ -134,6 +119,7 @@ def use_linked_files_as_prior(): files.append("./prior_ens" + str(iens) + "/wrfout_d01") write_txt(files, cluster.dart_rundir+'/input_list.txt') + def use_filter_output_as_prior(): """Use the last posterior as input for DART, e.g. to evaluate the analysis in observation space """ @@ -144,11 +130,13 @@ def use_filter_output_as_prior(): os.remove(f_new) except: pass - os.rename(cluster.dart_rundir+'/filter_restart_d01.'+str(iens).zfill(4), f_new) + os.rename(cluster.dart_rundir+'/filter_restart_d01.' + + str(iens).zfill(4), f_new) files.append(f_new) write_txt(files, cluster.dart_rundir+'/input_list.txt') + def write_list_of_outputfiles(): files = [] for iens in range(1, exp.n_ens+1): @@ -158,15 +146,16 @@ def write_list_of_outputfiles(): def filter(nproc=12): """Calls DART ./filter program - + Args: nproc (int): number of cores for use in ./filter call - + Returns: None (writes to file) """ - nproc = min(nproc, cluster.max_nproc) - + if hasattr(cluster, 'max_nproc'): + nproc = cluster.max_nproc + print("time now", dt.datetime.now()) print("running filter") os.chdir(cluster.dart_rundir) @@ -175,24 +164,23 @@ def filter(nproc=12): t = time_module.time() if nproc > 1: # -genv I_MPI_PIN_PROCESSOR_LIST=0-"+str(int(nproc) - 1) - shell(cluster.dart_modules+" mpirun -np "+str(int(nproc))+" ./filter > log.filter") + shell(cluster.dart_modules+"; mpirun -np " + + str(int(nproc))+" ./filter > log.filter") else: - shell(cluster.dart_modules+" ./filter > log.filter") + shell(cluster.dart_modules+"; ./filter > log.filter") print("./filter took", int(time_module.time() - t), "seconds") - + if not os.path.isfile(cluster.dart_rundir + "/obs_seq.final"): raise RuntimeError( "obs_seq.final does not exist in run_DART directory. ", "Check log file at " + cluster.dart_rundir + "/log.filter") -############### archiving - def archive_filteroutput(time): """Archive filter output files (filter_restart, preassim, postassim, output_mean, output_sd) """ # archive diagnostics - dir_out = cluster.archivedir + time.strftime(pattern_init_time) + dir_out = cluster.archivedir + time.strftime(cluster.pattern_init_time) os.makedirs(dir_out, exist_ok=True) # copy input.nml to archive @@ -206,18 +194,24 @@ def archive_filteroutput(time): ) # copy preassim/postassim files to archive (not necessary for next forecast run) - try: - for f in ["output_mean.nc", "output_sd.nc"]: # copy mean and sd to archive + for f in ["preassim_mean.nc", "preassim_sd.nc", + "postassim_mean.nc", "postassim_sd.nc", + "output_mean.nc", "output_sd.nc"]: + try: copy(cluster.dart_rundir + "/" + f, dir_out + "/" + f) + except: + warnings.warn(f+" not found") + + if False: # save disk space, dont do this by default + try: + ftypes = ['preassim', 'postassim'] + for ftype in ftypes: + for iens in range(1, exp.n_ens + 1): + fname = "/"+ftype+"_member_" + str(iens).zfill(4) + ".nc" + copy(cluster.dart_rundir + fname, dir_out + fname) + except Exception as e: + warnings.warn(str(e)) - ftypes = ['preassim', 'postassim'] - for ftype in ftypes: - for iens in range(1, exp.n_ens + 1): - fname = "/"+ftype+"_member_" + str(iens).zfill(4) + ".nc" - copy(cluster.dart_rundir + fname, dir_out + fname) - - except Exception as e: - warnings.warn(str(e)) def get_parametrized_error(obscfg, osf_prior): """Calculate the parametrized error for an ObsConfig (one obs type) @@ -242,12 +236,13 @@ def get_parametrized_error(obscfg, osf_prior): if channel == 6: return err.calc_obserr_WV('WV73', Hx_truth, Hx_prior) else: - NotImplementedError('sat_channel not implemented', obscfg.get("sat_channel")) - + NotImplementedError('sat_channel not implemented', + obscfg.get("sat_channel")) + def set_obserr_assimilate_in_obsseqout(oso, outfile="./obs_seq.out"): """"Overwrite existing variance values in obs_seq.out files - + Args: oso (ObsSeq): python representation of obs_seq.out file, will be modified and written to file @@ -266,33 +261,40 @@ def set_obserr_assimilate_in_obsseqout(oso, outfile="./obs_seq.out"): # modify observation error of each kind sequentially where_oso_iskind = oso.df.kind == kind - - if obscfg["error_assimilate"] == False: - f_osf = cluster.dart_rundir + "/obs_seq.final" - if not os.path.isfile(f_osf): - evaluate(time, f_out_pattern=f_osf) - - osf_prior = obsseq.ObsSeq(f_osf) # this file was generated by `evaluate()` - - where_osf_iskind = osf_prior.df.kind == kind - - assim_err = get_parametrized_error(obscfg, osf_prior.df[where_osf_iskind]) - oso.df.loc[where_oso_iskind, 'variance'] = assim_err**2 - #assert np.allclose(assim_err, oso.df['variance']**2) # check - else: - # overwrite with user-defined values - oso.df.loc[where_oso_iskind, 'variance'] = obscfg["error_assimilate"]**2 + + if hasattr(obscfg, "error_assimilate"): + if obscfg["error_assimilate"] == False: + # get a parametrized error for this observation type + + f_osf = cluster.dart_rundir + "/obs_seq.final" + if not os.path.isfile(f_osf): + evaluate(time, f_out_pattern=f_osf) + + # this file was generated by `evaluate()` + osf_prior = obsseq.ObsSeq(f_osf) + + where_osf_iskind = osf_prior.df.kind == kind + + assim_err = get_parametrized_error( + obscfg, osf_prior.df[where_osf_iskind]) + oso.df.loc[where_oso_iskind, 'variance'] = assim_err**2 + # assert np.allclose(assim_err, oso.df['variance']**2) # check + else: + # overwrite with user-defined values + oso.df.loc[where_oso_iskind, + 'variance'] = obscfg["error_assimilate"]**2 oso.to_dart(outfile) -def qc_obs(time, oso): + +def reject_small_FGD(time, oso): """Quality control of observations We assume that the prior values have been evaluated and are in `run_DART/obs_seq.final` - + Args: time (datetime): time of the assimilation oso (ObsSeq): python representation of obs_seq.out file, will be modified and written to file - + Returns: None (writes to file) The pre-existing obs_seq.out will be archived. @@ -301,9 +303,10 @@ def qc_obs(time, oso): osf_prior = obsseq.ObsSeq(cluster.dart_rundir + "/obs_seq.final") # obs should be superobbed already! - for i, obscfg in enumerate(exp.observations): + for i, obscfg in enumerate(exp.observations): if i > 0: - raise NotImplementedError('Multiple observation types -> might not work') + raise NotImplementedError( + 'Multiple observation types -> might not work') obs = oso.df.observations.values Hx_prior_mean = osf_prior.df['prior ensemble mean'] @@ -315,8 +318,8 @@ def qc_obs(time, oso): print('removing obs with abs(FGD) < 0.03') Hx_prior = osf_prior.df.get_prior_Hx().T Hx_prior_mean = np.mean(Hx_prior, axis=0) - #Hx_prior_spread = osf_prior.df['prior ensemble spread'].values - #Hx_prior_spread[Hx_prior_spread < 1e-9] = 1e-9 + # Hx_prior_spread = osf_prior.df['prior ensemble spread'].values + # Hx_prior_spread[Hx_prior_spread < 1e-9] = 1e-9 abs_FGD = abs(obs - Hx_prior_mean) # /Hx_prior_spread oso.df = oso.df[abs_FGD > 0.03] # Hx_prior_spread] @@ -328,8 +331,10 @@ def qc_obs(time, oso): else: # set obs to prior mean => no change in mean but spread is adjusted. abs_FGD = abs(obs - Hx_prior_mean) # /Hx_prior_spread - Hx_prior_median = np.median(osf_prior.df.get_prior_Hx(), axis=1) - oso.df.loc[abs_FGD <= 0.05, 'observations'] = Hx_prior_median[abs_FGD <= 0.05] + Hx_prior_median = np.median( + osf_prior.df.get_prior_Hx(), axis=1) + oso.df.loc[abs_FGD <= 0.05, + 'observations'] = Hx_prior_median[abs_FGD <= 0.05] elif obscfg.get("sat_channel") == 6: # WV73 @@ -340,12 +345,13 @@ def qc_obs(time, oso): abs_FGD = abs(obs - Hx_prior_mean) # /Hx_prior_spread oso.df = oso.df[abs_FGD > 5] else: - raise NotImplementedError('no obs QC implemented for this obs type') - - print('QC removed', n_obs_orig-len(oso.df), 'observations') + raise NotImplementedError( + 'no obs QC implemented for this obs type') + + print('QC removed', n_obs_orig-len(oso.df), 'observations') # archive obs_seq.out before QC (contains all observations, including later removed ones) - f_out_archive = time.strftime(pattern_obs_seq_out)+"-beforeQC" + f_out_archive = time.strftime(cluster.pattern_obs_seq_out)+"-beforeQC" os.makedirs(os.path.dirname(f_out_archive), exist_ok=True) copy(cluster.dart_rundir + "/obs_seq.out", f_out_archive) @@ -358,8 +364,7 @@ def qc_obs(time, oso): def evaluate(assim_time, obs_seq_out=False, prior_is_filter_output=False, - f_out_pattern=pattern_obs_seq_final+"-evaluate", - nproc=12): + f_out_pattern=cluster.pattern_obs_seq_final+"-evaluate"): """Calculates either prior or posterior obs space values. Note: Depends on a prepared input_list.txt, which defines the ensemble (prior or posterior). @@ -376,13 +381,7 @@ def evaluate(assim_time, Returns None (writes file) """ - os.makedirs(cluster.dart_rundir, exist_ok=True) # create directory to run DART in - os.chdir(cluster.dart_rundir) - - _link_DART_binaries_and_RTTOV_files() - - # remove any existing observation files - os.system("rm -f input.nml obs_seq.final") + prepare_run_DART_folder() if prior_is_filter_output: print('using filter_restart files in run_DART as prior') @@ -392,81 +391,57 @@ def evaluate(assim_time, use_linked_files_as_prior() # the observations at which to evaluate the prior at - if obs_seq_out: - copy(obs_seq_out, cluster.dart_rundir+'/obs_seq.out') # user defined file + if obs_seq_out: + copy(obs_seq_out, cluster.dart_rundir + + '/obs_seq.out') # user defined file else: - # probably not necessary - # prepare_nature_dart(assim_time) # link WRF nature file to run_DART directory - # use existing obs_seq.out file currently present in the run_DART directory if not os.path.isfile(cluster.dart_rundir+'/obs_seq.out'): - raise RuntimeError(cluster.dart_rundir+'/obs_seq.out does not exist') + raise RuntimeError(cluster.dart_rundir + + '/obs_seq.out does not exist') dart_nml.write_namelist(just_prior_values=True) - filter(nproc=nproc) + filter(nproc=cluster.max_nproc) archive_filter_diagnostics(assim_time, f_out_pattern) def archive_filter_diagnostics(time, f_out_pattern): - f_archive = time.strftime(f_out_pattern) + """Copy the filter output txt to the archive + """ + f_archive = time.strftime(f_out_pattern) dir_out = os.path.dirname(f_archive) os.makedirs(dir_out, exist_ok=True) copy(cluster.dart_rundir + "/obs_seq.final", f_archive) print(f_archive, "saved.") -def txtlink_to_prior(time, prior_init_time, prior_path_exp): - """For documentation: Which prior was used? -> write into txt file""" - os.makedirs(cluster.archivedir + time.strftime('/%Y-%m-%d_%H:%M/'), exist_ok=True) - os.system('echo "'+prior_path_exp+'\n'+prior_init_time.strftime('/%Y-%m-%d_%H:%M/') - +'\n'+time.strftime('/wrfrst_d01_%Y-%m-%d_%H:%M:%S')+'" > ' - +cluster.archivedir + time.strftime('/%Y-%m-%d_%H:%M/')+'link_to_prior.txt') - -def get_obsseq_out(time): - """Prepares an obs_seq.out file in the run_DART folder - If `exp.use_existing_obsseq` points to an existing file, then this is used. - Otherwise, the observations are generated and an `obs_seq.out` file is created. - An `obs_seq.out` file is always archived. - - Args: - time (datetime): time of assimilation - Returns: - obsseq.ObsSeq +def txtlink_to_prior(time, prior_init_time, prior_path_exp): + """For reproducibility, write the path of the prior to a txt file """ - if exp.use_existing_obsseq != False and os.path.isfile(exp.use_existing_obsseq): - # use an existing obs_seq.out file - f_obsseq = time.strftime(exp.use_existing_obsseq) - copy(f_obsseq, cluster.dart_rundir+'/obs_seq.out') # copy to run_DART folder - print(f_obsseq, 'copied to', cluster.dart_rundir+'/obs_seq.out') - - oso = obsseq.ObsSeq(cluster.dart_rundir + "/obs_seq.out") # read the obs_seq.out file - else: - # do NOT use an existing obs_seq.out file - # but generate observations with new observation noise - prepare_nature_dart(time) - oso = osq_out.generate_obsseq_out(time) + os.makedirs(cluster.archivedir + + time.strftime('/%Y-%m-%d_%H:%M/'), exist_ok=True) + os.system('echo "'+prior_path_exp+'\n'+prior_init_time.strftime('/%Y-%m-%d_%H:%M/') + + '\n'+time.strftime('/wrfrst_d01_%Y-%m-%d_%H:%M:%S')+'" > ' + + cluster.archivedir + time.strftime('/%Y-%m-%d_%H:%M/')+'link_to_prior.txt') - # copy to sim_archive - f_obsseq_archive = time.strftime(pattern_obs_seq_out) - os.makedirs(os.path.dirname(f_obsseq_archive), exist_ok=True) - copy(cluster.dart_rundir+'/obs_seq.out', f_obsseq_archive) - return oso def prepare_inflation_2(time, prior_init_time): """Prepare inflation files (spatially varying) - + Recycles inflation files from previous assimilations or takes default files from archive. - + Args: time (datetime): time of assimilation prior_init_time (datetime): time of prior assimilation """ - dir_priorinf = cluster.archivedir + prior_init_time.strftime(pattern_init_time) + dir_priorinf = cluster.archivedir + \ + prior_init_time.strftime(cluster.pattern_init_time) f_default = cluster.archive_base + "/input_priorinf_mean.nc" - f_prior = dir_priorinf + time.strftime("/%Y-%m-%d_%H:%M_output_priorinf_mean.nc") + f_prior = dir_priorinf + \ + time.strftime("/%Y-%m-%d_%H:%M_output_priorinf_mean.nc") f_new = cluster.dart_rundir + '/input_priorinf_mean.nc' if os.path.isfile(f_prior): @@ -477,7 +452,8 @@ def prepare_inflation_2(time, prior_init_time): copy(f_default, f_new) f_default = cluster.archive_base + "/input_priorinf_sd.nc" - f_prior = dir_priorinf + time.strftime("/%Y-%m-%d_%H:%M_output_priorinf_sd.nc") + f_prior = dir_priorinf + \ + time.strftime("/%Y-%m-%d_%H:%M_output_priorinf_sd.nc") f_new = cluster.dart_rundir + '/input_priorinf_sd.nc' if os.path.isfile(f_prior): @@ -487,69 +463,112 @@ def prepare_inflation_2(time, prior_init_time): warnings.warn(f_prior + ' does not exist. Using default file instead.') copy(f_default, f_new) + def archive_inflation_2(time): - dir_output = cluster.archivedir + time.strftime(pattern_init_time) + dir_output = cluster.archivedir + time.strftime(cluster.pattern_init_time) os.makedirs(dir_output, exist_ok=True) f_output = cluster.dart_rundir + '/output_priorinf_sd.nc' - f_archive = dir_output + time.strftime("/%Y-%m-%d_%H:%M_output_priorinf_sd.nc") + f_archive = dir_output + \ + time.strftime("/%Y-%m-%d_%H:%M_output_priorinf_sd.nc") copy(f_output, f_archive) print(f_archive, 'saved') f_output = cluster.dart_rundir + '/output_priorinf_mean.nc' - f_archive = dir_output + time.strftime("/%Y-%m-%d_%H:%M_output_priorinf_mean.nc") + f_archive = dir_output + \ + time.strftime("/%Y-%m-%d_%H:%M_output_priorinf_mean.nc") copy(f_output, f_archive) print(f_archive, 'saved') -def _link_DART_binaries_and_RTTOV_files(): - joinp = os.path.join - # link DART binaries - bins = ['perfect_model_obs', 'filter', 'obs_diag', 'obs_seq_to_netcdf'] - for b in bins: - symlink(joinp(cluster.dart_srcdir, b), - joinp(cluster.dart_rundir, b)) +def prepare_run_DART_folder(prior_path_exp=False): + # create directory to run DART in + os.makedirs(cluster.dart_rundir, exist_ok=True) + + link_DART_exe() + + for obscfg in exp.observations: + if 'sat_channel' in obscfg: + link_RTTOV_files() + continue + + # remove any remains of a previous run + os.chdir(cluster.dart_rundir) + os.system("rm -f input.nml obs_seq.in obs_seq.out obs_seq.out-orig obs_seq.final") - symlink(cluster.dart_srcdir+'/../../../assimilation_code/programs/gen_sampling_err_table/' - +'work/sampling_error_correction_table.nc', - cluster.dart_rundir+'/sampling_error_correction_table.nc') - # link RTTOV files - if cluster.rttov_srcdir != False: - rttov_files = ['rttov13pred54L/rtcoef_msg_4_seviri_o3.dat', - 'mfasis_lut/rttov_mfasis_cld_msg_4_seviri_deff.H5', - 'cldaer_visir/sccldcoef_msg_4_seviri.dat'] +def get_obsseq_out(time, prior_path_exp, prior_init_time, prior_valid_time, lag=None): + """Prepares an obs_seq.out file in the run_DART folder - for f_src in rttov_files: - destname = os.path.basename(f_src) - if 'rtcoef' in f_src: - destname = 'rtcoef_msg_4_seviri.dat' + 3 Options: + 1) Use existing obs_seq.out file + 2) Use precomputed FO (for cloud fraction) + 3) Generate new observations with new observation noise - symlink(cluster.rttov_srcdir + f_src, - cluster.dart_rundir+'/'+destname) + Args: + time (datetime): time of assimilation + prior_path_exp (str): path to the experiment folder + prior_init_time (datetime): time of the prior forecast init + prior_valid_time (datetime): time of the prior - symlink(cluster.dart_rundir+'/rttov_mfasis_cld_msg_4_seviri_deff.H5', - cluster.dart_rundir+'/rttov_mfasis_cld_msg_4_seviri.H5') # use deff, not OPAC + Returns: + obsseq.ObsSeq + """ + # access time-dependent configuration + try: + ACF_config = load_dict( + time.strftime(cluster.scripts_rundir+'/ACF_config_%H%M.pkl')) + print('using ACF_config:', ACF_config) + exp.ACF = True + except: + pass - symlink(cluster.dart_srcdir+'/../../../observations/forward_operators/rttov_sensor_db.csv', - cluster.dart_rundir+'/rttov_sensor_db.csv') + oso = None + if isinstance(exp.use_existing_obsseq, str): + # assume that the user wants to use an existing obs_seq.out file - else: - # check if we need RTTOV, but rttov_srcdir is not set - for obscfg in exp.observations: - if 'sat_channel' in obscfg: - raise RuntimeError('cluster.rttov_srcdir not set, but satellite channel will be assimilated') + f_obsseq = time.strftime(exp.use_existing_obsseq) + if os.path.isfile(f_obsseq): + # copy to run_DART folder + copy(f_obsseq, cluster.dart_rundir+'/obs_seq.out') + print(f_obsseq, 'copied to', cluster.dart_rundir+'/obs_seq.out') + else: + # explain the error if the file does not exist + raise IOError('exp.use_existing_obsseq is not False. \n' + + 'In this case, use_existing_obsseq should be a file path (wildcards %H, %M allowed)!\n' + + 'But there is no file with this pattern: '+str(exp.use_existing_obsseq)) + + elif hasattr(exp, 'ACF'): + # prepare observations with precomputed FO + if lag != None: + prior_valid_time += lag + time += lag + pattern_prior = prior_path_exp + prior_init_time.strftime( + '/%Y-%m-%d_%H:%M/<iens>/') + prior_valid_time.strftime(ACF_config.pop('first_guess_pattern')) + + from CloudFractionDA import obsseqout as cfoso + cfoso.write_obsseq(time, pattern_prior, + time.strftime(ACF_config.pop('observed_data')), + ACF_config.pop('var'), + path_output=cluster.dart_rundir + "/obs_seq.out", + **ACF_config) -def prepare_run_DART_folder(prior_path_exp=False): - os.makedirs(cluster.dart_rundir, exist_ok=True) # create directory to run DART in - os.chdir(cluster.dart_rundir) + else: + # do NOT use an existing obs_seq.out file + # but generate observations with new observation noise + from dartwrf.obs import create_obsseq_out as osq_out + oso = osq_out.generate_new_obsseq_out(time) - _link_DART_binaries_and_RTTOV_files() - _prepare_DART_grid_template(prior_path_exp=prior_path_exp) + # copy to sim_archive + f_obsseq_archive = time.strftime(cluster.pattern_obs_seq_out) + os.makedirs(os.path.dirname(f_obsseq_archive), exist_ok=True) + copy(cluster.dart_rundir+'/obs_seq.out', f_obsseq_archive) - # remove any existing observation files - os.system("rm -f input.nml obs_seq.in obs_seq.out obs_seq.out-orig obs_seq.final") + # read so that we can return it + if oso is None: + oso = obsseq.ObsSeq(cluster.dart_rundir + "/obs_seq.out") + return oso def main(time, prior_init_time, prior_valid_time, prior_path_exp): @@ -564,35 +583,40 @@ def main(time, prior_init_time, prior_valid_time, prior_path_exp): optional: quality-control 4) Assimilate with assigned errors 5) Evaluate posterior (after DART clamping, e.g. setting negative cloud water to zero) - + Args: assim_time (dt.datetime): time of output prior_init_time (dt.datetime): forecast start of prior prior_valid_time (dt.datetime): valid time of prior (may be different to assim_time) prior_path_exp (str): path to prior experiment - + Returns: None """ - obscfg = exp.observations - do_QC = getattr(exp, "do_quality_control", False) # True: triggers additional evaluations of prior & posterior + # do_reject_smallFGD: triggers additional evaluations of prior & posterior + do_reject_smallFGD = getattr(exp, "do_reject_smallFGD", False) prepare_run_DART_folder(prior_path_exp) - prepare_prior_ensemble(time, prior_init_time, prior_valid_time, prior_path_exp) nml = dart_nml.write_namelist() - + print(" get observations with specified obs-error") - oso = get_obsseq_out(time) + #lag = dt.timedelta(minutes=15) + oso = get_obsseq_out(time, prior_path_exp, prior_init_time, prior_valid_time) + + # prepare for assimilation + prepare_prior_ensemble(time, prior_init_time, prior_valid_time, prior_path_exp) + prepare_DART_grid_template() - if do_QC: # additional evaluation of prior (in case some observations are rejected) + # additional evaluation of prior (in case some observations are rejected) + if do_reject_smallFGD: print(" evaluate prior for all observations (incl rejected) ") - evaluate(time, f_out_pattern=pattern_obs_seq_final+"-evaluate_prior") + evaluate(time, f_out_pattern=cluster.pattern_obs_seq_final+"-evaluate_prior") print(" assign observation-errors for assimilation ") set_obserr_assimilate_in_obsseqout(oso, outfile=cluster.dart_rundir + "/obs_seq.out") - if do_QC: + if do_reject_smallFGD: print(" reject observations? ") - qc_obs(time, oso) + reject_small_FGD(time, oso) prior_inflation_type = nml['&filter_nml']['inf_flavor'][0][0] if prior_inflation_type == '2': @@ -602,28 +626,30 @@ def main(time, prior_init_time, prior_valid_time, prior_path_exp): dart_nml.write_namelist() filter() archive_filteroutput(time) - archive_filter_diagnostics(time, pattern_obs_seq_final) + archive_filter_diagnostics(time, cluster.pattern_obs_seq_final) txtlink_to_prior(time, prior_init_time, prior_path_exp) if prior_inflation_type == '2': archive_inflation_2(time) - print(" evaluate posterior in observation-space") - f_oso = time.strftime(pattern_obs_seq_out) - if do_QC: - f_oso += '-beforeQC' # includes all observations (including rejected ones in qc_obs()) + if exp.evaluate_posterior_in_obs_space: + print(" evaluate posterior in observation-space") + f_oso = time.strftime(cluster.pattern_obs_seq_out) + if do_reject_smallFGD: + # includes all observations (including rejected ones in reject_small_FGD()) + f_oso += '-beforeQC' - # evaluate() separately after ./filter is crucial when assimilating cloud variables - # as the obs_seq.final returned by ./filter was produced using un-clamped cloud variables - evaluate(time, - obs_seq_out=f_oso, - prior_is_filter_output=True, - f_out_pattern=pattern_obs_seq_final+"-evaluate") + # evaluate() separately after ./filter is crucial when assimilating cloud variables + # as the obs_seq.final returned by ./filter was produced using un-clamped cloud variables + evaluate(time, + obs_seq_out=f_oso, + prior_is_filter_output=True, + f_out_pattern=cluster.pattern_obs_seq_final+"-evaluate") if __name__ == "__main__": """Assimilate observations - + Example: python assimilate.py 2008-08-07_13:00 2008-08_12:00 2008-08-07_13:00 /path/to/experiment/ """ diff --git a/dartwrf/calc_linear_posterior.py b/dartwrf/calc_linear_posterior.py index a1510fd2b626dd9ad1b9e38731db1ebfdfeee0b7..bf7b044f62c967e2b7dc4e901d58895cdb75a0c0 100755 --- a/dartwrf/calc_linear_posterior.py +++ b/dartwrf/calc_linear_posterior.py @@ -29,15 +29,14 @@ def calc_lin_posterior(assim_time): aso.prepare_prior_ensemble(assim_time, prior_init_time=prior_init_time, prior_valid_time=assim_time, prior_path_exp=prior_exp) #aso.prepare_nature_dart(assim_time) - - dart_nml.write_namelist() # does an observation exist at this time? - #f_oso = assim_time.strftime(aso.pattern_obs_seq_out) - f_oso = cluster.archivedir+assim_time.strftime("/diagnostics/%Y-%m-%d_%H:%M_obs_seq.out") + f_oso = assim_time.strftime(cluster.pattern_obs_seq_out) - if os.path.exists(f_oso): + if os.path.exists(f_oso): # use the existing file + os.chdir(cluster.dart_rundir) + os.system("rm -f input.nml obs_seq.in obs_seq.out obs_seq.final") # remove any existing observation files shutil.copy(f_oso, cluster.dart_rundir+'/obs_seq.out') else: raise NotImplementedError(f_oso+' does not exist!') @@ -48,6 +47,7 @@ def calc_lin_posterior(assim_time): symlink(path_linear_filter, os.path.join(cluster.dart_rundir, 'filter')) + dart_nml.write_namelist() aso.filter(nproc=12) aso.archive_filter_diagnostics(assim_time, pattern_osf_linear) @@ -60,10 +60,9 @@ if __name__ == "__main__": Usage: python3 evaluate_obs_space.py init1,valid1 init2,valid2 ... """ args = sys.argv[1:] - #arg_tuples = [a.split(',') for a in args] # we need an existing run_DART folder - aso.prepare_run_DART_folder() + os.makedirs(cluster.dart_rundir, exist_ok=True) for assim_time in args: #prior_init_time = dt.datetime.strptime(prior_init_time, "%Y-%m-%d_%H:%M") diff --git a/dartwrf/dart_nml.py b/dartwrf/dart_nml.py index 39a4951ca690166a97a25c6bce89ebecb74dea7c..a0474ee7b08efee5fad3c6ed330651a1c4ab0cb6 100644 --- a/dartwrf/dart_nml.py +++ b/dartwrf/dart_nml.py @@ -1,21 +1,23 @@ import warnings +import numpy as np from dartwrf.utils import append_file from dartwrf.exp_config import exp from dartwrf.server_config import cluster -earth_radius_km = 6370 +# 6370 for earth radius in km +radius_periodic_km = 6370 def read_namelist(filepath): """Read the DART namelist file into a dictionary. - + Args: filepath (str): Path to namelist file - + Returns: dict: namelist[section][parameter] = [[arg1, arg2,], [arg3, arg4]] """ - + d = dict() # read file into a list of strings with open(filepath, 'r') as f: @@ -36,7 +38,7 @@ def read_namelist(filepath): section = line d[section] = dict() continue - + if line == '/': continue # skip end of namelist section @@ -50,7 +52,7 @@ def read_namelist(filepath): param_data = [] except ValueError: - # If the above split failed, + # If the above split failed, # then there is additional data for the previous variable val = line # in this line, there is only param_data # param is still the one from previously @@ -61,7 +63,6 @@ def read_namelist(filepath): # if isinstance(val, list) and len(val) == 1: # val = [val] - # try: # # convert to float/int # val = [float(v) for v in val] @@ -72,7 +73,6 @@ def read_namelist(filepath): # it is not a numeric value => string val = [v.strip() for v in val] - param_data.append(val) # print('this iteration var, val ...', {param: param_data}) @@ -81,15 +81,32 @@ def read_namelist(filepath): d[section][param] = param_data return d + def write_namelist_from_dict(d, filepath): """Write a DART namelist dictionary to a file. - + Args: d (dict): keys are namelist sections, values are dictionaries. these dictionaries contain keys (=parameters) and values (list type) every item in values is a line (list type) every line contains possibly multiple entries filepath (str): Path to namelist file + + Note: + The namelist dictionary should have the following structure: + d = { + '§ion1': { + 'parameter1': [['arg1', 'arg2'], ['arg3', 'arg4']], + 'parameter2': [['arg1', 'arg2'], ['arg3', 'arg4']], + }, + '§ion2': { + 'parameter1': [['arg1', 'arg2'], ['arg3', 'arg4']], + 'parameter2': [['arg1', 'arg2'], ['arg3', 'arg4']], + }, + } + + Returns: + None (writes to file) """ with open(filepath, 'w') as f: for section in d: @@ -102,40 +119,67 @@ def write_namelist_from_dict(d, filepath): width = max_width_of_parameter_name + 1 except: width = None - + for parameter in parameters: lines = d[section][parameter] - # lines (list(list(str))): + # Note: one parameter can have multiple lines with values in the namelist + # lines : (list(list(str))) # outer list: one element per line in the text file # inner list: one element per value in that line - - # we should have a list here + # `lines` should be a list here # if we instead have a single value, then make a list # because we assume that lines consists of multiple lines assert isinstance(lines, list) for i, line in enumerate(lines): + # a line can contain of multiple entries + # therefore a line has to be a list assert isinstance(line, list) + + # in case there is no value, write empty string if line == []: line = ['',] - + # if there are multiple entries in line, + # then join them with commas + # but remove pre-existing quotes + + # do we have a numerical value? first_entry = line[0] - if isinstance(first_entry, str) and not first_entry.startswith('.'): - try: - float(first_entry) - line = ', '.join(str(v) for v in line) - except: - # contains strings - line = [entry.strip("'").strip('"') for entry in line] # remove pre-existing quotes - line = ', '.join('"'+v+'"' for v in line) - else: - # numerical values + try: + float(first_entry) line = ', '.join(str(v) for v in line) - + except: + + if isinstance(first_entry, str): + # remove pre-existing quotes + line = [entry.strip("'").strip('"') + for entry in line] + + # does it start with a dot? then it is boolean + if first_entry.startswith('.'): + line = ', '.join(str(v) for v in line) + else: + line = ', '.join('"'+str(v)+'"' for v in line) + else: + # it is not a string, nor a number + raise ValueError( + 'Value neither str nor numeric!', first_entry) + + # if isinstance(first_entry, str) and not first_entry.startswith('.'): + # try: + # float(first_entry) + # lines line = ', '.join(str(v) for v in line) + # except: + # # contains strings + # line = [entry.strip("'").strip('"') for entry in line] # remove pre-existing quotes + # line = ', '.join('"'+v+'"' for v in line) + # else: + # # numerical values + # line = ', '.join(str(v) for v in line) if i == 0: f.write(' '+parameter.ljust(width)+' = '+line+',\n') @@ -149,13 +193,13 @@ def _get_horiz_localization(): Args: exp (Experiment): Experiment object - + Returns: l_obstypes (list of str): entries for `special_vert_normalization_obs_types` l_loc_horiz_rad (list of str): entries for `special_localization_cutoffs` """ def to_radian_horizontal(cov_loc_horiz_km): - cov_loc_radian = cov_loc_horiz_km / earth_radius_km + cov_loc_radian = cov_loc_horiz_km / radius_periodic_km return cov_loc_radian l_obstypes_all = [] # list of all observation types @@ -167,7 +211,8 @@ def _get_horiz_localization(): # compute horizontal localization value loc_horiz_km = obscfg["loc_horiz_km"] if not loc_horiz_km >= 0: - raise ValueError('Invalid value for `loc_horiz_km`, set loc_horiz_km >= 0 !') + raise ValueError( + 'Invalid value for `loc_horiz_km`, set loc_horiz_km >= 0 !') loc_horiz_rad = to_radian_horizontal(loc_horiz_km) l_loc_horiz_rad.append(loc_horiz_rad) @@ -176,7 +221,7 @@ def _get_horiz_localization(): def _get_vertical_localization(): """Compile the list of vertical localizations for the DART namelist variables - + Vertical localization can be defined in section &location_nml of 'input.nml' using following namelist variables: special_vert_normalization_obs_types (list of str) @@ -190,7 +235,7 @@ def _get_vertical_localization(): Args: exp (Experiment): Experiment object - + Returns: l_obstypes_vert (list of str): entries for `special_vert_normalization_obs_types` vert_norm_heights (list of str): entries for `special_vert_normalization_heights` @@ -199,12 +244,13 @@ def _get_vertical_localization(): """ def to_vertical_normalization(cov_loc_vert_km, cov_loc_horiz_km): - vert_norm_rad = earth_radius_km * cov_loc_vert_km / cov_loc_horiz_km * 1000 + vert_norm_rad = radius_periodic_km * cov_loc_vert_km / cov_loc_horiz_km * 1000 return vert_norm_rad - + l_obstypes_vert = [] # list of observation types that have vertical localization vert_norm_heights = [] # list of respective vertical localization values - vert_norm_scale_heights = [] # list of respective vertical localization values (alternative to vert_norm_heights) + # list of respective vertical localization values (alternative to vert_norm_heights) + vert_norm_scale_heights = [] vert_norm_levels = [] vert_norm_pressures = [] @@ -215,12 +261,13 @@ def _get_vertical_localization(): continue # no vertical localization for this observation type, in all other cases we need it l_obstypes_vert.append(obscfg["kind"]) - + try: # do we localize by height? - loc_vert_km = obscfg["loc_vert_km"] + loc_vert_km = obscfg["loc_vert_km"] loc_horiz_km = obscfg["loc_horiz_km"] - vert_norm_hgt = to_vertical_normalization(loc_vert_km, loc_horiz_km) + vert_norm_hgt = to_vertical_normalization( + loc_vert_km, loc_horiz_km) vert_norm_heights.append(vert_norm_hgt) # set the other (unused) list to a dummy value @@ -251,15 +298,15 @@ def _get_vertical_localization(): vert_norm_levels.append(-1) vert_norm_pressures.append(-1) else: - raise ValueError('DART namelist requires vertical localization, but neither `loc_vert_km` nor `loc_vert_scaleheight` are defined in obscfg.') - - return l_obstypes_vert, vert_norm_heights, vert_norm_scale_heights, vert_norm_levels, vert_norm_pressures + raise ValueError( + 'DART namelist requires vertical localization, but neither `loc_vert_km` nor `loc_vert_scaleheight` are defined in obscfg.') + return l_obstypes_vert, vert_norm_heights, vert_norm_scale_heights, vert_norm_levels, vert_norm_pressures def write_namelist(just_prior_values=False): """Write a DART namelist file ('input.nml') - + 1. Uses the default namelist (from the DART source code) 2. Calculates localization parameters from the experiment configuration 3. Overwrites other parameters as defined in the experiment configuration @@ -278,29 +325,42 @@ def write_namelist(just_prior_values=False): None """ list_obstypes_all, list_loc_horiz_rad = _get_horiz_localization() - - vert_norm_obs_types, vert_norm_heights, vert_norm_scale_heights, vert_norm_levels, vert_norm_pressures = _get_vertical_localization() - nml = read_namelist(cluster.dart_srcdir + "/input.nml") # default compilation namelist + vert_norm_obs_types, vert_norm_heights, vert_norm_scale_heights, vert_norm_levels, vert_norm_pressures = _get_vertical_localization() + + # default compilation namelist + nml = read_namelist(cluster.dart_srcdir + "/input.nml") n_obstypes = len(list_obstypes_all) if n_obstypes > 0: # make sure that observations defined in `exp.observations` are assimilated nml['&obs_kind_nml']['assimilate_these_obs_types'] = [list_obstypes_all] - nml['&obs_kind_nml']['evaluate_these_obs_types'] = [[]] - - nml['&assim_tools_nml']['special_localization_obs_types'] = [list_obstypes_all] - nml['&assim_tools_nml']['special_localization_cutoffs'] = [list_loc_horiz_rad] - - if len(vert_norm_obs_types) > 0: # avoid to write lists like [""] - nml['&location_nml']['special_vert_normalization_obs_types'] = [vert_norm_obs_types] - nml['&location_nml']['special_vert_normalization_heights'] = [vert_norm_heights] - nml['&location_nml']['special_vert_normalization_scale_heights'] = [vert_norm_scale_heights] - nml['&location_nml']['special_vert_normalization_levels'] = [vert_norm_levels] - nml['&location_nml']['special_vert_normalization_pressures'] = [vert_norm_pressures] - + nml['&obs_kind_nml']['use_precomputed_FOs_these_obs_types'] = [ + [a for a in list_obstypes_all if a.startswith('CF')]] # only for cloud fraction + nml['&obs_kind_nml']['assimilate_these_obs_types'] = [list_obstypes_all] - # overwrite namelist parameters as defined in the experiment configuration + nml['&assim_tools_nml']['special_localization_obs_types'] = [ + list_obstypes_all] + nml['&assim_tools_nml']['special_localization_cutoffs'] = [ + list_loc_horiz_rad] + + # <-- to avoid writing empty lists like [""] + if len(vert_norm_obs_types) > 0: + nml['&location_nml']['special_vert_normalization_obs_types'] = [ + vert_norm_obs_types] + nml['&location_nml']['special_vert_normalization_heights'] = [ + vert_norm_heights] + nml['&location_nml']['special_vert_normalization_scale_heights'] = [ + vert_norm_scale_heights] + nml['&location_nml']['special_vert_normalization_levels'] = [ + vert_norm_levels] + nml['&location_nml']['special_vert_normalization_pressures'] = [ + vert_norm_pressures] + + # we start out with the default namelist from the DART source code + # then we read the configuration file of the experiment + # and overwrite the default values where necessary + # (to merge default and user-defined namelist settings) for section, sdata in exp.dart_nml.items(): # if section is not in namelist, add it @@ -309,25 +369,40 @@ def write_namelist(just_prior_values=False): for parameter, value in sdata.items(): - if isinstance(value, list) and len(value) > 1: # it is a list - - if isinstance(value[0], list): - pass # nothing to do, value is list(list()) + # enforce that value is double-nested list (list of lists) + if isinstance(value, list) and len(value) > 1: + # it is a list with multiple entries + if isinstance(value[0], list): # value was list(list()) + pass # nothing to do else: - value = [value] # value was a list of parameter values, but just one line + # value was a list of parameter values, but just one line + value = [value] + + elif isinstance(value, list) and len(value) == 1: + # value was a list of parameter values, but just one value + value = [value] else: value = [[value]] # value was a single entry + # make sure that there is no value which is a triple-nested list + # because this doesnt make sense + # outer list: lines in the namelist + # inner list: values in the line + if isinstance(value[0][0], list): + raise RuntimeError( + 'This should not happen. The value must not be a more than doubly-nested list', value) + # overwrite entry in each dictionary - nml[section][parameter] = value # every entry in this list is one line + # every entry in this list is one line + nml[section][parameter] = value # necessary options if we dont compute posterior but only evaluate prior - if just_prior_values: + if just_prior_values: nml['&obs_kind_nml']['assimilate_these_obs_types'] = [[]] nml['&obs_kind_nml']['evaluate_these_obs_types'] = [list_obstypes_all] nml['&filter_nml']['compute_posterior'] = [['.false.']] - + # inf_flavor posterior must be 0 if posterior is not computed # inf_flavor keyword exists, so we can just overwrite it # prior inf must be 0, because we only want to evaluate the prior, not assimilate anything @@ -341,7 +416,8 @@ def write_namelist(just_prior_values=False): if nml['&location_nml']['horiz_dist_only'][0] == '.false.': for obscfg in exp.observations: if 'sat_channel' in obscfg: - warnings.warn("Selected vertical localization, but observations contain satellite obs -> Bug in DART.") + warnings.warn( + "Selected vertical localization, but observations contain satellite obs -> Bug in DART.") # write to file write_namelist_from_dict(nml, cluster.dart_rundir + "/input.nml") @@ -352,3 +428,10 @@ def write_namelist(just_prior_values=False): # alternatively, we could do this in cfg.py or the template input.nml in DART's model/wrf/work folder return nml # in case we want to access namelist settings in python + + +if __name__ == "__main__": + # for testing + + nml = write_namelist() + print(nml) diff --git a/dartwrf/evaluate_obs_space.py b/dartwrf/evaluate_obs_space.py index 7c6a1eef951ea1d72d087e3ed14f64c6e399dae0..05d8b67536b066c2f351fd8e554e89d5b5c28a67 100755 --- a/dartwrf/evaluate_obs_space.py +++ b/dartwrf/evaluate_obs_space.py @@ -21,33 +21,38 @@ def evaluate_one_time(assim_time, valid_time, use_other_obsseq=False): aso.prepare_prior_ensemble(valid_time, prior_init_time=assim_time, prior_valid_time=valid_time, prior_path_exp=cluster.archivedir) dart_nml.write_namelist() - # does an observation exist at this time? - f_oso = valid_time.strftime(aso.pattern_obs_seq_out) - f_oso = cluster.archivedir+valid_time.strftime("/%Y-%m-%d_%H:%M_obs_seq.out") - - if os.path.exists(f_oso): - # use the existing file - shutil.copy(f_oso, cluster.dart_rundir+'/obs_seq.out') + if os.path.isfile(exp.use_existing_obsseq): + # use the existing obs_seq.out file + shutil.copy(exp.use_existing_obsseq, cluster.dart_rundir+'/obs_seq.out') else: - try: - # generate the observations for the specified valid_time - aso.prepare_nature_dart(valid_time) - osq_out.generate_obsseq_out(valid_time) - except: - print("failed to generate observations from a nature file") - print("-> trying to evaluate posterior with dummy observations") - # use an old obsseq file and overwrite obs/truth values with "missing value" - f_oso = cluster.archivedir+valid_time.strftime("/diagnostics/%Y-%m-%d_%H:%M_obs_seq.out") - if not os.path.isfile(f_oso): - raise RuntimeError(f_oso+' not found. Cannot create dummy observation.') - - from dartwrf.obs import obsseq - oso = obsseq.ObsSeq(f_oso) + # is there a pre-existing obs file from assimilation before? + f_oso = valid_time.strftime(cluster.pattern_obs_seq_out) + f_oso = cluster.archivedir+valid_time.strftime("/%Y-%m-%d_%H:%M_obs_seq.out") + + if os.path.isfile(f_oso): + # use the existing file + shutil.copy(f_oso, cluster.dart_rundir+'/obs_seq.out') + else: + print("Obs file does not exist:", f_oso) + try: + # generate the observations for the specified valid_time + print("Trying to generate observations from a nature file") + aso.prepare_nature_dart(valid_time) + osq_out.generate_obsseq_out(valid_time) + except: + print("Failed. Trying to evaluate posterior with dummy observations") + # use an old obsseq file and overwrite obs/truth values with "missing value" + f_oso = cluster.archivedir+valid_time.strftime("/diagnostics/%Y-%m-%d_%H:%M_obs_seq.out") + if not os.path.isfile(f_oso): + raise RuntimeError(f_oso+' not found. Cannot create dummy observation.') + + from dartwrf.obs import obsseq + oso = obsseq.ObsSeq(f_oso) - # overwrite obs/truth values with "missing value" - oso.df['observations'] = -888888.0 - oso.df['truth'] = -888888.0 - oso.to_dart(cluster.dart_rundir+'/obs_seq.out') + # overwrite obs/truth values with "missing value" + oso.df['observations'] = -888888.0 + oso.df['truth'] = -888888.0 + oso.to_dart(cluster.dart_rundir+'/obs_seq.out') aso.evaluate(valid_time, f_out_pattern=cluster.archivedir + "/diagnostics/%Y-%m-%d_%H:%M:%S_obs_seq.final-evaluate") @@ -69,4 +74,4 @@ if __name__ == "__main__": assim_time = dt.datetime.strptime(assim_time, "%Y-%m-%d_%H:%M") valid_time = dt.datetime.strptime(valid_time, "%Y-%m-%d_%H:%M:%S") - evaluate_one_time(assim_time, valid_time) \ No newline at end of file + evaluate_one_time(assim_time, valid_time) diff --git a/dartwrf/obs/calculate_obs_locations.py b/dartwrf/obs/calculate_obs_locations.py index 59647b976c863ef46c9276d6f6367252a49408e5..d86548b9619c31440cd2d32cdc69b5af5c53ce55 100755 --- a/dartwrf/obs/calculate_obs_locations.py +++ b/dartwrf/obs/calculate_obs_locations.py @@ -2,7 +2,9 @@ These are the template files defining obs locations and metadata according to which observations are generated and subsequently assimilated. """ -import os, sys, warnings +import os +import sys +import warnings import numpy as np import datetime as dt import xarray as xr @@ -19,6 +21,7 @@ lon0_center = 0. radius_earth_meters = 6.371*1E6 + def square_array_from_domaincenter(n_obs, distance_between_obs_km): """ Create equally spaced grid for satellite observations every 4 km @@ -40,54 +43,65 @@ def square_array_from_domaincenter(n_obs, distance_between_obs_km): for ix in range(int(-nx/2), int(nx/2+1)): lat = lat0_center + iy*dy_4km_in_degree - m_per_degree_x = 2*np.pi*radius_earth_meters*np.sin(lat/180*np.pi)/360 + m_per_degree_x = 2*np.pi*radius_earth_meters * \ + np.sin(lat/180*np.pi)/360 dx_4km_in_degree = distance_between_obs_meters/m_per_degree_x lon = lon0_center + ix*dx_4km_in_degree coords.append((lat, lon)) return coords -def evenly_on_grid(n_obs, omit_covloc_radius_on_boundary=True): + +def evenly_on_grid(km_between_obs, skip_border_km=0): """Observations spread evenly over domain - - omit_covloc_radius_on_boundary : leave out a distance to the border of the domain - so that the assimilation increments are zero on the boundary - distance to boundary = 50 km + + skip_border_km : no observations within this distance to the border Returns - tuple of (lat, lon) coordinates + tuple of (lat, lon) coordinates of observed gridpoints in degrees """ - fcoords = cluster.geo_em_for_WRF_ideal # TODO: in case of WRF real, need to find a file providing coordinates, e.g. nature wrfout + fcoords = cluster.geo_em_nature ds = xr.open_dataset(fcoords) lons = ds.XLONG_M.isel(Time=0).values lats = ds.XLAT_M.isel(Time=0).values - n_obs_x = int(np.sqrt(n_obs)) - nx = len(ds.south_north) # number of gridpoints in one direction - model_dx_km = exp.model_dx/1000 - print('assuming', model_dx_km, 'km model grid') - - - if omit_covloc_radius_on_boundary: # in order to avoid an increment step on the boundary - skip_km = 50 - skip_gridpoints = int(skip_km/model_dx_km) # skip this many gridpoints on each side - - gridpoints_left = nx - 2*skip_gridpoints - # now spread observations evenly across the space left - gridpoints_between_obs = int(gridpoints_left/(n_obs_x-1)) - else: - gridpoints_between_obs = int(nx/n_obs_x) # number of gridpoints / number of observations in one direction - skip_gridpoints = int(gridpoints_between_obs/2) # to center the observations in the domain + + domain_width = len(ds.south_north) # number of gridpoints in one direction + assert domain_width == len(ds.west_east), 'domain is not square' - km_between_obs = gridpoints_between_obs*model_dx_km - print('observation density: one observation every', km_between_obs, 'km /', - gridpoints_between_obs,'gridpoints \n', 'leaving a domain boundary on each side of', - skip_gridpoints, 'gridpoints or', skip_gridpoints*model_dx_km, 'km') - # skip_gridpoints = space we have to leave out on the boundary of the domain - # in order to have the observations centered in the domain + try: + grid_dx_km = float(ds.attrs['DX'])/1000 + except Exception as e: + raise KeyError('DX not found in file '+fcoords) from e + + # skip this many gridpoints to the border + skip_gridpoints = int(skip_border_km/grid_dx_km) + gridpoints_observed = domain_width - 2*skip_gridpoints + + # now spread observations evenly across the space left + gridpoints_between_obs = int(km_between_obs/grid_dx_km) + + # number of observations in one direction + # +1 because 100 gridpoints with dx=10 can fit 11 observations + n_obs_x = int(gridpoints_observed/gridpoints_between_obs) + 1 + + # for info + print('no observations within', skip_gridpoints, 'gridpoints or', skip_gridpoints*grid_dx_km, 'km to the border.') + print('grid resolution [km]=', grid_dx_km, ', shape of domain=', lons.shape, ', observed gridpoints:', gridpoints_observed) + print('gridpoints_between_obs=', gridpoints_between_obs, '=>', n_obs_x, 'observations in one direction') + print('total number of observations=', n_obs_x**2) coords = [] - for i in range(n_obs_x): - for j in range(n_obs_x): - coords.append((lats[skip_gridpoints+i*gridpoints_between_obs, skip_gridpoints+j*gridpoints_between_obs], - lons[skip_gridpoints+i*gridpoints_between_obs, skip_gridpoints+j*gridpoints_between_obs])) - return coords \ No newline at end of file + for i_obs in range(n_obs_x): + for j_obs in range(n_obs_x): + i_grid = skip_gridpoints+i_obs*gridpoints_between_obs + j_grid = skip_gridpoints+j_obs*gridpoints_between_obs + + coords.append((lats[i_grid, j_grid], lons[i_grid, j_grid])) + + print('first observation at gridpoint', skip_gridpoints,', last observation at gridpoint', i_grid) + return coords + + +if __name__ == '__main__': + + evenly_on_grid(12, skip_border_km=16) \ No newline at end of file diff --git a/dartwrf/obs/create_obsseq_in.py b/dartwrf/obs/create_obsseq_in.py index 6b64816e0ca76540abd7f9b23833962fee577041..769dc377a8d5de7f4ffd225a7eb63e1bccaf2c23 100755 --- a/dartwrf/obs/create_obsseq_in.py +++ b/dartwrf/obs/create_obsseq_in.py @@ -2,7 +2,9 @@ These are the template files defining obs locations and metadata according to which observations are generated and subsequently assimilated. """ -import os, sys, warnings +import os +import sys +import warnings import numpy as np import datetime as dt from pysolar.solar import get_altitude, get_azimuth @@ -10,7 +12,8 @@ from pysolar.solar import get_altitude, get_azimuth from dartwrf.server_config import cluster from dartwrf import utils from dartwrf.obs import calculate_obs_locations as col -from dartwrf.obs.obskind import obs_kind_nrs # dictionary string => DART internal indices +# dictionary string => DART internal indices +from dartwrf.obs.obskind import obs_kind_nrs # position on earth for RTTOV ray geometry lat0 = 45. @@ -22,10 +25,10 @@ sat_zen = "45.0" def degr_to_rad(degr): """Convert to DART convention (radians) 2*pi = 360 degrees - + Args: degr (float) : degrees east of Greenwich - + Returns float """ @@ -48,9 +51,9 @@ def _add_timezone_UTC(t): def get_dart_date(time_dt): """Convert datetime.datetime into DART time format - + Assumes that input is UTC! - + Returns str, str """ @@ -74,14 +77,14 @@ def _write_file(msg, output_path='./'): def _append_hgt_to_coords(coords, heights): """Adds vertical position to list of coordinates - + if heights is a scalar, then all obs have the same height if heights is a list, then you get multiple levels Args: coords (list of 2-tuples): (lat, lon) in degrees north/east heights (float or list of float): height in meters - + Returns: list of 3-tuples """ @@ -90,7 +93,7 @@ def _append_hgt_to_coords(coords, heights): len(heights) # fails with scalar except TypeError: heights = [heights, ] - + for i in range(len(coords)): for hgt_m in heights: coords2.append(coords[i] + (hgt_m,)) @@ -129,7 +132,7 @@ def _determine_vert_coords(sat_channel, kind, obscfg): else: vert_coord_sys = "-2" # undefined height vert_coords = ["-888888.0000000000", ] - + if 'height' in obscfg: # hypothetical height, only used for localization vert_coord_sys = "3" # meters AMSL @@ -181,7 +184,7 @@ def write_section(obs, last=False): line_link = " "+str(obs['i']-1)+" -1 -1" else: line_link = " -1 "+str(obs['i']+1)+" -1" - + return """ OBS """+str(obs['i'])+""" """+line_link+""" @@ -195,7 +198,7 @@ kind """+str(obs['obserr_var']) -def create_obs_seq_in(time_dt, list_obscfg, +def create_obs_seq_in(time_dt, list_obscfg, output_path=cluster.dart_rundir+'/obs_seq.in'): """Create obs_seq.in with multiple obs types in one file @@ -221,29 +224,36 @@ def create_obs_seq_in(time_dt, list_obscfg, txt = '' i_obs_total = 0 for i_cfg, obscfg in enumerate(list_obscfg): - n_obs = obscfg['n_obs'] - if obscfg['obs_locations'] == 'square_array_from_domaincenter': - coords = col.square_array_from_domaincenter(n_obs, - obscfg['distance_between_obs_km']) # <---- must have variable + # obstypes with precomputed FO do not need obs_seq.in + if hasattr(obscfg, 'precomputed_FO'): + if obscfg.precomputed_FO: + print('this obstype uses precomputed FOs, skipping...') + continue - elif obscfg['obs_locations'] == 'square_array_evenly_on_grid': - coords = col.evenly_on_grid(n_obs) + # how to compute locations? + obs_locations = obscfg.get('obs_locations', 'DEFAULT_GRID') - else: # obs_locations given by iterable + if obs_locations == 'DEFAULT_GRID': + # compute as a square_array_evenly_on_grid + coords = col.evenly_on_grid(obscfg['km_between_obs'], + obscfg['skip_border_km']) + else: + # obs_locations given by iterable coords = obscfg['obs_locations'] - assert len(coords) == n_obs, (len(coords), n_obs) # check if functions did what they supposed to + # check if lon/lat within bounds for lat, lon in coords: - assert (lat < 90) & (lat > -90) - assert (lon < 180) & (lon > -180) + assert (lat < 90) & (lat > -90), 'latitude out of bounds' + assert (lon < 180) & (lon > -180), 'longitude out of bounds' kind = obscfg['kind'] print('obstype', kind) sat_channel = obscfg.get('sat_channel', False) # add observation locations in the vertical at different levels - vert_coord_sys, vert_coords = _determine_vert_coords(sat_channel, kind, obscfg) + vert_coord_sys, vert_coords = _determine_vert_coords( + sat_channel, kind, obscfg) coords = _append_hgt_to_coords(coords, vert_coords) n_obs_3d_thistype = len(coords) @@ -263,17 +273,17 @@ def create_obs_seq_in(time_dt, list_obscfg, last = True txt += write_section(dict(i=i_obs_total, - kind_nr=obs_kind_nrs[kind], - dart_date_day=dart_date_day, - secs_thatday=secs_thatday, - lon=coords[i_obs][1], - lat=coords[i_obs][0], - vert_coord=coords[i_obs][2], - vert_coord_sys=vert_coord_sys, - obserr_var=obserr_std[i_obs]**2, - appendix=sat_info), - last=last) - + kind_nr=obs_kind_nrs[kind], + dart_date_day=dart_date_day, + secs_thatday=secs_thatday, + lon=coords[i_obs][1], + lat=coords[i_obs][0], + vert_coord=coords[i_obs][2], + vert_coord_sys=vert_coord_sys, + obserr_var=obserr_std[i_obs]**2, + appendix=sat_info), + last=last) + n_obs_total = i_obs_total list_kinds = [a['kind'] for a in list_obscfg] pretxt = preamble(n_obs_total, list_kinds) @@ -287,11 +297,11 @@ if __name__ == '__main__': time_dt = dt.datetime(2008, 7, 30, 13, 0) radar = dict(var_name='Radar reflectivity', unit='[dBz]', - kind='RADAR_REFLECTIVITY', - n_obs=5776, obs_locations='square_array_evenly_on_grid', - error_generate=2.5, error_assimilate=2.5, - heights=range(2000, 14001, 2000), - loc_horiz_km=20, loc_vert_km=2.5) + kind='RADAR_REFLECTIVITY', + n_obs=5776, obs_locations='square_array_evenly_on_grid', + error_generate=2.5, error_assimilate=2.5, + heights=range(2000, 14001, 2000), + loc_horiz_km=20, loc_vert_km=2.5) create_obs_seq_in(time_dt, [radar], output_path=utils.userhome+'/run_DART/obs_seq.in') @@ -299,6 +309,5 @@ if __name__ == '__main__': if False: error_assimilate = 5.*np.ones(n_obs*len(radar['heights'])) import assim_synth_obs as aso - aso.replace_errors_obsseqout(cluster.dart_rundir+'/obs_seq.out', error_assimilate) - - + aso.replace_errors_obsseqout( + cluster.dart_rundir+'/obs_seq.out', error_assimilate) diff --git a/dartwrf/obs/create_obsseq_out.py b/dartwrf/obs/create_obsseq_out.py index 2ec5040d243bd86fa91a6b9525bd7d4344ecfc61..4abd01d48b02833eb0fb59fe21709cd83f5ca8b9 100644 --- a/dartwrf/obs/create_obsseq_out.py +++ b/dartwrf/obs/create_obsseq_out.py @@ -1,105 +1,145 @@ -import os, shutil, warnings +import os +import shutil +import glob +import warnings -from dartwrf.utils import try_remove, print, shell, symlink +from dartwrf.utils import try_remove, print, shell, symlink, copy import dartwrf.obs.create_obsseq_in as osi from dartwrf.obs import obsseq from dartwrf import assimilate as aso +from dartwrf import wrfout_add_geo +from dartwrf.obs.obskind import obs_kind_nrs from dartwrf.exp_config import exp from dartwrf.server_config import cluster -def _prepare_DART_grid_template(): - # DART needs a wrfinput file as a template for the grid - # No data will be read from this file, but the grid information must match exactly. - - # error if the source does not exist - if not os.path.exists(cluster.dart_rundir + "/wrfout_d01"): - raise FileNotFoundError("wrfout_d01 not found in " + cluster.dart_rundir, - "but necessary to create observations") - symlink(cluster.dart_rundir + "/wrfout_d01", - cluster.dart_rundir + "/wrfinput_d01") - -def generate_obsseq_out(time, nproc=12): - """Generate an obs_seq.out file from the current experiment - Expects an existing nature file in the cluster.dart_rundir - +def prepare_nature_dart(time): + """Prepares DART nature (wrfout_d01) if available + Args: - time (datetime): time of the observations - nproc (int, optional): number of cores for call to ./perfect_model_obs - - Returns: - obsseq.ObsSeq: obs_seq.out representation + time (dt.datetime): Time at which observations will be made """ - - def _ensure_physical_vis(oso): # set reflectance < surface albedo to surface albedo - """ Set values smaller than surface albedo to surface albedo - Highly hacky. Your albedo might be different. + def _find_nature(time): + """Find the path to the nature file for the given time """ - print(" removing obs below surface albedo ") - clearsky_albedo = 0.2928 - - if_vis_obs = oso.df['kind'].values == 262 - if_obs_below_surface_albedo = oso.df['observations'].values < clearsky_albedo - oso.df.loc[if_vis_obs & if_obs_below_surface_albedo, ('observations')] = clearsky_albedo - oso.to_dart(f=cluster.dart_rundir + "/obs_seq.out") - return oso - - - def _apply_superobbing(oso): + glob_pattern = time.strftime(exp.nature_wrfout_pattern) + print('searching for nature in pattern:', glob_pattern) try: - f_oso = aso.pattern_obs_seq_out + '-before_superob' - shutil.copy(cluster.dart_rundir + "/obs_seq.out", f_oso) - print('saved', f_oso) - except Exception as e: - warnings.warn(str(e)) - - print(" superobbing to", exp.superob_km, "km") - oso.df = oso.df.superob(window_km=exp.superob_km) - oso.to_dart(f=cluster.dart_rundir + "/obs_seq.out") - return oso - - osi.create_obs_seq_in(time, exp.observations) # create obs definition file - - _prepare_DART_grid_template() - run_perfect_model_obs(nproc=nproc) # generate observation, draws from gaussian - - oso = obsseq.ObsSeq(cluster.dart_rundir + "/obs_seq.out") - - oso = _ensure_physical_vis(oso) + f_nat = glob.glob(glob_pattern)[0] # find the nature wrfout-file + except IndexError: + raise IOError("no nature found with pattern "+glob_pattern) + + # check user input + if not 'wrfout' in f_nat.split('/')[-1]: + warnings.warn( + f_nat+" does not contain 'wrfout' in filename, are you sure this is a valid nature file?") + + if not os.path.exists(f_nat): + raise IOError(f_nat+" does not exist -> no nature found") + + print('using nature:', f_nat) + return f_nat + + print("prepare nature") + f_nat = _find_nature(time) + # link nature wrfout to DART directory + copy(f_nat, cluster.dart_rundir + "/wrfout_d01") + + # add coordinates if necessary + if cluster.geo_em_nature: + wrfout_add_geo.run(cluster.geo_em_nature, cluster.dart_rundir + "/wrfout_d01") + + # as a grid template + symlink(cluster.dart_rundir + "/wrfout_d01", cluster.dart_rundir + "/wrfinput_d01") + - if getattr(exp, "superob_km", False): - oso = _apply_superobbing(oso) +def force_obs_in_physical_range(oso): + """ Set values smaller than surface albedo to surface albedo + Highly hacky. Your albedo might be different. + """ + print(" removing obs below surface albedo ") + clearsky_albedo = 0.2928 # custom value + + if_vis_obs = oso.df['kind'].values == obs_kind_nrs['MSG_4_SEVIRI_BDRF'] + if_obs_below_surface_albedo = oso.df['observations'].values < clearsky_albedo + oso.df.loc[if_vis_obs & if_obs_below_surface_albedo, + ('observations')] = clearsky_albedo + oso.to_dart(f=cluster.dart_rundir + "/obs_seq.out") return oso + def run_perfect_model_obs(nproc=12): """Run the ./perfect_model_obs program Args: nproc (int): number of processors to use - + Returns: None, creates obs_seq.out """ print("running ./perfect_model_obs") os.chdir(cluster.dart_rundir) - nproc = min(nproc, cluster.max_nproc) + + if hasattr(cluster, 'max_nproc'): + nproc = cluster.max_nproc try_remove(cluster.dart_rundir + "/obs_seq.out") if not os.path.exists(cluster.dart_rundir + "/obs_seq.in"): - raise RuntimeError("obs_seq.in does not exist in " + cluster.dart_rundir) - shell(cluster.dart_modules+' mpirun -np '+str(nproc)+" ./perfect_model_obs > log.perfect_model_obs") + raise RuntimeError( + "obs_seq.in does not exist in " + cluster.dart_rundir) + shell(cluster.dart_modules+'; mpirun -np '+str(nproc) + + " ./perfect_model_obs > log.perfect_model_obs") if not os.path.exists(cluster.dart_rundir + "/obs_seq.out"): raise RuntimeError( "obs_seq.out does not exist in " + cluster.dart_rundir, "\n see "+cluster.dart_rundir + "/log.perfect_model_obs") + + +def generate_new_obsseq_out(time): + """Generate an obs_seq.out file from obs_seq.in + Expects an existing nature file in the cluster.dart_rundir. + + Note: + Combining precomputed FO with regular observations is not supported! + + Args: + time (datetime): time of the observations + nproc (int, optional): number of cores for call to ./perfect_model_obs + + Returns: + obsseq.ObsSeq: obs_seq.out representation + """ + def _provide_obs_seq_in(time): + if hasattr(exp, 'use_this_obs_seq_in'): + # custom definition of an obs_seq.in file + print("using obs_seq.in:", exp.use_this_obs_seq_in) + copy(exp.use_this_obs_seq_in, cluster.dart_rundir+'/obs_seq.in') + else: + # create file from scratch + osi.create_obs_seq_in(time, exp.observations) + + _provide_obs_seq_in(time) + prepare_nature_dart(time) + + run_perfect_model_obs(nproc=cluster.max_nproc) + + oso = obsseq.ObsSeq(cluster.dart_rundir + "/obs_seq.out") + oso = force_obs_in_physical_range(oso) + f_dest = time.strftime(cluster.pattern_obs_seq_out) + os.makedirs(os.path.dirname(f_dest), exist_ok=True) + shutil.copy(cluster.dart_rundir+'/obs_seq.out', f_dest) + return oso + + if __name__ == '__main__': - """Generate obs_seq.out files from an experiment - + """Generate obs_seq.out files from any wrfout files + Make sure that the nature & obs info is complete in the config file. + Usage: - python3 create_obsseq_out.py 2008-07-30_12:00,2008-07-30_12:01 - + python create_obsseq_out.py exp_hires.py jet.py 2008-07-30_12:00,2008-07-30_12:01 /jetfs/home/lkugler/data/HiResNature_obs + Args: times (str): comma-separated list of times of the observations @@ -108,12 +148,13 @@ if __name__ == '__main__': """ import argparse import datetime as dt - parser = argparse.ArgumentParser(description='Generate obs_seq.out files from an experiment') - parser.add_argument('times', type=str, help='times of the observations') + parser = argparse.ArgumentParser( + description='Generate obs_seq.out files from an experiment') + parser.add_argument('times', type=str, help='times of the observations') args = parser.parse_args() times = args.times.split(',') - + # before running perfect_model_obs, we need to set up the run_DART folder from dartwrf import assimilate as aso from dartwrf import dart_nml @@ -123,6 +164,5 @@ if __name__ == '__main__': for time in times: print("time", time) time = dt.datetime.strptime(time, '%Y-%m-%d_%H:%M') - - aso.prepare_nature_dart(time) # link WRF files to DART directory - generate_obsseq_out(time) + generate_new_obsseq_out(time) + diff --git a/dartwrf/obs/obskind.py b/dartwrf/obs/obskind.py index 73e38d013757bd05155f21b3a3cb915ee1c83cfa..8011b5a32386b2c8bbd7c70b624ff1e4e4f056a9 100644 --- a/dartwrf/obs/obskind.py +++ b/dartwrf/obs/obskind.py @@ -1,5 +1,8 @@ -# this file is autogenerated -obs_kind_nrs = {"RADIOSONDE_U_WIND_COMPONENT": 1, +""" NOTE: This file is autogenerated! +Use dartwrf/create_obskind_table.py to regenerate! +""" +obs_kind_nrs = { +"RADIOSONDE_U_WIND_COMPONENT": 1, "RADIOSONDE_V_WIND_COMPONENT": 2, "RADIOSONDE_GEOPOTENTIAL_HGT": 3, "RADIOSONDE_SURFACE_PRESSURE": 4, diff --git a/dartwrf/obs/obsseq.py b/dartwrf/obs/obsseq.py index e6147cb99cb5bcc3c59c88e15eaa45fa3cfeca9c..7f0541fea13162663ff730b694a622f797c0b79b 100755 --- a/dartwrf/obs/obsseq.py +++ b/dartwrf/obs/obsseq.py @@ -5,21 +5,8 @@ Examples: >>> from dartwrf.obs.obsseq import ObsSeq >>> osf = ObsSeq('path/to/obs_seq.final') - The content is a pandas.DataFrame with all observations (rows) - >>> osf.df - observations truth prior ensemble mean posterior ensemble mean ... kind metadata time variance - 0 0.292800 0.289466 0.360284 0.330799 ... 262 [ visir\n, 180.000000000000 45.00000... (50400, 148864) 0.0009 - 1 0.292800 0.289466 0.398444 0.380152 ... 262 [ visir\n, 180.000000000000 45.00000... (50400, 148864) 0.0009 - 2 0.310016 0.289466 0.355061 0.369988 ... 262 [ visir\n, 180.000000000000 45.00000... (50400, 148864) 0.0009 - 3 0.297182 0.289466 0.305424 0.302489 ... 262 [ visir\n, 180.000000000000 45.00000... (50400, 148864) 0.0009 - 4 0.292800 0.293797 0.306238 0.303252 ... 262 [ visir\n, 180.000000000000 45.00000... (50400, 148864) 0.0009 - .. ... ... ... ... ... ... ... ... ... - 956 0.762274 0.796486 0.664451 0.833559 ... 262 [ visir\n, 180.000000000000 45.00000... (50400, 148864) 0.0009 - 957 0.525743 0.500751 0.534391 0.653267 ... 262 [ visir\n, 180.000000000000 45.00000... (50400, 148864) 0.0009 - 958 0.341627 0.348115 0.405534 0.447314 ... 262 [ visir\n, 180.000000000000 45.00000... (50400, 148864) 0.0009 - 959 0.826649 0.835491 0.374459 0.785951 ... 262 [ visir\n, 180.000000000000 45.00000... (50400, 148864) 0.0009 - 960 0.320477 0.343154 0.303468 0.325203 ... 262 [ visir\n, 180.000000000000 45.00000... (50400, 148864) 0.0009 - [961 rows x 93 columns] + osf.df is a pandas.DataFrame with all observations as rows. + Its keys are: e.g. 'observations', 'truth', 'prior ensemble mean', 'prior ensemble spread', 'Quality Control', 'obdef', 'loc3d', 'kind', 'metadata', 'time', 'variance' To get arrays of prior and posterior use >>> osf.df.get_prior_Hx() @@ -32,12 +19,14 @@ Note: Can not create obs_seq from scratch, since it does not know which metadata is necessary for each observation type """ -import os, warnings +import os +import warnings import numpy as np import pandas as pd missing_value = -888888.0 + def _plot_box(m, lat, lon, label="", **kwargs): """"Draw bounding box @@ -66,12 +55,14 @@ def _plot_box(m, lat, lon, label="", **kwargs): **kwargs ) + def _degrees_to_rad(degr): """Convert to DART convention = radians""" if degr < 0: degr += 360 return degr / 360 * 2 * np.pi + def _rad_to_degrees(rad): """Convert to degrees from DART convention (radians)""" assert rad >= 0, "no negative radians allowed" @@ -94,7 +85,7 @@ class ObsRecord(pd.DataFrame): def get_prior_Hx(self): """Retrieve H(x_prior) for all ensemble members - + Returns: np.array (n_obs, n_ens) """ @@ -102,7 +93,7 @@ class ObsRecord(pd.DataFrame): def get_posterior_Hx(self): """Retrieve H(x_posterior) for all ensemble members - + Returns: np.array (n_obs, n_ens) """ @@ -129,7 +120,7 @@ class ObsRecord(pd.DataFrame): Works with all observations (self = self.self) or a subset of observations (self = self.self[343:348]) """ - if what not in ['prior', 'posterior']: + if what not in ['prior', 'posterior']: raise ValueError(what, 'must be prior or posterior') # which columns do we need? @@ -143,8 +134,6 @@ class ObsRecord(pd.DataFrame): # assert np.allclose(Hx.mean(axis=1).values, self[what+' ensemble mean'].values, rtol=1e-6) return Hx.values - - def get_model_grid_indices(self, wrf_file_with_grid): """Retrieve the grid indices closest to the observations @@ -192,11 +181,11 @@ class ObsRecord(pd.DataFrame): # find indices of observations in wrf grid for i, row in lon_lat.iterrows(): - ilat_ilon[i,:] = find_index_from_coords_tree(tree, xlat.shape[0], row.lat, row.lon) - - return pd.DataFrame(index=self.index, - data=dict(wrf_i=ilat_ilon[:,0], wrf_j=ilat_ilon[:,1])) + ilat_ilon[i, :] = find_index_from_coords_tree( + tree, xlat.shape[0], row.lat, row.lon) + return pd.DataFrame(index=self.index, + data=dict(wrf_i=ilat_ilon[:, 0], wrf_j=ilat_ilon[:, 1])) def get_lon_lat(self): """Retrieve longitude and latitude of observations @@ -243,7 +232,8 @@ class ObsRecord(pd.DataFrame): nlayers = int(len(self)/obs_per_layer) else: - warnings.warn('I can only guess the number of layers from this file.') + warnings.warn( + 'I can only guess the number of layers from this file.') return nlayers def superob(self, window_km): @@ -280,10 +270,10 @@ class ObsRecord(pd.DataFrame): def calc_km_from_deg(deg_lat, deg_lon, center_lat): dist_km_lat = deg_lat * km_per_degrees - dist_km_lon = deg_lon * km_per_degrees * np.cos(center_lat * np.pi / 180) + dist_km_lon = deg_lon * km_per_degrees * \ + np.cos(center_lat * np.pi / 180) return dist_km_lat, dist_km_lon - debug = False radius_earth_meters = 6.371 * 1e6 m_per_degrees = np.pi * radius_earth_meters / 180 # m per degree latitude @@ -307,9 +297,9 @@ class ObsRecord(pd.DataFrame): # superob in case of multiple layers, only implemented for single obstype nlayers = self._determine_nlayers() - + # indices of observations (starting from 0) - i_obs_grid = self.index.values + i_obs_grid = self.index.values # get the observation indices from obs_seq (list) # onto a cartesian grid (ix, iy, iz) @@ -326,7 +316,7 @@ class ObsRecord(pd.DataFrame): out = self.drop(self.index) # this df will be filled boxes = [] - for i in range(0, nx+1 - win_obs, win_obs): + for i in range(0, nx+1 - win_obs, win_obs): # i is the number of observations in x direction # but in steps of "number of observations in superob window" # i.e. i = 0, win_obs, 2*win_obs, 3*win_obs, ... @@ -337,7 +327,8 @@ class ObsRecord(pd.DataFrame): for k in range(0, nlayers): # k is the index of the vertical layer - if debug: print(i,j,k) + if debug: + print(i, j, k) # find indices of observations within superob window # i_obs_box = i_obs_grid[i : i + win_obs, j : j + win_obs, k].ravel() @@ -345,24 +336,29 @@ class ObsRecord(pd.DataFrame): if debug: print("index x from", i, 'to', i + win_obs) print("index y from", j, 'to', j + win_obs) - print("obs indices box=", i_obs_grid[i : i + win_obs, j : j + win_obs, k]) + print("obs indices box=", + i_obs_grid[i: i + win_obs, j: j + win_obs, k]) # find observations within superob window obs_box = self._get_from_cartesian_grid(slice(i, i + win_obs), - slice(j, j + win_obs), - k) + slice( + j, j + win_obs), + k) - # save boundary of box to list, for plotting later eps = dx_obs_lat_deg/2 # for plotting eps2 = eps*0.8 # for plotting - lat1, lon1 = self._get_from_cartesian_grid(i, j, k).get_lon_lat().values[0] - lat2, lon2 = self._get_from_cartesian_grid(i+win_obs-1, j, k).get_lon_lat().values[0] - lat3, lon3 = self._get_from_cartesian_grid(i, j+win_obs-1, k).get_lon_lat().values[0] - lat4, lon4 = self._get_from_cartesian_grid(i+win_obs-1, j+win_obs-1, k).get_lon_lat().values[0] + lat1, lon1 = self._get_from_cartesian_grid( + i, j, k).get_lon_lat().values[0] + lat2, lon2 = self._get_from_cartesian_grid( + i+win_obs-1, j, k).get_lon_lat().values[0] + lat3, lon3 = self._get_from_cartesian_grid( + i, j+win_obs-1, k).get_lon_lat().values[0] + lat4, lon4 = self._get_from_cartesian_grid( + i+win_obs-1, j+win_obs-1, k).get_lon_lat().values[0] boxes.append(([lat1-eps2, lat2+eps2, lat3-eps2, lat4+eps2], - [lon1-eps, lon2-eps, lon3+eps, lon4+eps])) + [lon1-eps, lon2-eps, lon3+eps, lon4+eps])) # average the subset # metadata are assumed to be equal @@ -374,10 +370,12 @@ class ObsRecord(pd.DataFrame): pass # these parameters are not averaged elif 'spread' in key: # stdev of mean of values = sqrt(mean of variances) - obs_mean.at[key] = np.sqrt((obs_box[key]**2).mean()) + obs_mean.at[key] = np.sqrt( + (obs_box[key]**2).mean()) elif key == 'variance': # variance of mean = sum(variances)/n^2 - obs_mean.at[key] = obs_box[key].sum()/obs_box[key].size**2 + obs_mean.at[key] = obs_box[key].sum() / \ + obs_box[key].size**2 else: obs_mean.at[key] = obs_box[key].mean() @@ -391,7 +389,8 @@ class ObsRecord(pd.DataFrame): # -> there is an observation in the middle # take the location of that obs # int(win_obs/2) is the index of the center element when indices start at 0 - i_obs_center = i_obs_grid[i + int(win_obs/2), j + int(win_obs/2), k] + i_obs_center = i_obs_grid[i + + int(win_obs/2), j + int(win_obs/2), k] obs_mean.at['loc3d'] = self.iloc[i_obs_center]['loc3d'] # check if all obs share the same vertical position @@ -409,12 +408,13 @@ class ObsRecord(pd.DataFrame): out.attrs['boxes'] = boxes # quick after check - does the output obs number match with the expected number? - n_windows_x = int((n_pre_superob/nlayers)**.5/win_obs) # assume square of obs + n_windows_x = int((n_pre_superob/nlayers)**.5 / + win_obs) # assume square of obs n_target_post = n_windows_x**2 * nlayers # number of windows print('superob from', n_pre_superob, 'obs to', n_post_superob, 'obs') if n_post_superob != n_target_post: - raise RuntimeError('expected', n_target_post, 'superobservations, but created', - n_post_superob) + raise RuntimeError('expected', n_target_post, 'superobservations, but created', + n_post_superob) out.attrs['df_pre_superob'] = self # original data self = out # overwrite dataframe @@ -463,10 +463,12 @@ class ObsSeq(object): # check if we found definitions before end of file if i == len(self.preamble)-1: # end of file - raise RuntimeError('did not find `obs_type_definitions` or `obs_kind_definitions` in file') + raise RuntimeError( + 'did not find `obs_type_definitions` or `obs_kind_definitions` in file') line_n_obstypes = i + 1 - n_obstypes = int(self.preamble[line_n_obstypes]) # read integer from file + # read integer from file + n_obstypes = int(self.preamble[line_n_obstypes]) # read obs type kind (number and description) obstypes = [] @@ -476,16 +478,20 @@ class ObsSeq(object): obstypes.append((kind_nr, kind_type)) self.obstypes = obstypes - # read num_copies (2 for obs_seq.out, 44 for obs_seq.final with 40 ens members) + # read num_copies + # num_copies=1 ... obs_seq.out without truth value + # num_copies=2 ... obs_seq.out with truth value + # num_copies=86 ... obs_seq.final with 40 ens members (prior+post) + obs + truth + 2x mean + 2x spread num_copies = False for line in self.preamble: if 'num_copies:' in line: _, num_copies, _, num_qc = line.split() break if not num_copies: - raise RuntimeError('did not find `num_copies:` in '+str(self.preamble)) - num_copies = int(num_copies) - num_qc = int(num_qc) + raise RuntimeError( + 'did not find `num_copies:` in '+str(self.preamble)) + self.num_copies = int(num_copies) + self.num_qc = int(num_qc) # read num_obs num_obs = False @@ -494,14 +500,15 @@ class ObsSeq(object): _, num_obs, _, max_num_obs = line.split() break if not num_obs: - raise RuntimeError('did not find `num_obs:` in '+str(self.preamble)) + raise RuntimeError( + 'did not find `num_obs:` in '+str(self.preamble)) assert num_obs == max_num_obs, NotImplementedError() self.num_obs = int(num_obs) # read keys for values (e.g. 'observations', 'truth', 'prior ensemble mean',) keys = [] line_start_keys = i+1 - for j in range(line_start_keys, line_start_keys+num_copies+num_qc): + for j in range(line_start_keys, line_start_keys+self.num_copies+self.num_qc): line = self.preamble[j] keys.append(line.strip()) @@ -520,7 +527,7 @@ class ObsSeq(object): Args: content (list of str) : contains lines of file - + Returns list of list of str """ @@ -539,12 +546,12 @@ class ObsSeq(object): if obs_begin_str in line: # then this line is beginning of obs obs_end = i - 1 # previous line - obs_list.append(content[obs_begin : obs_end + 1]) + obs_list.append(content[obs_begin: obs_end + 1]) obs_begin = i # next obs starts here if i == len(content) - 1: # last line obs_end = i - obs_list.append(content[obs_begin : obs_end + 1]) + obs_list.append(content[obs_begin: obs_end + 1]) if not len(obs_list) > 1: warnings.warn('len(obs_list)='+str(len(obs_list))) @@ -552,7 +559,7 @@ class ObsSeq(object): # consistency check to ensure that all observations have been detected if len(obs_list) != self.num_obs: raise RuntimeError('num_obs read in does not match preamble num_obs ' - +str(len(obs_list))+' != '+str(self.num_obs)) + + str(len(obs_list))+' != '+str(self.num_obs)) return obs_list def one_obs_to_dict(obs_list_entry): @@ -583,11 +590,10 @@ class ObsSeq(object): else: out[key] = v - x, y, z, z_coord = lines[line_loc].split() out["loc3d"] = float(x), float(y), float(z), int(z_coord) out["kind"] = int(lines[line_kind].strip()) - out["metadata"] = lines[line_kind + 1 : -2] + out["metadata"] = lines[line_kind + 1: -2] out["time"] = tuple(lines[-2].split()) out["variance"] = float(lines[-1].strip()) return out @@ -615,10 +621,10 @@ class ObsSeq(object): def append_obsseq(self, list_of_obsseq): """Append a list of ObsSeq objects - + Args: list_of_obsseq (list of ObsSeq()) - + Example: Combine two ObsSeq() objects >>> oso1 = ObsSeq('path/to/obs_seq.out1') @@ -628,7 +634,7 @@ class ObsSeq(object): Returns: ObsSeq() with combined data """ - from dartwrf.obs.obskind import obs_kind_nrs # dictionary string => DART internal indices + from dartwrf.obs.obskind import obs_kind_nrs # dictionary string => DART internal indices inverted_obs_kind_nrs = {v: k for k, v in obs_kind_nrs.items()} for a in list_of_obsseq: @@ -640,8 +646,8 @@ class ObsSeq(object): list_of_obsseq_df.extend([a.df for a in list_of_obsseq]) combi_df = pd.concat(list_of_obsseq_df, - ignore_index=True # we use a new observation index now - ) + ignore_index=True # we use a new observation index now + ) n_obstypes = combi_df.kind.nunique() list_kinds = combi_df.kind.unique() @@ -651,10 +657,39 @@ class ObsSeq(object): obstypes.append((kind, inverted_obs_kind_nrs[kind])) oso3 = self - oso3.df = combi_df - oso3.obstypes = obstypes + oso3.df = combi_df + oso3.obstypes = obstypes return oso3 + def remove_obs_of_type(self, kind_str=False, kind=False): + """Remove all observations of a certain type + + Args: + kind_str (str): observation type as string + kind (int): observation type as integer + + Returns: + self + """ + + if kind_str != False: + # dictionary string => DART internal indices + from dartwrf.obs.obskind import obs_kind_nrs + kind_remove = obs_kind_nrs[kind_str] + if kind != False: + kind_remove = kind + + # remove data from table + self.df = self.df[self.df.kind != kind_remove] + + # remove obstypes from obstypes-list + obstypes = self.obstypes + obstypes_new = [] + for kind, kindstr in obstypes: + if kind != kind_remove: + obstypes_new.append((kind, kindstr)) + self.obstypes = obstypes_new + return self def to_pandas(self): """Create pd.DataFrame with rows=observations @@ -701,11 +736,13 @@ class ObsSeq(object): for (nr, obstype) in self.obstypes: txt += "\n " + str(nr) + " " + obstype nobs = str(n_obs) - txt += "\n num_copies: 2 num_qc: 1" - txt += "\n num_obs: " + nobs - txt += " max_num_obs: " + nobs - txt += "\n observations \n truth \n Quality Control \n" - txt += " first: 1 last: " + nobs + txt += "\n".join(["\n num_copies: "+str(self.num_copies) + + " num_qc: "+str(self.num_qc), + " num_obs: " + nobs+" max_num_obs: " + nobs, + " observations"]) + if self.num_copies > 1: + txt += "\n truth " + txt += "\n Quality Control \n first: 1 last: " + nobs return txt def write_obs(i, obs, next_i_obs=None, prev_i_obs=None): @@ -724,37 +761,42 @@ class ObsSeq(object): """ if next_i_obs: - line_link = " -1 " + str(next_i_obs) + " -1" + line_link = " -1 " + \ + str(next_i_obs) + " -1" else: # last observation in file - line_link = " " + str(prev_i_obs) + " -1 -1" + line_link = " " + \ + str(prev_i_obs) + " -1 -1" lon_rad = str(obs["loc3d"][0]) lat_rad = str(obs["loc3d"][1]) - out = ( - " \n".join( + content = ["OBS " + str(i), + str(obs["observations"]),] + if "truth" in obs: + content.append(str(obs["truth"])) + + content.extend([ + str(obs["Quality Control"]), + line_link, "obdef", "loc3d", + " ".join( [ - "\nOBS " + str(i), - str(obs["observations"]), - str(obs["truth"]), - str(obs["Quality Control"]), - line_link, - "obdef", - "loc3d", " ".join( - [ - lon_rad, lat_rad, - str(obs["loc3d"][2]), - str(obs["loc3d"][3]), - ] - ), - "kind", " " + str(int(obs["kind"])), - "".join(obs["metadata"]), + lon_rad, lat_rad, + str(obs["loc3d"][2]), + str(obs["loc3d"][3]), ] - ) - + " \n " + obs["time"][0] + " " + obs["time"][1] - + " \n " + str(obs["variance"]) - ) - return out + ), + "kind", + " " + str(int(obs["kind"])), + ]) + if "metadata" in obs: + content.append("".join(obs["metadata"])) + + content.extend([ + obs["time"][0] + " " + obs["time"][1], + str(obs["variance"]), + ]) + # print(content) + return "\n" + " \n".join(content) n_obs = len(self.df) outstr = write_preamble(n_obs) @@ -775,7 +817,6 @@ class ObsSeq(object): write_file(outstr, output_path=f) - def plot(self, f_out="./map_obs_superobs.png"): print('plotting obs...') import matplotlib as mpl @@ -784,7 +825,7 @@ class ObsSeq(object): import matplotlib.pyplot as plt import xarray as xr - georef = xr.open_dataset(cluster.geo_em_for_WRF_ideal) + georef = xr.open_dataset(cluster.geo_em_nature) lon = georef.XLONG_M.values.squeeze() lat = georef.XLAT_M.values.squeeze() @@ -813,7 +854,7 @@ class ObsSeq(object): m.drawcoastlines(color="white") m.drawcountries(color="white") - _plot_box(m, lat, lon, label="domain", color="green", lw=1) #4) + _plot_box(m, lat, lon, label="domain", color="green", lw=1) # 4) # OBSERVATIONS original_df = self.df.attrs['df_pre_superob'] @@ -822,16 +863,16 @@ class ObsSeq(object): longs = coords.lon.values coords = zip(lats, longs) - label="observed pixel" + label = "observed pixel" for lati, long in coords: m.plot(long, lati, ".", - markersize=0.4, #4, - latlon=True, - color="grey", - label=label, - zorder=4, - ) - label='' + markersize=0.4, # 4, + latlon=True, + color="grey", + label=label, + zorder=4, + ) + label = '' # after superob coords = self.df.get_lon_lat() @@ -842,22 +883,22 @@ class ObsSeq(object): label = 'superobservation' for lati, long in coords: m.plot(long, lati, ".", - markersize=0.5, #5, - latlon=True, - color="red", - label=label, - zorder=4, - ) + markersize=0.5, # 5, + latlon=True, + color="red", + label=label, + zorder=4, + ) label = '' - - #from IPython import embed; embed() + # from IPython import embed; embed() if self.df.attrs['boxes']: label = 'superob' for lats, lons in self.df.attrs['boxes']: lats, lons = np.meshgrid(lats, lons) - _plot_box(m, lats, lons, label=label, color="white", lw=0.1) #1) + _plot_box(m, lats, lons, label=label, + color="white", lw=0.1) # 1) label = '' plt.legend() diff --git a/dartwrf/prep_IC_prior.py b/dartwrf/prep_IC_prior.py index 95efb7f7f62bf5f0e99773dd93114857aa368f34..b91c82388a7185cd0d0ce6ed88139b73d30238a9 100755 --- a/dartwrf/prep_IC_prior.py +++ b/dartwrf/prep_IC_prior.py @@ -19,10 +19,10 @@ Ad 2: copies wrfrst to run_WRF directory """ -def create_wrfrst_in_WRF_rundir(time, prior_init_time, prior_path_exp): - """copies wrfrst to run_WRF directory (for next WRF run) +def create_wrfrst_in_WRF_rundir(time: dt.datetime, prior_init_time: dt.datetime, prior_path_exp: str) -> None: + """Copy WRF restart files to run_WRF directory + These files will be used as initial conditions for the next WRF run """ - for iens in range(1, exp.n_ens+1): clean_wrfdir(cluster.wrf_rundir(iens)) @@ -32,16 +32,21 @@ def create_wrfrst_in_WRF_rundir(time, prior_init_time, prior_path_exp): print('copy prior (wrfrst)', prior_wrfrst, 'to', wrfrst) copy(prior_wrfrst, wrfrst) - # remove all wrfrst (but not the one used) - WHY? NO! - # files_rst = glob.glob(prior_path_exp + prior_init_time.strftime('/%Y-%m-%d_%H:%M/'+str(iens)+'/wrfrst_*')) - # files_rst.remove(prior_wrfrst) - # for f in files_rst: - # print('removing', f) - # try_remove(f) - -def create_updated_wrfinput_from_wrfout(time, prior_init_time, prior_path_exp, new_start_time): - """Same as create_wrfout_in_archivedir, but output is `wrfinput` in WRF run directory""" +def create_updated_wrfinput_from_wrfout(time: dt.datetime, prior_init_time: dt.datetime, prior_path_exp: str, new_start_time: dt.datetime) -> None: + """Create a new wrfinput file from wrfout file + Output is created inside the WRF run directory + + Args: + time: time of the wrfout file + prior_init_time: initial time of the prior run + prior_path_exp: path to the prior run + new_start_time: time of the new wrfinput file + If provided, overwrites the valid time of the initial conditions; + This hack allows you to use a prior of a different time than your forecast start time. + Usually, you don't want to do this. + + """ print('writing updated wrfout to WRF run directory as wrfinput') for iens in range(1, exp.n_ens+1): prior_wrfout = prior_path_exp + prior_init_time.strftime('/%Y-%m-%d_%H:%M/') \ @@ -61,12 +66,14 @@ if __name__ == '__main__': prior_init_time = dt.datetime.strptime(sys.argv[2], '%Y-%m-%d_%H:%M') prior_valid_time = dt.datetime.strptime(sys.argv[3], '%Y-%m-%d_%H:%M') - if len(sys.argv) == 5: + if len(sys.argv) == 4: + create_wrfrst_in_WRF_rundir(prior_valid_time, prior_init_time, prior_path_exp) + elif len(sys.argv) == 5: # to start new simulation at different time than prior_valid_time new_start_time = dt.datetime.strptime(sys.argv[4], '%Y-%m-%d_%H:%M') # use_wrfout_as_wrfinput + # Caution: Even without assimilation increments, this will change the model forecast create_updated_wrfinput_from_wrfout(prior_valid_time, prior_init_time, prior_path_exp, new_start_time) else: - # restart - create_wrfrst_in_WRF_rundir(prior_valid_time, prior_init_time, prior_path_exp) + raise ValueError('Usage: python prep_IC_prior.py prior_path_exp prior_init_time prior_valid_time [new_start_time]') \ No newline at end of file diff --git a/dartwrf/prepare_namelist.py b/dartwrf/prepare_namelist.py index d762f9646f56be4b72c3faa600ae9e22993335f7..cffbef86ef05678a628a4f457166c492f1277c02 100755 --- a/dartwrf/prepare_namelist.py +++ b/dartwrf/prepare_namelist.py @@ -57,23 +57,20 @@ def run(iens, begin, end, hist_interval_s=5*60, radt=5, archive=True, '<HH2>': '%H', '<MM2>': '%M', '<SS2>': '%S'}.items(): sed_inplace(rundir+'/namelist.input', k, end.strftime(v)) - # print(rundir+'/namelist.input created') - # print('WRF namelist begin:', begin, 'end:', end, 'output to', archdir) - ######################### + print('saved', rundir+'/namelist.input') + + if archive: - init_dir = cluster.archivedir+begin.strftime('/%Y-%m-%d_%H:%M/')+str(iens) os.makedirs(init_dir, exist_ok=True) - print('copy namelist to archive') copy(rundir+'/namelist.input', init_dir+'/namelist.input') - try: - if not restart: - print('copy wrfinput of this run to archive') - wrfin_old = rundir+'/wrfinput_d01' - wrfin_arch = init_dir+'/wrfinput_d01' - copy(wrfin_old, wrfin_arch) - except Exception as e: - warnings.warn(str(e)) + print('archived', init_dir+'/namelist.input') + + if not restart: + wrfin_old = rundir+'/wrfinput_d01' + wrfin_arch = init_dir+'/wrfinput_d01' + copy(wrfin_old, wrfin_arch) + print('archived', wrfin_arch) if __name__ == '__main__': diff --git a/dartwrf/prepare_wrfrundir.py b/dartwrf/prepare_wrfrundir.py index 434412c0f73b0686d06b1873dc50cfb386830e80..cd85e64f8281384936e2c2f169dc92b18456edb3 100755 --- a/dartwrf/prepare_wrfrundir.py +++ b/dartwrf/prepare_wrfrundir.py @@ -16,29 +16,30 @@ import datetime as dt from dartwrf.exp_config import exp from dartwrf.server_config import cluster -from dartwrf.utils import symlink, copy, link_contents +from dartwrf.utils import symlink, link_contents, try_remove from dartwrf import prepare_namelist if __name__ == '__main__': - init_time = dt.datetime.strptime(sys.argv[1], '%Y-%m-%d_%H:%M') - for iens in range(1, exp.n_ens+1): - # print('preparing ens', iens) - rundir = cluster.wrf_rundir(iens) os.makedirs(rundir, exist_ok=True) link_contents(cluster.srcdir, rundir) - # print('linking ideal and wrf.exe:') - symlink(cluster.ideal, rundir+'/ideal.exe') symlink(cluster.wrfexe, rundir+'/wrf.exe') + + if hasattr(cluster, 'ideal'): + symlink(cluster.ideal, rundir+'/ideal.exe') # prepare input profiles + try_remove(rundir+'/input_sounding') # remove existing file if hasattr(exp, 'input_profile'): + init_time = dt.datetime.strptime(sys.argv[1], '%Y-%m-%d_%H:%M') # prep namelist for ./ideal.exe prepare_namelist.run(iens, begin=init_time, end=init_time, archive=False) # time not important input_prof = (exp.input_profile).replace('<iens>', str(iens).zfill(3)) + if not os.path.isfile(input_prof): + raise FileNotFoundError(f'Input profile {input_prof} does not exist.') symlink(input_prof, rundir+'/input_sounding') print('All run_WRF directories have been set up.') diff --git a/dartwrf/utils.py b/dartwrf/utils.py index cc19b5d4dcad75b9c1758c8c0bfe40b51015a872..12054419e31a7ad52a9d0a082bc1fa3c73ae1522 100755 --- a/dartwrf/utils.py +++ b/dartwrf/utils.py @@ -24,7 +24,6 @@ class Experiment(object): expname (str): Name of the experiment model_dx (int): WRF grid spacing in meters n_ens (int): Ensemble size - do_quality_control (bool): If True, activate "quality control" function in assim_synth_obs.py nature_wrfout_pattern (str): Path to the nature run, where we take observations from; the path can contain wildcards (*,?), e.g. '/jetfs/exp1/*/1/wrfout_d01_%Y-%m-%d_%H:%M:%S' diff --git a/dartwrf/workflows.py b/dartwrf/workflows.py index 3371d2e657feaa24ddba62b49f19e86edca88ede..a90dc61aa62dc61f66bccf3d8423cfdff5d189fe 100644 --- a/dartwrf/workflows.py +++ b/dartwrf/workflows.py @@ -5,10 +5,14 @@ e.g. assimilate() calls dartwrf/assimilate.py through the shell. This would not be necessary, but some users might want to use queueing systems (e.g. SLURM) which must call scripts. """ -import os, sys, shutil, warnings +import os +import sys +import shutil +import warnings import datetime as dt -from dartwrf.utils import script_to_str +from dartwrf.utils import script_to_str, shell + class WorkFlows(object): def __init__(self, exp_config='cfg.py', server_config='server.py'): @@ -30,85 +34,43 @@ class WorkFlows(object): exp (obj): experiment configuration as defined in exp_config file """ - def _copy_dartwrf_to_archive(): - # Copy scripts to self.cluster.archivedir folder + def _copy_dartwrf_to_archive(dirs_exist_ok=False): + # Copy DART-WRF/dartwrf/ to self.cluster.archivedir folder + # copy the dartwrf python package + shutil.copytree(self.cluster.dartwrf_dir+'/dartwrf/', + self.cluster.archivedir+'/DART-WRF/dartwrf/', + ignore=shutil.ignore_patterns('*.git',), + dirs_exist_ok=dirs_exist_ok) + print('>>> DART-WRF scripts: "' + + self.cluster.archivedir+'/DART-WRF/"') + + # copy this script for reproducibility + script_executing_this_process = sys.argv[0] + shutil.copy(script_executing_this_process, + self.cluster.archivedir+'/DART-WRF/') + + def _save_config_to(config_fname, destination): try: - shutil.copytree(self.cluster.dartwrf_dir, self.cluster.archivedir+'/DART-WRF/', - ignore=shutil.ignore_patterns('*.git',)) # don't copy the git repo - print('>>> DART-WRF scripts: "'+self.cluster.archivedir+'/DART-WRF/"') - except FileExistsError as e: - warnings.warn(str(e)) - if input('The experiment name already exists! Scripts will not be overwritten. Continue? (Y/n) ') in ['Y', 'y']: - pass - else: - raise e - except: - raise - - def _obskind_read(): - """Read dictionary of observation types + ID numbers ("kind") - from DART f90 script and return it as python dictionary - """ - definitionfile = self.cluster.dart_srcdir+'/../../../assimilation_code/modules/observations/obs_kind_mod.f90' - with open(definitionfile, 'r') as f: - kind_def_f = f.readlines() - - obskind_nrs = {} - for i, line in enumerate(kind_def_f): - if 'Integer definitions for DART OBS TYPES' in line: - # data starts below this line - i_start = i - break - for line in kind_def_f[i_start+1:]: - if 'MAX_DEFINED_TYPES_OF_OBS' in line: - # end of data - break - if '::' in line: - # a line looks like this - # integer, parameter, public :: MSG_4_SEVIRI_TB = 261 - data = line.split('::')[-1].split('=') - kind_str = data[0].strip() - kind_nr = int(data[1].strip()) - obskind_nrs[kind_str] = kind_nr - return obskind_nrs - - def _dict_to_py(d, outfile): - """Write a python dictionary to a .py file - - Args: - d (dict): dictionary to write - outfile (str): path to output file - - Returns: - None - """ - with open(outfile, 'w') as f: - txt = '# this file is autogenerated \nobs_kind_nrs = {' - for k,v in d.items(): - txt += '"'+k+'": '+str(v)+', \n' - txt += '}' - f.write(txt) + shutil.copyfile('config/'+config_fname, destination) + except shutil.SameFileError: + pass print('------------------------------------------------------') print('>>> Starting experiment ... ') print('>>> Experiment configuration: "./config/'+exp_config+'" ') print('>>> Server configuration: "./config/'+server_config+'"') - #### 1 + # 1 # copy the selected config files (arguments to Workflows(...)) to the scripts directory # ./DART-WRF/dartwrf/server_config.py and ./DART-WRF/dartwrf/exp_config.py # these config files will be used later, and no others! - original_scripts_dir = '/'.join(__file__.split('/')[:-1]) # usually /home/DART-WRF/dartwrf/ - try: - shutil.copyfile('config/'+server_config, original_scripts_dir+'/server_config.py') - except shutil.SameFileError: - pass - try: - shutil.copyfile('config/'+exp_config, original_scripts_dir+'/exp_config.py') - except shutil.SameFileError: - pass + # usually /home/DART-WRF/dartwrf/ + original_scripts_dir = '/'.join(__file__.split('/')[:-1]) + _save_config_to(server_config, original_scripts_dir + + '/server_config.py') + _save_config_to(exp_config, original_scripts_dir+'/exp_config.py') - #### 2 + # 2 # import the configuration files from where we copied them just before sys.path.append(original_scripts_dir) from server_config import cluster @@ -116,40 +78,50 @@ class WorkFlows(object): from exp_config import exp self.exp = exp + print(" ") print('>>> Main data folder: "'+self.cluster.archivedir+'"') - print('>>> Temporary DART run folder: "'+self.cluster.dart_rundir+'"') - print('>>> Temporary WRF run folder: "'+self.cluster.wrf_rundir_base+'"') + print('>>> Temporary DART folder: "'+self.cluster.dart_rundir+'"') + print('>>> Temporary WRF folder: "' + + self.cluster.wrf_rundir_base+'"') - #### 3 + # 3 # Set paths and backup scripts self.cluster.log_dir = self.cluster.archivedir+'/logs/' - print('>>> In case of error, check logs at:"'+self.cluster.log_dir+'"') + print(" ") + print('>>> Log-files: "'+self.cluster.log_dir+'"') if self.cluster.use_slurm: self.cluster.slurm_scripts_dir = self.cluster.archivedir+'/slurm-scripts/' - print('>>> SLURM scripts stored in: "', self.cluster.slurm_scripts_dir+'"') + print('>>> SLURM scripts: "'+self.cluster.slurm_scripts_dir+'"') + print(" ") - #### 4 + # 4 # to be able to generate obs_seq.in files, we need a dictionary to convert obs kinds to numbers - # a) we read the obs kind definitions (obs_kind_mod.f90 from DART code) + # a) we read the obs kind definitions (obs_kind_mod.f90 from DART code) # b) we generate a python file with this dictionary - # Note: to include it in the documentary, the file needs to exist also in the repository - # (so the documentation generator SPHINX can read it) - _dict_to_py(_obskind_read(), original_scripts_dir+'/obs/obskind.py') + import create_obskind_table + create_obskind_table.run(server_config) - #### 5 + # 5 # Copy scripts and config files to `self.cluster.archivedir` folder - _copy_dartwrf_to_archive() - - #### 6 + try: + _copy_dartwrf_to_archive() + except FileExistsError as e: + if input('The experiment name already exists! Overwrite existing experiment? (Y/n) ') in ['Y', 'y']: + _copy_dartwrf_to_archive(dirs_exist_ok=True) + else: + raise e + + # 6 # we set the path from where python should import dartwrf modules # every python command then imports DART-WRF from self.cluster.archivedir+'/DART-WRF/dartwrf/' - self.cluster.python = 'export PYTHONPATH='+self.cluster.scripts_rundir+'/../; '+self.cluster.python + self.cluster.python = 'export PYTHONPATH=' + \ + self.cluster.scripts_rundir+'/../; '+self.cluster.python print('>>> DART-WRF experiment initialized. ') print('------------------------------------------------------') - + def prepare_WRFrundir(self, init_time): """Prepare WRF run directories for all ensemble members - + Note: Optionally copy input sounding profiles to WRF run directories if defined in cfg.py @@ -160,9 +132,9 @@ class WorkFlows(object): Returns: None """ - cmd = self.cluster.python+' '+self.cluster.scripts_rundir+'/prepare_wrfrundir.py '+init_time.strftime('%Y-%m-%d_%H:%M') - print(cmd) - os.system(cmd) + cmd = 'python '+self.cluster.scripts_rundir + \ + '/prepare_wrfrundir.py '+init_time.strftime('%Y-%m-%d_%H:%M') + shell(cmd) def generate_obsseq_out(self, times, depends_on=None): """Creates observations from a nature run for a list of times @@ -175,23 +147,25 @@ class WorkFlows(object): """ times_str = ','.join([t.strftime('%Y-%m-%d_%H:%M') for t in times]) - cmd = self.cluster.python+' '+self.cluster.scripts_rundir+'/obs/create_obsseq_out.py '+times_str + cmd = self.cluster.python+' '+self.cluster.scripts_rundir + \ + '/obs/create_obsseq_out.py '+times_str - id = self.cluster.run_job(cmd, "obsgen-"+self.exp.expname, cfg_update={"ntasks": "12", "time": "30", - "mem": "50G", "ntasks-per-node": "12", "ntasks-per-core": "2"}, depends_on=[depends_on]) + id = self.cluster.run_job(cmd, "obsgen-"+self.exp.expname, + cfg_update={"ntasks": "20", "time": "30", "mem": "200G", "ntasks-per-node": "20"}, depends_on=[depends_on]) return id def run_ideal(self, depends_on=None): """Run WRF's ideal.exe for every ensemble member - + Args: depends_on (str, optional): job ID of a previous job after which to run this job - + Returns: str: job ID of the submitted job """ - cmd = """# run ideal.exe in parallel + cmd = self.cluster.wrf_modules+""" export SLURM_STEP_GRES=none + # run ideal.exe in parallel for ((n=1; n<="""+str(self.exp.n_ens)+"""; n++)) do rundir="""+self.cluster.wrf_rundir_base+'/'+self.exp.expname+"""/$n @@ -209,8 +183,9 @@ class WorkFlows(object): mv $rundir/rsl.out.0000 $rundir/rsl.out.input done """ - id = self.cluster.run_job(cmd, "ideal-"+self.exp.expname, cfg_update={"ntasks": str(self.exp.n_ens), - "time": "10", "mem": "100G"}, depends_on=[depends_on]) + id = self.cluster.run_job(cmd, "ideal-"+self.exp.expname, + cfg_update={"ntasks": "40", "ntasks-per-node": "40", + "time": "30", "mem": "200G"}, depends_on=[depends_on]) return id def wrfinput_insert_wbubble(self, perturb=True, depends_on=None): @@ -229,81 +204,90 @@ class WorkFlows(object): pstr = ' ' if perturb: pstr = ' perturb' - cmd = self.cluster.python+' '+self.cluster.scripts_rundir+'/create_wbubble_wrfinput.py'+pstr + cmd = self.cluster.python+' '+self.cluster.scripts_rundir + \ + '/create_wbubble_wrfinput.py'+pstr - id = self.cluster.run_job(cmd, "ins_wbub-"+self.exp.expname, cfg_update={"time": "5"}, depends_on=[depends_on]) + id = self.cluster.run_job( + cmd, "ins_wbub-"+self.exp.expname, cfg_update={"time": "5"}, depends_on=[depends_on]) return id - def run_ENS(self, begin, end, depends_on=None, first_minutes=False, - input_is_restart=True, output_restart_interval=720, hist_interval=5, radt=5): + def run_ENS(self, begin, end, first_second=False, + input_is_restart=True, output_restart_interval=360, hist_interval_s=300, + depends_on=None): """Run the forecast ensemble Args: begin (datetime): start time of the forecast end (datetime): end time of the forecast depends_on (str, optional): job ID of a previous job after which to run this job - first_minutes (bool, optional): if True, get wrfout of first 2 minutes + first_second (bool, optional): if True, get wrfout of first second input_is_restart (bool, optional): if True, start WRF from WRFrst file (restart mode) output_restart_interval (int, optional): interval in minutes between output of WRFrst files - hist_interval (float, optional): interval in minutes between output of WRF history files - radt (int, optional): time step of radiation scheme + hist_interval_s (float, optional): interval in seconds between output of WRF history files Returns: str: job ID of the submitted job """ - def prepare_WRF_inputfiles(begin, end, hist_interval_s=300, radt=5, output_restart_interval=False, depends_on=None): - + def prepare_WRF_inputfiles(begin, end, hist_interval_s=300, radt=1, + output_restart_interval=False, depends_on=None): + args = [self.cluster.python, self.cluster.scripts_rundir+'/prepare_namelist.py', - begin.strftime('%Y-%m-%d_%H:%M:%S'), end.strftime('%Y-%m-%d_%H:%M:%S'), + begin.strftime( + '%Y-%m-%d_%H:%M:%S'), end.strftime('%Y-%m-%d_%H:%M:%S'), str(hist_interval_s), '--radt='+str(radt), '--restart='+restart_flag,] - if output_restart_interval: - args.append('--restart_interval='+str(int(float(output_restart_interval)))) + if output_restart_interval != False: + args.append('--restart_interval=' + + str(int(float(output_restart_interval)))) + + return self.cluster.run_job(' '.join(args), "preWRF", + cfg_update=dict(time="2"), depends_on=[depends_on]) - return self.cluster.run_job(' '.join(args), "preWRF", - cfg_update=dict(time="2"), depends_on=[depends_on]) - id = depends_on restart_flag = '.false.' if not input_is_restart else '.true.' wrf_cmd = script_to_str(self.cluster.run_WRF - ).replace('<exp.expname>', self.exp.expname - ).replace('<cluster.wrf_rundir_base>', self.cluster.wrf_rundir_base - ).replace('<cluster.wrf_modules>', self.cluster.wrf_modules, - ).replace('<exp.np_WRF>', str(self.cluster.np_WRF)) - - # every minute output within first 2 minutes (needed for validating a radiance assimilation) - if first_minutes: - id = prepare_WRF_inputfiles(begin, begin+dt.timedelta(minutes=2), - hist_interval_s=30, # to get an output every 30 seconds - radt = 1, # to get a cloud fraction CFRAC after 1 minute - output_restart_interval=output_restart_interval, - depends_on=id) - - id = self.cluster.run_job(wrf_cmd, "WRF-"+self.exp.expname, - cfg_update={"array": "1-"+str(self.cluster.size_WRF_jobarray), "ntasks": "10", "nodes": "1", - "time": "10", "mem": "40G"}, + ).replace('<exp.expname>', self.exp.expname + ).replace('<cluster.wrf_rundir_base>', self.cluster.wrf_rundir_base + ).replace('<cluster.wrf_modules>', self.cluster.wrf_modules, + ).replace('<exp.np_WRF>', str(self.cluster.np_WRF)) + + if first_second: + id = prepare_WRF_inputfiles(begin, begin+dt.timedelta(seconds=1), + hist_interval_s=1, # to get an output every 1 s + radt=0, # to get a cloud fraction CFRAC after 1 s + output_restart_interval=output_restart_interval, + depends_on=id) + + id = self.cluster.run_job(wrf_cmd, "WRF-"+self.exp.expname, + cfg_update={"array": "1-"+str(self.cluster.size_WRF_jobarray), + "nodes": "1", "ntasks": str(self.cluster.np_WRF), "ntasks-per-core": "1", + "time": "5", "mem": "100G"}, depends_on=[id]) - # forecast for the whole forecast duration - id = prepare_WRF_inputfiles(begin, end, - hist_interval_s=hist_interval*60, - radt=radt, + # forecast for the whole forecast duration + id = prepare_WRF_inputfiles(begin, end, + hist_interval_s=hist_interval_s, output_restart_interval=output_restart_interval, depends_on=id) time_in_simulation_hours = (end-begin).total_seconds()/3600 - runtime_wallclock_mins_expected = int(8+time_in_simulation_hours*9) # usually below 9 min/hour - cfg_update = {"array": "1-"+str(self.cluster.size_WRF_jobarray), "ntasks": "10", "nodes": "1", - "time": str(runtime_wallclock_mins_expected), "mem": "40G", } - # if runtime_wallclock_mins_expected > 10: - # cfg_update.update({"nodelist": "jet05"}) - id = self.cluster.run_job(wrf_cmd, "WRF-"+self.exp.expname, cfg_update=cfg_update, depends_on=[id]) + runtime_wallclock_mins_expected = int( + time_in_simulation_hours*30 + 10) # usually <15 min/hour + cfg_update = {"array": "1-"+str(self.cluster.size_WRF_jobarray), + "nodes": "1", "ntasks": str(self.cluster.np_WRF), "ntasks-per-core": "1", + "time": str(runtime_wallclock_mins_expected), "mem": "90G", } + + if runtime_wallclock_mins_expected > 25: + cfg_update.update({"partition": "amd"}) + # #cfg_update.update({"exclude": "jet03"}) + + id = self.cluster.run_job( + wrf_cmd, "WRF-"+self.exp.expname, cfg_update=cfg_update, depends_on=[id]) return id - - def assimilate(self, assim_time, prior_init_time, prior_valid_time, prior_path_exp, - depends_on=None): + def assimilate(self, assim_time, prior_init_time, prior_valid_time, prior_path_exp, + depends_on=None): """Creates observations from a nature run and assimilates them. Args: @@ -318,16 +302,16 @@ class WorkFlows(object): raise IOError('prior_path_exp does not exist: '+prior_path_exp) cmd = (self.cluster.python+' '+self.cluster.scripts_rundir+'/assimilate.py ' - +assim_time.strftime('%Y-%m-%d_%H:%M ') - +prior_init_time.strftime('%Y-%m-%d_%H:%M ') - +prior_valid_time.strftime('%Y-%m-%d_%H:%M ') - +prior_path_exp) - - id = self.cluster.run_job(cmd, "Assim-"+self.exp.expname, cfg_update={"ntasks": "12", "time": "60", - "mem": "60G", "ntasks-per-node": "12", "ntasks-per-core": "2"}, depends_on=[depends_on]) + + assim_time.strftime('%Y-%m-%d_%H:%M ') + + prior_init_time.strftime('%Y-%m-%d_%H:%M ') + + prior_valid_time.strftime('%Y-%m-%d_%H:%M ') + + prior_path_exp) + + id = self.cluster.run_job(cmd, "Assim-"+self.exp.expname, + cfg_update={"ntasks": "20", "time": "30", "mem": "110G", + "ntasks-per-node": "20", "ntasks-per-core": "1"}, depends_on=[depends_on]) return id - def prepare_IC_from_prior(self, prior_path_exp, prior_init_time, prior_valid_time, new_start_time=None, depends_on=None): """Create initial conditions from prior wrfrst files @@ -339,7 +323,7 @@ class WorkFlows(object): This hack allows you to use a prior of a different time than your forecast start time. Usually, you don't want to do this. depends_on (str, optional): job ID of a previous job after which to run this job - + Returns: str: job ID of the submitted job """ @@ -349,14 +333,14 @@ class WorkFlows(object): tnew = '' cmd = (self.cluster.python+' '+self.cluster.scripts_rundir+'/prep_IC_prior.py ' - +prior_path_exp - +prior_init_time.strftime(' %Y-%m-%d_%H:%M') - +prior_valid_time.strftime(' %Y-%m-%d_%H:%M') - +tnew) - id = self.cluster.run_job(cmd, "IC-prior-"+self.exp.expname, cfg_update=dict(time="18"), depends_on=[depends_on]) + + prior_path_exp + + prior_init_time.strftime(' %Y-%m-%d_%H:%M') + + prior_valid_time.strftime(' %Y-%m-%d_%H:%M') + + tnew) + id = self.cluster.run_job( + cmd, "IC-prior-"+self.exp.expname, cfg_update=dict(time="18"), depends_on=[depends_on]) return id - def update_IC_from_DA(self, assim_time, depends_on=None): """Update existing initial conditions with the output from the assimilation @@ -367,53 +351,58 @@ class WorkFlows(object): Returns: str: job ID of the submitted job """ - cmd = self.cluster.python+' '+self.cluster.scripts_rundir+'/update_IC.py '+assim_time.strftime('%Y-%m-%d_%H:%M') - id = self.cluster.run_job(cmd, "IC-update-"+self.exp.expname, cfg_update=dict(time="18"), depends_on=[depends_on]) + cmd = self.cluster.python+' '+self.cluster.scripts_rundir + \ + '/update_IC.py '+assim_time.strftime('%Y-%m-%d_%H:%M') + id = self.cluster.run_job(cmd, "IC-update-"+self.exp.expname, + cfg_update=dict(time="18"), depends_on=[depends_on]) return id - def create_satimages(self, init_time, depends_on=None): """Run a job array, one job per ensemble member, to create satellite images""" - cmd = 'module purge; module load rttov; ' \ + cmd = 'module purge; module load rttov/v13.2-gcc-8.5.0; ' \ + 'python ~/RTTOV-WRF/run_init.py '+self.cluster.archivedir+init_time.strftime('/%Y-%m-%d_%H:%M/ ') \ + '$SLURM_ARRAY_TASK_ID' - id = self.cluster.run_job(cmd, "RTTOV-"+self.exp.expname, + id = self.cluster.run_job(cmd, "RTTOV-"+self.exp.expname, cfg_update={"ntasks": "1", "time": "60", "mem": "10G", "array": "1-"+str(self.exp.n_ens)}, depends_on=[depends_on]) return id - def gen_obsseq(self, depends_on=None): """(not included in DART-WRF)""" cmd = self.cluster.python+' '+self.cluster.scripts_rundir+'/obsseq_to_netcdf.py' - id = self.cluster.run_job("obsseq_netcdf", cfg_update={"time": "10", "mail-type": "FAIL,END"}, - depends_on=[depends_on]) + id = self.cluster.run_job("obsseq_netcdf", cfg_update={"time": "10", "mail-type": "FAIL,END"}, + depends_on=[depends_on]) return id - + def evaluate_obs_posterior_after_analysis(self, init, valid, depends_on=None): - - cmd = self.cluster.python+' '+self.cluster.scripts_rundir+'/evaluate_obs_space.py '+init.strftime('%Y-%m-%d_%H:%M,') + valid.strftime('%Y-%m-%d_%H:%M:%S') - id = self.cluster.run_job(cmd, 'eval+1'+self.exp.expname, cfg_update={"ntasks": "12", "mem": "50G", "ntasks-per-node": "12", "ntasks-per-core": "2", - "time": "15", "mail-type": "FAIL"}, - depends_on=[depends_on]) - - cmd = self.cluster.python+' '+self.cluster.scripts_rundir+'/calc_linear_posterior.py '+init.strftime('%Y-%m-%d_%H:%M') - id = self.cluster.run_job(cmd, 'linpost'+self.exp.expname, cfg_update={"ntasks": "12", "mem": "50G", "ntasks-per-node": "12", "ntasks-per-core": "2", - "time": "15", "mail-type": "FAIL"}, - depends_on=[id]) + + cmd = self.cluster.python+' '+self.cluster.scripts_rundir+'/evaluate_obs_space.py ' + \ + init.strftime('%Y-%m-%d_%H:%M,') + \ + valid.strftime('%Y-%m-%d_%H:%M:%S') + id = self.cluster.run_job(cmd, 'eval+1'+self.exp.expname, cfg_update={"ntasks": "16", "mem": "80G", "ntasks-per-node": "16", "ntasks-per-core": "2", + "time": "9", "mail-type": "FAIL"}, + depends_on=[depends_on]) + + # cmd = self.cluster.python+' '+self.cluster.scripts_rundir + \ + # '/calc_linear_posterior.py '+init.strftime('%Y-%m-%d_%H:%M') + # id = self.cluster.run_job(cmd, 'linpost'+self.exp.expname, cfg_update={"ntasks": "16", "mem": "80G", "ntasks-per-node": "16", "ntasks-per-core": "2", + # "time": "15", "mail-type": "FAIL"}, + # depends_on=[id]) return id def verify_sat(self, depends_on=None): """(not included in DART-WRF)""" - cmd = '/jetfs/home/lkugler/miniforge3/envs/verif/bin/python /jetfs/home/lkugler/osse_analysis/plot_from_raw/analyze_fc.py '+self.exp.expname+' has_node sat verif1d FSS BS' + cmd = self.cluster.python+' /jetfs/home/lkugler/osse_analysis/plot_from_raw/analyze_fc.py ' + \ + self.exp.expname+' '+self.exp.nature_exp + ' sat has_node np=2 mem=110G' - self.cluster.run_job(cmd, "verif-SAT-"+self.exp.expname, - cfg_update={"time": "60", "mail-type": "FAIL,END", "ntasks": "15", - "ntasks-per-node": "15", "ntasks-per-core": "1", "mem": "100G",}, depends_on=[depends_on]) + self.cluster.run_job(cmd, "verif-SAT-"+self.exp.expname, + cfg_update={"time": "60", "mail-type": "FAIL,END", "ntasks": "2", + "ntasks-per-node": "1", "ntasks-per-core": "2", "mem": "110G", }, depends_on=[depends_on]) def verify_wrf(self, depends_on=None): """(not included in DART-WRF)""" - cmd = '/jetfs/home/lkugler/miniforge3/envs/verif/bin/python /jetfs/home/lkugler/osse_analysis/plot_from_raw/analyze_fc.py '+self.exp.expname+' has_node wrf verif1d FSS BS' - - self.cluster.run_job(cmd, "verif-WRF-"+self.exp.expname, - cfg_update={"time": "180", "mail-type": "FAIL,END", "ntasks": "21", - "ntasks-per-node": "21", "ntasks-per-core": "1", "mem": "230G"}, depends_on=[depends_on]) + cmd = self.cluster.python+' /jetfs/home/lkugler/osse_analysis/plot_from_raw/analyze_fc.py ' + \ + self.exp.expname+' '+self.exp.nature_exp + ' wrf has_node np=10 mem=250G' + + self.cluster.run_job(cmd, "verif-WRF-"+self.exp.expname, + cfg_update={"time": "210", "mail-type": "FAIL,END", "ntasks": "10", + "ntasks-per-node": "10", "ntasks-per-core": "1", "mem": "250G"}, depends_on=[depends_on]) diff --git a/dartwrf/wrfout_add_geo.py b/dartwrf/wrfout_add_geo.py index 86c7ae8417ee41edea4d5cd9a6d63b524f433934..15c1020a600ffaf8b5f79befa8fe6f437ce9d2cf 100755 --- a/dartwrf/wrfout_add_geo.py +++ b/dartwrf/wrfout_add_geo.py @@ -1,11 +1,16 @@ -import os, sys +import os +import sys import netCDF4 as nc from dartwrf.server_config import cluster -fields_old = ["XLAT_M", "XLONG_M", "CLAT", - "XLONG_U", "XLONG_V", "XLAT_U", "XLAT_V"] -fields_new = ["XLAT", "XLONG", "CLAT", - "XLONG_U", "XLONG_V", "XLAT_U", "XLAT_V"] +fields_old = ["XLAT_M", "XLONG_M",] + # "XLONG_U", "XLONG_V", + # "XLAT_U", "XLAT_V"] +# DART expects XLAT, XLAT_U, XLAT_V, XLONG_U, XLONG_V +fields_new = ["XLAT", "XLONG",] + # "XLONG_U", "XLONG_V", + # "XLAT_U", "XLAT_V"] + def run(geo_data_file, wrfout_file): """Add geogrid data to a wrfout file @@ -13,11 +18,11 @@ def run(geo_data_file, wrfout_file): Takes LAT,LON, mapfac from geogrid, so that they are consistent. Does not change E, F, HGT_M as they would alter the dynamics and have no impact on assimilation - + Args: geo_data_file (str): Path to WRF's geo_em file wrfout_file (str): Path to WRF history (wrfout) file - + Returns: None """ @@ -27,12 +32,27 @@ def run(geo_data_file, wrfout_file): print('updating geodata in', wrfout_file, 'from', geo_data_file) geo_ds = nc.Dataset(geo_data_file, 'r') wrfinp_ds = nc.Dataset(wrfout_file, 'r+') + print('wrfinput.variables', list(wrfinp_ds.variables)) + print('geo_em.variables', list(geo_ds.variables)) for old, new in zip(fields_old, fields_new): - if debug: - print('moving old field', old, 'into new field', new) - print(geo_ds.variables[old][:].shape, wrfinp_ds.variables[new][:].shape) - wrfinp_ds.variables[new][:] = geo_ds.variables[old][:] + + # check + if old not in list(geo_ds.variables): + if old.endswith('_M'): + old = old[:-2] # without _M + + if old not in list(geo_ds.variables): + raise KeyError(old, 'not in', geo_data_file, 'variables') + + geo_em_coord = geo_ds.variables[old][:] + + # check + if new not in list(wrfinp_ds.variables): + raise KeyError(new, 'not in', wrfout_file, 'variables', + 'however, DART expects this variable to localize impact etc!') + + wrfinp_ds.variables[new][:] = geo_em_coord wrfinp_ds.close() geo_ds.close() diff --git a/free_forecast.py b/free_forecast.py index 4bad85707b46cd32339bc415593ddd73ef690683..17286a898f7317ac1f909ba7dcf8404fe1c439c7 100755 --- a/free_forecast.py +++ b/free_forecast.py @@ -2,179 +2,201 @@ """ running the forecast model without assimilation """ -import os, sys, shutil +import os +import sys import datetime as dt import pandas as pd from dartwrf.workflows import WorkFlows -w = WorkFlows(exp_config='exp_template.py', server_config='jet.py') +w = WorkFlows(exp_config='exp_noda.py', server_config='jet.py') id = None -if False: # generate_nature +if False: # generate_nature begin = dt.datetime(2008, 7, 30, 7) - id = w.prepare_WRFrundir(begin) # create initial conditions + w.prepare_WRFrundir(begin) # create initial conditions id = w.run_ideal(depends_on=id) - #id = wrfinput_insert_wbubble(perturb=False, depends_on=id) + # id = wrfinput_insert_wbubble(perturb=False, depends_on=id) end = dt.datetime(2008, 7, 30, 12) id = w.run_ENS(begin=begin, end=end, - input_is_restart=False, - output_restart_interval=(end-begin).total_seconds()/60, - depends_on=id) + input_is_restart=False, + output_restart_interval=(end-begin).total_seconds()/60, + depends_on=id) # id = w.create_satimages(begin, depends_on=id) if False: # to continue a nature - start = dt.datetime(2008, 7, 30, 12) - id = w.prepare_WRFrundir(start) # create initial conditions + start = dt.datetime(2008, 7, 30, 13, 45) + w.prepare_WRFrundir(start) # create initial conditions # id = w.run_ideal(depends_on=id) - prior_path_exp = '/jetfs/scratch/lkugler/data/sim_archive//exp_v1.18_P1_nature+1/' # w.cluster.archivedir + # w.cluster.archivedir + prior_path_exp = '/jetfs/scratch/lkugler/data/sim_archive//exp_v1.18_P1_nature+1b/' time = start - prior_init_time = dt.datetime(2008, 7, 30, 6) - end = start + dt.timedelta(minutes=30) + prior_init_time = dt.datetime(2008, 7, 30, 13, 30) + end = start + dt.timedelta(minutes=15) + + id = w.prepare_IC_from_prior(prior_path_exp, prior_init_time, start, + depends_on=id) + id = w.run_ENS(begin=start, end=end, + input_is_restart=True, + first_second=True, # to get a +1 minute forecast after each restart + # output_restart_interval=(end-start).total_seconds()/60, + output_restart_interval=9999, + depends_on=id) - restarts = pd.date_range(start=dt.datetime(2008, 7, 30, 12,30), - end=dt.datetime(2008, 7, 30, 14), - freq=dt.timedelta(minutes=30)) + + restarts = pd.date_range(start=dt.datetime(2008, 7, 30, 12, 30), + end=dt.datetime(2008, 7, 30, 14), + freq=dt.timedelta(minutes=30)) for i, next_restart in enumerate(restarts): prior_valid_time = time - id = w.prepare_IC_from_prior(prior_path_exp, prior_init_time, prior_valid_time, - depends_on=id) + id = w.prepare_IC_from_prior(prior_path_exp, prior_init_time, prior_valid_time, + depends_on=id) # integration time start = time end = next_restart id = w.run_ENS(begin=start, end=end, - input_is_restart=True, - first_minute=True, # to get a +1 minute forecast after each restart - output_restart_interval=(end-start).total_seconds()/60, - #output_restart_interval=9999, - depends_on=id) - + input_is_restart=True, + first_second=True, # to get a +1 minute forecast after each restart + output_restart_interval=(end-start).total_seconds()/60, + # output_restart_interval=9999, + depends_on=id) + w.create_satimages(start, depends_on=id) prior_init_time = time # this iteration's start time = end # this iteration's end = next iteration's start - + # after last restart prior_valid_time = time - id = w.prepare_IC_from_prior(prior_path_exp, prior_init_time, prior_valid_time, - depends_on=id) - + id = w.prepare_IC_from_prior(prior_path_exp, prior_init_time, prior_valid_time, + depends_on=id) + end = time + dt.timedelta(minutes=5) id = w.run_ENS(begin=time, end=end, - input_is_restart=True, - first_minute=True, # to get a +1 minute forecast after each restart - output_restart_interval=9999, - depends_on=id) - - w.create_satimages(start, depends_on=id) + input_is_restart=True, + first_second=True, # to get a +1 minute forecast after each restart + output_restart_interval=9999, + depends_on=id) + w.create_satimages(start, depends_on=id) if True: # do a free run (all inits) - begin = dt.datetime(2008, 7, 30, 12,30) - #id = w.prepare_WRFrundir(begin) # create initial conditions - # id = w.run_ideal(depends_on=id) - - #id = wrfinput_insert_wbubble(perturb=True, depends_on=id) - restarts = pd.date_range(start=dt.datetime(2008, 7, 30, 13), - end=dt.datetime(2008, 7, 30, 14,30), - freq=dt.timedelta(minutes=30)) - # restarts = [dt.datetime(2008, 7, 30, 12, 30)] + inits = [dt.datetime(2008, 7, 30, 8)] + inits += list(pd.date_range(start=dt.datetime(2008, 7, 30, 11), + end=dt.datetime(2008, 7, 30, 14), + freq=dt.timedelta(minutes=15))) + + input_is_restart = False - input_is_restart = True - time = begin - last_init = dt.datetime(2008, 7, 30, 12) + # w.prepare_WRFrundir(inits[0]) # create initial conditions + #id = w.run_ideal(depends_on=id) + #sys.exit() - for i, next_restart in enumerate(restarts): - print('run_WRF from', time, 'to', next_restart) + # id = wrfinput_insert_wbubble(perturb=True, depends_on=id) + time = inits[0] + last_init = dt.datetime(2008, 7, 30, 8) + + for i, next_restart in enumerate(inits[1:]): + print('run_WRF from', time, 'to', next_restart, 'rst intv', (next_restart-time).total_seconds()/60) - prior_path_exp = '/jetfs/scratch/lkugler/data/sim_archive/exp_v1.19_P2_noDA+1/' #w.cluster.archivedir + prior_path_exp = w.cluster.archivedir #'/jetfs/scratch/lkugler/data/sim_archive/exp_v1.23_P2_noDA+1/' prior_init_time = last_init prior_valid_time = time - id = w.prepare_IC_from_prior(prior_path_exp, prior_init_time, prior_valid_time, depends_on=id) + + if input_is_restart: + id = w.prepare_IC_from_prior( + prior_path_exp, prior_init_time, prior_valid_time, depends_on=id) - id = w.run_ENS(begin=time, end=next_restart, - input_is_restart=input_is_restart, - output_restart_interval=(next_restart-time).total_seconds()/60, - first_minute=True, - #output_restart_interval=720, - depends_on=id) + id = w.run_ENS(begin=time, end=next_restart, + input_is_restart=input_is_restart, + output_restart_interval=(next_restart-time).total_seconds()/60, + depends_on=id) + id_sat = w.create_satimages(time, depends_on=id) last_init = time time = next_restart input_is_restart = True - id_sat = w.create_satimages(last_init, depends_on=id) + # free run, no restart files anymore - # end = dt.datetime(2008, 7, 30, 18) - # print('run WRF from', time, 'until', end) - # id = w.run_ENS(begin=time, end=end, - # input_is_restart=input_is_restart, - # #output_restart_interval=(next_restart-time).total_seconds()/60, - # output_restart_interval=9999, - # depends_on=id) - + prior_init_time = last_init + prior_valid_time = time + id = w.prepare_IC_from_prior(prior_path_exp, prior_init_time, prior_valid_time, depends_on=id) + end = dt.datetime(2008, 7, 30, 18) + print('run WRF from', time, 'until', end) + id = w.run_ENS(begin=time, end=end, + input_is_restart=input_is_restart, + #output_restart_interval=(next_restart-time).total_seconds()/60, + output_restart_interval=9999, + depends_on=id) + id = w.create_satimages(time, depends_on=id) + w.verify_wrf(depends_on=id) - #id = w.create_satimages(time, depends_on=id) w.verify_sat(depends_on=id_sat) if False: # to continue a free run - start = dt.datetime(2008, 7, 30, 7) + start = dt.datetime(2008, 7, 30, 13, 30) + end = dt.datetime(2008, 7, 30, 18) - id = w.prepare_WRFrundir(start) # create initial conditions - - prior_path_exp = '/jetfs/scratch/lkugler/data/sim_archive/exp_v1.19_P2_noDA' # w.cluster.archivedir - prior_init_time = dt.datetime(2008, 7, 30, 7) - prior_valid_time = start - - id = w.prepare_IC_from_prior(prior_path_exp, prior_init_time, prior_valid_time, depends_on=id) + w.prepare_WRFrundir(start) + id = w.run_ideal(depends_on=id) - id = w.run_ENS(begin=start, end=end, - input_is_restart=True, - output_restart_interval=(end-start).total_seconds()/60, - #output_restart_interval=9999, - depends_on=id) - - id = w.create_satimages(start, depends_on=id) - w.verify(depends_on=id) + # prior_path_exp = w.cluster.archivedir + # # prior_path_exp = '/jetfs/scratch/lkugler/data/sim_archive/exp_v1.23_P2_noDA+1' + # prior_init_time = dt.datetime(2008, 7, 30, 13, 15) + # prior_valid_time = start + + # id = w.prepare_IC_from_prior( + # prior_path_exp, prior_init_time, prior_valid_time, depends_on=id) + + # id = w.run_ENS(begin=start, end=end, + # input_is_restart=True, + # #output_restart_interval=(end-start).total_seconds()/60, + # output_restart_interval=9999, + # depends_on=id) + # id = w.create_satimages(start, depends_on=id) + # w.verify_sat(id) + # w.verify_wrf(id) if False: # to continue a free run after spinup - start = dt.datetime(2008, 7, 30, 13,30) + start = dt.datetime(2008, 7, 30, 12) end = dt.datetime(2008, 7, 30, 14) - id = w.prepare_WRFrundir(start) # create initial conditions + w.prepare_WRFrundir(start) # create initial conditions # id = w.run_ideal(depends_on=id) - prior_path_exp = '/jetfs/home/lkugler/data/sim_archive/exp_v1.19_P2_noDA' # w.cluster.archivedir + # w.cluster.archivedir + prior_path_exp = '/jetfs/home/lkugler/data/sim_archive/exp_v1.19_P2_noDA' prior_init_time = dt.datetime(2008, 7, 30, 13) - prior_valid_time = dt.datetime(2008, 7, 30, 13,30) + prior_valid_time = dt.datetime(2008, 7, 30, 13, 30) - id = w.prepare_IC_from_prior(prior_path_exp, prior_init_time, prior_valid_time, - # new_start_time=start, # <---------- to overwrite start time / leads to a call to `create_updated_wrfinput_from_wrfout()` - depends_on=id) + id = w.prepare_IC_from_prior(prior_path_exp, prior_init_time, prior_valid_time, + # new_start_time=start, # <---------- to overwrite start time / leads to a call to `create_updated_wrfinput_from_wrfout()` + depends_on=id) - #frequency_restart = (end-start).total_seconds()/60 + # frequency_restart = (end-start).total_seconds()/60 frequency_restart = dt.timedelta(minutes=30).total_seconds()/60 id = w.run_ENS(begin=start, end=end, - input_is_restart=True, - output_restart_interval=frequency_restart, - #output_restart_interval=9999, - depends_on=id) - + input_is_restart=True, + output_restart_interval=frequency_restart, + # output_restart_interval=9999, + depends_on=id) + # id = w.create_satimages(start, depends_on=id) - + # # continue now with free run # # no restart files anymore # prior_path_exp = w.cluster.archivedir @@ -192,4 +214,3 @@ if False: # to continue a free run after spinup # depends_on=id) # id = w.create_satimages(start, depends_on=id) # w.verify(depends_on=id) - diff --git a/generate_observations.py b/generate_observations.py index ae1c68dae496029ec34a561cf638b40dca4ef787..89d3f96035b15a8b2d20056fc38a384647ae5f63 100755 --- a/generate_observations.py +++ b/generate_observations.py @@ -3,15 +3,12 @@ Generate observation files from an experiment """ import datetime as dt +import pandas as pd from dartwrf.workflows import WorkFlows -w = WorkFlows(exp_config='nature.py', server_config='jet.py') +w = WorkFlows(exp_config='exp_hires.py', server_config='jet.py') -obs_times = [dt.datetime(2008,7,30,12,15),] -# dt.datetime(2008,7,30,12), dt.datetime(2008,7,30,12,1), -# dt.datetime(2008,7,30,12,30), dt.datetime(2008,7,30,12,31), -# dt.datetime(2008,7,30,13), dt.datetime(2008,7,30,13,1), -# dt.datetime(2008,7,30,13,30), dt.datetime(2008,7,30,13,31), -# dt.datetime(2008,7,30,14), dt.datetime(2008,7,30,14,1),] +#obs_times = [dt.datetime(2008,7,30,14,),] +obs_times = pd.date_range(start='2008-07-30 13:00:00', end='2008-07-30 14:00:00', freq='15min') w.generate_obsseq_out(obs_times) diff --git a/setup.py b/setup.py index 2f41a7dba502c1d8d3bfe31edf70cf9d44fb7ee0..c103a24be98715aa38e76119274050ea2514ff59 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ def read_requirements(fname): setuptools.setup( name="dartwrf", - version="2024.1", + version="2025.1", author="Lukas Kugler", author_email="lukas.kugler@univie.ac.at", description="Observing system simulation experiments with WRF and DART", diff --git a/tests/test_inputnml.py b/tests/test_inputnml.py index 78bf73170855de7af78fab3093dcf3e95e890104..286471198ce18419f999a2f53e5fd8f8c69e1c81 100644 --- a/tests/test_inputnml.py +++ b/tests/test_inputnml.py @@ -6,6 +6,10 @@ from dartwrf import dart_nml def test_input_nml(): + """A simple test, to read an existing input.nml, modify one parameter and save it again. + + The test is successful if the saved input.nml is identical to the desired output. + """ test_input = './input.nml.original' test_output = './input.nml.output' desired_output = './input.nml.desired_output' @@ -49,9 +53,9 @@ def test_input_nml(): pass # print(this, expected) else: - raise ValueError('expected', expected, 'got', have[i][j]) + raise ValueError('expected: '+(expected)+' got: '+have[i][j]+' this: '+this) - os.remove(test_output) + # os.remove(test_output) def test_get_list_of_localizations(): @@ -62,4 +66,4 @@ def test_get_list_of_localizations(): if __name__ == '__main__': test_input_nml() - test_get_list_of_localizations() \ No newline at end of file + #test_get_list_of_localizations() \ No newline at end of file