diff --git a/dartwrf/assim_synth_obs.py b/dartwrf/assim_synth_obs.py index 9d42cc437eec7403f4d0208b08907c2e1f9248c0..6c51da0ea32b58a3ca60b70edca3ac22af657e0f 100755 --- a/dartwrf/assim_synth_obs.py +++ b/dartwrf/assim_synth_obs.py @@ -18,30 +18,24 @@ wrfout_format = 'wrfout_d01_%Y-%m-%d_%H:%M:%S' def _prepare_DART_grid_template(): # DART needs a wrfinput file as a template for the grid # No data will be read from this file, but the grid information must match exactly. - symlink(cluster.dartrundir + "/prior_ens1/wrfout_d01", - cluster.dartrundir + "/wrfinput_d01") + symlink(cluster.dart_rundir + "/prior_ens1/wrfout_d01", + cluster.dart_rundir + "/wrfinput_d01") def _copy_nature_to_dart(time): """Copies wrfout_d01 from nature run to DART directory - - TODO: This is a bit of a hack, because it is not explicit about where to take the nature from. """ - # find the file in any init directory - fformat = 'wrfout_d01_%Y-%m-%d_%H:%M:%S' - f_nat = glob.glob(cluster.archive_base + '/' + exp.nature_expname + '/*/1/'+time.strftime(fformat))[0] - shutil.copy(f_nat, cluster.dart_rundir + "/wrfout_d01") + glob_pattern = time.strftime(exp.nature_wrfout_pattern) # replace time in pattern + print('searching for nature in pattern:', glob_pattern) + f_nat = glob.glob(glob_pattern)[0] # find the nature wrfout-file - # DART may need a wrfinput file as well ?! - symlink(cluster.dart_rundir + "/wrfout_d01", - cluster.dart_rundir + "/wrfinput_d01") - print("linked", f_nat, "to", cluster.dart_rundir + "/wrfout_d01") + # check user input + if not 'wrfout' in f_nat.split('/')[-1]: + warnings.warn(f+" does not contain 'wrfout' in filename, are you sure this is a valid nature file?") - f_wrfout_nature = time.strftime(exp.nature+'/'+wrfout_format) - assert os.path.exists(f_wrfout_nature) + # copy nature wrfout to DART directory + shutil.copy(f_nat, cluster.dart_rundir + "/wrfout_d01") - print("linking nature to DART & georeferencing") - shutil.copy(f_wrfout_nature, cluster.dartrundir + "/wrfout_d01") - print("linked", f_wrfout_nature, "to", cluster.dartrundir + "/wrfout_d01") + # add coordinates if necessary if cluster.geo_em_for_WRF_ideal: wrfout_add_geo.run(cluster.geo_em_for_WRF_ideal, cluster.dart_rundir + "/wrfout_d01") @@ -53,7 +47,7 @@ def prepare_nature_dart(time): """ try: _copy_nature_to_dart(time) - except FileExistsError: # if nature is not available due to any reason + except (FileNotFoundError, AttributeError) as e: # if nature is not available due to any reason print('-> has no nature, not copying nature') @@ -369,8 +363,13 @@ def get_obsseq_out(time): if exp.use_existing_obsseq != False: # use an existing obs_seq.out file f_obsseq = time.strftime(exp.use_existing_obsseq) - copy(f_obsseq, cluster.dart_rundir+'/obs_seq.out') + copy(f_obsseq, cluster.dart_rundir+'/obs_seq.out') # copy to run_DART folder print(f_obsseq, 'copied to', cluster.dart_rundir+'/obs_seq.out') + + # copy to sim_archive + os.makedirs(cluster.archivedir + "/obs_seq_out/", exist_ok=True) + copy(f_obsseq, time.strftime(cluster.archivedir+'/obs_seq_out/%Y-%m-%d_%H:%M_obs_seq.out')) + oso = obsseq.ObsSeq(cluster.dart_rundir + "/obs_seq.out") # read the obs_seq.out file else: # do NOT use an existing obs_seq.out file diff --git a/dartwrf/obs/create_obsseq_out.py b/dartwrf/obs/create_obsseq_out.py index af9eaa503b05dc6803879784d1db802fc5970ff9..8eb99b6b2d427eca50f9432a7951835c89968d28 100644 --- a/dartwrf/obs/create_obsseq_out.py +++ b/dartwrf/obs/create_obsseq_out.py @@ -1,14 +1,22 @@ import os, shutil, warnings -from dartwrf.utils import try_remove, print, shell +from dartwrf.utils import try_remove, print, shell, symlink import dartwrf.obs.create_obsseq_in as osi from dartwrf.obs import obsseq from dartwrf.exp_config import exp from dartwrf.server_config import cluster + +def _prepare_DART_grid_template(): + # DART needs a wrfinput file as a template for the grid + # No data will be read from this file, but the grid information must match exactly. + symlink(cluster.dart_rundir + "/wrfout_d01", + cluster.dart_rundir + "/wrfinput_d01") + def generate_obsseq_out(time): """Generate an obs_seq.out file from the current experiment + Expects an existing nature file in the cluster.dart_rundir Args: time (datetime): time of the observations @@ -47,6 +55,8 @@ def generate_obsseq_out(time): os.makedirs(dir_obsseq, exist_ok=True) osi.create_obs_seq_in(time, exp.observations) + + _prepare_DART_grid_template() run_perfect_model_obs() # generate observation, draws from gaussian print(" 2.1) obs preprocessing") diff --git a/dartwrf/obs/obsseq.py b/dartwrf/obs/obsseq.py index 49f3bd80498f2a3888014166b294af31cb187d44..b01c9ec6b75ff73577b2bb504a664d3154172cd2 100755 --- a/dartwrf/obs/obsseq.py +++ b/dartwrf/obs/obsseq.py @@ -36,6 +36,8 @@ import os, warnings import numpy as np import pandas as pd +missing_value = -888888.0 + def _plot_box(m, lat, lon, label="", **kwargs): """"Draw bounding box @@ -141,6 +143,61 @@ class ObsRecord(pd.DataFrame): assert np.allclose(Hx.mean(axis=1), self[what+' ensemble mean']) return Hx.values + + + def get_model_grid_indices(self, wrf_file_with_grid): + """Retrieve the grid indices closest to the observations + + Note: + Only the horizontal grid is considered + + Args: + wrf_file_with_grid (str): path to wrf file with grid information + + Returns: + pd.DataFrame (n_obs, 2) columns: i, j + """ + from scipy import spatial + import xarray as xr + + def find_index_from_coords_tree(tree, len_latitudes, lat=45., lon=0.): + """Find Lat & Lon indices in array + to find the state space values nearest to the observation + + Args: + len_latitudes (int) : usually xlat.shape[0] + actually this could also have to be len of longitudes (i dont know!) + but it works if len(xlon)==len(xlat) + + Returns: + ilat, ilon (int) + """ + dd, ii = tree.query([[lat, lon],]) + ilat = int(ii/len_latitudes) + ilon = int(ii % len_latitudes) + return ilat, ilon + + # load coordinates of wrf grid + grid = xr.open_dataset(wrf_file_with_grid) + xlat = grid.XLAT_M.values.squeeze() + xlon = grid.XLONG_M.values.squeeze() + + # build search tree + tree = spatial.KDTree(np.c_[xlat.ravel(), xlon.ravel()]) + + # get lat lon of observations + lon_lat = self.get_lon_lat() + + ilat_ilon = np.empty((len(lon_lat), 2), np.int32) + + # find indices of observations in wrf grid + for i, row in lon_lat.iterrows(): + ilat_ilon[i,:] = find_index_from_coords_tree(tree, xlat.shape[0], row.lat, row.lon) + + return pd.DataFrame(index=self.index, + data=dict(wrf_i=ilat_ilon[:,0], wrf_j=ilat_ilon[:,1])) + + def get_lon_lat(self): """Retrieve longitude and latitude of observations @@ -190,21 +247,26 @@ class ObsRecord(pd.DataFrame): return nlayers def superob(self, window_km): - """Select subset, average, overwrite existing obs with average - - TODO: allow different obs types (KIND) - TODO: loc3d overwrite with mean - Metadata is copied from the first obs in a superob-box + """Create super-observations (averaged observations) Note: This routine discards observations (round off) - e.g. 31 obs with 5 obs-window => obs #31 is not processed + e.g. 31 obs with 5 obs-window => obs #31 is not processed. + + Metadata is copied from the first observation in a superob-box + + The location (loc3d) of new observation is taken from the center observation + + TODO: allow different obs types (KIND) Args: window_km (numeric): horizontal window edge length includes obs on edge 25x25 km with 5 km obs density = average 5 x 5 observations + + Returns: + ObsRecord """ def calc_deg_from_km(distance_km, center_lat): """Approximately calculate distance in degrees from meters @@ -264,9 +326,17 @@ class ObsRecord(pd.DataFrame): out = self.drop(self.index) # this df will be filled boxes = [] - for i in range(0, nx+1 - win_obs, win_obs): + for i in range(0, nx+1 - win_obs, win_obs): + # i is the number of observations in x direction + # but in steps of "number of observations in superob window" + # i.e. i = 0, win_obs, 2*win_obs, 3*win_obs, ... + for j in range(0, nx+1 - win_obs, win_obs): + # same as i but in y direction + for k in range(0, nlayers): + # k is the index of the vertical layer + if debug: print(i,j,k) # find indices of observations within superob window @@ -301,7 +371,7 @@ class ObsRecord(pd.DataFrame): # average spread and other values for key in obs_box: if key in ['loc3d', 'kind', 'metadata', 'time']: - pass + pass # these parameters are not averaged elif 'spread' in key: # stdev of mean of values = sqrt(mean of variances) obs_mean.at[key] = np.sqrt((obs_box[key]**2).mean()) @@ -503,8 +573,16 @@ class ObsSeq(object): if "kind" in line: # find obs kind line_kind = i + 1 + # read values like 'observations', 'truth', 'prior ensemble mean' for k, key in enumerate(self.keys_for_values): - out[key] = float(lines[1+k].strip()) + + v = float(lines[1+k].strip()) # value in obs_seq file + + if v == missing_value: # e.g. -888888.0 + out[key] = np.nan + else: + out[key] = v + x, y, z, z_coord = lines[line_loc].split() out["loc3d"] = float(x), float(y), float(z), int(z_coord)