diff --git a/config/cfg.py b/config/cfg.py index b101eeec30d76adc33a2f39fb7e51aed6a3cf760..acae00f59829aab718aee2a18c87710c700da4ee 100755 --- a/config/cfg.py +++ b/config/cfg.py @@ -7,7 +7,7 @@ class ExperimentConfiguration(object): pass exp = ExperimentConfiguration() -exp.expname = "exp_v1.19_wb-random_WV_superob4-12" +exp.expname = "exp_v1.19_wb-random_Radar_zero" exp.model_dx = 2000 exp.n_ens = 40 exp.n_nodes = 10 @@ -23,9 +23,9 @@ exp.input_profile = '/home/fs71386/lkugler/wrf_profiles/data/wrf/ens/2021-05-04/ # needs a horizontal scale too, to calculate the vertical normalization # since you can not specify different vertical localizations for diff. variables exp.cov_loc_vert_km_horiz_km = (1, 30) -exp.superob_km = 12 +#exp.superob_km = 12 -n_obs = 5776 #5776: 4km, 121: 30km, 256:16x16 (20km); 961: 10km resoltn # radar: n_obs for each observation height level +n_obs = 961 #5776: 4km, 121: 30km, 256:16x16 (20km); 961: 10km resoltn # radar: n_obs for each observation height level vis = dict(plotname='VIS 0.6µm', plotunits='[1]', kind='MSG_4_SEVIRI_BDRF', sat_channel=1, n_obs=n_obs, @@ -65,7 +65,7 @@ psfc = dict(plotname='SYNOP Pressure', plotunits='[dBz]', cov_loc_radius_km=32) -exp.observations = [wv73] # 108, wv73, vis] +exp.observations = [radar] # 108, wv73, vis] #exp.update_vars = ['T', 'QVAPOR', 'QCLOUD', 'QICE','CLDFRA'] exp.update_vars = ['U', 'V', 'T', 'PH', 'MU', 'QVAPOR', 'QCLOUD', 'QICE', 'CLDFRA'] diff --git a/dartwrf/assim_synth_obs.py b/dartwrf/assim_synth_obs.py index 29845ecc9cdc82a02aefc897afa539d7189bc20e..18606920d8d5a67a60af994787d70aa59d0ad0ce 100755 --- a/dartwrf/assim_synth_obs.py +++ b/dartwrf/assim_synth_obs.py @@ -5,10 +5,10 @@ import numpy as np from scipy.interpolate import interp1d from config.cfg import exp, cluster -from .utils import symlink, copy, sed_inplace, append_file, mkdir, try_remove, print -from . import create_obsseq as osq -from . import wrfout_add_geo -from . import obsseq +from utils import symlink, copy, sed_inplace, append_file, mkdir, try_remove, print +import create_obsseq as osq +import wrfout_add_geo +import obsseq earth_radius_km = 6370 @@ -168,13 +168,18 @@ def set_DART_nml(just_prior_values=False): append_file(cluster.dartrundir + "/input.nml", rttov_nml) -def get_Hx_truth_prior(): - +def run_Hx(time, obscfg): + """ # assumes that prior ensemble is already linked to advance_temp<i>/wrfout_d01 - print("running obs operator on ensemble forecast") - os.chdir(cluster.dartrundir) - - t = time_module.time() + Creates + obs_seq.final (file): observations on (non-averaged) grid + """ + # existing obs_seq.out is a precondition for filter + # needs to know where to take observations + osq.create_obsseqin_alltypes(time, [obscfg, ], 0) + run_perfect_model_obs() + + print("running H(x) : obs operator on ensemble prior") # set input.nml to calculate prior Hx set_DART_nml(just_prior_values=True) @@ -182,22 +187,6 @@ def get_Hx_truth_prior(): # run filter to calculate prior Hx os.system("mpirun -np 12 ./filter &> log.filter.preassim") - # read prior Hx values from obs_seq.final - osf = obsseq.ObsSeq(cluster.dartrundir + '/obs_seq.final') - - if hasattr(exp, "superob_km"): - print("superobbing to", exp.superob_km, "km") - - t = time_module.time() - osf.superob(window_km=exp.superob_km) - print("superob took", int(time_module.time() - t), "seconds") - - prior_Hx = osf.get_prior_Hx_matrix().T - truth_Hx = osf.get_truth_Hx() - - print("obs operator ensemble took", int(time_module.time() - t), "seconds") - return truth_Hx, prior_Hx - def obs_operator_nature(time): print("getting true values in obs space from nature run") @@ -294,6 +283,15 @@ def prepare_prior_ensemble(assim_time, prior_init_time, prior_valid_time, prior_ def calc_obserr_WV73(Hx_nature, Hx_prior): + """Calculate parametrized error (for assimilation) + Args: + Hx_nature (np.array): dim (observations) + Hx_prior (np.array): dim (ensemble_members, observations) + + Returns + np.array Observation error std-deviation with dim (observations) + """ + debug = False n_obs = len(Hx_nature) OEs = np.ones(n_obs) @@ -322,8 +320,7 @@ def run_perfect_model_obs(): if not os.path.exists(cluster.dartrundir + "/obs_seq.out"): raise RuntimeError( "obs_seq.out does not exist in " + cluster.dartrundir, - "\n look for " + cluster.dartrundir + "/log.perfect_model_obs", - ) + "\n look for " + cluster.dartrundir + "/log.perfect_model_obs") def assimilate(nproc=96): @@ -332,15 +329,15 @@ def assimilate(nproc=96): os.chdir(cluster.dartrundir) try_remove(cluster.dartrundir + "/obs_seq.final") t = time_module.time() - os.system( - "mpirun -genv I_MPI_PIN_PROCESSOR_LIST=0-" - + str(int(nproc) - 1) - + " -np " - + str(int(nproc)) - + " ./filter > log.filter" - ) + #os.system('mpirun -np 12 ./filter &> log.filter') + os.system("".join([ + "mpirun -genv I_MPI_PIN_PROCESSOR_LIST=0-", + str(int(nproc) - 1)," -np ",str(int(nproc))," ./filter > log.filter"])) print("./filter took", int(time_module.time() - t), "seconds") - + if not os.path.isfile(cluster.dartrundir + "/obs_seq.final"): + raise RuntimeError( + "obs_seq.final does not exist in " + cluster.dartrundir, + "\n look for " + cluster.dartrundir + "/log.filter") # currently unused # def recycle_output(): @@ -368,10 +365,8 @@ def assimilate(nproc=96): # print('updating', updates, 'in', dart_input, 'from', dart_output) # os.system(cluster.ncks+' -A -v '+updates+' '+dart_output+' '+dart_input) - ############### archiving - def archive_osq_final(time, posterior_1min=False): """Save obs_seq.final file for later. time (dt.datetime) : time of sampling values from files @@ -425,11 +420,49 @@ def archive_osq_out(time): os.makedirs(dir_obsseq, exist_ok=True) copy(cluster.dartrundir + "/obs_seq.out", dir_obsseq + time.strftime("/%Y-%m-%d_%H:%M_obs_seq.out")) - try: - copy(cluster.dartrundir + "/obs_seq.out-orig", - dir_obsseq + time.strftime("/%Y-%m-%d_%H:%M_obs_seq.out-orig")) - except Exception as e: - warnings.warn(str(e)) + if os.path.isfile(cluster.dartrundir + "/obs_seq.out-orig"): + try: + copy(cluster.dartrundir + "/obs_seq.out-orig", + dir_obsseq + time.strftime("/%Y-%m-%d_%H:%M_obs_seq.out-orig")) + except Exception as e: + warnings.warn(str(e)) + +def is_assim_error_parametrized(obscfg): + if (obscfg["error_assimilate"] == False) and ( + obscfg.get("sat_channel") == 6): + return True + else: + return False + +def get_parametrized_error(obscfg, df_obs): + """Calculate the parametrized error for an ObsRecord + This can be all observations or just a subset of all + + Args + obscfg () + df_obs (obsseq.ObsRecord): contains observations from obs_seq.out + + Returns + np.array observation error std-dev for assimilation + """ + + # run obs operator (through filter program) + # creates obs_seq.final containing truth & prior Hx + run_Hx(time, obscfg) + + if hasattr(exp, "superob_km"): + print("superobbing to", exp.superob_km, "km") + t = time_module.time() + df_obs.superob(window_km=exp.superob_km) + print("superob took", int(time_module.time() - t), "seconds") + + Hx_prior = df_obs.get_prior_Hx().T + Hx_truth = df_obs.get_truth_Hx() + + # compute the obs error for assimilation on the averaged grid + # since the assimilation is done on the averaged grid + return calc_obserr_WV73(Hx_truth, Hx_prior) + if __name__ == "__main__": """Assimilate observations (different obs types) @@ -484,49 +517,14 @@ if __name__ == "__main__": prepare_prior_ensemble(time, prior_init_time, prior_valid_time, prior_path_exp) ################################################ - print(" 1) get the assimilation errors in a single vector ") - error_assimilate = [] - # to get the obs-error for assimilation, - # we need to get the true obs-space values for the parametrized variable - # and collect the obs-error for assimilation in a single vector/list - - for i, obscfg in enumerate(exp.observations): - print('for', obscfg) - n_obs = obscfg["n_obs"] - levels = obscfg.get("heights", [1,]) - n_obs_z = len(levels) - n_obs_3d = n_obs * n_obs_z - - do_error_parametrization = (obscfg["error_assimilate"] == False) and ( - obscfg.get("sat_channel") == 6) - - if do_error_parametrization: - # get observations for sat 6 - osq.create_obsseqin_alltypes(time, [obscfg, ], 0) - - # create observations at full resolution - run_perfect_model_obs() - - # run obs operator (through filter program) - # to create truth and prior ensemble Hx - # superob the high-res observations - # return the averaged observations / prior Hx - Hx_truth, Hx_prior = get_Hx_truth_prior() - - # compute the obs error for assimilation on the averaged grid - # since the assimilation is done on the averaged grid - err_assim = calc_obserr_WV73(Hx_truth, Hx_prior) - else: - err_assim = np.zeros(n_obs_3d) + obscfg["error_assimilate"] - - error_assimilate.extend(err_assim) - - ################################################ - print(" 2) generate observations ") + print(" 1) generate observations with specified obs-error") # the obs-error we use for generating obs is user-defined error_generate = [] for i, obscfg in enumerate(exp.observations): + levels = obscfg.get("heights", [1,]) + n_obs_z = len(levels) + n_obs_3d = obscfg["n_obs"] * n_obs_z error_generate.extend(np.zeros(n_obs_3d) + obscfg["error_generate"]) osq.create_obsseqin_alltypes(time, exp.observations, error_generate) @@ -534,8 +532,14 @@ if __name__ == "__main__": run_perfect_model_obs() # actually create observations that are used to assimilate + # read in the actual observations oso = obsseq.ObsSeq(cluster.dartrundir + "/obs_seq.out") + if False: # only refl < 6 dBz + # oso = obsseq.ObsSeq(cluster.dartrundir + "/obs_seq.out") + oso.df = oso.df[oso.df['truth'].values < 6] + oso.to_dart(f=cluster.dartrundir + "/obs_seq.out") + if hasattr(exp, "superob_km"): print("superobbing to", exp.superob_km, "km") oso.superob(window_km=exp.superob_km) @@ -543,15 +547,26 @@ if __name__ == "__main__": oso.to_dart(f=cluster.dartrundir + "/obs_seq.out") ################################################ - print(" 3) assimilate with observation-errors for assimilation") + print(" 2) define observation-errors for assimilation ") + + for obscfg in exp.observations: + kind = obscfg['kind'] + mask_kind = oso.df.kind == kind + + if is_assim_error_parametrized(obscfg): + + assim_err = get_parametrized_error(obscfg, oso.df[mask_kind]) + + oso.df[mask_kind] = assim_err**2 + else: + # overwrite with user-defined values + oso.df[mask_kind] = obscfg["error_assimilate"]**2 - oso.df['variance'] = error_assimilate # replace error variance oso.to_dart(cluster.dartrundir + "/obs_seq.out") - archive_osq_out(time) - t = time_module.time() + print(" 3) assimilate ") + archive_osq_out(time) assimilate() - print("filter took", time_module.time() - t, "seconds") archive_filteroutput(time) archive_osq_final(time) diff --git a/dartwrf/cleanup_exp.py b/dartwrf/cleanup_exp.py index 244c30690de1871c08d580ea28c9afe29033757b..bee76fe30311448c0d3771e8aac6677f4adbd5cf 100644 --- a/dartwrf/cleanup_exp.py +++ b/dartwrf/cleanup_exp.py @@ -1,6 +1,6 @@ import os, glob from config.cfg import exp, cluster -from .utils import try_remove +from utils import try_remove """Run this script after an experiment to reduce cluster disk usage. diff --git a/dartwrf/link_dart_rttov.py b/dartwrf/link_dart_rttov.py index c5f8d849ed518e328d8fb1f738c3c7282535e0f8..ec6d0aed9ae4a29419be63d7da6cc7eab41a9062 100644 --- a/dartwrf/link_dart_rttov.py +++ b/dartwrf/link_dart_rttov.py @@ -1,6 +1,6 @@ import os from config.cfg import exp, cluster -from .utils import symlink, copy_scp_srvx8, copy, sed_inplace +from utils import symlink, copy_scp_srvx8, copy, sed_inplace joinp = os.path.join diff --git a/dartwrf/obsseq.py b/dartwrf/obsseq.py index adb1ca69567ba96089b8bf0d69197818c905f434..3229858682569cd1b0e493cd7bd029b5e6f9bff2 100755 --- a/dartwrf/obsseq.py +++ b/dartwrf/obsseq.py @@ -3,7 +3,7 @@ import numpy as np import pandas as pd from config.cfg import exp, cluster -from .utils import symlink, copy, sed_inplace, append_file, mkdir, try_remove +from utils import symlink, copy, sed_inplace, append_file, mkdir, try_remove def plot_box(m, lat, lon, label="", **kwargs): @@ -47,6 +47,177 @@ def rad_to_degrees(rad): return degr + + +class ObsRecord(pd.DataFrame): + """Content of obsseq.ObsSeq instances + provides additional methods for pd.DataFrame + created inside an ObsSeq instance + """ + @property + def _constructor(self): + # This ensures that pandas operations return ObsRecord instances + # e.g. subsetting df with df[3:4] returns ObsRecord + return ObsRecord + + def get_prior_Hx(self): + """Return prior Hx array (n_obs, n_ens)""" + return self._get_model_Hx(self, 'prior') + + def get_posterior_Hx(self): + """Return posterior Hx array (n_obs, n_ens)""" + return self._get_model_Hx(self, 'posterior') + + def get_truth_Hx(self): + return self['truth'].values + + def _get_model_Hx(self, what): + """Return ensemble member info + + Args: + self (pd.DataFrame): usually self.self + what (str): 'prior' or 'posterior' + + Returns: + np.array + + Works with all observations (self = self.self) + or a subset of observations (self = self.self[343:348]) + """ + if what not in ['prior', 'posterior']: + raise ValueError + + # which columns do we need? + keys = self.columns + keys_bool = np.array([what+' ensemble member' in a for a in keys]) + + # select columns in DataFrame + Hx = self.iloc[:, keys_bool] + + # consistency check: compute mean over ens - compare with value from file + assert np.allclose(Hx.mean(axis=1), self[what+' ensemble mean']) + return Hx.values + + def get_lon_lat(self): + lats = np.empty(len(self), np.float32) + lons = lats.copy() + + for i_obs, values in self.loc3d.items(): + x, y, z, z_coord = values + + # convert radian to degrees lon/lat + lon = rad_to_degrees(x) + lat = rad_to_degrees(y) + lons[i_obs] = lon + lats[i_obs] = lat + + return pd.DataFrame(index=self.index, data=dict(lat=lats, lon=lons)) + + def superob(self, window_km): + """Select subset, average, overwrite existing obs with average + + TODO: allow different obs types (KIND) + TODO: loc3d overwrite with mean + Metadata is copied from the first obs in a superob-box + + Note: + This routine discards observations (round off) + e.g. 31 obs with 5 obs-window => obs #31 is not processed + + Args: + window_km (numeric): horizontal window edge length + includes obs on edge + 25x25 km with 5 km obs density + = average 5 x 5 observations + """ + debug = False + radius_earth_meters = 6.371 * 1e6 + m_per_degrees = np.pi * radius_earth_meters / 180 # m per degree latitude + km_per_degrees = m_per_degrees / 1000 + + def calc_deg_from_km(distance_km, center_lat): + """Approximately calculate distance in degrees from meters + Input: distance in km; degree latitude + Output: distance in degrees of latitude, longitude + """ + assert distance_km > 0, "window size <= 0, must be > 0" + dist_deg_lat = distance_km / km_per_degrees + dist_deg_lon = dist_deg_lat * np.cos(center_lat * np.pi / 180) + return dist_deg_lat, dist_deg_lon + + def calc_km_from_deg(deg_lat, deg_lon, center_lat): + dist_km_lat = deg_lat * km_per_degrees + dist_km_lon = deg_lon * km_per_degrees * np.cos(center_lat * np.pi / 180) + return dist_km_lat, dist_km_lon + + # assume cartesian grid of observations + i_obs_grid = self.index.values + nx = int(len(i_obs_grid) ** 0.5) + i_obs_grid = i_obs_grid.reshape(nx, nx) + + # loop through columns/rows + # avoid loop in (lat,lon) space as coordinates are non-cartesian + # i.e. first column of observations does not have same longitude + + # determine obs density (approx) + coords = self.get_lon_lat() + dx_obs_lat_deg = coords.lat.diff().max() + km_lat, _ = calc_km_from_deg(dx_obs_lat_deg, np.nan, 45) + obs_spacing_km = int(km_lat) + + # how many observations in x/y direction? + win_obs = int(window_km / obs_spacing_km) + if debug: + print('window_km=', window_km) + print('obs spacing=', obs_spacing_km) + print("window (#obs in x/y)=", win_obs) + + out = self.drop(self.index) # this df will be filled + + for i in range(0, nx - win_obs, win_obs): + for j in range(0, nx - win_obs, win_obs): + + # find indices of observations which lie in the superob box + i_obs_box = i_obs_grid[i : i + win_obs, j : j + win_obs].ravel() + + if debug: + print("index x from", i, 'to', i + win_obs) + print("index y from", j, 'to', j + win_obs) + print("obs indices box=", i_obs_grid[i : i + win_obs, j : j + win_obs]) + + # average the subset + # metadata are assumed to be equal + obs_box = self.iloc[i_obs_box] + + obs_mean = obs_box.iloc[0] + # despite its name, 'observations' is a single value + for key in obs_box: + if key in ['loc3d', 'kind', 'metadata', 'time']: + pass + elif 'spread' in key: # mean of std deviations + obs_mean.at[key] = np.sqrt((obs_box[key]**2).mean()) + else: + obs_mean.at[key] = obs_box[key].mean() + + if debug: + print("pre_avg:", obs_box.head()) + print("avg:", obs_mean) + + out = out.append(obs_mean) + + n_pre_superob = len(self) + n_post_superob = len(out) + + # assume square of obs + n_windows_x = int(n_pre_superob**.5/win_obs) # in x-direction + n_target_post = n_windows_x**2 # number of windows + print('superob from', n_pre_superob, 'obs to', n_post_superob, 'obs') + assert n_post_superob == n_target_post, RuntimeError() + + self = out # overwrite dataframe + return self # output itself (allows to write it to a new name) + + class ObsSeq(object): """Read, manipulate, save obs_seq.out / final files """ @@ -57,8 +228,7 @@ class ObsSeq(object): self.get_preamble_content() self.read_preamble() - self.dict = self.obs_to_dict() - self.df = self.to_pandas() + self.df = ObsRecord(self.to_pandas()) def __str__(self): return self.df.__str__() @@ -172,6 +342,11 @@ class ObsSeq(object): obs_list.append(content[obs_begin : obs_end + 1]) assert len(obs_list) > 1 + + # consistency check to ensure that all observations have been detected + if len(obs_list) != self.num_obs: + raise RuntimeError('num_obs read in does not match preamble num_obs ' + +str(len(obs_list))+' != '+str(self.num_obs)) return obs_list def one_obs_to_dict(obs_list_entry): @@ -194,9 +369,7 @@ class ObsSeq(object): for k, key in enumerate(self.keys_for_values): out[key] = float(lines[1+k].strip()) - #out["obs"] = float(lines[1].strip()) - #out["truth"] = float(lines[2].strip()) - #out["qc"] = float(lines[3].strip()) + x, y, z, z_coord = lines[line_loc].split() out["loc3d"] = float(x), float(y), float(z), int(z_coord) out["kind"] = int(lines[line_kind].strip()) @@ -226,150 +399,6 @@ class ObsSeq(object): list_of_obsdict = obs_list_to_dict(obs_list) return list_of_obsdict - def _get_model_Hx(self, what): - if what not in ['prior', 'posterior']: - raise ValueError - - # which columns do we need? - keys = self.df.columns - keys_bool = np.array([what+' ensemble member' in a for a in keys]) - - # select columns in DataFrame - Hx = self.df.iloc[:, keys_bool] - - # consistency check: compute mean over ens - compare with value from file - assert np.allclose(Hx.mean(axis=1), self.df[what+' ensemble mean']) - - return Hx.values - - def get_prior_Hx(self): - """Return prior Hx array (n_obs, n_ens)""" - return self._get_model_Hx('prior') - - def get_posterior_Hx(self): - """Return posterior Hx array (n_obs, n_ens)""" - return self._get_model_Hx('posterior') - - def get_truth_Hx(self): - return self.df['truth'].values - - def get_lon_lat(self): - lats = np.empty(len(self.df), np.float32) - lons = lats.copy() - - for i_obs, values in self.df.loc3d.items(): - x, y, z, z_coord = values - - # convert radian to degrees lon/lat - lon = rad_to_degrees(x) - lat = rad_to_degrees(y) - lons[i_obs] = lon - lats[i_obs] = lat - - return pd.DataFrame(index=self.df.index, data=dict(lat=lats, lon=lons)) - - def superob(self, window_km): - """Select subset, average, overwrite existing obs with average - - TODO: allow different obs types (KIND) - TODO: loc3d overwrite with mean - Metadata is copied from the first obs in a superob-box - - Note: - This routine discards observations (round off) - e.g. 31 obs with 5 obs-window => obs #31 is not processed - - Args: - window_km (numeric): horizontal window edge length - includes obs on edge - 25x25 km with 5 km obs density - = average 5 x 5 observations - """ - debug = False - radius_earth_meters = 6.371 * 1e6 - m_per_degrees = np.pi * radius_earth_meters / 180 # m per degree latitude - km_per_degrees = m_per_degrees / 1000 - - def calc_deg_from_km(distance_km, center_lat): - """Approximately calculate distance in degrees from meters - Input: distance in km; degree latitude - Output: distance in degrees of latitude, longitude - """ - assert distance_km > 0, "window size <= 0, must be > 0" - dist_deg_lat = distance_km / km_per_degrees - dist_deg_lon = dist_deg_lat * np.cos(center_lat * np.pi / 180) - return dist_deg_lat, dist_deg_lon - - def calc_km_from_deg(deg_lat, deg_lon, center_lat): - dist_km_lat = deg_lat * km_per_degrees - dist_km_lon = deg_lon * km_per_degrees * np.cos(center_lat * np.pi / 180) - return dist_km_lat, dist_km_lon - - # assume cartesian grid of observations - i_obs_grid = self.df.index.values - nx = int(len(i_obs_grid) ** 0.5) - i_obs_grid = i_obs_grid.reshape(nx, nx) - - # loop through columns/rows - # avoid loop in (lat,lon) space as coordinates are non-cartesian - # i.e. first column of observations does not have same longitude - - # determine obs density (approx) - coords = self.get_lon_lat() - dx_obs_lat_deg = coords.lat.diff().max() - km_lat, _ = calc_km_from_deg(dx_obs_lat_deg, np.nan, 45) - obs_spacing_km = int(km_lat) - - # how many observations in x/y direction? - win_obs = int(window_km / obs_spacing_km) - if debug: - print('window_km=', window_km) - print('obs spacing=', obs_spacing_km) - print("window (#obs in x/y)=", win_obs) - - out = self.df.drop(self.df.index) # this df will be filled - - for i in range(0, nx - win_obs, win_obs): - for j in range(0, nx - win_obs, win_obs): - - # find indices of observations which lie in the superob box - i_obs_box = i_obs_grid[i : i + win_obs, j : j + win_obs].ravel() - - if debug: - print("index x from", i, 'to', i + win_obs) - print("index y from", j, 'to', j + win_obs) - print("obs indices box=", i_obs_grid[i : i + win_obs, j : j + win_obs]) - - # average the subset - # metadata are assumed to be equal - obs_box = self.df.iloc[i_obs_box] - - obs_mean = obs_box.iloc[0] - # despite its name, 'observations' is a single value - for key in obs_box: - if key in ['loc3d', 'kind', 'metadata', 'time']: - pass - elif 'spread' in key: # mean of std deviations - obs_mean.at[key] = np.sqrt((obs_box[key]**2).mean()) - else: - obs_mean.at[key] = obs_box[key].mean() - - if debug: - print("pre_avg:", obs_box.head()) - print("avg:", obs_mean) - - out = out.append(obs_mean) - - self.df = out # overwrite input - n_pre_superob = self.num_obs - n_post_superob = len(self.df) - - # assume square of obs - n_windows_x = int(n_pre_superob**.5/win_obs) # in x-direction - n_target_post = n_windows_x**2 # number of windows - print('superob from', n_pre_superob, 'obs to', n_post_superob, 'obs') - assert n_post_superob == n_target_post, RuntimeError() - def to_pandas(self): """Create xr.Dataset containing observations Variables = observation types @@ -462,17 +491,12 @@ class ObsSeq(object): str(obs["loc3d"][3]), ] ), - "kind", " " + str(obs["kind"]), + "kind", " " + str(int(obs["kind"])), "".join(obs["metadata"]), ] ) - + str(i) - + "\n " - + obs["time"][0] - + " " - + obs["time"][1] - + "\n" - + str(obs["variance"]) + + " \n " + obs["time"][0] + " " + obs["time"][1] + + " \n " + str(obs["variance"]) ) return out @@ -580,9 +604,11 @@ if __name__ == "__main__": # for testing purposes obs = ObsSeq(cluster.scriptsdir + "/../tests/obs_seq.orig.out") + print(type(obs)) # select a subset (lat-lon) - obs.superob(window_km=50) + obs.df = obs.df[:].superob(window_km=50) + print(type(obs.df)) # write to obs_seq.out in DART format obs.to_dart(f=cluster.dartrundir + "/obs_seq.out") diff --git a/dartwrf/prep_IC_prior.py b/dartwrf/prep_IC_prior.py index 1c5372ff9a9b714f26e2eac76889f0140d3f70d8..a4d2a73d47c89cc2b983adb80dd16c0a9878cccd 100755 --- a/dartwrf/prep_IC_prior.py +++ b/dartwrf/prep_IC_prior.py @@ -2,7 +2,7 @@ import os, sys, warnings, glob import datetime as dt from config.cfg import exp, cluster -from .utils import copy, clean_wrfdir, try_remove +from utils import copy, clean_wrfdir, try_remove """ Sets initial condition data (wrfinput/wrfrst file) in the run_WRF directory for each ensemble member diff --git a/dartwrf/prepare_namelist.py b/dartwrf/prepare_namelist.py index 28bac3ede79721a0b1528894bd279ca4e53e072a..4f2672b68204ad8963d27b2ed3fb4b5c3478524b 100755 --- a/dartwrf/prepare_namelist.py +++ b/dartwrf/prepare_namelist.py @@ -12,7 +12,7 @@ import os, sys, shutil, warnings import datetime as dt from docopt import docopt from config.cfg import exp, cluster -from .utils import sed_inplace, copy, symlink, mkdir +from utils import sed_inplace, copy, symlink, mkdir def run(iens, begin, end, hist_interval=5, radt=5, archive=True, restart=False, restart_interval=720): diff --git a/dartwrf/prepare_wrfrundir.py b/dartwrf/prepare_wrfrundir.py index d6ba5f6fb6a82c6b8af4ca9e88f43cf079066e56..772e0c4981721c6ad3dff2ab81502db5381e6e65 100755 --- a/dartwrf/prepare_wrfrundir.py +++ b/dartwrf/prepare_wrfrundir.py @@ -1,26 +1,28 @@ import os, sys, shutil import datetime as dt + from config.cfg import exp, cluster -from .utils import symlink, copy, link_contents +from utils import symlink, copy, link_contents +import prepare_namelist -from . import prepare_namelist +if __name__ == '__main__': -init_time = dt.datetime.strptime(sys.argv[1], '%Y-%m-%d_%H:%M') + init_time = dt.datetime.strptime(sys.argv[1], '%Y-%m-%d_%H:%M') -for iens in range(1, exp.n_ens+1): - print('preparing ens', iens) - input_prof = (exp.input_profile).replace('<iens>', str(iens).zfill(3)) + for iens in range(1, exp.n_ens+1): + print('preparing ens', iens) + input_prof = (exp.input_profile).replace('<iens>', str(iens).zfill(3)) - rundir = cluster.wrf_rundir(iens) - os.makedirs(rundir, exist_ok=True) - link_contents(cluster.srcdir, rundir) - print('linking ideal and wrf.exe:') - symlink(cluster.ideal, rundir+'/ideal.exe') - symlink(cluster.wrfexe, rundir+'/wrf.exe') + rundir = cluster.wrf_rundir(iens) + os.makedirs(rundir, exist_ok=True) + link_contents(cluster.srcdir, rundir) + print('linking ideal and wrf.exe:') + symlink(cluster.ideal, rundir+'/ideal.exe') + symlink(cluster.wrfexe, rundir+'/wrf.exe') - # time not important, but general settings - prepare_namelist.run(iens, begin=init_time, end=dt.datetime(2008, 7, 30, 23), - archive=False) + # time not important, but general settings + prepare_namelist.run(iens, begin=init_time, end=dt.datetime(2008, 7, 30, 23), + archive=False) - symlink(input_prof, rundir+'/input_sounding') -print('finished.') + symlink(input_prof, rundir+'/input_sounding') + print('finished.') diff --git a/dartwrf/run_obs_diag.py b/dartwrf/run_obs_diag.py index 15064774f1aa07f95cc3b539cf713f6411844292..d3d3feb79d72e983a3e05e11d0e42516e3445629 100644 --- a/dartwrf/run_obs_diag.py +++ b/dartwrf/run_obs_diag.py @@ -1,6 +1,6 @@ import os, sys, shutil, glob from config.cfg import exp, cluster -from .utils import symlink, copy, sed_inplace, append_file +from utils import symlink, copy, sed_inplace, append_file rundir_program = '/home/fs71386/lkugler/data/run_DART/'