diff --git a/dartwrf/assim_synth_obs.py b/dartwrf/assim_synth_obs.py
index 9d42cc437eec7403f4d0208b08907c2e1f9248c0..6c51da0ea32b58a3ca60b70edca3ac22af657e0f 100755
--- a/dartwrf/assim_synth_obs.py
+++ b/dartwrf/assim_synth_obs.py
@@ -18,30 +18,24 @@ wrfout_format = 'wrfout_d01_%Y-%m-%d_%H:%M:%S'
 def _prepare_DART_grid_template():
     # DART needs a wrfinput file as a template for the grid
     # No data will be read from this file, but the grid information must match exactly.
-    symlink(cluster.dartrundir + "/prior_ens1/wrfout_d01", 
-            cluster.dartrundir + "/wrfinput_d01")
+    symlink(cluster.dart_rundir + "/prior_ens1/wrfout_d01", 
+            cluster.dart_rundir + "/wrfinput_d01")
 
 def _copy_nature_to_dart(time):
     """Copies wrfout_d01 from nature run to DART directory
-
-    TODO: This is a bit of a hack, because it is not explicit about where to take the nature from.
     """
-    # find the file in any init directory
-    fformat = 'wrfout_d01_%Y-%m-%d_%H:%M:%S'
-    f_nat = glob.glob(cluster.archive_base + '/' + exp.nature_expname + '/*/1/'+time.strftime(fformat))[0]
-    shutil.copy(f_nat, cluster.dart_rundir + "/wrfout_d01")
+    glob_pattern = time.strftime(exp.nature_wrfout_pattern)  # replace time in pattern
+    print('searching for nature in pattern:', glob_pattern)
+    f_nat = glob.glob(glob_pattern)[0]  # find the nature wrfout-file
 
-    # DART may need a wrfinput file as well ?!
-    symlink(cluster.dart_rundir + "/wrfout_d01", 
-            cluster.dart_rundir + "/wrfinput_d01")
-    print("linked", f_nat, "to", cluster.dart_rundir + "/wrfout_d01")
+    # check user input
+    if not 'wrfout' in f_nat.split('/')[-1]:
+        warnings.warn(f+" does not contain 'wrfout' in filename, are you sure this is a valid nature file?")
 
-    f_wrfout_nature = time.strftime(exp.nature+'/'+wrfout_format)
-    assert os.path.exists(f_wrfout_nature)
+    # copy nature wrfout to DART directory
+    shutil.copy(f_nat, cluster.dart_rundir + "/wrfout_d01")
 
-    print("linking nature to DART & georeferencing")
-    shutil.copy(f_wrfout_nature, cluster.dartrundir + "/wrfout_d01")
-    print("linked", f_wrfout_nature, "to", cluster.dartrundir + "/wrfout_d01")
+    # add coordinates if necessary
     if cluster.geo_em_for_WRF_ideal:
         wrfout_add_geo.run(cluster.geo_em_for_WRF_ideal, cluster.dart_rundir + "/wrfout_d01")
 
@@ -53,7 +47,7 @@ def prepare_nature_dart(time):
     """
     try:
         _copy_nature_to_dart(time)
-    except FileExistsError:  # if nature is not available due to any reason
+    except (FileNotFoundError, AttributeError) as e:  # if nature is not available due to any reason
         print('-> has no nature, not copying nature')
 
 
@@ -369,8 +363,13 @@ def get_obsseq_out(time):
     if exp.use_existing_obsseq != False: 
         # use an existing obs_seq.out file
         f_obsseq = time.strftime(exp.use_existing_obsseq)
-        copy(f_obsseq, cluster.dart_rundir+'/obs_seq.out')
+        copy(f_obsseq, cluster.dart_rundir+'/obs_seq.out')  # copy to run_DART folder        
         print(f_obsseq, 'copied to', cluster.dart_rundir+'/obs_seq.out')
+
+        # copy to sim_archive
+        os.makedirs(cluster.archivedir + "/obs_seq_out/", exist_ok=True)
+        copy(f_obsseq,  time.strftime(cluster.archivedir+'/obs_seq_out/%Y-%m-%d_%H:%M_obs_seq.out'))
+
         oso = obsseq.ObsSeq(cluster.dart_rundir + "/obs_seq.out")  # read the obs_seq.out file
     else: 
         # do NOT use an existing obs_seq.out file
diff --git a/dartwrf/obs/create_obsseq_out.py b/dartwrf/obs/create_obsseq_out.py
index af9eaa503b05dc6803879784d1db802fc5970ff9..8eb99b6b2d427eca50f9432a7951835c89968d28 100644
--- a/dartwrf/obs/create_obsseq_out.py
+++ b/dartwrf/obs/create_obsseq_out.py
@@ -1,14 +1,22 @@
 import os, shutil, warnings
 
-from dartwrf.utils import try_remove, print, shell
+from dartwrf.utils import try_remove, print, shell, symlink
 import dartwrf.obs.create_obsseq_in as osi
 from dartwrf.obs import obsseq
 
 from dartwrf.exp_config import exp
 from dartwrf.server_config import cluster
 
+
+def _prepare_DART_grid_template():
+    # DART needs a wrfinput file as a template for the grid
+    # No data will be read from this file, but the grid information must match exactly.
+    symlink(cluster.dart_rundir + "/wrfout_d01", 
+            cluster.dart_rundir + "/wrfinput_d01")
+
 def generate_obsseq_out(time):
     """Generate an obs_seq.out file from the current experiment
+    Expects an existing nature file in the cluster.dart_rundir
     
     Args:
         time (datetime): time of the observations
@@ -47,6 +55,8 @@ def generate_obsseq_out(time):
     os.makedirs(dir_obsseq, exist_ok=True)
 
     osi.create_obs_seq_in(time, exp.observations)
+
+    _prepare_DART_grid_template()
     run_perfect_model_obs()  # generate observation, draws from gaussian
 
     print(" 2.1) obs preprocessing")
diff --git a/dartwrf/obs/obsseq.py b/dartwrf/obs/obsseq.py
index 49f3bd80498f2a3888014166b294af31cb187d44..b01c9ec6b75ff73577b2bb504a664d3154172cd2 100755
--- a/dartwrf/obs/obsseq.py
+++ b/dartwrf/obs/obsseq.py
@@ -36,6 +36,8 @@ import os, warnings
 import numpy as np
 import pandas as pd
 
+missing_value = -888888.0
+
 def _plot_box(m, lat, lon, label="", **kwargs):
     """"Draw bounding box
 
@@ -141,6 +143,61 @@ class ObsRecord(pd.DataFrame):
         assert np.allclose(Hx.mean(axis=1), self[what+' ensemble mean'])
         return Hx.values
 
+
+
+    def get_model_grid_indices(self, wrf_file_with_grid):
+        """Retrieve the grid indices closest to the observations
+
+        Note:
+            Only the horizontal grid is considered
+
+        Args:
+            wrf_file_with_grid (str):   path to wrf file with grid information
+
+        Returns:
+            pd.DataFrame (n_obs, 2)     columns: i, j
+        """
+        from scipy import spatial
+        import xarray as xr
+
+        def find_index_from_coords_tree(tree, len_latitudes, lat=45., lon=0.):
+            """Find Lat & Lon indices in array
+            to find the state space values nearest to the observation
+
+            Args:
+                len_latitudes (int) : usually xlat.shape[0]
+                    actually this could also have to be len of longitudes (i dont know!)
+                    but it works if len(xlon)==len(xlat)
+
+            Returns:
+                ilat, ilon (int)
+            """
+            dd, ii = tree.query([[lat, lon],])
+            ilat = int(ii/len_latitudes)
+            ilon = int(ii % len_latitudes)
+            return ilat, ilon
+
+        # load coordinates of wrf grid
+        grid = xr.open_dataset(wrf_file_with_grid)
+        xlat = grid.XLAT_M.values.squeeze()
+        xlon = grid.XLONG_M.values.squeeze()
+
+        # build search tree
+        tree = spatial.KDTree(np.c_[xlat.ravel(), xlon.ravel()])
+
+        # get lat lon of observations
+        lon_lat = self.get_lon_lat()
+
+        ilat_ilon = np.empty((len(lon_lat), 2), np.int32)
+
+        # find indices of observations in wrf grid
+        for i, row in lon_lat.iterrows():
+            ilat_ilon[i,:] = find_index_from_coords_tree(tree, xlat.shape[0], row.lat, row.lon)
+            
+        return pd.DataFrame(index=self.index, 
+            data=dict(wrf_i=ilat_ilon[:,0], wrf_j=ilat_ilon[:,1]))
+
+
     def get_lon_lat(self):
         """Retrieve longitude and latitude of observations
 
@@ -190,21 +247,26 @@ class ObsRecord(pd.DataFrame):
         return nlayers
 
     def superob(self, window_km):
-        """Select subset, average, overwrite existing obs with average
-
-        TODO: allow different obs types (KIND)
-        TODO: loc3d overwrite with mean
-        Metadata is copied from the first obs in a superob-box
+        """Create super-observations (averaged observations)
 
         Note:
             This routine discards observations (round off)
-            e.g. 31 obs with 5 obs-window => obs #31 is not processed
+            e.g. 31 obs with 5 obs-window => obs #31 is not processed.
+
+            Metadata is copied from the first observation in a superob-box
+
+            The location (loc3d) of new observation is taken from the center observation
+
+        TODO: allow different obs types (KIND)
 
         Args:
             window_km (numeric):        horizontal window edge length
                                         includes obs on edge
                                         25x25 km with 5 km obs density
                                         = average 5 x 5 observations
+
+        Returns:
+            ObsRecord
         """
         def calc_deg_from_km(distance_km, center_lat):
             """Approximately calculate distance in degrees from meters
@@ -264,9 +326,17 @@ class ObsRecord(pd.DataFrame):
         out = self.drop(self.index)  # this df will be filled
         boxes = []
 
-        for i in range(0, nx+1 - win_obs, win_obs):
+        for i in range(0, nx+1 - win_obs, win_obs):  
+            # i is the number of observations in x direction
+            # but in steps of "number of observations in superob window"
+            # i.e. i = 0, win_obs, 2*win_obs, 3*win_obs, ...
+
             for j in range(0, nx+1 - win_obs, win_obs):
+                # same as i but in y direction
+
                 for k in range(0, nlayers):
+                    # k is the index of the vertical layer
+
                     if debug: print(i,j,k)
 
                     # find indices of observations within superob window
@@ -301,7 +371,7 @@ class ObsRecord(pd.DataFrame):
                     # average spread and other values
                     for key in obs_box:
                         if key in ['loc3d', 'kind', 'metadata', 'time']:
-                            pass
+                            pass  # these parameters are not averaged
                         elif 'spread' in key:
                             # stdev of mean of values = sqrt(mean of variances)
                             obs_mean.at[key] = np.sqrt((obs_box[key]**2).mean())
@@ -503,8 +573,16 @@ class ObsSeq(object):
                 if "kind" in line:  # find obs kind
                     line_kind = i + 1
 
+            # read values like 'observations', 'truth', 'prior ensemble mean'
             for k, key in enumerate(self.keys_for_values):
-                out[key] = float(lines[1+k].strip())
+
+                v = float(lines[1+k].strip())  # value in obs_seq file
+
+                if v == missing_value:  # e.g. -888888.0
+                    out[key] = np.nan
+                else:
+                    out[key] = v
+
 
             x, y, z, z_coord = lines[line_loc].split()
             out["loc3d"] = float(x), float(y), float(z), int(z_coord)