diff --git a/.gitignore b/.gitignore
index e0a66811123dee4def21f9db91477ab99bf7ce82..309f0818b37afde1fd5064ba5c2e5805f2f43380 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,4 +3,5 @@ slurm-scripts/
 logs/
 dask-worker-space/
 DART_WRF.egg*
-build*
\ No newline at end of file
+build*
+._*
\ No newline at end of file
diff --git a/scripts/obsseq.py b/scripts/obsseq.py
new file mode 100644
index 0000000000000000000000000000000000000000..8dae9a32254f6699c477e8994cb56214ec234acd
--- /dev/null
+++ b/scripts/obsseq.py
@@ -0,0 +1,489 @@
+import os, sys, shutil, warnings
+import numpy as np
+import pandas as pd
+
+from config.cfg import exp, cluster
+from utils import symlink, copy, sed_inplace, append_file, mkdir, try_remove
+
+
+import matplotlib as mpl
+
+mpl.use("agg")
+import matplotlib.pyplot as plt
+
+
+def plot_box(m, lat, lon, label="", **kwargs):
+
+    m.drawgreatcircle(
+        lon[0, -1], lat[0, -1], lon[0, 0], lat[0, 0], del_s=20, zorder=4, **kwargs
+    )
+    m.drawgreatcircle(
+        lon[0, -1], lat[0, -1], lon[-1, -1], lat[-1, -1], del_s=20, zorder=4, **kwargs
+    )
+    m.drawgreatcircle(
+        lon[-1, 0], lat[-1, 0], lon[-1, -1], lat[-1, -1], del_s=20, zorder=4, **kwargs
+    )
+    m.drawgreatcircle(
+        lon[0, 0],
+        lat[0, 0],
+        lon[-1, 0],
+        lat[-1, 0],
+        del_s=20,
+        zorder=4,
+        label=label,
+        **kwargs
+    )
+
+
+def degrees_to_rad(degr):
+    """Convert to DART convention = radians"""
+    if degr < 0:
+        degr += 360
+    return degr / 360 * 2 * np.pi
+
+
+def rad_to_degrees(rad):
+    """Convert to degrees from DART convention (radians)"""
+    assert rad >= 0, "no negative radians allowed"
+    degr = rad / np.pi * 180
+
+    # convert degr (180,360) to (-180,0)
+    if degr > 180:
+        degr -= 360
+    return degr
+
+
+class ObsSeqOut(object):
+    """
+    Attributes:
+        obstypes (list) :       contains (kind_nr, kind_name) tuples
+    """
+
+    def __init__(self, filepath):
+        print(filepath)
+        self.ascii = open(filepath, "r").readlines()
+
+        self.preamble, self.content = self.get_preamble_content()
+        self.obstypes = self.get_obstypes()
+
+        self.dict = self.obs_to_dict()
+        self.df = self.to_pandas()
+
+    def __str__(self):
+        return self.df.__str__()
+
+    def get_preamble_content(self):
+        """Split the obs_seq.out file into two parts
+        1) First lines of obs_seq.out file until the first observation message
+        2) Observation contents
+        """
+        for i, line in enumerate(self.ascii):
+            if " OBS  " in line:
+                break
+
+        return self.ascii[:i], self.ascii[i:]
+
+    def get_obstypes(self):
+        """Return a list of tuples (kind_nr, kind_descriptor) for each obs type"""
+        # how many obstypes
+        for i, line in enumerate(self.ascii):
+            if "obs_type_definitions" in line:
+                break
+        line_n_obstypes = i + 1
+        n_obstypes = int(self.ascii[line_n_obstypes])
+
+        # read obs type kind (number and description)
+        obstypes = []
+        for k in range(1, n_obstypes + 1):
+            kind_nr, kind_type = self.ascii[line_n_obstypes + k].split()
+            kind_nr = int(kind_nr)
+            obstypes.append((kind_nr, kind_type))
+        return obstypes
+
+    def to_dart(self, f):
+        """Write to obs_seq.out file in DART format
+
+        Args:
+            f (str):    path of file to write
+        """
+
+        def write_file(msg, output_path="./"):
+            try:
+                os.remove(output_path)
+            except OSError:
+                pass
+
+            with open(output_path, "w") as f:
+                f.write(msg)
+                print(output_path, "saved.")
+
+        def write_preamble(n_obs):
+
+            num_obstypes = str(len(self.obstypes))
+            txt = " obs_sequence \n obs_kind_definitions \n     " + num_obstypes
+
+            for (nr, obstype) in self.obstypes:
+                txt += "\n         " + str(nr) + " " + obstype
+            nobs = str(n_obs)
+            txt += "\n num_copies:            2  num_qc:            1"
+            txt += "\n num_obs:           " + nobs
+            txt += "   max_num_obs:            " + nobs
+            txt += "\n observations \n truth \n Quality Control \n"
+            txt += " first:            1  last:            " + nobs
+            return txt
+
+        def write_obs(i, obs, next_i_obs=None, prev_i_obs=None):
+            """Write the observation section of a obs_seq.out file
+            Args:
+                i   (int):      index of observation
+                obs (dict):     observation data
+
+                next_i_obs (int):   index of next observation
+                prev_i_obs (int):   index of previous observation
+                                    (in case it is the last)
+
+            Returns
+                str
+            """
+
+            if next_i_obs:
+                line_link = "        -1           " + str(next_i_obs) + "          -1"
+            else:  # last observation in file
+                line_link = "          " + str(prev_i_obs) + "           -1          -1"
+
+            lon_rad = str(obs["loc3d"][0])
+            lat_rad = str(obs["loc3d"][1])
+
+            out = (
+                " \n".join(
+                    [
+                        "\nOBS         " + str(i),
+                        str(obs["obs"]),
+                        str(obs["truth"]),
+                        str(obs["qc"]),
+                        line_link,
+                        "obdef",
+                        "loc3d",
+                        "    ".join(
+                            [
+                                lon_rad,
+                                lat_rad,
+                                str(obs["loc3d"][2]),
+                                str(obs["loc3d"][3]),
+                            ]
+                        ),
+                        "kind",
+                        "         " + str(obs["kind"]),
+                        "".join(obs["metadata"]),
+                    ]
+                )
+                + str(i)
+                + "\n "
+                + obs["time"][0]
+                + "     "
+                + obs["time"][1]
+                + "\n"
+                + str(obs["variance"])
+            )
+            return out
+
+        n_obs = len(self.df)
+        outstr = write_preamble(n_obs)
+
+        # loop through observations, concatenate obs sections
+        # DART format is linked list, needs index of next observation
+        # k     ...         0, ..., len(df)-1
+        # i_obs_this ...    starts at 1
+        for k, (_, obs) in enumerate(self.df.iterrows()):
+            i_obs_this = k + 1
+
+            if k < len(self.df) - 1:
+                i_obs_next = k + 2
+                outstr += write_obs(i_obs_this, obs, next_i_obs=i_obs_next)
+            else:  # last obs in file
+                i_obs_prev = k
+                outstr += write_obs(i_obs_this, obs, prev_i_obs=i_obs_prev)
+
+        write_file(outstr, output_path=f)
+
+    def obs_to_dict(self):
+        """Convert an obs_seq.out file to a dictionary"""
+
+        def check_obs_begin(line):
+            if not " OBS  " in line:
+                raise RuntimeError("wrong line in observation")
+
+        def content_to_list(content):
+            """Split obs_seq.out content into list of observation-sections"""
+            obs_list = []
+            i = 0
+            check_obs_begin(content[0])
+            obs_begin = 0
+
+            for i, line in enumerate(content):
+                if i == 0:
+                    continue
+
+                if " OBS  " in line:  # then this line is beginning of obs
+                    obs_end = i - 1  # previous line
+                    obs_list.append(content[obs_begin : obs_end + 1])
+                    obs_begin = i  # next obs starts here
+
+                if i == len(content) - 1:  # last line
+                    obs_end = i
+                    obs_list.append(content[obs_begin : obs_end + 1])
+
+            assert len(obs_list) > 1
+            return obs_list
+
+        def one_obs_to_dict(obs_list_entry):
+            """"""
+            out = dict()
+            lines = obs_list_entry
+
+            check_obs_begin(lines[0])
+
+            for i, line in enumerate(lines):
+                if "loc3d" in line:  # find location
+                    line_loc = i + 1
+
+                if "kind" in line:  # find obs kind
+                    line_kind = i + 1
+
+            out["obs"] = float(lines[1].strip())
+            out["truth"] = float(lines[2].strip())
+            out["qc"] = float(lines[3].strip())
+            x, y, z, z_coord = lines[line_loc].split()
+            out["loc3d"] = float(x), float(y), float(z), int(z_coord)
+            out["kind"] = int(lines[line_kind].strip())
+            out["metadata"] = lines[line_kind + 1 : -3]
+            out["time"] = tuple(lines[-2].split())
+            out["variance"] = float(lines[-1].strip())
+            return out
+
+        def obs_list_to_dict(obs_list):
+            # wraps `one_obs_to_dict`
+            obs_list_dict = []  # list of dict
+            for entry in obs_list:
+
+                # convert list of lines to dictionary
+                # with (kind, loc3d, values, ...) as keys
+                obs_dict = one_obs_to_dict(entry)
+
+                obs_list_dict.append(obs_dict)  # append dict to list
+            return obs_list_dict
+
+        # content = [line1, ...]
+        # transform to [obs1, obs2, obs3, ...]
+        # obs1 = [obsline1, obsline2, ...]
+        obs_list = content_to_list(self.content)
+        return obs_list_to_dict(obs_list)
+
+    def to_pandas(self):
+        """Create xr.Dataset containing observations
+        Variables = observation types
+        """
+        obs_dict_list = self.obs_to_dict()
+
+        # convert to pandas.DataFrame
+        # each observation is one line
+        # columns: all observation contents
+
+        # set keys from first obs (kind, loc3d, values)
+        keys = obs_dict_list[0].keys()
+        data = {key: [] for key in keys}
+
+        # fill the data lists for each column of the DataFrame
+        for obs in obs_dict_list:
+            for key in keys:
+                data[key].append(obs[key])
+
+        return pd.DataFrame(index=range(len(obs_dict_list)), data=data)
+
+    def get_lon_lat(self):
+        lats = np.empty(len(self.df), np.float32)
+        lons = lats.copy()
+
+        for i_obs, values in self.df.loc3d.items():
+            x, y, z, z_coord = values
+
+            # convert radian to degrees lon/lat
+            lon = rad_to_degrees(x)
+            lat = rad_to_degrees(y)
+            lons[i_obs] = lon
+            lats[i_obs] = lat
+
+        return pd.DataFrame(index=self.df.index, data=dict(lat=lats, lon=lons))
+
+    def superob(self, window_km):
+        """Select subset, average, overwrite existing obs with average
+
+        TODO: allow different obs types (KIND)
+        TODO: loc3d overwrite with mean
+        Metadata is copied from the first obs in a superob-box
+
+        Note:
+            This routine discards observations (round off)
+            e.g. 31 obs with 5 obs-window => obs #31 is not processed
+
+        Args:
+            window_km (numeric):        horizontal window edge length
+                                        includes obs on edge
+                                        25x25 km with 5 km obs density
+                                        = average 5 x 5 observations
+        """
+        debug = False
+        radius_earth_meters = 6.371 * 1e6
+        m_per_degrees = np.pi * radius_earth_meters / 180  # m per degree latitude
+        km_per_degrees = m_per_degrees / 1000
+
+        def calc_deg_from_km(distance_km, center_lat):
+            """Approximately calculate distance in degrees from meters
+            Input: distance in km; degree latitude
+            Output: distance in degrees of latitude, longitude
+            """
+            assert distance_km > 0, "window size <= 0, must be > 0"
+            dist_deg_lat = distance_km / km_per_degrees
+            dist_deg_lon = dist_deg_lat * np.cos(center_lat * np.pi / 180)
+            return dist_deg_lat, dist_deg_lon
+
+        def calc_km_from_deg(deg_lat, deg_lon, center_lat):
+            dist_km_lat = deg_lat * km_per_degrees
+            dist_km_lon = deg_lon * km_per_degrees * np.cos(center_lat * np.pi / 180)
+            return dist_km_lat, dist_km_lon
+
+        # assume cartesian grid of observations
+        i_obs_grid = self.df.index.values
+        nx = int(len(i_obs_grid) ** 0.5)
+        i_obs_grid = i_obs_grid.reshape(nx, nx)
+
+        # loop through columns/rows
+        # avoid loop in (lat,lon) space as coordinates are non-cartesian
+        # i.e. first column of observations does not have same longitude
+
+        # determine obs density (approx)
+        coords = self.get_lon_lat()
+        dx_obs_lat_deg = coords.lat.diff().max()
+        km_lat, _ = calc_km_from_deg(dx_obs_lat_deg, np.nan, 45)
+        obs_spacing_km = int(km_lat)
+
+        # how many observations in x/y direction?
+        win_obs = int(window_km / obs_spacing_km)
+        if debug:
+            print("window (#obs in x/y)=", win_obs)
+
+        out = self.df.drop(self.df.index)  # this df will be filled
+
+        for i in range(0, nx - win_obs, win_obs):
+            for j in range(0, nx - win_obs, win_obs):
+
+                # find indices of observations which lie in the superob box
+                i_obs_box = i_obs_grid[i : i + win_obs, j : j + win_obs].ravel()
+
+                if debug:
+                    print("box=", i, i + win_obs, j, j + win_obs)
+                    print("i_obs box=", i_obs_grid[i : i + win_obs, j : j + win_obs])
+
+                # average the subset
+                # metadata are assumed to be equal
+                obs_box = self.df.iloc[i_obs_box]
+
+                obs_mean = obs_box.iloc[0]
+                obs_mean.at["obs"] = obs_box["obs"].mean()
+                obs_mean.at["truth"] = obs_box["truth"].mean()
+                obs_mean.at["qc"] = obs_box["qc"].mean()
+                obs_mean.at["variance"] = obs_box["variance"].mean()
+                if debug:
+                    print("pre_avg:", obs_box.head())
+                    print("avg:", obs_mean)
+
+                out = out.append(obs_mean)
+
+        self.df = out  # overwrite input
+
+    def plot(self, box=None):
+        import xarray as xr
+
+        georef = xr.open_dataset("/gpfs/data/fs71386/lkugler/run_DART/geo_em.d01.nc")
+        lon = georef.XLONG_M.values.squeeze()
+        lat = georef.XLAT_M.values.squeeze()
+
+        from mpl_toolkits.basemap import Basemap
+
+        fig, ax = plt.subplots(figsize=(12, 12))
+
+        # m = Basemap(projection='geos', lon_0=lon[100,100], resolution='i',
+        #             llcrnrlon=lon[0,0]-1,llcrnrlat=lat[0,0]-3,
+        #             urcrnrlon=lon[-1,-1]+1,urcrnrlat=lat[-1,-1]+3)
+        m = Basemap(
+            projection="lcc",
+            resolution="h",
+            lon_0=lon[100, 100],
+            lat_0=lat[25, 25],
+            lat_1=lat[100, 100],
+            lat_2=lat[175, 175],
+            llcrnrlon=lon[0, 0] - 2,
+            llcrnrlat=lat[0, 0] - 2,
+            urcrnrlon=lon[-1, -1] + 2,
+            urcrnrlat=lat[-1, -1] + 2,
+        )
+        # m.fillcontinents(color='0.1', lake_color='0.2')
+
+        m.drawlsmask(land_color="0.1", ocean_color="0.1")
+        m.drawcoastlines(color="white")
+        m.drawcountries(color="white")
+
+        plot_box(m, lat, lon, label="domain", color="green", lw=4)
+
+        # OBSERVATIONS
+        coords = self.get_lon_lat()
+        lats = coords.lat.values
+        longs = coords.lon.values
+        coords = zip(lats, longs)
+
+        for lati, long in coords:
+            m.plot(
+                long,
+                lati,
+                ".",
+                markersize=5,
+                latlon=True,
+                color="red",
+                zorder=4,
+            )
+
+        m.plot(
+            [],
+            [],
+            "s",
+            markersize=0.3,
+            label="observations",
+            color="red",
+            zorder=4,
+        )
+
+        if box:
+            lats = box["lat"]
+            longs = box["lon"]
+            lats, longs = np.meshgrid(lats, longs)
+            print(lats, longs)
+
+            plot_box(m, lats, longs, label="superob", color="white", lw=1)
+
+        plt.legend()
+        plt.tight_layout()
+        f = "/home/fs71386/lkugler/data/analysis/map_obs_superobs.png"
+        plt.savefig(f, dpi=300)
+        print(f, "saved.")
+
+
+if __name__ == "__main__":
+    # for testing purposes
+
+    obs = ObsSeqOut(cluster.scriptsdir + "/../tests/obs_seq.orig.out")
+
+    # select a subset (lat-lon)
+    obs.superob(window_km=50)
+
+    # write to obs_seq.out in DART format
+    obs.to_dart(f=cluster.dartrundir + "/obs_seq.out")