From 802426762647836fcfaa8138a1158f63856183a6 Mon Sep 17 00:00:00 2001
From: lkugler <lukas.kugler@gmail.com>
Date: Wed, 15 Mar 2023 10:36:30 +0100
Subject: [PATCH] improve 2D obs

---
 dartwrf/obsseq_2dim.py | 59 +++++++++++++++++++++++++++---------------
 1 file changed, 38 insertions(+), 21 deletions(-)

diff --git a/dartwrf/obsseq_2dim.py b/dartwrf/obsseq_2dim.py
index 966d709..46bd605 100755
--- a/dartwrf/obsseq_2dim.py
+++ b/dartwrf/obsseq_2dim.py
@@ -1,5 +1,16 @@
 """Create obs_seq.out files with collapsed vertical dimension
 Specifically, one observation per column which is the maximum of the column
+
+Use this script before running the OSSE workflow, to prepare obs_seq.out files.
+
+Note:
+     path_3d_obsseq = '/path/exp_obs10_loc20/obs_seq_out/2008-07-30_%H:%M_obs_seq.out'  
+
+Note:
+     Only works in case there is 1 observation type!
+
+Example:
+     python obsseq_2dim.py exp_v1.22_P2_rr_REFL_obs10_loc20_oe2.5 2008-07-30_13:00
 """
 
 from copy import copy
@@ -8,41 +19,46 @@ import time as time_module
 import datetime as dt
 import numpy as np
 
-from config.cfg import exp
 from config.cluster import cluster
+from dartwrf import utils
 from dartwrf import assim_synth_obs as aso
 from dartwrf import obsseq
 
+def _get_n_obs_per_layer(oso):     
+     """Get number of observations per layer"""
+     height_all = np.array([a[2] for a in oso.df.loc3d])
 
-if __name__ == "__main__":
-
-     assim_time = dt.datetime.strptime(sys.argv[1], "%Y-%m-%d_%H:%M")
+     height_first = height_all[0]
 
-    # prepare an obsseq without rejected observations
-     if exp.use_existing_obsseq:  # from another exp
-          oso_input = assim_time.strftime(exp.use_existing_obsseq)
+     # count how often this height appears
+     n_obs_per_layer = int(np.sum(height_all == height_first))
+     return n_obs_per_layer
 
-     # only assured to work with single obstype
-     if len(exp.observations) > 1:
-          raise NotImplementedError()
-     n_obs = exp.observations[0]['n_obs']
 
+if __name__ == "__main__":
+     exp = sys.argv[1]
+     assim_time = dt.datetime.strptime(sys.argv[2], "%Y-%m-%d_%H:%M")
+     
+     path_3d_obsseq = cluster.archive_base+exp+'/obs_seq_out/%Y-%m-%d_%H:%M_obs_seq.out'
+     oso_input = assim_time.strftime(path_3d_obsseq)
+     
      # existing obsseq with multi levels
      oso = obsseq.ObsSeq(oso_input)
+     
+     n_obs_3d = len(oso.df)
+     n_obs_per_layer = _get_n_obs_per_layer(oso)
+     nlev = int(n_obs_3d/n_obs_per_layer)
+     assert np.allclose(nlev, n_obs_3d/n_obs_per_layer), 'n_obs not evenly divisible!'
 
-     nlev = len(oso.df)/n_obs
-     if nlev - int(nlev) != 0:
-          raise RuntimeError()
-     nlev = int(nlev)  # levels per obs
+     print('n_obs_per_layer', n_obs_per_layer)
+     print('n_obs_3d', n_obs_3d)
      
-     # copy will be modified
-     output = copy(oso)
+     output = copy(oso)  # copy will be modified
+     # output.df = output.df.copy()  # without this, we get a SettingWithCopyWarning
      output.df = output.df.iloc[0::nlev]  #  every nth level = first level
 
-     #print(output.df, oso.df)
-
      # iterate through, set value to max
-     for i_obs in range(0, n_obs):  # go through n_obs (all columns)
+     for i_obs in range(0, ):  # go through n_obs (all columns)
 
           i_obs_subset = i_obs*nlev  # jumps by nlev (from one to next column)
           column = oso.df.loc[0 + i_obs_subset:nlev + i_obs_subset, :]  # select column
@@ -50,8 +66,9 @@ if __name__ == "__main__":
           output.df.loc[i_obs_subset, ('observations')] = float(column['observations'].max())
           output.df.loc[i_obs_subset, ('truth')] = float(column['truth'].max())
 
-     print(output.df) #, 'observations'], output.df.loc[i_obs, 'observations'])
+     print(output.df)
 
      fout = cluster.archivedir + assim_time.strftime("/obs_seq_out/%Y-%m-%d_%H:%M_obs_seq.out")
      os.makedirs(cluster.archivedir+'/obs_seq_out', exist_ok=True)
      output.to_dart(fout)
+     utils.write_txt(["created from", oso_input,], fout[:-3]+'.txt')
-- 
GitLab