From 434d045ceaa40184f442e05a3172d3cdf68f55d6 Mon Sep 17 00:00:00 2001
From: lkugler <lukas.kugler@gmail.com>
Date: Wed, 5 May 2021 16:17:10 +0200
Subject: [PATCH] use different values of obserr in obs generation and
 assimilation

---
 config/cfg.py              | 35 ++++++++++--------
 scripts/assim_synth_obs.py | 72 +++++++++++++++++++++++++++++++++-----
 scripts/create_obsseq.py   | 71 ++++++++++++++++++++++++-------------
 3 files changed, 130 insertions(+), 48 deletions(-)

diff --git a/config/cfg.py b/config/cfg.py
index caac5e7..98027d6 100755
--- a/config/cfg.py
+++ b/config/cfg.py
@@ -9,42 +9,49 @@ class ExperimentConfiguration(object):
 
 
 exp = ExperimentConfiguration()
-exp.expname = "exp_v1.16_P1_40mem"
+exp.expname = "exp_v1.16_P1-1_Radar"
 exp.model_dx = 2000
-exp.timestep = 8
 exp.n_ens = 40
 exp.n_nodes = 10
 
 n_obs = 1600  # 50km res: 64:8x8; 10km res: 1600:40x40  # radar: n_obs for each observation height level
 
 vis = dict(plotname='VIS 0.6µm',  plotunits='[1]',
-           kind='MSG_4_SEVIRI_BDRF',
-           sat_channel=1, n_obs=n_obs, err_std=0.03,
+           kind='MSG_4_SEVIRI_BDRF', sat_channel=1, n_obs=n_obs, 
+           err_std=0.03,
+           error_generate=0.03, error_assimilate=0.06,
            cov_loc_radius_km=32)
+
 wv73 = dict(plotname='Brightness temperature WV 7.3µm',  plotunits='[K]',
-            kind='MSG_4_SEVIRI_TB',
-            sat_channel=6, n_obs=n_obs, err_std=False,
+            kind='MSG_4_SEVIRI_TB', sat_channel=6, n_obs=n_obs, 
+            err_std=False,
+            error_generate=False, error_assimilate=False,
             cov_loc_radius_km=32)
+
 ir108 = dict(plotname='Brightness temperature IR 10.8µm', plotunits='[K]',
-             kind='MSG_4_SEVIRI_TB',
-             sat_channel=9, n_obs=n_obs, err_std=5.,
+             kind='MSG_4_SEVIRI_TB', sat_channel=9, n_obs=n_obs, 
+             err_std=5.,
+             error_generate=5., error_assimilate=10.,
              cov_loc_radius_km=32)
 
 radar = dict(plotname='Radar reflectivity', plotunits='[dBz]',
-             kind='RADAR_REFLECTIVITY', 
-             n_obs=n_obs, err_std=5.,
+             kind='RADAR_REFLECTIVITY', n_obs=n_obs, 
+             error_generate=2.5, error_assimilate=5.,
              heights=np.arange(1000, 15001, 1000),
-             cov_loc_radius_km=30, cov_loc_vert_km=4)
+             cov_loc_radius_km=32, cov_loc_vert_km=4)
 
 t2m = dict(plotname='SYNOP Temperature', plotunits='[K]',
-           kind='SYNOP_TEMPERATURE', n_obs=n_obs, err_std=0.1, 
+           kind='SYNOP_TEMPERATURE', n_obs=n_obs, 
+           error_generate=0.1, error_assimilate=1.,
            cov_loc_radius_km=20, cov_loc_vert_km=3)
+
 psfc = dict(plotname='SYNOP Pressure', plotunits='[dBz]',
-            kind='SYNOP_SURFACE_PRESSURE', n_obs=n_obs, err_std=50.,
+            kind='SYNOP_SURFACE_PRESSURE', n_obs=n_obs, 
+            error_generate=50., error_assimilate=100.,
             cov_loc_radius_km=32, cov_loc_vert_km=5)
 
 
-exp.observations = [ ] #ir108, vis, wv73]  # ir108, wv73, vis]
+exp.observations = [radar, ] # 108, wv73, vis]
 
 # directory paths depend on the name of the experiment
 cluster.expname = exp.expname
diff --git a/scripts/assim_synth_obs.py b/scripts/assim_synth_obs.py
index 818fdfb..063d510 100755
--- a/scripts/assim_synth_obs.py
+++ b/scripts/assim_synth_obs.py
@@ -53,7 +53,8 @@ def read_prior_obs(f_obs_prior):
             OBSs.append(dict(observed=observed, truth=truth, prior_ens=np.array(prior_ens)))
     return OBSs
 
-def read_obsseqout(f):
+def read_truth_obs_obsseq(f):
+    """Reads observed and true values from obs_seq.out/final files."""
     obsseq = open(f, 'r').readlines()
     true = []
     obs = []
@@ -66,6 +67,44 @@ def read_obsseqout(f):
             obs.append(observed)
     return true, obs
 
+def replace_errors_obsseqout(f, new_errors):
+    """Replaces existing observation errors in obs_seq.final files
+    
+    new_errors (np.array) : standard deviation,
+                            shape must match the number of observations
+    """
+    obsseq = open(f, 'r').readlines()
+
+    # find number of lines between two ' OBS   ' lines:
+    first_obs = None
+    for i, line in enumerate(obsseq):
+        if ' OBS ' in line:
+            if first_obs is None:
+                first_obs = i
+            else:
+                second_obs = i
+                break
+    lines_between = second_obs - first_obs
+    lines_obserr_after_obsnr = lines_between - 1  # obserr line is one before ' OBS   ' line
+
+    # replace values in list obsseq
+    i_obs = 0
+    for i, line in enumerate(obsseq):
+        if ' OBS ' in line:
+            line_error_obs_i = i+lines_obserr_after_obsnr
+
+            previous_err_var = obsseq[line_error_obs_i]
+            new_err_obs_i = new_errors[i_obs]**2  # variance in obs_seq.out
+            #print('previous err var ', previous_err_var, 'new error', new_err_obs_i)
+            obsseq[line_error_obs_i] = ' '+str(new_err_obs_i)+' \n'
+
+            i_obs += 1  # next iteration
+
+    with open(f, 'w') as file:
+        for line in obsseq:
+            file.write(line)
+        print(f, 'saved.')
+
 
 def set_DART_nml(sat_channel=False, cov_loc_radius_km=32, cov_loc_vert_km=False,
                  just_prior_values=False):
@@ -129,7 +168,7 @@ def obs_operator_ensemble(istage):
             os.system('mpirun -np 12 ./perfect_model_obs > /dev/null')
 
             # truth values in obs_seq.out are H(x) values
-            true, _ = read_obsseqout(cluster.dartrundir+'/obs_seq.out')
+            true, _ = read_truth_obs_obsseq(cluster.dartrundir+'/obs_seq.out')
             list_ensemble_truths.append(true)
         
         n_obs = len(list_ensemble_truths[0])
@@ -144,7 +183,7 @@ def obs_operator_nature(time):
     print('running obs operator on nature run')
     prepare_nature_dart(time)
     run_perfect_model_obs()
-    true, _ = read_obsseqout(cluster.dartrundir+'/obs_seq.out')
+    true, _ = read_truth_obs_obsseq(cluster.dartrundir+'/obs_seq.out')
     return true
 
 
@@ -286,32 +325,47 @@ if __name__ == "__main__":
 
         archive_stage = archive_time + '/assim_stage'+str(istage)+'/'
         n_obs = obscfg['n_obs']
+        n_obs_3d = n_obs * len(obscfg['heights'])
         sat_channel = obscfg.get('sat_channel', False)
+        error_generate = obscfg['error_generate']
+        error_assimilate = obscfg['error_assimilate']
 
         set_DART_nml(sat_channel=sat_channel, 
                      cov_loc_radius_km=obscfg['cov_loc_radius_km'],
                      cov_loc_vert_km=obscfg.get('cov_loc_vert_km', False))
 
-        use_error_parametrization = obscfg['err_std'] == False
+        use_error_parametrization = error_generate == False
         if use_error_parametrization:
+
             if sat_channel != 6:
                 raise NotImplementedError('sat channel '+str(sat_channel))
 
-            osq.create_obsseq_in(time, obscfg, zero_error=True)  # zero error to get truth vals
+            osq.create_obsseq_in(time, obscfg, obs_errors=0)  # zero error to get truth vals
 
             Hx_nat = obs_operator_nature(time) 
             Hx_prior = obs_operator_ensemble(istage)  # files are already linked to DART directory
-            
-            obscfg['err_std'] = calc_obserr_WV73(Hx_nat, Hx_prior)
-
+            error_generate = calc_obserr_WV73(Hx_nat, Hx_prior)
+ 
         # create obs template file, now with correct errors
-        osq.create_obsseq_in(time, obscfg, archive_obs_coords=archive_stage+'/obs_coords.pkl')
+        osq.create_obsseq_in(time, obscfg, obs_errors=error_generate,
+                             archive_obs_coords=archive_stage+'/obs_coords.pkl')
         prepare_nature_dart(time)  # link WRF files to DART directory
         run_perfect_model_obs()  # actually create observations that are used to assimilate
 
+        # debug option: overwrite time in prior files
         # for iens in range(1,41):
         #    os.system('ncks -A -v Times '+cluster.dartrundir+'/wrfout_d01 '+cluster.dartrundir+'/advance_temp'+str(iens)+'/wrfout_d01')
 
+        if error_assimilate is not False:
+            # overwrite error values in obs_seq.out
+
+            if isinstance(error_assimilate, int):
+                error_assimilate = np.zeros(n_obs_3d) + error_assimilate
+            else:
+                assert len(error_assimilate) == n_obs_3d
+
+            replace_errors_obsseqout(cluster.dartrundir+'/obs_seq.out', error_assimilate)
+
         assimilate()
         dir_obsseq = cluster.archivedir()+'/obs_seq_final/assim_stage'+str(istage)
         archive_diagnostics(dir_obsseq, time)
diff --git a/scripts/create_obsseq.py b/scripts/create_obsseq.py
index 8d050ed..400b50d 100755
--- a/scripts/create_obsseq.py
+++ b/scripts/create_obsseq.py
@@ -211,13 +211,15 @@ kind
 """+str(obs['obserr_var'])
 
 
-def create_obsseq_in(time_dt, obscfg, zero_error=False,
+def create_obsseq_in(time_dt, obscfg, obs_errors=False,
                      archive_obs_coords=False):
     """Create obs_seq.in
 
     Args:
         time_dt (dt.datetime): time of observation
         obscfg (dict)
+        obs_errors (int, np.array) : values of observation errors (standard deviations)
+            e.g. 0 = use zero error
         archive_obs_coords (str, False): path to folder
 
     channel_id (int): SEVIRI channel number
@@ -255,8 +257,8 @@ def create_obsseq_in(time_dt, obscfg, zero_error=False,
 
     # define obs error
     obserr_std = np.zeros(n_obs_3d) 
-    if not zero_error:
-        obserr_std += obscfg['err_std']
+    if obs_errors:
+        obserr_std += obs_errors
 
     # other stuff for obsseq.in
     obs_kind_nr = obs_kind_nrs[kind]
@@ -305,25 +307,44 @@ def create_obsseq_in(time_dt, obscfg, zero_error=False,
 if __name__ == '__main__':
     # for testing
     time_dt = dt.datetime(2008, 7, 30, 9, 0)
-    n_obs = 16  # radar: n_obs for each observation height level
-
-    vis = dict(kind='MSG_4_SEVIRI_BDRF',
-            sat_channel=1, n_obs=n_obs, err_std=0.03,
-            cov_loc_radius_km=10)
-    wv = dict(kind='MSG_4_SEVIRI_TB',
-            sat_channel=6, n_obs=n_obs, err_std=False,
-            cov_loc_radius_km=10)
-    ir108 = dict(kind='MSG_4_SEVIRI_TB',
-                sat_channel=9, n_obs=n_obs, err_std=5.,
-                cov_loc_radius_km=10)
-
-    radar = dict(kind='RADAR_REFLECTIVITY', n_obs=n_obs, err_std=5.,
-                heights=np.arange(1000, 2001, 1000),
-                cov_loc_radius_km=10, cov_loc_vert_km=2)
-
-    t2m = dict(kind='SYNOP_TEMPERATURE', n_obs=n_obs, err_std=1.0, 
-            cov_loc_radius_km=32, cov_loc_vert_km=1)
-    psfc = dict(kind='SYNOP_SURFACE_PRESSURE', n_obs=n_obs, err_std=50.,
-            cov_loc_radius_km=32, cov_loc_vert_km=5)
-
-    create_obsseq_in(time_dt, radar, archive_obs_coords='./coords_stage1.pkl')
+    n_obs = 1600  # radar: n_obs for each observation height level
+
+    vis = dict(plotname='VIS 0.6µm',  plotunits='[1]',
+            kind='MSG_4_SEVIRI_BDRF', sat_channel=1, n_obs=n_obs, 
+            err_std=0.03,
+            error_generate=0.03, error_assimilate=0.06,
+            cov_loc_radius_km=32)
+
+    wv73 = dict(plotname='Brightness temperature WV 7.3µm',  plotunits='[K]',
+                kind='MSG_4_SEVIRI_TB', sat_channel=6, n_obs=n_obs, 
+                err_std=False,
+                error_generate=False, error_assimilate=False,
+                cov_loc_radius_km=32)
+
+    ir108 = dict(plotname='Brightness temperature IR 10.8µm', plotunits='[K]',
+                kind='MSG_4_SEVIRI_TB', sat_channel=9, n_obs=n_obs, 
+                err_std=5.,
+                error_generate=5., error_assimilate=10.,
+                cov_loc_radius_km=32)
+
+    radar = dict(plotname='Radar reflectivity', plotunits='[dBz]',
+                kind='RADAR_REFLECTIVITY', n_obs=n_obs, 
+                error_generate=2.5, error_assimilate=5.,
+                heights=np.arange(1000, 15001, 1000),
+                cov_loc_radius_km=30, cov_loc_vert_km=4)
+
+    t2m = dict(plotname='SYNOP Temperature', plotunits='[K]',
+            kind='SYNOP_TEMPERATURE', n_obs=n_obs, 
+            error_generate=0.1, error_assimilate=1.,
+            cov_loc_radius_km=20, cov_loc_vert_km=3)
+
+    psfc = dict(plotname='SYNOP Pressure', plotunits='[dBz]',
+                kind='SYNOP_SURFACE_PRESSURE', n_obs=n_obs, 
+                error_generate=50., error_assimilate=100.,
+                cov_loc_radius_km=32, cov_loc_vert_km=5)
+
+    #create_obsseq_in(time_dt, radar, archive_obs_coords=False) #'./coords_stage1.pkl')
+
+    error_assimilate = 5.*np.ones(n_obs*len(radar['heights']))
+    import assim_synth_obs as aso
+    aso.replace_errors_obsseqout(cluster.dartrundir+'/obs_seq.out', error_assimilate)
-- 
GitLab