Adding localization

Changed the various functions needed to include localization in the EnKF. Examples need to be added to the getting started notebook still. And so far only a guassian localization is available.

Adding localization
8c018ee5 · Philipp Griewank · 0222f5d6 · 8c018ee5 · 8c018ee5
Commit 8c018ee5 authored 4 years ago by Philipp Griewank
--- a/da_functions.py
+++ b/da_functions.py
@@ -5,7 +5,9 @@
 import numpy as np
 from model_functions import *

-def set_da_constants(ncyc=10,nt=1,dt=500,u_std=0.5,dhdt_std=1e-4,True_std_obs=0.1,used_std_obs=0.1,pert_std_obs=0.00,obs_loc_h=np.arange(5,101,15),nens=100,nexp=1,init_noise=0.,fixed_seed=True,loc=None,init_spread = False, init_spread_h=0.5,init_spread_x = 500.):
+def set_da_constants(ncyc=10,nt=1,dt=500,u_std=0.5,dhdt_std=1e-4,True_std_obs=0.1,used_std_obs=0.1,pert_std_obs=0.00,obs_loc_h=np.arange(5,101,15),nens=100,nexp=1,init_noise=0.,fixed_seed=True,
+                     loc=None,init_spread = False, init_spread_h=0.5,init_spread_x = 500.,
+                     loc_length = 1000,loc_type='gaussian'):
    """
    Sets constants needed for data assimilation and stores them in a dictionary. 
    There is some confusting misnaming going on, e.g. "u_std_ens" = u_std, but nothing that needs to be changed immediately
@@ -50,6 +52,9 @@ def set_da_constants(ncyc=10,nt=1,dt=500,u_std=0.5,dhdt_std=1e-4,True_std_obs=0.
    
    #Localization 
    DA_const["loc"] = loc                     # localization yes or no, no for now
+    DA_const["loc_length"] = loc_length                     # localization yes or no, no for now
+    DA_const["loc_type"] = loc_type                    # localization yes or no, no for now
+        
    return DA_const
    
 def create_states_dict(j,states,m_const,da_const):
@@ -121,20 +126,23 @@ def create_states_dict(j,states,m_const,da_const):
    
    return DA_const

-
-
 def generate_obs(truth,states,m_const,da_const,j,t):
    """
    Generates the truth and observations for the next assimilation step.
    If a fixed_seed is used the model "errors" of the truth remain the same, but noise of the obs is still reseeded differently each time.
+    
+    To avoid diffusion over time in the truth I take advantage of the linearity of the model to compute it directly from the initial conditions. 
+    This only works if the truth is not perturbed each timestep though.  
    """
    #Generate new truth constants and integrate in time
    if da_const["fixed_seed"]==True: np.random.seed(j)
    u_truth = np.random.normal(m_const["u_ref"],m_const["u_std_truth"])
    dhdt_truth = np.random.normal(m_const["dhdt_ref"],m_const["dhdt_std_truth"])
+    if da_const["fixed_seed"]==True:
+        truth = linear_advection_model(states['truth'][0],u_truth,dhdt_truth,m_const["dx"],da_const["dt"]*t,da_const["nt"])
+    else:
        truth = linear_advection_model(truth,u_truth,dhdt_truth,m_const["dx"],da_const["dt"],da_const["nt"])
    
-    
    #make obs by adding some noise, a fixed seed should change over time so that the differences are not always the same for each measurement location
    if da_const["fixed_seed"]==True: np.random.seed((j+1)*t)
    obs = truth + np.random.normal(0,da_const["True_std_obs"],m_const["nx"])
@@ -144,6 +152,8 @@ def generate_obs(truth,states,m_const,da_const,j,t):



+
+
 def predict(analysis,states,da_const,m_const,j):
    """
    Runs the analysis ensemble forward in time using the model to generate the next forecast/background ensemble for the next assimilation step.
@@ -164,15 +174,13 @@ def predict(analysis,states,da_const,m_const,j):



-def update_noloc(background,obs,R,H,states,da_const,m_const):
+def update(background,obs,R,H,C,states,da_const,m_const):
    """
    Computes the analysis by individually changing each ensemble member of the forecast/background through the shared ensemble Kalman Gain and the observations.
-
-    Todo:
-    -Figure out exactly which version of the EnKF is used here, so that I can refer to it properly
+    Now also used the localization matrix,C
    """
    # Compute the background error covariance matrix
-    P = np.cov(background)
+    P = np.cov(background)*C

    # define relative weights of observations and background in terms of the Kalman Gain matrix of size
    K = KalmanGain(P, R, H)
@@ -184,6 +192,26 @@ def update_noloc(background,obs,R,H,states,da_const,m_const):
    states["an"] = states["an"]+[an]
    return an, states

+#def update_noloc(background,obs,R,H,states,da_const,m_const):
+#    """
+#    Computes the analysis by individually changing each ensemble member of the forecast/background through the shared ensemble Kalman Gain and the observations.
+#
+#    Todo:
+#    -Figure out exactly which version of the EnKF is used here, so that I can refer to it properly
+#    """
+#    # Compute the background error covariance matrix
+#    P = np.cov(background)
+#
+#    # define relative weights of observations and background in terms of the Kalman Gain matrix of size
+#    K = KalmanGain(P, R, H)
+#
+#    # Compute the analysis for each ensemble members
+#    an = np.zeros((m_const["nx"],da_const["nens"]))
+#    for i in range(da_const["nens"]):
+#        an[:,i] = get_analysis(background[:,i],obs,K,H,da_const)
+#    states["an"] = states["an"]+[an]
+#    return an, states
+


 def KalmanGain(P,R,H):
@@ -234,9 +262,10 @@ def run_linear_advection_EnKF(m_const,da_const):

    #Construct localization matrix C if loc!=None
    if da_const["loc"]:
-        C = LocMat(da_const["loc"])
+        if da_const["loc_type"]=='gaussian':
+            C = gaussian_loc_matrix(da_const,m_const)
    else:
-        C=None
+        C=np.ones([m_const["nx"],m_const["nx"]])

    """
    Loop over experiments
@@ -248,6 +277,7 @@ def run_linear_advection_EnKF(m_const,da_const):
        """
        analysis, truth, states = create_states_dict(j,states,m_const,da_const)
        
+
        """
        START DATA ASSIMILATION CYCLE
        """
@@ -268,8 +298,9 @@ def run_linear_advection_EnKF(m_const,da_const):
            """
            Update
            """
-            # Combine background and observations to get the analysis
-            analysis, states[j] = update_noloc(background,obs,R,H,states[j],da_const,m_const)
+            ## Combine background and observations to get the analysis
+            #analysis, states[j] = update_noloc(background,obs,R,H,states[j],da_const,m_const)
+            analysis, states[j] = update(background,obs,R,H,C,states[j],da_const,m_const)
        """
        END DATA ASSIMILATION CYCLE
        """
@@ -278,6 +309,7 @@ def run_linear_advection_EnKF(m_const,da_const):
    """
    return states

+
 def get_spread_and_rmse(states,da_const,m_const):
    """
    computes RMSE over space and time respectively, averaged over all experiments for the background ensmeble mean and analysis ensemble mean.
@@ -471,7 +503,7 @@ def add_response(states,func_J=sum_mid_tri):

    return states

-def var_reduction_estimate(states,m_const,da_const,j=0,obs = []):
+def var_reduction_estimate(states,m_const,da_const,j=0,obs = [],ncyc=0,n_start = 10):
    """
    just loops over a full experiment
    Calculates an estimated var reduction of the response fucntion for all observations, as well as for each observation individually
@@ -490,14 +522,14 @@ def var_reduction_estimate(states,m_const,da_const,j=0,obs = []):
    

    nens = da_const["nens"]
-    ncyc = len(states[j]['response']['bf'])
+    if ncyc ==0: ncyc = len(states[j]['response']['bf'])
    

    var_reduction_total      = []
    var_reduction_individual = []
    real_reduction           = []
    
-    for t in range(1,ncyc-1):
+    for t in range(n_start,ncyc-1):

        R = da_const["used_std_obs"]**2*np.identity(len(obs))           # Observation error corvariance matrix
        H = np.identity(m_const["nx"])[da_const["obs_loc"][obs],:]
@@ -677,3 +709,15 @@ def var_reduction_estimate_iterative(states,m_const,da_const,j=0,dJ_update_flag=

        var_reduction_individual.append(vr_individual)
    return var_reduction_individual
+
+
+
+def gaussian_loc_matrix(da_const,m_const):
+    C = np.zeros([m_const['nx'],m_const['nx']])
+    #The gaussian is slightly less trivial because of the periodic boundary domain, so what I do is I use the same function that i use to generate the initial conditions and then step it once for all other locations
+    #And of course i am not sure if i have my dimensions switched
+    C[:,0] = gaussian_initial_condition(m_const["x_grid"],da_const["loc_length"])
+    for i in range(1,m_const['nx']):
+        C[:,i] = np.hstack([C[-1,i-1],C[:-1,i-1]])
+    return C
+
--- a/getting-started.ipynb
+++ b/getting-started.ipynb