diff --git a/ENDA.py b/ENDA.py index 17910dbd70e0ec21ca0a60e3317e5ebaaf226169..6852981adf2ded9a9840d38ef32bbb53f088d7bf 100644 --- a/ENDA.py +++ b/ENDA.py @@ -1,7 +1,11 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- +import json +import os import numpy as np -from scipy.special import erfinv,erf +import pickle +from scipy.interpolate import CubicSpline +from copy import deepcopy # Own modules from models import CBL @@ -21,35 +25,6 @@ from observations import * verbose = True -def transform_none(x,kind): - if kind=='dir': - return x - if kind=='inv': - return x - -def transform_log(x,kind): - if kind=='dir': - return np.log(x) - if kind=='inv': - return np.exp(x) - -def transform_pit(x,kind): - # mu and sigma could be guessed from the data, but... - a = 0.5 - b = 4.5 - mu = 0.5*(a+b) - sigma = (b-a)/12**0.5 - # takes a uniform, returns a normal - if kind=='dir': - p = (x-a)/(b-a) - xt = mu+sigma*2**0.5*erfinv(2*p-1) - return xt - # takes a normal), returns a uniform - if kind=='inv': - p = 0.5*(1+erf((x-mu)/sigma/2**0.5)) - xt = a+p*(b-a) - return xt - def observation_operator(fp,xp,x): f = np.interp(x, xp, fp) return f @@ -95,13 +70,13 @@ def eakf(xb,o,var_o,xcoord,ocoord,dist,\ def increments_in_obs_space(y_p,o,var_o): - # Compute ensemble mean and ensemble variance of prior + # Compute ensemble mean, ensemble variance and weight of prior mean_p = np.mean(y_p) var_p = np.var(y_p) - - # Observation and prior weights - weight_o = 1./var_o weight_p = 1./var_p + + # Observation weight + weight_o = 1./var_o # Compute weight of posterior, then ensemble variance of posterior weight_u = weight_o + weight_p @@ -118,7 +93,7 @@ def eakf(xb,o,var_o,xcoord,ocoord,dist,\ alpha = (var_u*weight_p)**0.5 #delta_y = mean_u*np.ones(nens) + (alpha-1)*y_p - alpha*mean_p*np.ones(nens) delta_y = mean_u*np.ones(nens) + alpha*(y_p - mean_p*np.ones(nens)) - y_p - + return delta_y,weight_p # Read out relevant dimensions @@ -274,19 +249,30 @@ class cycle: nens,\ FILTER,\ trun,\ + tspinup_assim,\ assimilation_interval,\ ocoord,\ observations,\ obs_error_sdev_assimilate,\ localization_cutoff,\ - inflate_rtps,\ inflation_rtps_alpha,\ + simulate_underdispersiveness,\ rnseed): + # Sanity check + assert trun>tspinup_assim, \ + f"Pre-assimilation spinup time must be shorter than run length, got: {trun, tspinup_assim}" + # How many cycles do you need to run? ncycles = int(trun/assimilation_interval) time = (np.arange(ncycles)+1)*assimilation_interval + # Do you want to have a spinup time for the assimilation? + # I.e., an interval between ensemble initialization + # and the first assimilation + nspinupcycles = int(tspinup_assim/assimilation_interval) + analysis_times = np.arange(nspinupcycles,ncycles) + # Define state vector length state_vector_length = nr.x0.size initial_state_vector = nr.x0 @@ -325,106 +311,155 @@ class cycle: if isinstance(nr,CBL): # Initialize assimilation ensemble by adding # random perturbations to initial state of nature run. - da = CBL(cbl_settings) + model = CBL(cbl_settings) x0 = np.tile(initial_state_vector,(nens,1)).T - da.initialize(x0,coordinates=xcoord) + model.initialize(x0,coordinates=xcoord) # In case of parameter estimation, update variance inflation settings - if da.do_parameter_estimation: - inflation_coefficients_rtps[-da.parameter_number:] = da.parameter_inflation_rtps_alpha + if model.do_parameter_estimation: + inflation_coefficients_rtps[-model.parameter_number:] = model.parameter_inflation_rtps_alpha # Store initial ensemble state - initial_state[0,:,:] = da.x0[:,:] + initial_state[0,:,:] = model.x0[:,:] + + # If array of inflation coefficients is filled with zeros, + # turn off posterior variance inflation + if np.sum(inflation_coefficients_rtps) == 0.0: + inflate_rtps = False + else: + inflate_rtps = True history = [] # Start cycling for k in range(ncycles): if verbose: print('Cycle %04u : '%k,end='') - # Compute distances between current observations and state vector variables - dist = compute_distances(xcoord,ocoord[k,:]) - - # Replace any NaN distances with 0 - # NaN distances only occur when the state is augmented with global - # parameters, whose location is undefined. Setting the distance to - # global parameters equal to zero ensures that the covariance - # matrix for global parameters is identity, as required by - # global updating. - np.nan_to_num(dist,copy=False) - # Integrate up to analysis time - da.maxtime = assimilation_interval + model.maxtime = assimilation_interval output_full_history = False if output_full_history: - da.run(output_full_history=True) - history.append(da.history) + model.run(output_full_history=True) + history.append(model.history) + else: + model.run() + + # If you want to simulate ensemble underdispersiveness + # (=spread smaller than actual error of the ensemble mean), + # perturb the ensemble mean before the analysis. + # Only for the state, not for the parameter! + if simulate_underdispersiveness: + # Ensemble mean repeated nens times + ensmean = model.x[:-model.parameter_number,:].mean(axis=1) + xbm = np.tile(ensmean,(nens,1)) + # Ensemble perturbations + xbp = model.x[:-model.parameter_number,:].T-xbm + # Define positions of random perturbations + # enforce two at boundaries to avoid extrapolation + randomsize = model.perturbations_smooth_number + ipert = np.arange(0,randomsize-1)*(model.nz//randomsize) + ipert = np.append(ipert,model.nz-1) + # Generate random perturbations + # Change the seed every time you go through this, + # otherwise the perturbations are always the same; + # and you have bias instead of randomness + RNG = np.random.default_rng(seed=rnseed+k*10000) + pert_random = RNG.standard_normal(randomsize) + f = CubicSpline(ipert,pert_random) + pert_interpolated = f(np.arange(model.nz))*model.error_growth_perturbations_amplitude + # Perturbed ensemble mean + xbm_new = np.tile(ensmean+pert_interpolated,(nens,1)) + # Store the perturbed ensemble mean plus original ensemble perturbations + bkgd = np.zeros(model.x.shape) + bkgd[:-model.parameter_number,:] = (xbm_new+xbp).T + bkgd[-model.parameter_number:,:] = model.x[-model.parameter_number:,:] else: - da.run() + bkgd = model.x[:,:] # Store prior - backgrounds[k,:,:] = da.x[:,:] - - # If you want to add/simulate some error growth in the run, before the analysis, - # perturb the prior state here. - if isinstance(da,CBL): - if cbl_settings["simulate_error_growth"]: - backgrounds[k,:,:] = CBL.perturb_state(da,backgrounds[k,:,:]) - - # Compute prior spread (necessary for variance inflation) - if inflate_rtps: - sigp = np.std(backgrounds[k,:,:],axis=1) - - # Assimilate - if FILTER == 'EAKF': - if cbl_settings['do_parameter_estimation'] and cbl_settings['return_covariances_increments_and_innovations']: - updates,model_equivalents[k,:,:],cov_xx,cov_xy,cov_yy_dum,innov_dum,increm_dum = eakf(backgrounds[k,:,:],observations[k,:], + backgrounds[k,:,:] = bkgd + + if k not in analysis_times: # Do not assimilate anything at this time; simply let the ensemble run + + if verbose: print('~analysis skipped~') + + # Propagate the prior state as is to the next iteration + #x0 = model.x[:,:] + x0 = backgrounds[k,:,:] + + # Compute the corresponsing model equivalents + for i in range(nobs): + for j in range(nens): + model_equivalents[k,i,j] = observation_operator(x0[:,j],xcoord,ocoord[k,i]) + + else: # Assimilate + + # Compute prior spread (necessary for variance inflation) + if inflate_rtps: + sigp = np.std(backgrounds[k,:,:],axis=1) + + # Compute distances between current observations and state vector variables + dist = compute_distances(xcoord,ocoord[k,:]) + + # Replace any NaN distances with 0 + # NaN distances only occur when the state is augmented with global + # parameters, whose location is undefined. Setting the distance to + # global parameters equal to zero ensures that the covariance + # matrix for global parameters is identity, as required by + # global updating. + np.nan_to_num(dist,copy=False) + + # Assimilate + if FILTER == 'EAKF': + if cbl_settings['do_parameter_estimation'] and cbl_settings['return_covariances_increments_and_innovations']: + analysis,model_equivalents[k,:,:],cov_xx,cov_xy,cov_yy_dum,innov_dum,increm_dum = eakf(backgrounds[k,:,:],observations[k,:], + obs_error_sdev_assimilate, + xcoord, + ocoord[k,:], + dist, + localization_cutoff=localization_cutoff,return_covariances_increments_and_innovations=True) + increments[k,:,-npar] = increm_dum[:,-npar] + cov_pp[k,:,-npar] = cov_xx[:,-npar] + cov_py[k,:,-npar] = cov_xy[:,-npar] + cov_yy[k,:] = cov_yy_dum + innov[k,:] = innov_dum + else: + analysis,model_equivalents[k,:,:] = eakf(backgrounds[k,:,:],observations[k,:], + obs_error_sdev_assimilate, + xcoord, + ocoord[k,:], + dist, + localization_cutoff=localization_cutoff) + elif FILTER == 'LETKF': + analysis,model_equivalents[k,:,:] = letkf(backgrounds[k,:,:],observations[k,:], obs_error_sdev_assimilate, xcoord, ocoord[k,:], dist, - localization_cutoff=localization_cutoff,return_covariances_increments_and_innovations=True) - increments[k,:,-npar] = increm_dum[:,-npar] - cov_pp[k,:,-npar] = cov_xx[:,-npar] - cov_py[k,:,-npar] = cov_xy[:,-npar] - cov_yy[k,:] = cov_yy_dum - innov[k,:] = innov_dum - else: - updates,model_equivalents[k,:,:] = eakf(backgrounds[k,:,:],observations[k,:], - obs_error_sdev_assimilate, - xcoord, - ocoord[k,:], - dist, - localization_cutoff=localization_cutoff) - elif FILTER == 'LETKF': - updates,model_equivalents[k,:,:] = letkf(backgrounds[k,:,:],observations[k,:], - obs_error_sdev_assimilate, - xcoord, - ocoord[k,:], - dist, - localization_cutoff=localization_cutoff, - inflation_rho=1.0) - - x0 = updates - - # Posterior variance inflation (relaxation to prior spread) - # http://dx.doi.org/10.1175/MWR-D-11-00276.1 - # Variance inflation is optional for regular state vector elements - if inflate_rtps: - # Add 1e-10 to posterior variance (denominator) to avoid NaN - sigu = np.std(x0,axis=1) - inflation_factor = np.tile(inflation_coefficients_rtps*(sigp-sigu)/(sigu+1e-10),(nens,1)) - xbm = np.tile(x0.mean(axis=1),(nens,1)) - xbp = x0.T-xbm - x0 = (xbm+xbp*(1.+inflation_factor)).T + localization_cutoff=localization_cutoff, + inflation_rho=1.0) + + x0 = analysis + + # Posterior variance inflation (relaxation to prior spread) + # http://dx.doi.org/10.1175/MWR-D-11-00276.1 + # Variance inflation is optional for regular state vector elements + if inflate_rtps: + # Add 1e-10 to posterior variance (denominator) to avoid NaN + sigu = np.std(x0,axis=1) + inflation_factor = np.tile(inflation_coefficients_rtps*(sigp-sigu)/(sigu+1e-10),(nens,1)) + xbm = np.tile(x0.mean(axis=1),(nens,1)) + xbp = x0.T-xbm + x0 = (xbm+xbp*(1.+inflation_factor)).T # Store posterior analyses[k,:,:] = x0 # Save initial conditions for next cycle - da.update(x0) + model.update(x0) # Store the output self.ncycles = ncycles self.time = time + self.analysis_times = analysis_times self.initial_state = initial_state self.backgrounds = backgrounds self.analyses = analyses @@ -437,7 +472,7 @@ class cycle: self.nz = nr.nz self.zt = nr.zt self.nens = nens - self.initial_perturbed_parameters = da.initial_perturbed_parameters + self.initial_perturbed_parameters = model.initial_perturbed_parameters if cbl_settings['do_parameter_estimation'] and cbl_settings['return_covariances_increments_and_innovations']: self.cov_py = cov_py self.cov_pp = cov_pp @@ -445,130 +480,64 @@ class cycle: self.innov = innov self.increments = increments -""" -class experiment: - def __init__(self,settings): - # Read attributes from dictionary of settings - for k, v in settings.items(): - setattr(self, k, v) - if 0 < self.inflation_rtps_alpha <= 1: - self.inflate_rtps = True - else: - self.inflate_rtps = False - - # Consistency check on observation info - nobs = self.obs_coordinates.size - assert (self.obs_coordinates.size==self.obs_kinds.size==self.obs_error_sdev_assimilate.size==self.obs_error_sdev_generate.size),\ - f'obs_coordinates, obs_kinds and obs_error_sdev must have the same size,\ - got {self.obs_coordinates.size,self.obs_kinds.size,self.obs_error_sdev_assimilate.size,self.obs_error_sdev_assimilate.size}' - - # Consistency check on time coordinates - nassim = self.trun/self.assimilation_interval - assert (nassim.is_integer()),\ - f'trun/assimilation_interval must be integer,\ - got {self.trun,self.assimilation_interval,self.trun/self.assimilation_interval}' - - # If you don't have a pre-existing nature run, you need to run it - if not hasattr(self,'nr'): - # Define initial conditions of nature run - if self.tspinup > 0: - # Create initial state with random perturbations, then integrate - # one deterministic run for the spinup period - # Initial condition for the nature run is the last state of the spinup run - if verbose: print('Spinup : ',end='') - sp = CBL(self.cbl_settings) - sp.maxtime = self.tspinup - sp.initialize(1) - sp.run() - initial_conditions = sp.x - coords = sp.state_coordinates - else: - # No spinup, nature run gets unperturbed initial condition - sp = CBL(self.cbl_settings) - sp.initialize(1) - initial_conditions = sp.x0 - coords = sp.state_coordinates - - # Nature run: initialize, then integrate - if verbose: print('Nature run : ',end='') - nr = CBL(self.cbl_settings) - nr.maxtime = self.trun - nr.initialize(initial_conditions,coordinates=coords) - nr.run(output_full_history=True) - - # Draw observations from nature run - # Need nested loops because observation times (k) are independent - # and observation error variance depends on location (kk) - state_coordinates = nr.zt - truths = np.zeros((int(nassim),nobs))+np.nan - observations = np.zeros((int(nassim),nobs))+np.nan - for i in range(int(nassim)): - valid_time = (i+1)*self.assimilation_interval - assert(np.mod(self.assimilation_interval,nr.dt)==0), f'Assimilation interval must be a multiple of dt, got {self.assimilation_interval,nr.dt}' - time_index = np.argwhere(nr.history['time'] == valid_time)[0][0] - for j in range(nobs): - obs_coordinate = self.obs_coordinates[j] - if self.obs_kinds[j]=='theta': - variable = nr.history['theta'][:,time_index] - elif self.obs_kinds[j]=='u': - variable = nr.history['u'][:,time_index] - elif self.obs_kinds[j]=='v': - variable = nr.history['v'][:,time_index] - truths[i,j] = observation_operator(variable,state_coordinates,obs_coordinate) - # Change the seed every time you go through this, - # otherwise the perturbations are always the same; - # and you have bias instead of randomness - seed = self.rnseed+j*1000+i*100000 - RNG = np.random.default_rng(seed=seed) - observations[i,j] = truths[i,j]+\ - RNG.normal(0,self.obs_error_sdev_generate[j]) - - # Store truths and observations - self.truths = truths - self.observations = observations - - # Store nature run - self.nr = nr - - # Run assimilation cycle - if not hasattr(self,'da'): - self.da = cycle(self.cbl_settings, - self.nr, - self.nens, - self.FILTER, - self.trun, - self.assimilation_interval, - self.obs_coordinates, - self.truths, - self.observations, - self.obs_error_sdev_assimilate, - self.localization_cutoff, - self.inflate_rtps, - self.inflation_rtps_alpha, - self.rnseed) - - # Compute diagnostics - if not hasattr(self,'dg'): - self.dg = diagnostics(self.da) -""" class experiment: def __init__(self,settings): # Read attributes from dictionary of settings for k, v in settings.items(): - setattr(self, k, v) - if 0 < self.inflation_rtps_alpha <= 1: - self.inflate_rtps = True + setattr(self, k, v) + + """ + # Create additional attributes + self.obs_error_sdev_generate = np.ones(self.nobs)*self.obs_error_sdev_generate + self.obs_error_sdev_assimilate = np.ones(self.nobs)*self.obs_error_sdev_assimilate + if self.obs_kinds == 'theta': + self.obs_kinds = np.array(['theta']*self.nobs) + + self.obs_coordinates = np.linspace(self.obs_coordinates_min,self.obs_coordinates_max,self.nobs) + """ + + # If necessary, get nature run and related information from an external file + if self.nature_run_from_file: + assert os.path.isfile(self.nature_run_from_file),\ + f'Nature run file {self.nature_run_from_file} does not exist' + exp = pickle.load(open(self.nature_run_from_file, "rb")) + self.nr = deepcopy(exp.nr) + self.truths = deepcopy(exp.truths) + self.observations = deepcopy(exp.observations) + self.obs_coordinates = deepcopy(exp.obs_coordinates) + if not self.do_parameter_estimation: + self.nr.do_parameter_estimation = False + + # Get CBL settings from file + assert os.path.isfile(self.cbl_settings_file),\ + f'CBL settings file {self.cbl_settings_file} does not exist' + with open(self.cbl_settings_file, 'r') as fp: + self.cbl_settings = dict(json.load(fp)) + + # Propagate parameter estimation settings to CBL model + if self.do_parameter_estimation: + self.cbl_settings['do_parameter_estimation'] = True else: - self.inflate_rtps = False + self.cbl_settings['do_parameter_estimation'] = False ################################################################################################ # CASE 1: synthetic observations (OSSE with nature run) if not hasattr(self,'nr') and self.type == 'OSSE': + + # Create additional attributes + self.obs_error_sdev_generate = np.ones(self.nobs)*self.obs_error_sdev_generate + self.obs_error_sdev_assimilate = np.ones(self.nobs)*self.obs_error_sdev_assimilate + if self.obs_kinds == 'theta': + self.obs_kinds = np.array(['theta']*self.nobs) + + self.obs_coordinates = np.linspace(self.obs_coordinates_min,self.obs_coordinates_max,self.nobs) + # Consistency check on observation info - nobs = self.obs_coordinates.size - assert (self.obs_coordinates.size==self.obs_kinds.size==self.obs_error_sdev_assimilate.size==self.obs_error_sdev_generate.size),\ - f'obs_coordinates, obs_kinds and obs_error_sdev must have the same size,\ - got {self.obs_coordinates.size,self.obs_kinds.size,self.obs_error_sdev_assimilate.size,self.obs_error_sdev_assimilate.size}' + nobs = self.nobs + if self.obs_coordinates.ndim == 1: + assert (self.obs_coordinates.size==self.obs_kinds.size==self.obs_error_sdev_assimilate.size==self.obs_error_sdev_generate.size),\ + f'obs_coordinates, obs_kinds and obs_error_sdev must have the same size,\ + got {self.obs_coordinates.size,self.obs_kinds.size,self.obs_error_sdev_assimilate.size,self.obs_error_sdev_assimilate.size}' # Consistency check on time coordinates nassim = self.trun/self.assimilation_interval @@ -603,6 +572,8 @@ class experiment: nr.maxtime = self.trun nr.initialize(initial_conditions,coordinates=coords) nr.run(output_full_history=True) + if self.cbl_settings["do_parameter_estimation"]: + nr.has_parameters_in_state = True # Draw observations from nature run # Need nested loops because observation times (k) are independent @@ -678,12 +649,16 @@ class experiment: # There is no nature run and no truth in this case. # The only thing you can store is the observations. + self.nobs = observations.shape[1] self.observations = observations self.obs_coordinates = ocoords + self.obs_error_sdev_assimilate = np.ones(self.nobs)*self.obs_error_sdev_assimilate + if self.obs_kinds == 'theta': + self.obs_kinds = np.array(['theta']*self.nobs) # The da cycle depends on a "nr" structure to get information # about the model properties. - # Depending on the configuration, the da cycle could + # Depending on the configuration, the da cycle can # create the initial ensemble perturbations around # a spun-up model state, which is stored in nr.x0. # So, run the forecast model for the spinup time if necessary. @@ -706,7 +681,8 @@ class experiment: initial_conditions = sp.x0 coords = sp.state_coordinates - # Nature run: initialize, then integrate + # Nature run: initialize only + # I.e., store "initial_conditions" in nr.x0 self.nr = CBL(self.cbl_settings) self.nr.maxtime = self.trun self.nr.initialize(initial_conditions,coordinates=coords) @@ -719,14 +695,16 @@ class experiment: self.nens, self.FILTER, self.trun, + self.tspinup_assim, self.assimilation_interval, self.obs_coordinates, self.observations, self.obs_error_sdev_assimilate, self.localization_cutoff, - self.inflate_rtps, self.inflation_rtps_alpha, - self.rnseed) + self.simulate_underdispersiveness, + self.rnseed + ) # If truths (from nature run) exist, add them to the da cycle structure. # That's necessary for the diagnostics package to work properly. @@ -745,38 +723,62 @@ class diagnostics: # nobs: number of observations # Rename variables for shorthand - ntim = da.observations.shape[0] + time_indices = da.analysis_times + ntim = time_indices.size nobs = da.observations.shape[1] nens = da.backgrounds.shape[2] - ocoords = da.obs_coordinates # nobs or ntim*nobs - xcoords = da.state_coordinates # nvar - o = da.observations # ntim*nobs + if da.obs_coordinates.ndim == 1: # nobs or ntim*nobs + ocoords = da.obs_coordinates + elif da.obs_coordinates.ndim == 2: # nobs or ntim*nobs + ocoords = da.obs_coordinates[time_indices,:] + xcoords = da.state_coordinates # nvar + o = da.observations[time_indices,:] # ntim*nobs if hasattr(da,'truths'): - t = da.truths # ntim*nobs + t = da.truths[time_indices,:] # ntim*nobs truth_exists = True else: truth_exists = False # Get observation-space priors from da output - b = da.model_equivalents # ntim*nobs*nens + # Consider only the model equivalents beyond the assimilation spinup + b = da.model_equivalents[time_indices,:,:] # ntim*nobs*nens # To get observation-space posteriors (ntim*nvar*nens), # apply observation operator to analyses + # First allocate an empty array. a = np.zeros((ntim,nobs,nens))+np.nan + # Consider only the analyses beyond the assimilation spinup + analyses = da.analyses[time_indices,:,:] + # In this case, observation coordinates do not change between analysis times if ocoords.ndim == 1: for i in range(ntim): for j in range(nobs): for k in range(nens): - a[i,j,k] = observation_operator(da.analyses[i,:,k],xcoords,ocoords[j]) + a[i,j,k] = observation_operator(analyses[i,:,k],xcoords,ocoords[j]) # In this case, observation coordinates are different at each analysis time elif ocoords.ndim == 2: for i in range(ntim): for j in range(nobs): for k in range(nens): - a[i,j,k] = observation_operator(da.analyses[i,:,k],xcoords,ocoords[i,j]) + a[i,j,k] = observation_operator(analyses[i,:,k],xcoords,ocoords[i,j]) + + # For the computation of time-averaged statistics, all + # data arrays must be properly sorted in space (otherwise the elements + # of the time average refer to different positions). + # Re-sort the coordinates and the data arrays, in case they are not + for k in range(ntim): + if ocoords.ndim == 2: + indices = np.argsort(ocoords[k,:]) + elif ocoords.ndim == 1: + indices = np.argsort(ocoords) + a[k,:,:] = a[k,indices,:] + b[k,:,:] = b[k,indices,:] + o[k,:] = o[k,indices] + if truth_exists: + t[k,:] = t[k,indices] # Compute ensemble mean and spread meana = a.mean(axis=2) # ntim*nobs @@ -784,36 +786,64 @@ class diagnostics: sigmaa = a.std(axis=2) # ntim*nobs sigmab = b.std(axis=2) # ntim*nobs - # Compute space-averaged analysis RMSE and spread - aRMSD_x = np.sqrt((meana-o)**2).mean(axis=1) # ntim - bRMSD_x = np.sqrt((meanb-o)**2).mean(axis=1) # ntim - if truth_exists: - aRMSE_x = np.sqrt((meana-t)**2).mean(axis=1) # ntim - bRMSE_x = np.sqrt((meanb-t)**2).mean(axis=1) # ntim - aSprd_x = sigmaa.mean(axis=1) # ntim - bSprd_x = sigmab.mean(axis=1) # ntim - - # Compute time-averaged analysis RMSE and spread - aRMSD_t = np.sqrt((meana-o)**2).mean(axis=0) # nobs - bRMSD_t = np.sqrt((meanb-o)**2).mean(axis=0) # nobs - if truth_exists: - aRMSE_t = np.sqrt((meana-t)**2).mean(axis=0) # nobs - bRMSE_t = np.sqrt((meanb-t)**2).mean(axis=0) # nobs - aSprd_t = sigmaa.mean(axis=0) # nobs - bSprd_t = sigmab.mean(axis=0) # nobs - # Compute deviations omina = o-meana # ntim*nobs ominb = o-meanb # ntim*nobs aminb = meana-meanb # ntim*nobs - - # Time-averaged Desroziers statistics - vara_ta = (sigmaa**2).mean(axis=0) # nobs - varb_ta = (sigmab**2).mean(axis=0) # nobs - dobdob_ta = (ominb*ominb).mean(axis=0) # nobs - dabdob_ta = (aminb*ominb).mean(axis=0) # nobs - doadob_ta = (omina*ominb).mean(axis=0) # nobs - dabdoa_ta = (aminb*omina).mean(axis=0) # nobs + if truth_exists: + tmina = t-meana # ntim*nobs + tminb = t-meanb # ntim*nobs + + # Time-averaged Desroziers statistics (size nobs) + vara_ta = (sigmaa**2).mean(axis=0) + varb_ta = (sigmab**2).mean(axis=0) + dobdob_ta = (ominb*ominb).mean(axis=0) + dabdob_ta = (aminb*ominb).mean(axis=0) + doadob_ta = (omina*ominb).mean(axis=0) + dabdoa_ta = (aminb*omina).mean(axis=0) + + # Time-averaged RMSE and spread (size nobs) + aRMSD_t = np.sqrt( (omina*omina).mean(axis=0) ) + bRMSD_t = np.sqrt(dobdob_ta) + aSprd_t = np.sqrt(vara_ta) + bSprd_t = np.sqrt(varb_ta) + if truth_exists: + aRMSE_t = np.sqrt( (tmina*tmina).mean(axis=0) ) + bRMSE_t = np.sqrt( (tminb*tminb).mean(axis=0) ) + + # Space-averaged Desroziers statistics (size ntim) + vara_xa = (sigmaa**2).mean(axis=1) + varb_xa = (sigmab**2).mean(axis=1) + dobdob_xa = (ominb*ominb).mean(axis=1) + dabdob_xa = (aminb*ominb).mean(axis=1) + doadob_xa = (omina*ominb).mean(axis=1) + dabdoa_xa = (aminb*omina).mean(axis=1) + + # Space-averaged RMSE and spread (size ntim) + aRMSD_x = np.sqrt( (omina*omina).mean(axis=1) ) + bRMSD_x = np.sqrt(dobdob_xa) + aSprd_x = np.sqrt(vara_xa) + bSprd_x = np.sqrt(varb_xa) + if truth_exists: + aRMSE_x = np.sqrt( (tmina*tmina).mean(axis=1) ) + bRMSE_x = np.sqrt( (tminb*tminb).mean(axis=1) ) + + # Space/time-averaged Desroziers statistics (scalar) + vara_xta = (sigmaa**2).mean() + varb_xta = (sigmab**2).mean() + dobdob_xta = (ominb*ominb).mean() + dabdob_xta = (aminb*ominb).mean() + doadob_xta = (omina*ominb).mean() + dabdoa_xta = (aminb*omina).mean() + + # Space/time-averaged RMSE and spread (scalar) + aRMSD_xt = np.sqrt( (omina*omina).mean() ) + bRMSD_xt = np.sqrt(dobdob_xta) + aSprd_xt = np.sqrt(vara_xta) + bSprd_xt = np.sqrt(varb_xta) + if truth_exists: + aRMSE_xt = np.sqrt( (tmina*tmina).mean() ) + bRMSE_xt = np.sqrt( (tminb*tminb).mean() ) # Compute histograms of ensemble mean departures. # Histograms are meaningless if you only have 1 observation. @@ -841,49 +871,67 @@ class diagnostics: for i in range(nobs): hist_aminb_local.append(np.histogram(aminb[:,i],bins=edges)[0]/aminb[:,i].size/(edges[1]-edges[0])) - # Analysis & background mean error and spread - self.aRMSD_x = aRMSD_x # ntim - self.aSprd_x = aSprd_x # ntim - self.aRMSD_t = aRMSD_t # nobs - self.aSprd_t = aSprd_t # nobs - self.bRMSD_x = bRMSD_x # ntim - self.bSprd_x = bSprd_x # ntim - self.bRMSD_t = bRMSD_t # nobs - self.bSprd_t = bSprd_t # nobs - if truth_exists: - self.aRMSE_x = aRMSE_x # ntim - self.aRMSE_t = aRMSE_t # nobs - self.bRMSE_x = bRMSE_x # ntim - self.bRMSE_t = bRMSE_t # nobs - # All these with dimensions ntim*nobs - self.meana = meana - self.meanb = meanb + self.meana = meana + self.meanb = meanb self.sigmaa = sigmaa self.sigmab = sigmab - self.omina = omina - self.ominb = ominb - self.aminb = aminb + self.omina = omina + self.ominb = ominb + self.aminb = aminb + + # All these with dimensions ntim + self.aRMSD_x = aRMSD_x + self.aSprd_x = aSprd_x + self.bRMSD_x = bRMSD_x + self.bSprd_x = bSprd_x + self.dobdob_xa = dobdob_xa + self.dabdob_xa = dabdob_xa + self.doadob_xa = doadob_xa + self.dabdoa_xa = dabdoa_xa + self.vara_xa = vara_xa + self.varb_xa = varb_xa # All these with dimensions nobs - self.varo = da.obs_error_sdev_assimilate**2 - self.vara_ta = vara_ta - self.varb_ta = varb_ta + self.aRMSD_t = aRMSD_t + self.aSprd_t = aSprd_t + self.bRMSD_t = bRMSD_t + self.bSprd_t = bSprd_t self.dobdob_ta = dobdob_ta self.dabdob_ta = dabdob_ta self.doadob_ta = doadob_ta self.dabdoa_ta = dabdoa_ta + self.vara_ta = vara_ta + self.varb_ta = varb_ta + self.varo = da.obs_error_sdev_assimilate**2 self.sigma_ominb = ominb.std(axis=0) self.mean_ominb = ominb.mean(axis=0) self.sigma_omina = omina.std(axis=0) self.mean_omina = omina.mean(axis=0) # All these are scalars + self.aRMSD_xt = aRMSD_xt + self.aSprd_xt = aSprd_xt + self.bRMSD_xt = bRMSD_xt + self.bSprd_xt = bSprd_xt + self.dobdob_xta = dobdob_xta + self.dabdob_xta = dabdob_xta + self.doadob_xta = doadob_xta + self.dabdoa_xta = dabdoa_xta self.sigma_ominb_global = ominb.std() self.mean_ominb_global = ominb.mean() self.sigma_omina_global = omina.std() self.mean_omina_global = omina.mean() + # Additional stuff if truth exists + if truth_exists: + self.aRMSE_x = aRMSE_x # ntim + self.bRMSE_x = bRMSE_x # ntim + self.aRMSE_t = aRMSE_t # nobs + self.bRMSE_t = bRMSE_t # nobs + self.aRMSE_xt = aRMSE_xt # scalar + self.bRMSE_xt = bRMSE_xt # scalar + # Store histograms, if they were computed if o.size > 1: # All these with dimensions nobs * nbins