diff --git a/utils/observations.py b/utils/observations.py new file mode 100644 index 0000000000000000000000000000000000000000..a2de25bf320231ae941fcae602c2f7dc6d644a94 --- /dev/null +++ b/utils/observations.py @@ -0,0 +1,131 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +import numpy as np +from matplotlib import pyplot as p +import glob +import pandas as pd + +is_sorted = lambda a: np.all(a[:-1] <= a[1:]) + +def decode_observations(observation_files): + + # Read set of CSV files into a pandas dataframe. + # Based on: + # https://stackoverflow.com/questions/20906474/import-multiple-csv-files-into-pandas-and-concatenate-into-one-dataframe + + # Create a list of dataframes, one per file + all_files = sorted(glob.glob(observation_files)) + li = [] + for filename in all_files: + df = pd.read_csv(filename, index_col=None, header=0) + li.append(df) + nassim = len(li) + + # Browse through all dataframes and get the maximum + # number of observation records + nobs = 0 + for i in li: + nobs = max(i.shape[0],nobs) + + # Create observation and coordinate arrays + observations = np.zeros((nassim,nobs))+np.nan + ocoords = np.zeros((nassim,nobs))+np.nan + + # Reading from the dataframes and + # populate observation and coordinate arrays + for i in range(nassim): + ocoords[i,:] = li[i].get("z (m)") + observations[i,:] = li[i].get("theta (K)") + + return observations,ocoords + +def random_sort_observations(observations,ocoords,truths=None): + + # Check that input arrays have consistent dimensions + assert (observations.shape == ocoords.shape),\ + f'observation and coordinate arrays must have the same size' + + # Preallocate arrays + nassim,nobs = observations.shape + shuffled_obs = np.zeros(observations.shape)+np.nan + shuffled_coords = np.zeros(observations.shape)+np.nan + + # Randomize arrays, differently at each time + if truths is None: + indices = np.arange(nobs) + for i in range(nassim): + np.random.shuffle(indices) + shuffled_obs[i,:] = observations[i,indices] + shuffled_coords[i,:] = ocoords[i,indices] + + return shuffled_obs,shuffled_coords + + else: + # Check that truths array has consistent dimensions + assert (truths.shape == ocoords.shape),\ + f'observation and coordinate arrays must have the same size' + + # Preallocate truths array + shuffled_truths = np.zeros(ocoords.shape)+np.nan + + # Randomize arrays, differently at each time + indices = np.arange(nobs) + for i in range(nassim): + np.random.shuffle(indices) + shuffled_obs[i,:] = observations[i,indices] + shuffled_coords[i,:] = ocoords[i,indices] + shuffled_truths[i,:] = truths[i,indices] + return shuffled_obs,shuffled_coords,shuffled_truths + +if __name__ == '__main__': + + printout = False + + # Read the observations + observations,ocoords = decode_observations('./LES_data/Theta/*csv') + + if printout: + ntim = 0 + for i in range(observations.shape[1]): + print(ocoords[ntim,i],observations[ntim,i]) + + # Sort the observations randomly + observations,ocoords = random_sort_observations(observations,ocoords) + if printout: + for i in range(observations.shape[1]): + print(ocoords[ntim,i],observations[ntim,i]) + + makeplots = True + # Make plots + if makeplots: + + # If observations are shuffled, sort them again + ntimes = ocoords.shape[0] + for k in range(ntimes): + if is_sorted(ocoords[k,:]): + pass + else: + indices = np.argsort(ocoords[k,:]) + ocoords[k,:] = ocoords[k,indices] + observations[k,:] = observations[k,indices] + + fig, ax1 = p.subplots(1,1,constrained_layout=True) + fig.set_size_inches(4,3) + + tint = 300 + t = np.linspace(0,tint*(ocoords.shape[0]-1),ocoords.shape[0])[:,None]+np.zeros(ocoords.shape) + t = t/3600. + c = ax1.scatter(t,ocoords,s=5,c=observations,vmin=290,vmax=296) + ax1.contour(t,ocoords,observations, + np.linspace(290,290+0.003*2000,13), + colors='black', + linestyles='--', + linewidths=0.75) + ax1.set_ylim([0,2000]) + ax1.set_ylabel(r'Height (m)') + ax1.set_xlabel(r'Time (s)') + ax1.set_title(r'$\overline{\theta}$ (K) obs in t=(%u-%u) h'%(t.min(),t.max())) + ax1.legend(frameon=False) + p.colorbar(c,orientation='horizontal') + fig.savefig('LES_obs.png',format='png',dpi=300) + p.close(fig) diff --git a/utils/replace_text.py b/utils/replace_text.py new file mode 100644 index 0000000000000000000000000000000000000000..23218028979fde3c06e98d89ddd94fef4daff0d3 --- /dev/null +++ b/utils/replace_text.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# Useful in order to edit +# a set of config files all at once + +# https://stackoverflow.com/questions/74191831/how-to-replace-a-text-with-another-text-in-several-txt-files-at-the-same-time + +import glob +import os + +# Input and output text files +pathSource = "./runs/seed=181612_enssize=20_EAKF_6hrs/" +pathDest = "./runs/seed=181612_enssize=20_EAKF_6hrs/" +#pathDest = "./backup/config_files_new/" +textFiles_Source = glob.glob(pathSource+"*.json") +textFiles_Dest = [i.replace(pathSource,pathDest) for i in textFiles_Source] + +if not os.path.exists(pathDest): + os.makedirs(pathDest) + +# searched text and replacement text +search_text = '"cbl_settings_file": "./config_files/' +replace_text = '"cbl_settings_file": "' +#search_text = '"nature_run_from_file": "./runs/seed=181612_enssize=200_EAKF_6hrs/' +#replace_text = '"nature_run_from_file": "./runs/seed=181612_enssize=200_LETKF_6hrs/' +#search_text = '"nens": 20,' +#replace_text = '"nens": 200,' + +# Loop over each text file in our list from above +for source,dest in zip(textFiles_Source,textFiles_Dest): + # Opening our text file in read only + # mode using the open() function + with open(source, 'r') as file: + + # Reading the content of the file + # using the read() function and storing + # them in a new variable + data = file.read() + + # Searching and replacing the text + # using the replace() function + data = data.replace(search_text, replace_text) + + # Opening our text file in write only + # mode to write the replaced content + with open(dest, 'w') as file: + + # Writing the replaced data in our + # text file + file.write(data) \ No newline at end of file diff --git a/utils/verification.py b/utils/verification.py new file mode 100644 index 0000000000000000000000000000000000000000..5a8f2f385782c26c0c792bce58ab82612c3c3d25 --- /dev/null +++ b/utils/verification.py @@ -0,0 +1,205 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +import numpy as np +from matplotlib import pyplot as p + +from PE_CBL_models import CBL +from observations import decode_observations + +# Default CBL model settings +default_cbl_settings ={ + # Physical parameters + 'g' : 9.806, + 'f' : 1e-4, + 'kvonk' : 0.4, + 'z0' : 0.1, + 'ug' : 0, + 'vg' : 10, + 'theta_0' : 290, + 'gamma' : 3e-3, + 'Hmax' : 0.12, + 'H0_perturbation_ampl_init' : 0.0, + 'H0_perturbation_ampl_time' : 0.0, + 'exct' : 0.3, + 'pfac' : 1.5, + 'Kmin' : 0.1, + 'Kmax' : 200, + # Model formulation + 'is_bwind' : False, + 'is_cgrad' : True, + 'is_cycle' : False, + # Domain parameters (Kmax determines dt) + 'dz' : 50, + 'ztop' : 4000, + # Ensemble generation parameters (only effective for ensemble runs) + # Part 1: perturbation of initial state + # perturbation_type can be "smooth", "random", "uniform" + # the adjective refers to vertical variability + 'rnseed' : 181612, + 'perturb_ensemble_state' : True, + 'perturbations_type' : "smooth", + 'perturbations_theta_amplitude' : 0.1, + 'perturbations_uv_amplitude' : 0.1, + 'perturbations_smooth_number' : 11, + 'perturbations_symmetric' : True, + 'simulate_error_growth' : False, + 'error_growth_perturbations_amplitude' : 0.0, + # Part 2: perturbation of parameters + 'perturb_ensemble_parameters' : True, + 'parameter_number' : 1, + 'parameter_transform' : None, + 'parameter_ensemble_min' : np.array([0.5]), + 'parameter_ensemble_max' : np.array([4.5]), + 'parameter_true' : np.array([1.5]), + # In case of parameter estimation + 'do_parameter_estimation' : False, + 'parameter_inflation_rtps_alpha' : np.array([0.8]), + 'return_covariances_increments_and_innovations' : True + } + +def observation_operator(fp,xp,x): + f = np.interp(x, xp, fp) + return f + +if __name__ == '__main__': + + # Settings + energy_diagnostics = False + sensitivity_to_p = True + + # Read the observations + theta,z = decode_observations('./LES_data/Theta/*csv') + nassim,nobs = theta.shape + thicknesses = np.zeros(z.shape)+np.nan + for k in range(nassim): + thicknesses[k,:]=np.hstack((z[k,0],np.diff(z[k,:]))) + tint = 300 + t = np.linspace(0,tint*(z.shape[0]-1),z.shape[0])[:,None]+np.zeros(z.shape) + + if energy_diagnostics: + + # Compute delta theta wrt IC + theta_init = 290+z*0.003 + delta_theta = theta-theta_init + + # Set the sensible heat flux (kinematic units) + hfx = 0.12 + + # Run model + run_settings = dict(default_cbl_settings) + run = CBL(run_settings) + run.maxtime = 25200 + run.initialize(1) + run.run(output_full_history=True) + zt = run.zt + + # Model state + model_theta = run.history['theta'] + model_delta_theta = model_theta-model_theta[:,0][:,None] + model_thicknesses = run.dz*(np.ones(zt.size))[:,None]+np.zeros(model_theta.shape) + model_delta_theta = model_delta_theta.T + model_thicknesses = model_thicknesses.T + model_times = run.history['time'] + + # Model equivalents + model_equivalents = np.zeros(theta.shape)+np.nan + for i in range(int(nassim)): + valid_time = i*tint + time_index = np.argwhere(run.history['time'] == valid_time)[0][0] + for j in range(nobs): + model_equivalents[i,j] = observation_operator(model_theta[:,time_index],zt,z[i,j]) + model_e_delta_theta = model_equivalents-theta_init + model_e_thicknesses = thicknesses + model_e_times = t + + # Make plots + fig, [[ax1,ax2],[ax3,ax4]] = p.subplots(2,2,constrained_layout=True) + fig.set_size_inches(6,6) + + # Time coordinate is hours + t = t/3600. + + # Observations + c = ax1.pcolormesh(t,z,delta_theta,vmin=-3,vmax=3) + ax1.contour(t,z,theta, + np.linspace(290,290+0.003*2000,13), + colors='white', + linestyles='--', + linewidths=0.75) + ax1.set_ylim([0,2000]) + ax1.set_ylabel(r'Height (m)') + ax1.set_xlabel(r'Time (h)') + ax1.set_title(r'$\Delta\overline{\theta}$ (K) observations') + p.colorbar(c,orientation='horizontal') + + # Observations + d = ax2.pcolormesh(t,z,model_e_delta_theta,vmin=-3,vmax=3) + ax2.contour(t,z,model_equivalents, + np.linspace(290,290+0.003*2000,13), + colors='white', + linestyles='--', + linewidths=0.75) + ax2.set_ylim([0,2000]) + ax2.set_ylabel(r'Height (m)') + ax2.set_xlabel(r'Time (h)') + ax2.set_title(r'$\Delta\overline{\theta}$ (K) model equivalents') + p.colorbar(d,orientation='horizontal') + + # Difference + d = ax3.pcolormesh(t,z,model_equivalents-theta,vmin=-0.5,vmax=0.5,cmap='RdBu_r') + ax3.set_ylim([0,2000]) + ax3.set_ylabel(r'Height (m)') + ax3.set_xlabel(r'Time (h)') + ax3.set_title(r'$\overline{\theta}_{fc}-\overline{\theta}_{obs}$ (K) differences') + ax3.text(3.5,100,'RMSD=%6.4f K'%np.mean((theta-model_equivalents)**2)) + p.colorbar(d,orientation='horizontal') + + # Energy diagnostics + ax4.plot(t[:,0],t[:,0]*3600*hfx,color='k',dashes=[3,1],zorder=10,label=r'$\int H dt$') + ax4.plot(model_times/3600,np.sum(model_delta_theta*model_thicknesses,axis=1),color='r',label=r'$\int\Delta\theta dz$, model state') + ax4.plot(model_e_times[:,0]/3600,np.sum(model_e_delta_theta*model_e_thicknesses,axis=1),color='orange',label=r'$\int\Delta\theta dz$, model equivalents') + ax4.plot(t[:,0],np.sum(delta_theta*thicknesses,axis=1),label=r'$\int\Delta\theta dz$, observations') + ax4.set_ylabel(r'$E/(\rho c_p)$ (K m)') + ax4.set_xlabel(r'Time (h)') + ax4.legend(prop={'size': 6}) + + fig.savefig('verification_1.png',format='png',dpi=300) + p.close(fig) + + if sensitivity_to_p: + + p_values = np.linspace(0.5,2,31) + rmsd = np.zeros(p_values.shape) + + for k in range(p_values.size): + print('Run %02u'%k) + + # Run model + run_settings = dict(default_cbl_settings) + run_settings['pfac'] = p_values[k] + run = CBL(run_settings) + run.maxtime = 25200 + run.initialize(1) + run.run(output_full_history=True) + + # Model equivalents + model_theta = run.history['theta'] + model_equivalents = np.zeros(theta.shape)+np.nan + for i in range(int(nassim)): + valid_time = i*tint + time_index = np.argwhere(run.history['time'] == valid_time)[0][0] + for j in range(nobs): + model_equivalents[i,j] = observation_operator(model_theta[:,time_index],run.zt,z[i,j]) + + rmsd[k] = np.sqrt(np.mean((theta-model_equivalents)**2)) + + # Make plots + fig, ax1 = p.subplots(1,1,constrained_layout=True) + fig.set_size_inches(4,3) + + ax1.plot(p_values,rmsd) + ax1.set_xlabel('$p$') + ax1.set_ylabel('RMSD (K)') + + fig.savefig('verification_2.png',format='png',dpi=300) + p.close(fig)