From 4d74d07f4ed6d416105dc5321c6626bef8186b6f Mon Sep 17 00:00:00 2001 From: lkugler <lukas.kugler@gmail.com> Date: Tue, 5 Jul 2022 10:41:31 +0200 Subject: [PATCH] add superob for multiple obs layers --- dartwrf/obsseq.py | 186 ++++++++++++++++++++++++---------------------- 1 file changed, 98 insertions(+), 88 deletions(-) diff --git a/dartwrf/obsseq.py b/dartwrf/obsseq.py index e9340ed..cd71c54 100755 --- a/dartwrf/obsseq.py +++ b/dartwrf/obsseq.py @@ -124,20 +124,11 @@ class ObsRecord(pd.DataFrame): return pd.DataFrame(index=self.index, data=dict(lat=lats, lon=lons)) - def get_from_cartesian_grid(self, i, j): - """Get the observation using cartesian grid indices (ix, iy) + def get_from_cartesian_grid(self, i, j, k): + """Get the observation using cartesian grid indices (ix, iy, iz) """ - # indices of observations (starting from 0) - i_obs_grid = self.index.values - - # number of obs in one direction - nx = int(len(i_obs_grid) ** 0.5) - - # indices to the pandas.DataFrame - i_obs_grid = i_obs_grid.reshape(nx, nx) - # find indices of observations within pandas.DataFrame - return self.iloc[i_obs_grid[i, j].ravel()] + return self.iloc[self.i_obs_grid[i, j, k].ravel()] def superob(self, window_km): """Select subset, average, overwrite existing obs with average @@ -156,11 +147,6 @@ class ObsRecord(pd.DataFrame): 25x25 km with 5 km obs density = average 5 x 5 observations """ - debug = False - radius_earth_meters = 6.371 * 1e6 - m_per_degrees = np.pi * radius_earth_meters / 180 # m per degree latitude - km_per_degrees = m_per_degrees / 1000 - def calc_deg_from_km(distance_km, center_lat): """Approximately calculate distance in degrees from meters Input: distance in km; degree latitude @@ -176,16 +162,11 @@ class ObsRecord(pd.DataFrame): dist_km_lon = deg_lon * km_per_degrees * np.cos(center_lat * np.pi / 180) return dist_km_lat, dist_km_lon - # indices of observations (starting from 0) - i_obs_grid = self.index.values - - # get the observation indices on a cartesian grid (ix, iy) - nx = int(len(i_obs_grid) ** 0.5) - i_obs_grid = i_obs_grid.reshape(nx, nx) - # loop through columns/rows - # avoid loop in (lat,lon) space as coordinates are non-cartesian - # i.e. first column of observations has different longitudes! + debug = False + radius_earth_meters = 6.371 * 1e6 + m_per_degrees = np.pi * radius_earth_meters / 180 # m per degree latitude + km_per_degrees = m_per_degrees / 1000 # determine obs density (approx) # TODO: error prone section @@ -203,79 +184,108 @@ class ObsRecord(pd.DataFrame): print('obs spacing=', obs_spacing_km) print("window (#obs in x/y)=", win_obs) + # superob in case of multiple layers, only implemented for single obstype + nlayers = 1 + if len(exp.observations) == 1: + obscfg = exp.observations[0] + if 'heights' in obscfg: + nlayers = len(obscfg['heights']) + self.nlayers = nlayers + + i_obs_grid = self.index.values # indices of observations (starting from 0) + + # get the observation indices from obs_seq (list) + # onto a cartesian grid (ix, iy, iz) + gridpoints_per_layer = len(i_obs_grid)/nlayers + nx = int(gridpoints_per_layer ** 0.5) + self.nx = nx + i_obs_grid = i_obs_grid.reshape(nx, nx, nlayers) + self.i_obs_grid = i_obs_grid + + # loop through columns/rows + # avoid loop in (lat,lon) space as coordinates are non-cartesian + # i.e. first column of observations has different longitudes! + out = self.drop(self.index) # this df will be filled boxes = [] for i in range(0, nx+1 - win_obs, win_obs): for j in range(0, nx+1 - win_obs, win_obs): - - # find indices of observations within superob window - i_obs_box = i_obs_grid[i : i + win_obs, j : j + win_obs].ravel() - - if debug: - print("index x from", i, 'to', i + win_obs) - print("index y from", j, 'to', j + win_obs) - print("obs indices box=", i_obs_grid[i : i + win_obs, j : j + win_obs]) - - # find observations within superob window - obs_box = self.get_from_cartesian_grid(slice(i, i + win_obs), - slice(j, j + win_obs)) - # from IPython import embed; embed() - # save boundary of box to list, for plotting later - eps = dx_obs_lat_deg/2 # for plotting - eps2 = eps*0.8 # for plotting - lat1, lon1 = self.get_from_cartesian_grid(i, j).get_lon_lat().values[0] - lat2, lon2 = self.get_from_cartesian_grid(i+win_obs-1, j).get_lon_lat().values[0] - lat3, lon3 = self.get_from_cartesian_grid(i, j+win_obs-1).get_lon_lat().values[0] - lat4, lon4 = self.get_from_cartesian_grid(i+win_obs-1, j+win_obs-1).get_lon_lat().values[0] - - boxes.append(([lat1-eps2, lat2+eps2, lat3-eps2, lat4+eps2], - [lon1-eps, lon2-eps, lon3+eps, lon4+eps])) - - # average the subset - # metadata are assumed to be equal - obs_mean = obs_box.iloc[0] - - # average spread and other values - for key in obs_box: - if key in ['loc3d', 'kind', 'metadata', 'time']: - pass - elif 'spread' in key: - # stdev of mean of values = sqrt(mean of variances) - obs_mean.at[key] = np.sqrt((obs_box[key]**2).mean()) - elif key == 'variance': - # variance of mean = sum(variances)/n^2 - obs_mean.at[key] = obs_box[key].sum()/obs_box[key].size**2 + for k in range(0, nlayers): + if debug: print(i,j,k) + + # find indices of observations within superob window + # i_obs_box = i_obs_grid[i : i + win_obs, j : j + win_obs, k].ravel() + + if debug: + print("index x from", i, 'to', i + win_obs) + print("index y from", j, 'to', j + win_obs) + print("obs indices box=", i_obs_grid[i : i + win_obs, j : j + win_obs, k]) + + # find observations within superob window + obs_box = self.get_from_cartesian_grid(slice(i, i + win_obs), + slice(j, j + win_obs), + k) + + + # save boundary of box to list, for plotting later + eps = dx_obs_lat_deg/2 # for plotting + eps2 = eps*0.8 # for plotting + lat1, lon1 = self.get_from_cartesian_grid(i, j, k).get_lon_lat().values[0] + lat2, lon2 = self.get_from_cartesian_grid(i+win_obs-1, j, k).get_lon_lat().values[0] + lat3, lon3 = self.get_from_cartesian_grid(i, j+win_obs-1, k).get_lon_lat().values[0] + lat4, lon4 = self.get_from_cartesian_grid(i+win_obs-1, j+win_obs-1, k).get_lon_lat().values[0] + + boxes.append(([lat1-eps2, lat2+eps2, lat3-eps2, lat4+eps2], + [lon1-eps, lon2-eps, lon3+eps, lon4+eps])) + + # average the subset + # metadata are assumed to be equal + obs_mean = obs_box.iloc[0] + + # average spread and other values + for key in obs_box: + if key in ['loc3d', 'kind', 'metadata', 'time']: + pass + elif 'spread' in key: + # stdev of mean of values = sqrt(mean of variances) + obs_mean.at[key] = np.sqrt((obs_box[key]**2).mean()) + elif key == 'variance': + # variance of mean = sum(variances)/n^2 + obs_mean.at[key] = obs_box[key].sum()/obs_box[key].size**2 + else: + obs_mean.at[key] = obs_box[key].mean() + + # define location of superobservation + if win_obs % 2 == 0: + # even number of observations in x-direction + # there is no center obs + raise NotImplementedError() else: - obs_mean.at[key] = obs_box[key].mean() - - # define location of superobservation - if win_obs % 2 == 0: - # even number of observations in x-direction - # there is no center obs - raise NotImplementedError() - else: - # odd number of observations in x-direction - # -> there is an observation in the middle - # take the location of that obs - # int(win_obs/2) is the index of the center element when indices start at 0 - i_obs_center = i_obs_grid[i + int(win_obs/2), - j + int(win_obs/2)] - obs_mean.at['loc3d'] = self.iloc[i_obs_center]['loc3d'] - - if debug: - print("pre_avg:", obs_box.head()) - print("avg:", obs_mean) - - out = out.append(obs_mean) + # odd number of observations in x-direction + # -> there is an observation in the middle + # take the location of that obs + # int(win_obs/2) is the index of the center element when indices start at 0 + i_obs_center = i_obs_grid[i + int(win_obs/2), j + int(win_obs/2), k] + obs_mean.at['loc3d'] = self.iloc[i_obs_center]['loc3d'] + + # check if all obs share the same vertical position + heights_in_box = np.array([a[2] for a in obs_box['loc3d']]) + assert np.allclose(heights_in_box, obs_mean['loc3d'][2]) + + if debug: + print("pre_avg:", obs_box.head()) + print("avg:", obs_mean) + + out = out.append(obs_mean) n_pre_superob = len(self) n_post_superob = len(out) out.attrs['boxes'] = boxes - # assume square of obs - n_windows_x = int(n_pre_superob**.5/win_obs) # in x-direction - n_target_post = n_windows_x**2 # number of windows + # quick after check - does the output obs number match with the expected number? + n_windows_x = int((n_pre_superob/nlayers)**.5/win_obs) # assume square of obs + n_target_post = n_windows_x**2 * nlayers # number of windows print('superob from', n_pre_superob, 'obs to', n_post_superob, 'obs') if n_post_superob != n_target_post: raise RuntimeError('expected', n_target_post, 'superobservations, but created', -- GitLab