Skip to content
Snippets Groups Projects
Commit 3c4745b0 authored by Aiko Voigt's avatar Aiko Voigt
Browse files

Implement alternatives for computing 2d connected components based on...

Implement alternatives for computing 2d connected components based on self-programmed breadth first search (own-bfs) and breadth first search and graph construction by means of networkx library (networkx)
parent 6190a6e5
No related branches found
No related tags found
No related merge requests found
%% Cell type:markdown id:65e78811-2a04-463e-b94a-3e72bf4eee4c tags:
# Alternative implementation using graph construction and breadth-first search implemented by networkx python library
https://networkx.org/
![grafik.png](attachment:3f058920-7fc9-4aed-8362-427d19722cc2.png)
%% Cell type:code id:e0dcf723-d338-4b62-b5bc-d2b665527fab tags:
``` python
import numpy as np
import xarray as xr
import tricco
from tricco.alternatives import nwx
```
%% Cell type:markdown id:3b89f508-be18-470c-a595-6b3c00ffe1bf tags:
## 1. Load grid and construct full graph of the grid that includes all connection edges between all cells
Do this separately for vertex and edge connectivity.
%% Cell type:code id:46fa620f-0e90-492a-8ea4-7b0218bf5c72 tags:
``` python
gridfile = "./data/icon-grid_nawdex_78w40e23n80n_R80000m.nc"
grid = nwx.prepare_grid(model="ICON", file=gridfile)
```
%% Cell type:code id:0b3926a9-ecbe-4619-8ee2-98b6d07f84eb tags:
``` python
fgraph_vert = nwx.compute_fullgraph(grid, connectivity="vertex")
fgraph_edge = nwx.compute_fullgraph(grid, connectivity="edge")
```
%% Cell type:markdown id:bc23ddda-7010-47ff-9bc7-8360b243e057 tags:
## 2. Load cloud data and identify connected components. Result is a list of connected components ordered by size, where each connected component is a list of cell indices.
%% Cell type:code id:5e57f10e-a411-4021-af4a-3d6adaa28842 tags:
``` python
datafile = "./data/nawdexnwp-80km-mis-0001_2016092200_2d_30min_DOM01_ML_0060.nc"
data = xr.open_dataset(datafile)["clct"].squeeze()
components_vert = nwx.compute_connected_components_2d(data, fgraph_vert, 85.0)
components_edge = nwx.compute_connected_components_2d(data, fgraph_edge, 85.0)
```
%% Cell type:markdown id:aff336f5-6d16-48f5-8fc8-191a9de67759 tags:
Map connected components on the structure of the field input data, with the value indicating the connected component. Needed for plotting.
%% Cell type:code id:51a1bff0-3fab-46f8-a822-4ce37bdb5fc0 tags:
``` python
ncells = grid.cell.size
field_cc_vert = np.zeros(ncells)
for icomp in range(len(components_vert)):
field_cc_vert[list(components_vert[icomp])]=icomp+1
field_cc_edge = np.zeros(ncells)
for icomp in range(len(components_edge)):
field_cc_edge[list(components_edge[icomp])]=icomp+1
```
%% Cell type:markdown id:f5c472e6-d101-42eb-9ad6-876cc1f4821c tags:
## 3. Plotting. Same as in example-2d.ipynb for the tricco cubulation implementation.
%% Cell type:markdown id:2592faa4-0263-4bc7-9ca0-bb53bc9faaed tags:
Define a qualitative colormap that cycles through the colors of the matplotlib Set1 colormap, and that always plots 0 as white.
%% Cell type:code id:250ef78f-b9b7-4cff-990b-3bd2c7b416ed tags:
``` python
def make_colormap(ncolors):
import matplotlib as mpl
cmap_base = plt.cm.Set1 # note: number of colors of base color map is given by cmap_base.N
cmaplist_base = [cmap_base(i) for i in range(cmap_base.N)]
cmaplist = [(1, 1, 1, 1.0)] # need to have white as first color for the triangles with no connected component
for i in range(ncolors-1):
icolor = np.mod(i,cmap_base.N)
cmaplist.append(cmaplist_base[icolor])
return mpl.colors.LinearSegmentedColormap.from_list('Custom cmap', cmaplist, ncolors)
```
%% Cell type:markdown id:27e8f072-364c-4947-b1ab-e35d5a6cb814 tags:
Create 2x2 panel plot with original cloud field, thresholded cloud field and connected components.
%% Cell type:code id:b4a01f77-110b-4237-b62c-b87e82d6569e tags:
``` python
import matplotlib.pyplot as plt
import matplotlib.tri as tri
rad2deg=180.0/np.pi
import xarray as xr
grid_plot = xr.load_dataset("./data/icon-grid_nawdex_78w40e23n80n_R80000m.nc")
vlat = rad2deg*grid_plot["vlat"].values
vlon = rad2deg*grid_plot["vlon"].values
# we need to subtract -1 from vertex_of_cell as python starts counting at 0, but fortran starts at 1
vertex_of_cell= grid_plot["vertex_of_cell"].values-1
del grid_plot
datafile = "./data/nawdexnwp-80km-mis-0001_2016092200_2d_30min_DOM01_ML_0060.nc"
field = xr.open_dataset(datafile)["clct"].squeeze()
field_thresholded = np.where(field<85.0, 0, 1)
def make_niceplot(ax):
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_bounds(20,80)
ax.spines['bottom'].set_bounds(-80,40)
ax.yaxis.set_ticks([20,40,60,80])
ax.yaxis.set_ticklabels(['20N','40N','60N','80N'], fontsize=6)
ax.xaxis.set_ticks([-80,-60,-40,-20,0,20,40])
ax.xaxis.set_ticklabels(['80W','60W','40W','20W','0','20E','40E'], fontsize=6)
plt.xlim(-85,45); plt.ylim(20,81);
plt.ylabel('latitude', fontsize=8)
# plotting
plt.figure(figsize=(12,7))
ax=plt.subplot(2,2,1); make_niceplot(ax)
plt.title('Total cloud cover (%)', fontsize=10)
c=plt.tripcolor(vlon, vlat, vertex_of_cell.transpose(), facecolors=field,
vmin=0, vmax=100, edgecolors='none', cmap=plt.get_cmap('Blues_r'))
plt.colorbar(c, ticks=[0,100]);
plt.text(-90,88, 'a)', color='k', size=14, ha='left', va='top')
ax=plt.subplot(2,2,3); make_niceplot(ax)
plt.title('Total cloud cover (thresholded at 85%)', fontsize=10)
c=plt.tripcolor(vlon, vlat, vertex_of_cell.transpose(), facecolors=field_thresholded,
vmin=0, vmax=1, edgecolors='none', cmap=plt.get_cmap('Blues_r'))
plt.colorbar(c, ticks=[0,1]);
plt.text(-90,88, 'b)', color='k', size=14, ha='left', va='top')
ax=plt.subplot(2,2,2); make_niceplot(ax)
plt.title('Vertex connectivity', fontsize=10); make_niceplot(ax)
c=plt.tripcolor(vlon, vlat, vertex_of_cell.transpose(), facecolors=field_cc_vert,
vmin=-0.5, vmax=len(components_vert)+0.5, edgecolors='none', cmap=make_colormap(ncolors=len(components_vert)+1))
plt.colorbar(c, ticks=[i for i in range(0,len(components_vert)+1,10)]);
plt.text(-90,88, 'c)', color='k', size=14, ha='left', va='top')
ax=plt.subplot(2,2,4); make_niceplot(ax)
plt.title('Edge connectivity', fontsize=10); make_niceplot(ax)
c=plt.tripcolor(vlon, vlat, vertex_of_cell.transpose(), facecolors=field_cc_edge,
vmin=-0.5, vmax=len(components_edge)+0.5, edgecolors='none', cmap=make_colormap(ncolors=len(components_edge)+1))
plt.colorbar(c, ticks=[i for i in range(0,len(components_edge)+1,10)]);
plt.xlabel('longitude', fontsize=8);
plt.text(-90,88, 'd)', color='k', size=14, ha='left', va='top');
plt.savefig("./alternative_networkx_2d.pdf")
```
%% Output
File added
This diff is collapsed.
File added
# Implementation by means of graphs and networkx python library
import numpy as np
import xarray as xr
import networkx as nx
def prepare_grid(model, file):
"""
Return required grid information as a xarray dataset.
Input:
model string model name
file string name of the file containing the grid (including full path)
Note that we only need the following information about the horizontal triangular grid:
- the edge neighbors of a given triangle (for edge connectivity)
- the triangles that share a given vertex (for vertex connectivity)
"""
if model == "ICON":
_grid = _grid_icon(file)
return _grid
# implementation for ICON model
def _grid_icon(_gridfile):
_grid = xr.open_dataset(_gridfile)
_grid = _grid[["neighbor_cell_index", "cells_of_vertex"]]
# in the ICON model grid, the indexing of the triangle cells and vertices
# starts with 1 and not with 0 as assumed by tricco --> we need to subtract 1 here
_grid["neighbor_cell_index"] = _grid["neighbor_cell_index"] - 1
_grid["cells_of_vertex"] = _grid["cells_of_vertex"] - 1
# after the substraction of 1, "missing" triangles at the border of the grid domain are indexed as -1
# --> we set these values to -9999 to clearly flag these "missing" triangles
_grid["neighbor_cell_index"] = xr.where(_grid["neighbor_cell_index"]!=-1, _grid["neighbor_cell_index"], -9999)
_grid["cells_of_vertex"] = xr.where(_grid["cells_of_vertex"]!=-1, _grid["cells_of_vertex"], -9999)
return _grid
def compute_fullgraph(grid, connectivity="vertex"):
"""
Returns full graph of the grid that contains all cells as nodes and with edges between all neighboring cells,
depending on the type of connectivity
Input:
grid xarray dataset grid information
connectivity string type of connectivity, either vertex or edge
"""
ncells = grid.cell.size
fgraph = nx.Graph()
fgraph.add_nodes_from(range(0, ncells))
# full graph for vertex connectivity
if connectivity == "vertex":
nverts = grid.vertex.size
for vertex in range(0, nverts):
cells_of_vertex = grid.cells_of_vertex[:, vertex].values
for cell1 in cells_of_vertex:
for cell2 in cells_of_vertex:
if cell1 >=0 and cell2>=0:
fgraph.add_edge(cell1, cell2)
# full graph for edge connectivity
elif connectivity == "edge":
for cell in range(0, ncells):
for neighbor in grid.neighbor_cell_index[:, cell].values:
if neighbor >=0:
fgraph.add_edge(neighbor, cell)
# unknown type of connectivity
else:
print("compute fullgraph: unknown type of connectivity:", connectivity, "--> full graph of grid not defined!")
fgraph = None
return fgraph
def compute_connected_components_2d(field, fgraph, threshold):
"""
Returns connected components for 2-d data based on a thresholded field and the full graph of the grid.
Input:
field : 1d-data for which connected components will be calculated, defined on each grid cell
fgraph: full graph of the grid with all grid cells being nodes and edges defined between all neighboring
cells, hence the graph encodes whether edge of vertex connectivity will be considered
threshold: set field to False if smaller than threshold, and to True otherwise
Output:
list of connected components sorted by size of connected components (largest component first),
each connected component is a list of grid cells
"""
# copy full graph to local graph
_graph = fgraph.copy(as_view=False)
# threshold field
field = np.where(field<threshold, False, True)
# remove grid cells/nodes from graph for which field < threshold, i.e., False
for i in range(0, field.size):
if not field[i]: _graph.remove_node(i)
# compute connected components and sort by size
components = []
[components.append(list(cc)) for cc in nx.connected_components(_graph)];
components = sorted(components, key=len, reverse=True)
return components
\ No newline at end of file
# Implementation by means of a self-programmed breadth-first search
import numpy as np
import xarray as xr
from collections import deque # for using lists as queues
def prepare_grid(model, file):
"""
Return required grid information as a xarray dataset.
Input:
model string model name
file string name of the file containing the grid (including full path)
Note that we only need the following information about the horizontal triangular grid:
- the edge neighbors of a given triangle (for edge connectivity)
- the vertices of each triangle and the triangles that share a given vertex (for vertex connectivity)
"""
if model == "ICON":
_grid = _grid_icon(file)
return _grid
# implementation for ICON model
def _grid_icon(_gridfile):
_grid = xr.open_dataset(_gridfile)
_grid = _grid[["neighbor_cell_index", "vertex_of_cell", "cells_of_vertex"]]
# in the ICON model grid, the indexing of the triangle cells and vertices
# starts with 1 and not with 0 as assumed by tricco --> we need to subtract 1 here
_grid["neighbor_cell_index"] = _grid["neighbor_cell_index"] - 1
_grid["vertex_of_cell"] = _grid["vertex_of_cell"] - 1
_grid["cells_of_vertex"] = _grid["cells_of_vertex"] - 1
# after the substraction of 1, "missing" triangles at the border of the grid domain are indexed as -1
# --> we set these values to -9999 to clearly flag these "missing" triangles
_grid["neighbor_cell_index"] = xr.where(_grid["neighbor_cell_index"]!=-1, _grid["neighbor_cell_index"], -9999)
_grid["vertex_of_cell"] = xr.where(_grid["vertex_of_cell"]!=-1, _grid["vertex_of_cell"], -9999)
_grid["cells_of_vertex"] = xr.where(_grid["cells_of_vertex"]!=-1, _grid["cells_of_vertex"], -9999)
return _grid
def _add_neighbors_to_queue(cell, field, grid, connectivity, explored, queue):
"""
Add neighbors of a given cell to the queue if they are in the same connected component and not yet explored.
Input:
cell int the index of the cell currently considered
field numpy array thresholded 1-d array with values of True or False,
defined on each grid cell
grid xarray dataset grid information
connectivity string type of connectivity, either vertex or edge
explored 1-darray information on whether cells have been explored yet
Input/Output:
queue list-queue cell indices for breadth-first search of current connected component
"""
ncells=field.size
# vertex connectivity
if connectivity == "vertex":
vertex_of_cell = grid.vertex_of_cell[:, cell].values
for vertex in vertex_of_cell:
for neigh in grid.cells_of_vertex[:,vertex].values:
if neigh!=cell and neigh>=0 and field[neigh] and not explored[neigh] and neigh not in queue:
queue.append(neigh)
# edge connectivity
elif connectivity == "edge":
for neigh in grid.neighbor_cell_index[:, cell].values:
if neigh>=0 and field[neigh] and not explored[neigh] and neigh not in queue:
queue.append(neigh)
# unknown type of connectivity
else:
print("_add_neighbors_to_queue: unknown type of connectivity:", connectivity, "--> ownbfs cannot work!")
def compute_components_2d(grid, field, threshold, connectivity="vertex"):
"""
Returns connected components for given grid, field and connectivity type.
The implementation runs through all cells. If a cell is not yet explored and has a value>=threshold, then
its connected component is explored via breadth-first-search marking all considered cells as explored.
Input:
grid xarray dataset grid information
field numpy array 1-d array of field for which connected components will be calculated,
defined on each grid cell
threshold float set field to False if smaller than threshold, and to True otherwise
connectivity string type of connectivity, either vertex or edge
Output:
list of connected components sorted by size of connected components (largest component first),
each connected component is a list of grid cells
"""
ncells = field.size
# threshold field
field = np.where(field<threshold, False, True)
# init lists for connected components and explored cells
components = []
explored = [False] * ncells
# loop over all cells
for cell in range(ncells):
if not explored[cell]: # cell must not be explored yet
explored[cell] = True
if field[cell]: # if cell is covered, it belongs to a new connected component
# init component list and queue for BFS of this connected component
comp = [cell]
queue = deque([])
_add_neighbors_to_queue(cell, field, grid, connectivity, explored, queue)
# perform BFS
while queue:
cell = queue.pop()
explored[cell] = True
comp.append(cell)
_add_neighbors_to_queue(cell, field, grid, connectivity, explored, queue)
# append found component to list of components
components.append(comp)
# sort connected components by size, starting with the largest component
components = sorted(components, key=len, reverse=True)
return components
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment