Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
D
DART-WRF
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Package registry
Model registry
Operate
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
DataAssimilation
DART-WRF
Commits
861ac9b1
Commit
861ac9b1
authored
3 years ago
by
lkugler
Browse files
Options
Downloads
Patches
Plain Diff
superob
parent
1d1a4b2b
Branches
Branches containing commit
Tags
Tags containing commit
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
.gitignore
+2
-1
2 additions, 1 deletion
.gitignore
scripts/obsseq.py
+489
-0
489 additions, 0 deletions
scripts/obsseq.py
with
491 additions
and
1 deletion
.gitignore
+
2
−
1
View file @
861ac9b1
...
@@ -3,4 +3,5 @@ slurm-scripts/
...
@@ -3,4 +3,5 @@ slurm-scripts/
logs/
logs/
dask-worker-space/
dask-worker-space/
DART_WRF.egg*
DART_WRF.egg*
build*
build*
\ No newline at end of file
._*
\ No newline at end of file
This diff is collapsed.
Click to expand it.
scripts/obsseq.py
0 → 100644
+
489
−
0
View file @
861ac9b1
import
os
,
sys
,
shutil
,
warnings
import
numpy
as
np
import
pandas
as
pd
from
config.cfg
import
exp
,
cluster
from
utils
import
symlink
,
copy
,
sed_inplace
,
append_file
,
mkdir
,
try_remove
import
matplotlib
as
mpl
mpl
.
use
(
"
agg
"
)
import
matplotlib.pyplot
as
plt
def
plot_box
(
m
,
lat
,
lon
,
label
=
""
,
**
kwargs
):
m
.
drawgreatcircle
(
lon
[
0
,
-
1
],
lat
[
0
,
-
1
],
lon
[
0
,
0
],
lat
[
0
,
0
],
del_s
=
20
,
zorder
=
4
,
**
kwargs
)
m
.
drawgreatcircle
(
lon
[
0
,
-
1
],
lat
[
0
,
-
1
],
lon
[
-
1
,
-
1
],
lat
[
-
1
,
-
1
],
del_s
=
20
,
zorder
=
4
,
**
kwargs
)
m
.
drawgreatcircle
(
lon
[
-
1
,
0
],
lat
[
-
1
,
0
],
lon
[
-
1
,
-
1
],
lat
[
-
1
,
-
1
],
del_s
=
20
,
zorder
=
4
,
**
kwargs
)
m
.
drawgreatcircle
(
lon
[
0
,
0
],
lat
[
0
,
0
],
lon
[
-
1
,
0
],
lat
[
-
1
,
0
],
del_s
=
20
,
zorder
=
4
,
label
=
label
,
**
kwargs
)
def
degrees_to_rad
(
degr
):
"""
Convert to DART convention = radians
"""
if
degr
<
0
:
degr
+=
360
return
degr
/
360
*
2
*
np
.
pi
def
rad_to_degrees
(
rad
):
"""
Convert to degrees from DART convention (radians)
"""
assert
rad
>=
0
,
"
no negative radians allowed
"
degr
=
rad
/
np
.
pi
*
180
# convert degr (180,360) to (-180,0)
if
degr
>
180
:
degr
-=
360
return
degr
class
ObsSeqOut
(
object
):
"""
Attributes:
obstypes (list) : contains (kind_nr, kind_name) tuples
"""
def
__init__
(
self
,
filepath
):
print
(
filepath
)
self
.
ascii
=
open
(
filepath
,
"
r
"
).
readlines
()
self
.
preamble
,
self
.
content
=
self
.
get_preamble_content
()
self
.
obstypes
=
self
.
get_obstypes
()
self
.
dict
=
self
.
obs_to_dict
()
self
.
df
=
self
.
to_pandas
()
def
__str__
(
self
):
return
self
.
df
.
__str__
()
def
get_preamble_content
(
self
):
"""
Split the obs_seq.out file into two parts
1) First lines of obs_seq.out file until the first observation message
2) Observation contents
"""
for
i
,
line
in
enumerate
(
self
.
ascii
):
if
"
OBS
"
in
line
:
break
return
self
.
ascii
[:
i
],
self
.
ascii
[
i
:]
def
get_obstypes
(
self
):
"""
Return a list of tuples (kind_nr, kind_descriptor) for each obs type
"""
# how many obstypes
for
i
,
line
in
enumerate
(
self
.
ascii
):
if
"
obs_type_definitions
"
in
line
:
break
line_n_obstypes
=
i
+
1
n_obstypes
=
int
(
self
.
ascii
[
line_n_obstypes
])
# read obs type kind (number and description)
obstypes
=
[]
for
k
in
range
(
1
,
n_obstypes
+
1
):
kind_nr
,
kind_type
=
self
.
ascii
[
line_n_obstypes
+
k
].
split
()
kind_nr
=
int
(
kind_nr
)
obstypes
.
append
((
kind_nr
,
kind_type
))
return
obstypes
def
to_dart
(
self
,
f
):
"""
Write to obs_seq.out file in DART format
Args:
f (str): path of file to write
"""
def
write_file
(
msg
,
output_path
=
"
./
"
):
try
:
os
.
remove
(
output_path
)
except
OSError
:
pass
with
open
(
output_path
,
"
w
"
)
as
f
:
f
.
write
(
msg
)
print
(
output_path
,
"
saved.
"
)
def
write_preamble
(
n_obs
):
num_obstypes
=
str
(
len
(
self
.
obstypes
))
txt
=
"
obs_sequence
\n
obs_kind_definitions
\n
"
+
num_obstypes
for
(
nr
,
obstype
)
in
self
.
obstypes
:
txt
+=
"
\n
"
+
str
(
nr
)
+
"
"
+
obstype
nobs
=
str
(
n_obs
)
txt
+=
"
\n
num_copies: 2 num_qc: 1
"
txt
+=
"
\n
num_obs:
"
+
nobs
txt
+=
"
max_num_obs:
"
+
nobs
txt
+=
"
\n
observations
\n
truth
\n
Quality Control
\n
"
txt
+=
"
first: 1 last:
"
+
nobs
return
txt
def
write_obs
(
i
,
obs
,
next_i_obs
=
None
,
prev_i_obs
=
None
):
"""
Write the observation section of a obs_seq.out file
Args:
i (int): index of observation
obs (dict): observation data
next_i_obs (int): index of next observation
prev_i_obs (int): index of previous observation
(in case it is the last)
Returns
str
"""
if
next_i_obs
:
line_link
=
"
-1
"
+
str
(
next_i_obs
)
+
"
-1
"
else
:
# last observation in file
line_link
=
"
"
+
str
(
prev_i_obs
)
+
"
-1 -1
"
lon_rad
=
str
(
obs
[
"
loc3d
"
][
0
])
lat_rad
=
str
(
obs
[
"
loc3d
"
][
1
])
out
=
(
"
\n
"
.
join
(
[
"
\n
OBS
"
+
str
(
i
),
str
(
obs
[
"
obs
"
]),
str
(
obs
[
"
truth
"
]),
str
(
obs
[
"
qc
"
]),
line_link
,
"
obdef
"
,
"
loc3d
"
,
"
"
.
join
(
[
lon_rad
,
lat_rad
,
str
(
obs
[
"
loc3d
"
][
2
]),
str
(
obs
[
"
loc3d
"
][
3
]),
]
),
"
kind
"
,
"
"
+
str
(
obs
[
"
kind
"
]),
""
.
join
(
obs
[
"
metadata
"
]),
]
)
+
str
(
i
)
+
"
\n
"
+
obs
[
"
time
"
][
0
]
+
"
"
+
obs
[
"
time
"
][
1
]
+
"
\n
"
+
str
(
obs
[
"
variance
"
])
)
return
out
n_obs
=
len
(
self
.
df
)
outstr
=
write_preamble
(
n_obs
)
# loop through observations, concatenate obs sections
# DART format is linked list, needs index of next observation
# k ... 0, ..., len(df)-1
# i_obs_this ... starts at 1
for
k
,
(
_
,
obs
)
in
enumerate
(
self
.
df
.
iterrows
()):
i_obs_this
=
k
+
1
if
k
<
len
(
self
.
df
)
-
1
:
i_obs_next
=
k
+
2
outstr
+=
write_obs
(
i_obs_this
,
obs
,
next_i_obs
=
i_obs_next
)
else
:
# last obs in file
i_obs_prev
=
k
outstr
+=
write_obs
(
i_obs_this
,
obs
,
prev_i_obs
=
i_obs_prev
)
write_file
(
outstr
,
output_path
=
f
)
def
obs_to_dict
(
self
):
"""
Convert an obs_seq.out file to a dictionary
"""
def
check_obs_begin
(
line
):
if
not
"
OBS
"
in
line
:
raise
RuntimeError
(
"
wrong line in observation
"
)
def
content_to_list
(
content
):
"""
Split obs_seq.out content into list of observation-sections
"""
obs_list
=
[]
i
=
0
check_obs_begin
(
content
[
0
])
obs_begin
=
0
for
i
,
line
in
enumerate
(
content
):
if
i
==
0
:
continue
if
"
OBS
"
in
line
:
# then this line is beginning of obs
obs_end
=
i
-
1
# previous line
obs_list
.
append
(
content
[
obs_begin
:
obs_end
+
1
])
obs_begin
=
i
# next obs starts here
if
i
==
len
(
content
)
-
1
:
# last line
obs_end
=
i
obs_list
.
append
(
content
[
obs_begin
:
obs_end
+
1
])
assert
len
(
obs_list
)
>
1
return
obs_list
def
one_obs_to_dict
(
obs_list_entry
):
""""""
out
=
dict
()
lines
=
obs_list_entry
check_obs_begin
(
lines
[
0
])
for
i
,
line
in
enumerate
(
lines
):
if
"
loc3d
"
in
line
:
# find location
line_loc
=
i
+
1
if
"
kind
"
in
line
:
# find obs kind
line_kind
=
i
+
1
out
[
"
obs
"
]
=
float
(
lines
[
1
].
strip
())
out
[
"
truth
"
]
=
float
(
lines
[
2
].
strip
())
out
[
"
qc
"
]
=
float
(
lines
[
3
].
strip
())
x
,
y
,
z
,
z_coord
=
lines
[
line_loc
].
split
()
out
[
"
loc3d
"
]
=
float
(
x
),
float
(
y
),
float
(
z
),
int
(
z_coord
)
out
[
"
kind
"
]
=
int
(
lines
[
line_kind
].
strip
())
out
[
"
metadata
"
]
=
lines
[
line_kind
+
1
:
-
3
]
out
[
"
time
"
]
=
tuple
(
lines
[
-
2
].
split
())
out
[
"
variance
"
]
=
float
(
lines
[
-
1
].
strip
())
return
out
def
obs_list_to_dict
(
obs_list
):
# wraps `one_obs_to_dict`
obs_list_dict
=
[]
# list of dict
for
entry
in
obs_list
:
# convert list of lines to dictionary
# with (kind, loc3d, values, ...) as keys
obs_dict
=
one_obs_to_dict
(
entry
)
obs_list_dict
.
append
(
obs_dict
)
# append dict to list
return
obs_list_dict
# content = [line1, ...]
# transform to [obs1, obs2, obs3, ...]
# obs1 = [obsline1, obsline2, ...]
obs_list
=
content_to_list
(
self
.
content
)
return
obs_list_to_dict
(
obs_list
)
def
to_pandas
(
self
):
"""
Create xr.Dataset containing observations
Variables = observation types
"""
obs_dict_list
=
self
.
obs_to_dict
()
# convert to pandas.DataFrame
# each observation is one line
# columns: all observation contents
# set keys from first obs (kind, loc3d, values)
keys
=
obs_dict_list
[
0
].
keys
()
data
=
{
key
:
[]
for
key
in
keys
}
# fill the data lists for each column of the DataFrame
for
obs
in
obs_dict_list
:
for
key
in
keys
:
data
[
key
].
append
(
obs
[
key
])
return
pd
.
DataFrame
(
index
=
range
(
len
(
obs_dict_list
)),
data
=
data
)
def
get_lon_lat
(
self
):
lats
=
np
.
empty
(
len
(
self
.
df
),
np
.
float32
)
lons
=
lats
.
copy
()
for
i_obs
,
values
in
self
.
df
.
loc3d
.
items
():
x
,
y
,
z
,
z_coord
=
values
# convert radian to degrees lon/lat
lon
=
rad_to_degrees
(
x
)
lat
=
rad_to_degrees
(
y
)
lons
[
i_obs
]
=
lon
lats
[
i_obs
]
=
lat
return
pd
.
DataFrame
(
index
=
self
.
df
.
index
,
data
=
dict
(
lat
=
lats
,
lon
=
lons
))
def
superob
(
self
,
window_km
):
"""
Select subset, average, overwrite existing obs with average
TODO: allow different obs types (KIND)
TODO: loc3d overwrite with mean
Metadata is copied from the first obs in a superob-box
Note:
This routine discards observations (round off)
e.g. 31 obs with 5 obs-window => obs #31 is not processed
Args:
window_km (numeric): horizontal window edge length
includes obs on edge
25x25 km with 5 km obs density
= average 5 x 5 observations
"""
debug
=
False
radius_earth_meters
=
6.371
*
1e6
m_per_degrees
=
np
.
pi
*
radius_earth_meters
/
180
# m per degree latitude
km_per_degrees
=
m_per_degrees
/
1000
def
calc_deg_from_km
(
distance_km
,
center_lat
):
"""
Approximately calculate distance in degrees from meters
Input: distance in km; degree latitude
Output: distance in degrees of latitude, longitude
"""
assert
distance_km
>
0
,
"
window size <= 0, must be > 0
"
dist_deg_lat
=
distance_km
/
km_per_degrees
dist_deg_lon
=
dist_deg_lat
*
np
.
cos
(
center_lat
*
np
.
pi
/
180
)
return
dist_deg_lat
,
dist_deg_lon
def
calc_km_from_deg
(
deg_lat
,
deg_lon
,
center_lat
):
dist_km_lat
=
deg_lat
*
km_per_degrees
dist_km_lon
=
deg_lon
*
km_per_degrees
*
np
.
cos
(
center_lat
*
np
.
pi
/
180
)
return
dist_km_lat
,
dist_km_lon
# assume cartesian grid of observations
i_obs_grid
=
self
.
df
.
index
.
values
nx
=
int
(
len
(
i_obs_grid
)
**
0.5
)
i_obs_grid
=
i_obs_grid
.
reshape
(
nx
,
nx
)
# loop through columns/rows
# avoid loop in (lat,lon) space as coordinates are non-cartesian
# i.e. first column of observations does not have same longitude
# determine obs density (approx)
coords
=
self
.
get_lon_lat
()
dx_obs_lat_deg
=
coords
.
lat
.
diff
().
max
()
km_lat
,
_
=
calc_km_from_deg
(
dx_obs_lat_deg
,
np
.
nan
,
45
)
obs_spacing_km
=
int
(
km_lat
)
# how many observations in x/y direction?
win_obs
=
int
(
window_km
/
obs_spacing_km
)
if
debug
:
print
(
"
window (#obs in x/y)=
"
,
win_obs
)
out
=
self
.
df
.
drop
(
self
.
df
.
index
)
# this df will be filled
for
i
in
range
(
0
,
nx
-
win_obs
,
win_obs
):
for
j
in
range
(
0
,
nx
-
win_obs
,
win_obs
):
# find indices of observations which lie in the superob box
i_obs_box
=
i_obs_grid
[
i
:
i
+
win_obs
,
j
:
j
+
win_obs
].
ravel
()
if
debug
:
print
(
"
box=
"
,
i
,
i
+
win_obs
,
j
,
j
+
win_obs
)
print
(
"
i_obs box=
"
,
i_obs_grid
[
i
:
i
+
win_obs
,
j
:
j
+
win_obs
])
# average the subset
# metadata are assumed to be equal
obs_box
=
self
.
df
.
iloc
[
i_obs_box
]
obs_mean
=
obs_box
.
iloc
[
0
]
obs_mean
.
at
[
"
obs
"
]
=
obs_box
[
"
obs
"
].
mean
()
obs_mean
.
at
[
"
truth
"
]
=
obs_box
[
"
truth
"
].
mean
()
obs_mean
.
at
[
"
qc
"
]
=
obs_box
[
"
qc
"
].
mean
()
obs_mean
.
at
[
"
variance
"
]
=
obs_box
[
"
variance
"
].
mean
()
if
debug
:
print
(
"
pre_avg:
"
,
obs_box
.
head
())
print
(
"
avg:
"
,
obs_mean
)
out
=
out
.
append
(
obs_mean
)
self
.
df
=
out
# overwrite input
def
plot
(
self
,
box
=
None
):
import
xarray
as
xr
georef
=
xr
.
open_dataset
(
"
/gpfs/data/fs71386/lkugler/run_DART/geo_em.d01.nc
"
)
lon
=
georef
.
XLONG_M
.
values
.
squeeze
()
lat
=
georef
.
XLAT_M
.
values
.
squeeze
()
from
mpl_toolkits.basemap
import
Basemap
fig
,
ax
=
plt
.
subplots
(
figsize
=
(
12
,
12
))
# m = Basemap(projection='geos', lon_0=lon[100,100], resolution='i',
# llcrnrlon=lon[0,0]-1,llcrnrlat=lat[0,0]-3,
# urcrnrlon=lon[-1,-1]+1,urcrnrlat=lat[-1,-1]+3)
m
=
Basemap
(
projection
=
"
lcc
"
,
resolution
=
"
h
"
,
lon_0
=
lon
[
100
,
100
],
lat_0
=
lat
[
25
,
25
],
lat_1
=
lat
[
100
,
100
],
lat_2
=
lat
[
175
,
175
],
llcrnrlon
=
lon
[
0
,
0
]
-
2
,
llcrnrlat
=
lat
[
0
,
0
]
-
2
,
urcrnrlon
=
lon
[
-
1
,
-
1
]
+
2
,
urcrnrlat
=
lat
[
-
1
,
-
1
]
+
2
,
)
# m.fillcontinents(color='0.1', lake_color='0.2')
m
.
drawlsmask
(
land_color
=
"
0.1
"
,
ocean_color
=
"
0.1
"
)
m
.
drawcoastlines
(
color
=
"
white
"
)
m
.
drawcountries
(
color
=
"
white
"
)
plot_box
(
m
,
lat
,
lon
,
label
=
"
domain
"
,
color
=
"
green
"
,
lw
=
4
)
# OBSERVATIONS
coords
=
self
.
get_lon_lat
()
lats
=
coords
.
lat
.
values
longs
=
coords
.
lon
.
values
coords
=
zip
(
lats
,
longs
)
for
lati
,
long
in
coords
:
m
.
plot
(
long
,
lati
,
"
.
"
,
markersize
=
5
,
latlon
=
True
,
color
=
"
red
"
,
zorder
=
4
,
)
m
.
plot
(
[],
[],
"
s
"
,
markersize
=
0.3
,
label
=
"
observations
"
,
color
=
"
red
"
,
zorder
=
4
,
)
if
box
:
lats
=
box
[
"
lat
"
]
longs
=
box
[
"
lon
"
]
lats
,
longs
=
np
.
meshgrid
(
lats
,
longs
)
print
(
lats
,
longs
)
plot_box
(
m
,
lats
,
longs
,
label
=
"
superob
"
,
color
=
"
white
"
,
lw
=
1
)
plt
.
legend
()
plt
.
tight_layout
()
f
=
"
/home/fs71386/lkugler/data/analysis/map_obs_superobs.png
"
plt
.
savefig
(
f
,
dpi
=
300
)
print
(
f
,
"
saved.
"
)
if
__name__
==
"
__main__
"
:
# for testing purposes
obs
=
ObsSeqOut
(
cluster
.
scriptsdir
+
"
/../tests/obs_seq.orig.out
"
)
# select a subset (lat-lon)
obs
.
superob
(
window_km
=
50
)
# write to obs_seq.out in DART format
obs
.
to_dart
(
f
=
cluster
.
dartrundir
+
"
/obs_seq.out
"
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment