diff --git a/Python/QA-010-Reading-Bufr-Synop.ipynb b/Python/QA-010-Reading-Bufr-Synop.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..8ae7c107a1b85ee38c2b1ed6f5adc4b1fb486eaf --- /dev/null +++ b/Python/QA-010-Reading-Bufr-Synop.ipynb @@ -0,0 +1,824 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "f4885378-6bf9-40b2-9046-9f1efd1e32ec", + "metadata": {}, + "source": [ + "# Reading BUFR files\n", + "\n", + "Information about BUFR at [ECMWF](https://confluence.ecmwf.int/display/UDOC/BUFR+structure+-+ecCodes+BUFR+FAQ) inside ECCODES" + ] + }, + { + "cell_type": "markdown", + "id": "4579e58c-b4cf-43f4-bd31-873598a81a95", + "metadata": {}, + "source": [ + "install via \n", + "\n", + "`pip install --user pdbufr`\n", + "\n", + "or install the newest version\n", + "\n", + "`pip install --user git+https://github.com/ecmwf/pdbufr.git`\n", + "\n", + "it has eccodes as dependency" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "ecf6bff7-91e7-4cfa-8d7a-509e4e4ae229", + "metadata": {}, + "outputs": [], + "source": [ + "import pdbufr" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "4935a745-b963-4aef-a8a1-dad695f3aa71", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'0.11.0'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pdbufr.__version__" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e60003da-fc6b-4a9f-a568-53bc2beed9d0", + "metadata": {}, + "outputs": [], + "source": [ + "SYNOP_BUFR_FILE = '/data/boden/BUFR/2022/01/31/ISMD12_LOWM.311200'" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "d3032d01-bbe4-49ca-bc43-117f3defa4ab", + "metadata": {}, + "outputs": [], + "source": [ + " keys = ['blockNumber', \n", + " 'stationNumber', \n", + " 'latitude', \n", + " 'longitude', \n", + " 'airTemperature', \n", + " 'heightOfStationGroundAboveMeanSeaLevel',\n", + " 'dewpointTemperature', \n", + " 'windSpeed', \n", + " 'windDirection', \n", + " 'WMO_station_id',\n", + " 'cloudType',\n", + " ]\n", + "df_all = pdbufr.read_bufr(SYNOP_BUFR_FILE, columns=keys)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "27f35abe-56f7-4afd-80b5-a3f93cc87b61", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>blockNumber</th>\n", + " <th>stationNumber</th>\n", + " <th>latitude</th>\n", + " <th>longitude</th>\n", + " <th>heightOfStationGroundAboveMeanSeaLevel</th>\n", + " <th>airTemperature</th>\n", + " <th>dewpointTemperature</th>\n", + " <th>cloudType</th>\n", + " <th>windDirection</th>\n", + " <th>windSpeed</th>\n", + " <th>WMO_station_id</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>11</td>\n", + " <td>8</td>\n", + " <td>48.56972</td>\n", + " <td>13.99417</td>\n", + " <td>597.0</td>\n", + " <td>273.35</td>\n", + " <td>272.95</td>\n", + " <td>NaN</td>\n", + " <td>260</td>\n", + " <td>5.0</td>\n", + " <td>11008</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>11</td>\n", + " <td>20</td>\n", + " <td>48.61778</td>\n", + " <td>15.20361</td>\n", + " <td>502.0</td>\n", + " <td>276.25</td>\n", + " <td>271.55</td>\n", + " <td>NaN</td>\n", + " <td>240</td>\n", + " <td>4.0</td>\n", + " <td>11020</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>11</td>\n", + " <td>21</td>\n", + " <td>48.95472</td>\n", + " <td>15.03833</td>\n", + " <td>558.0</td>\n", + " <td>274.45</td>\n", + " <td>272.05</td>\n", + " <td>NaN</td>\n", + " <td>270</td>\n", + " <td>1.0</td>\n", + " <td>11021</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>11</td>\n", + " <td>32</td>\n", + " <td>48.66917</td>\n", + " <td>16.63750</td>\n", + " <td>198.0</td>\n", + " <td>277.35</td>\n", + " <td>271.75</td>\n", + " <td>NaN</td>\n", + " <td>230</td>\n", + " <td>4.0</td>\n", + " <td>11032</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>11</td>\n", + " <td>72</td>\n", + " <td>47.94917</td>\n", + " <td>16.82889</td>\n", + " <td>117.0</td>\n", + " <td>276.25</td>\n", + " <td>270.45</td>\n", + " <td>NaN</td>\n", + " <td>190</td>\n", + " <td>2.0</td>\n", + " <td>11072</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>11</td>\n", + " <td>78</td>\n", + " <td>48.02806</td>\n", + " <td>15.58750</td>\n", + " <td>696.0</td>\n", + " <td>273.45</td>\n", + " <td>272.75</td>\n", + " <td>NaN</td>\n", + " <td>210</td>\n", + " <td>6.0</td>\n", + " <td>11078</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6</th>\n", + " <td>11</td>\n", + " <td>119</td>\n", + " <td>47.32472</td>\n", + " <td>11.17556</td>\n", + " <td>1182.0</td>\n", + " <td>272.85</td>\n", + " <td>271.95</td>\n", + " <td>NaN</td>\n", + " <td>100</td>\n", + " <td>1.0</td>\n", + " <td>11119</td>\n", + " </tr>\n", + " <tr>\n", + " <th>7</th>\n", + " <td>11</td>\n", + " <td>129</td>\n", + " <td>47.00722</td>\n", + " <td>11.51083</td>\n", + " <td>1412.0</td>\n", + " <td>273.15</td>\n", + " <td>268.25</td>\n", + " <td>NaN</td>\n", + " <td>280</td>\n", + " <td>2.0</td>\n", + " <td>11129</td>\n", + " </tr>\n", + " <tr>\n", + " <th>8</th>\n", + " <td>11</td>\n", + " <td>138</td>\n", + " <td>47.13500</td>\n", + " <td>12.62583</td>\n", + " <td>2317.0</td>\n", + " <td>264.65</td>\n", + " <td>262.25</td>\n", + " <td>NaN</td>\n", + " <td>170</td>\n", + " <td>1.0</td>\n", + " <td>11138</td>\n", + " </tr>\n", + " <tr>\n", + " <th>9</th>\n", + " <td>11</td>\n", + " <td>140</td>\n", + " <td>47.58445</td>\n", + " <td>12.69528</td>\n", + " <td>622.0</td>\n", + " <td>273.55</td>\n", + " <td>272.65</td>\n", + " <td>NaN</td>\n", + " <td>330</td>\n", + " <td>1.0</td>\n", + " <td>11140</td>\n", + " </tr>\n", + " <tr>\n", + " <th>10</th>\n", + " <td>11</td>\n", + " <td>141</td>\n", + " <td>47.40667</td>\n", + " <td>13.22111</td>\n", + " <td>550.0</td>\n", + " <td>272.35</td>\n", + " <td>270.55</td>\n", + " <td>NaN</td>\n", + " <td>320</td>\n", + " <td>2.0</td>\n", + " <td>11141</td>\n", + " </tr>\n", + " <tr>\n", + " <th>11</th>\n", + " <td>11</td>\n", + " <td>161</td>\n", + " <td>47.52167</td>\n", + " <td>14.95417</td>\n", + " <td>1215.0</td>\n", + " <td>270.95</td>\n", + " <td>268.15</td>\n", + " <td>NaN</td>\n", + " <td>340</td>\n", + " <td>3.0</td>\n", + " <td>11161</td>\n", + " </tr>\n", + " <tr>\n", + " <th>12</th>\n", + " <td>11</td>\n", + " <td>170</td>\n", + " <td>47.85444</td>\n", + " <td>15.06750</td>\n", + " <td>612.0</td>\n", + " <td>274.15</td>\n", + " <td>272.65</td>\n", + " <td>NaN</td>\n", + " <td>30</td>\n", + " <td>1.0</td>\n", + " <td>11170</td>\n", + " </tr>\n", + " <tr>\n", + " <th>13</th>\n", + " <td>11</td>\n", + " <td>182</td>\n", + " <td>47.83222</td>\n", + " <td>16.23139</td>\n", + " <td>275.0</td>\n", + " <td>277.15</td>\n", + " <td>270.55</td>\n", + " <td>3.0</td>\n", + " <td>0</td>\n", + " <td>2.0</td>\n", + " <td>11182</td>\n", + " </tr>\n", + " <tr>\n", + " <th>14</th>\n", + " <td>11</td>\n", + " <td>192</td>\n", + " <td>47.20750</td>\n", + " <td>16.33556</td>\n", + " <td>265.0</td>\n", + " <td>274.45</td>\n", + " <td>270.15</td>\n", + " <td>NaN</td>\n", + " <td>230</td>\n", + " <td>1.0</td>\n", + " <td>11192</td>\n", + " </tr>\n", + " <tr>\n", + " <th>15</th>\n", + " <td>11</td>\n", + " <td>201</td>\n", + " <td>46.74639</td>\n", + " <td>12.42361</td>\n", + " <td>1081.0</td>\n", + " <td>276.05</td>\n", + " <td>261.85</td>\n", + " <td>NaN</td>\n", + " <td>130</td>\n", + " <td>1.0</td>\n", + " <td>11201</td>\n", + " </tr>\n", + " <tr>\n", + " <th>16</th>\n", + " <td>11</td>\n", + " <td>213</td>\n", + " <td>46.61806</td>\n", + " <td>13.87389</td>\n", + " <td>493.0</td>\n", + " <td>274.95</td>\n", + " <td>266.45</td>\n", + " <td>NaN</td>\n", + " <td>100</td>\n", + " <td>1.0</td>\n", + " <td>11213</td>\n", + " </tr>\n", + " <tr>\n", + " <th>17</th>\n", + " <td>11</td>\n", + " <td>214</td>\n", + " <td>46.93806</td>\n", + " <td>14.91500</td>\n", + " <td>1034.0</td>\n", + " <td>274.55</td>\n", + " <td>265.05</td>\n", + " <td>NaN</td>\n", + " <td>270</td>\n", + " <td>3.0</td>\n", + " <td>11214</td>\n", + " </tr>\n", + " <tr>\n", + " <th>18</th>\n", + " <td>11</td>\n", + " <td>225</td>\n", + " <td>46.84917</td>\n", + " <td>14.19083</td>\n", + " <td>704.0</td>\n", + " <td>275.05</td>\n", + " <td>264.85</td>\n", + " <td>NaN</td>\n", + " <td>130</td>\n", + " <td>1.0</td>\n", + " <td>11225</td>\n", + " </tr>\n", + " <tr>\n", + " <th>19</th>\n", + " <td>11</td>\n", + " <td>241</td>\n", + " <td>47.19861</td>\n", + " <td>15.46639</td>\n", + " <td>1443.0</td>\n", + " <td>270.85</td>\n", + " <td>263.55</td>\n", + " <td>NaN</td>\n", + " <td>320</td>\n", + " <td>5.0</td>\n", + " <td>11241</td>\n", + " </tr>\n", + " <tr>\n", + " <th>20</th>\n", + " <td>11</td>\n", + " <td>274</td>\n", + " <td>47.37833</td>\n", + " <td>15.08667</td>\n", + " <td>544.0</td>\n", + " <td>277.35</td>\n", + " <td>268.35</td>\n", + " <td>NaN</td>\n", + " <td>170</td>\n", + " <td>1.0</td>\n", + " <td>11274</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " blockNumber stationNumber latitude longitude \\\n", + "0 11 8 48.56972 13.99417 \n", + "1 11 20 48.61778 15.20361 \n", + "2 11 21 48.95472 15.03833 \n", + "3 11 32 48.66917 16.63750 \n", + "4 11 72 47.94917 16.82889 \n", + "5 11 78 48.02806 15.58750 \n", + "6 11 119 47.32472 11.17556 \n", + "7 11 129 47.00722 11.51083 \n", + "8 11 138 47.13500 12.62583 \n", + "9 11 140 47.58445 12.69528 \n", + "10 11 141 47.40667 13.22111 \n", + "11 11 161 47.52167 14.95417 \n", + "12 11 170 47.85444 15.06750 \n", + "13 11 182 47.83222 16.23139 \n", + "14 11 192 47.20750 16.33556 \n", + "15 11 201 46.74639 12.42361 \n", + "16 11 213 46.61806 13.87389 \n", + "17 11 214 46.93806 14.91500 \n", + "18 11 225 46.84917 14.19083 \n", + "19 11 241 47.19861 15.46639 \n", + "20 11 274 47.37833 15.08667 \n", + "\n", + " heightOfStationGroundAboveMeanSeaLevel airTemperature \\\n", + "0 597.0 273.35 \n", + "1 502.0 276.25 \n", + "2 558.0 274.45 \n", + "3 198.0 277.35 \n", + "4 117.0 276.25 \n", + "5 696.0 273.45 \n", + "6 1182.0 272.85 \n", + "7 1412.0 273.15 \n", + "8 2317.0 264.65 \n", + "9 622.0 273.55 \n", + "10 550.0 272.35 \n", + "11 1215.0 270.95 \n", + "12 612.0 274.15 \n", + "13 275.0 277.15 \n", + "14 265.0 274.45 \n", + "15 1081.0 276.05 \n", + "16 493.0 274.95 \n", + "17 1034.0 274.55 \n", + "18 704.0 275.05 \n", + "19 1443.0 270.85 \n", + "20 544.0 277.35 \n", + "\n", + " dewpointTemperature cloudType windDirection windSpeed WMO_station_id \n", + "0 272.95 NaN 260 5.0 11008 \n", + "1 271.55 NaN 240 4.0 11020 \n", + "2 272.05 NaN 270 1.0 11021 \n", + "3 271.75 NaN 230 4.0 11032 \n", + "4 270.45 NaN 190 2.0 11072 \n", + "5 272.75 NaN 210 6.0 11078 \n", + "6 271.95 NaN 100 1.0 11119 \n", + "7 268.25 NaN 280 2.0 11129 \n", + "8 262.25 NaN 170 1.0 11138 \n", + "9 272.65 NaN 330 1.0 11140 \n", + "10 270.55 NaN 320 2.0 11141 \n", + "11 268.15 NaN 340 3.0 11161 \n", + "12 272.65 NaN 30 1.0 11170 \n", + "13 270.55 3.0 0 2.0 11182 \n", + "14 270.15 NaN 230 1.0 11192 \n", + "15 261.85 NaN 130 1.0 11201 \n", + "16 266.45 NaN 100 1.0 11213 \n", + "17 265.05 NaN 270 3.0 11214 \n", + "18 264.85 NaN 130 1.0 11225 \n", + "19 263.55 NaN 320 5.0 11241 \n", + "20 268.35 NaN 170 1.0 11274 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(df_all)" + ] + }, + { + "cell_type": "markdown", + "id": "26cd0da1-9c5f-44c6-8527-09815102c014", + "metadata": {}, + "source": [ + "## Reading Radiosonde information" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "df6a645b-01d2-499d-b2fc-0fe9cd0c393c", + "metadata": {}, + "outputs": [], + "source": [ + "df = pdbufr.read_bufr('/data/raso/BUFR/2022/01/31/IUSD04_LOWM.310300',\n", + " columns=('stationNumber', 'data_datetime', 'pressure', 'airTemperature', 'dewpointTemperature','windDirection', 'windSpeed', \n", + " 'nonCoordinateGeopotentialHeight', 'timePeriod', 'longitude', 'latitude', 'height'),\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "f98cb422-02e2-4351-91d4-b40bc208266f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>stationNumber</th>\n", + " <th>latitude</th>\n", + " <th>longitude</th>\n", + " <th>height</th>\n", + " <th>timePeriod</th>\n", + " <th>pressure</th>\n", + " <th>nonCoordinateGeopotentialHeight</th>\n", + " <th>airTemperature</th>\n", + " <th>dewpointTemperature</th>\n", + " <th>windDirection</th>\n", + " <th>windSpeed</th>\n", + " <th>data_datetime</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>240</td>\n", + " <td>46.99396</td>\n", + " <td>15.44706</td>\n", + " <td>338</td>\n", + " <td>NaN</td>\n", + " <td>100000.0</td>\n", + " <td>140</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>2022-01-31 03:15:11</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>240</td>\n", + " <td>46.99396</td>\n", + " <td>15.44706</td>\n", + " <td>338</td>\n", + " <td>0.0</td>\n", + " <td>97590.0</td>\n", + " <td>338</td>\n", + " <td>268.45</td>\n", + " <td>265.37</td>\n", + " <td>140.0</td>\n", + " <td>1.5</td>\n", + " <td>2022-01-31 03:15:11</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>240</td>\n", + " <td>46.99396</td>\n", + " <td>15.44706</td>\n", + " <td>338</td>\n", + " <td>1.0</td>\n", + " <td>97460.0</td>\n", + " <td>349</td>\n", + " <td>271.66</td>\n", + " <td>266.31</td>\n", + " <td>231.0</td>\n", + " <td>0.5</td>\n", + " <td>2022-01-31 03:15:11</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>240</td>\n", + " <td>46.99396</td>\n", + " <td>15.44706</td>\n", + " <td>338</td>\n", + " <td>2.0</td>\n", + " <td>97410.0</td>\n", + " <td>353</td>\n", + " <td>272.38</td>\n", + " <td>266.65</td>\n", + " <td>273.0</td>\n", + " <td>0.9</td>\n", + " <td>2022-01-31 03:15:11</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>240</td>\n", + " <td>46.99396</td>\n", + " <td>15.44706</td>\n", + " <td>338</td>\n", + " <td>3.0</td>\n", + " <td>97350.0</td>\n", + " <td>358</td>\n", + " <td>272.69</td>\n", + " <td>266.59</td>\n", + " <td>278.0</td>\n", + " <td>1.1</td>\n", + " <td>2022-01-31 03:15:11</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3864</th>\n", + " <td>240</td>\n", + " <td>46.99396</td>\n", + " <td>15.44706</td>\n", + " <td>338</td>\n", + " <td>4442.0</td>\n", + " <td>3530.0</td>\n", + " <td>22376</td>\n", + " <td>210.84</td>\n", + " <td>183.63</td>\n", + " <td>258.0</td>\n", + " <td>18.0</td>\n", + " <td>2022-01-31 03:15:11</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3865</th>\n", + " <td>240</td>\n", + " <td>46.99396</td>\n", + " <td>15.44706</td>\n", + " <td>338</td>\n", + " <td>4445.0</td>\n", + " <td>3520.0</td>\n", + " <td>22393</td>\n", + " <td>210.72</td>\n", + " <td>183.51</td>\n", + " <td>258.0</td>\n", + " <td>17.9</td>\n", + " <td>2022-01-31 03:15:11</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3866</th>\n", + " <td>240</td>\n", + " <td>46.99396</td>\n", + " <td>15.44706</td>\n", + " <td>338</td>\n", + " <td>4448.0</td>\n", + " <td>3510.0</td>\n", + " <td>22411</td>\n", + " <td>210.58</td>\n", + " <td>183.39</td>\n", + " <td>258.0</td>\n", + " <td>17.8</td>\n", + " <td>2022-01-31 03:15:11</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3867</th>\n", + " <td>240</td>\n", + " <td>46.99396</td>\n", + " <td>15.44706</td>\n", + " <td>338</td>\n", + " <td>4451.0</td>\n", + " <td>3500.0</td>\n", + " <td>22427</td>\n", + " <td>210.50</td>\n", + " <td>183.34</td>\n", + " <td>258.0</td>\n", + " <td>17.7</td>\n", + " <td>2022-01-31 03:15:11</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3868</th>\n", + " <td>240</td>\n", + " <td>46.99396</td>\n", + " <td>15.44706</td>\n", + " <td>338</td>\n", + " <td>4624.0</td>\n", + " <td>3000.0</td>\n", + " <td>23386</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>2022-01-31 03:15:11</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>3869 rows × 12 columns</p>\n", + "</div>" + ], + "text/plain": [ + " stationNumber latitude longitude height timePeriod pressure \\\n", + "0 240 46.99396 15.44706 338 NaN 100000.0 \n", + "1 240 46.99396 15.44706 338 0.0 97590.0 \n", + "2 240 46.99396 15.44706 338 1.0 97460.0 \n", + "3 240 46.99396 15.44706 338 2.0 97410.0 \n", + "4 240 46.99396 15.44706 338 3.0 97350.0 \n", + "... ... ... ... ... ... ... \n", + "3864 240 46.99396 15.44706 338 4442.0 3530.0 \n", + "3865 240 46.99396 15.44706 338 4445.0 3520.0 \n", + "3866 240 46.99396 15.44706 338 4448.0 3510.0 \n", + "3867 240 46.99396 15.44706 338 4451.0 3500.0 \n", + "3868 240 46.99396 15.44706 338 4624.0 3000.0 \n", + "\n", + " nonCoordinateGeopotentialHeight airTemperature dewpointTemperature \\\n", + "0 140 NaN NaN \n", + "1 338 268.45 265.37 \n", + "2 349 271.66 266.31 \n", + "3 353 272.38 266.65 \n", + "4 358 272.69 266.59 \n", + "... ... ... ... \n", + "3864 22376 210.84 183.63 \n", + "3865 22393 210.72 183.51 \n", + "3866 22411 210.58 183.39 \n", + "3867 22427 210.50 183.34 \n", + "3868 23386 NaN NaN \n", + "\n", + " windDirection windSpeed data_datetime \n", + "0 NaN NaN 2022-01-31 03:15:11 \n", + "1 140.0 1.5 2022-01-31 03:15:11 \n", + "2 231.0 0.5 2022-01-31 03:15:11 \n", + "3 273.0 0.9 2022-01-31 03:15:11 \n", + "4 278.0 1.1 2022-01-31 03:15:11 \n", + "... ... ... ... \n", + "3864 258.0 18.0 2022-01-31 03:15:11 \n", + "3865 258.0 17.9 2022-01-31 03:15:11 \n", + "3866 258.0 17.8 2022-01-31 03:15:11 \n", + "3867 258.0 17.7 2022-01-31 03:15:11 \n", + "3868 NaN NaN 2022-01-31 03:15:11 \n", + "\n", + "[3869 rows x 12 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2bbdbbb8-8aa6-495a-90ac-65f434119d8a", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Enstools v2021.11 - 3.8", + "language": "python", + "name": "etv2021.11" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/Python/QA-011-Reading-ODB.ipynb b/Python/QA-011-Reading-ODB.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..1742cc2542f11836f50c0c0db16eb35cbcc8f516 --- /dev/null +++ b/Python/QA-011-Reading-ODB.ipynb @@ -0,0 +1,815 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "5baf4cdc-a640-47ac-97c9-0684c0d67055", + "metadata": {}, + "source": [ + "# Read ODB files with Python\n", + "\n", + "**What is ODB?**\n", + "ODB (Observation DataBase) is a file-based database-like system developed at ECMWF to store and retrieve large volumes of meteorological observational and feedback data efficiently for use within the IFS.\n", + "\n", + "Currently, ODB files come in two flavours:\n", + "\n", + "- ODB-1 (the original hierarchical table format capable of running in a parallel environment within IFS)\n", + "- ODB-2 (a flat format with a modern API used for archiving in MARS).\n", + "\n", + "Data from ODB can be extracted using the ODB/SQL query language, which is generally a small subset of SQL with some useful extensions.\n", + "\n", + "more information on ODB: [Metview - ODB](https://confluence.ecmwf.int/display/METV/ODB+Overview)\n", + "\n", + "reading with Python: [PyODC](https://pyodc.readthedocs.io/en/latest/)\n", + "\n", + "Library for reading ODB: [ODC](https://odc.readthedocs.io/en/latest/)" + ] + }, + { + "cell_type": "markdown", + "id": "acd5d785-d591-4647-820a-ad5c86abbef6", + "metadata": {}, + "source": [ + "## Using Python to read ODB files\n", + "\n", + "`pip install --user pyodc`\n", + "\n", + "there are two interfaces one is slow (pyodc), one is fast (codc), but requires the odc library to be installed." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "a275a268-9e81-4cf8-9595-cd469f7b3636", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-------- \u001b[1;94m/home/swd/spack/share/spack/modules/linux-rhel8-skylake_avx512\u001b[0m --------\n", + "\u001b[1modc\u001b[22m/1.4.5-gcc-8.5.0 \n" + ] + } + ], + "source": [ + "!module av --no-pager odc" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "4075b65a-726f-40cc-a893-16bd99b15518", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-------------------------------------------------------------------\n", + "\u001b[1m/home/swd/spack/share/spack/modules/linux-rhel8-skylake_avx512/odc/1.4.5-gcc-8.5.0\u001b[22m:\n", + "\n", + "\u001b[92mmodule-whatis\u001b[0m\t{ECMWF encoding and decoding of observational data in ODB2 format.}\n", + "\u001b[92mmodule\u001b[0m\t\tload eckit/1.24.4-gcc-8.5.0\n", + "\u001b[92mconflict\u001b[0m\todc\n", + "\u001b[92mprepend-path\u001b[0m\t--delim : LIBRARY_PATH /home/swd/spack/opt/spack/linux-rhel8-skylake_avx512/gcc-8.5.0/odc-1.4.5-2jkj7xe2uu672npnmxjiw2z7q5gvqvny/lib64\n", + "\u001b[92mprepend-path\u001b[0m\t--delim : LD_LIBRARY_PATH /home/swd/spack/opt/spack/linux-rhel8-skylake_avx512/gcc-8.5.0/odc-1.4.5-2jkj7xe2uu672npnmxjiw2z7q5gvqvny/lib64\n", + "\u001b[92mprepend-path\u001b[0m\t--delim : CPATH /home/swd/spack/opt/spack/linux-rhel8-skylake_avx512/gcc-8.5.0/odc-1.4.5-2jkj7xe2uu672npnmxjiw2z7q5gvqvny/include\n", + "\u001b[92mprepend-path\u001b[0m\t--delim : INCLUDE /home/swd/spack/opt/spack/linux-rhel8-skylake_avx512/gcc-8.5.0/odc-1.4.5-2jkj7xe2uu672npnmxjiw2z7q5gvqvny/include\n", + "\u001b[92mprepend-path\u001b[0m\t--delim : PATH /home/swd/spack/opt/spack/linux-rhel8-skylake_avx512/gcc-8.5.0/odc-1.4.5-2jkj7xe2uu672npnmxjiw2z7q5gvqvny/bin\n", + "\u001b[92mprepend-path\u001b[0m\t--delim : PKG_CONFIG_PATH /home/swd/spack/opt/spack/linux-rhel8-skylake_avx512/gcc-8.5.0/odc-1.4.5-2jkj7xe2uu672npnmxjiw2z7q5gvqvny/lib64/pkgconfig\n", + "\u001b[92mprepend-path\u001b[0m\t--delim : CMAKE_PREFIX_PATH /home/swd/spack/opt/spack/linux-rhel8-skylake_avx512/gcc-8.5.0/odc-1.4.5-2jkj7xe2uu672npnmxjiw2z7q5gvqvny/.\n", + "-------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "!module show --no-pager odc" + ] + }, + { + "cell_type": "markdown", + "id": "49284281-1d26-42e3-8e9d-68918adce223", + "metadata": {}, + "source": [ + "we need to set the environment variable `ODC_DIR` to the prefix, so that the library can be found and codc can be used." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "4b505a3d-259e-43f4-809a-404100d6b82e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "env: ODC_DIR=/home/swd/spack/opt/spack/linux-rhel8-skylake_avx512/gcc-8.5.0/odc-1.4.5-2jkj7xe2uu672npnmxjiw2z7q5gvqvny\n" + ] + } + ], + "source": [ + "%env ODC_DIR=/home/swd/spack/opt/spack/linux-rhel8-skylake_avx512/gcc-8.5.0/odc-1.4.5-2jkj7xe2uu672npnmxjiw2z7q5gvqvny" + ] + }, + { + "cell_type": "markdown", + "id": "66e490b5-49b4-49a0-a913-d2fd7a756c6c", + "metadata": {}, + "source": [ + "install the package" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "65a7f8c0-e2a9-40be-9be2-28ae29a2091a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting pyodc\n", + " Downloading pyodc-1.3.0.tar.gz (28 kB)\n", + " Installing build dependencies ... \u001b[?25ldone\n", + "\u001b[?25h Getting requirements to build wheel ... \u001b[?25ldone\n", + "\u001b[?25h Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n", + "\u001b[?25hRequirement already satisfied: pandas in /home/swd/manual/nwp/2023.1/lib/python3.10/site-packages (from pyodc) (1.5.3)\n", + "Requirement already satisfied: cffi in /home/swd/manual/nwp/2023.1/lib/python3.10/site-packages (from pyodc) (1.15.1)\n", + "Requirement already satisfied: pycparser in /home/swd/manual/nwp/2023.1/lib/python3.10/site-packages (from cffi->pyodc) (2.21)\n", + "Requirement already satisfied: pytz>=2020.1 in /home/swd/manual/nwp/2023.1/lib/python3.10/site-packages (from pandas->pyodc) (2022.7.1)\n", + "Requirement already satisfied: python-dateutil>=2.8.1 in /home/swd/manual/nwp/2023.1/lib/python3.10/site-packages (from pandas->pyodc) (2.8.2)\n", + "Requirement already satisfied: numpy>=1.21.0 in /home/swd/manual/nwp/2023.1/lib/python3.10/site-packages (from pandas->pyodc) (1.23.5)\n", + "Requirement already satisfied: six>=1.5 in /home/swd/manual/nwp/2023.1/lib/python3.10/site-packages (from python-dateutil>=2.8.1->pandas->pyodc) (1.16.0)\n", + "Building wheels for collected packages: pyodc\n", + " Building wheel for pyodc (pyproject.toml) ... \u001b[?25ldone\n", + "\u001b[?25h Created wheel for pyodc: filename=pyodc-1.3.0-py3-none-any.whl size=29866 sha256=a9e092f59bbe53178efcf5a95a88e4682068f9bb36e4113b529d6615fc9ce488\n", + " Stored in directory: /mnt/users/staff/mblaschek/.cache/pip/wheels/9a/08/f0/7fde07980857fb4bec365d72c929d91d7a512c903ae6847e1c\n", + "Successfully built pyodc\n", + "Installing collected packages: pyodc\n", + "Successfully installed pyodc-1.3.0\n" + ] + } + ], + "source": [ + "!pip install --user pyodc" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "44d608b3-a4dc-4a5e-8ffe-8bd865dc98b2", + "metadata": {}, + "outputs": [], + "source": [ + "# import\n", + "import pyodc\n", + "import codc" + ] + }, + { + "cell_type": "markdown", + "id": "390fd1f0-9a2d-4db6-ba0c-6b76a2f5d28f", + "metadata": {}, + "source": [ + "reading an example file of 190MB from an Aeolus experiment." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "0ffb5590-80a4-485b-9950-839886782c11", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 3.75 s, sys: 1.2 s, total: 4.95 s\n", + "Wall time: 3.28 s\n" + ] + } + ], + "source": [ + "%%time\n", + "df_decoded = codc.read_odb('../scratch/data/Aeolus/test20201201.odb', single=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "4ede231c-3afc-4ac2-80fd-857a1b20771e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>type</th>\n", + " <th>class</th>\n", + " <th>stream</th>\n", + " <th>andate</th>\n", + " <th>antime</th>\n", + " <th>reportype</th>\n", + " <th>restricted@hdr</th>\n", + " <th>enda_member@desc</th>\n", + " <th>numtsl@desc</th>\n", + " <th>timeslot@timeslot_index</th>\n", + " <th>...</th>\n", + " <th>arg_lat@sat</th>\n", + " <th>t_ref@aeolus_l2b</th>\n", + " <th>p_ref@aeolus_l2b</th>\n", + " <th>beta@aeolus_l2b</th>\n", + " <th>dhlos_dt@aeolus_l2b</th>\n", + " <th>dhlos_dp@aeolus_l2b</th>\n", + " <th>dhlos_dbeta@aeolus_l2b</th>\n", + " <th>horiz_length@aeolus_l2b</th>\n", + " <th>vert_length@aeolus_l2b</th>\n", + " <th>expver</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>263</td>\n", + " <td>2</td>\n", + " <td>1247</td>\n", + " <td>20201201</td>\n", + " <td>0</td>\n", + " <td>45001</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>25</td>\n", + " <td>1</td>\n", + " <td>...</td>\n", + " <td>5.679873</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>11243.0</td>\n", + " <td>1010.0</td>\n", + " <td>hls0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>263</td>\n", + " <td>2</td>\n", + " <td>1247</td>\n", + " <td>20201201</td>\n", + " <td>0</td>\n", + " <td>45001</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>25</td>\n", + " <td>1</td>\n", + " <td>...</td>\n", + " <td>5.679873</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>11243.0</td>\n", + " <td>1261.0</td>\n", + " <td>hls0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>263</td>\n", + " <td>2</td>\n", + " <td>1247</td>\n", + " <td>20201201</td>\n", + " <td>0</td>\n", + " <td>45001</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>25</td>\n", + " <td>1</td>\n", + " <td>...</td>\n", + " <td>5.679873</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>14053.0</td>\n", + " <td>1009.0</td>\n", + " <td>hls0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>263</td>\n", + " <td>2</td>\n", + " <td>1247</td>\n", + " <td>20201201</td>\n", + " <td>0</td>\n", + " <td>45001</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>25</td>\n", + " <td>1</td>\n", + " <td>...</td>\n", + " <td>5.679873</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>14053.0</td>\n", + " <td>757.0</td>\n", + " <td>hls0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>263</td>\n", + " <td>2</td>\n", + " <td>1247</td>\n", + " <td>20201201</td>\n", + " <td>0</td>\n", + " <td>45001</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>25</td>\n", + " <td>1</td>\n", + " <td>...</td>\n", + " <td>5.679873</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>8432.0</td>\n", + " <td>757.0</td>\n", + " <td>hls0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1513516</th>\n", + " <td>263</td>\n", + " <td>2</td>\n", + " <td>1247</td>\n", + " <td>20201201</td>\n", + " <td>120000</td>\n", + " <td>45001</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>25</td>\n", + " <td>24</td>\n", + " <td>...</td>\n", + " <td>2.268997</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>5618.0</td>\n", + " <td>1008.0</td>\n", + " <td>hls0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1513517</th>\n", + " <td>263</td>\n", + " <td>2</td>\n", + " <td>1247</td>\n", + " <td>20201201</td>\n", + " <td>120000</td>\n", + " <td>45001</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>25</td>\n", + " <td>24</td>\n", + " <td>...</td>\n", + " <td>2.268997</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>2809.0</td>\n", + " <td>1008.0</td>\n", + " <td>hls0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1513518</th>\n", + " <td>263</td>\n", + " <td>2</td>\n", + " <td>1247</td>\n", + " <td>20201201</td>\n", + " <td>120000</td>\n", + " <td>45001</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>25</td>\n", + " <td>24</td>\n", + " <td>...</td>\n", + " <td>2.268997</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>2809.0</td>\n", + " <td>1007.0</td>\n", + " <td>hls0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1513519</th>\n", + " <td>263</td>\n", + " <td>2</td>\n", + " <td>1247</td>\n", + " <td>20201201</td>\n", + " <td>120000</td>\n", + " <td>45001</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>25</td>\n", + " <td>24</td>\n", + " <td>...</td>\n", + " <td>2.267844</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>8426.0</td>\n", + " <td>504.0</td>\n", + " <td>hls0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1513520</th>\n", + " <td>263</td>\n", + " <td>2</td>\n", + " <td>1247</td>\n", + " <td>20201201</td>\n", + " <td>120000</td>\n", + " <td>45001</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>25</td>\n", + " <td>24</td>\n", + " <td>...</td>\n", + " <td>2.267844</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>14044.0</td>\n", + " <td>503.0</td>\n", + " <td>hls0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>1513521 rows × 61 columns</p>\n", + "</div>" + ], + "text/plain": [ + " type class stream andate antime reportype restricted@hdr \\\n", + "0 263 2 1247 20201201 0 45001 0 \n", + "1 263 2 1247 20201201 0 45001 0 \n", + "2 263 2 1247 20201201 0 45001 0 \n", + "3 263 2 1247 20201201 0 45001 0 \n", + "4 263 2 1247 20201201 0 45001 0 \n", + "... ... ... ... ... ... ... ... \n", + "1513516 263 2 1247 20201201 120000 45001 0 \n", + "1513517 263 2 1247 20201201 120000 45001 0 \n", + "1513518 263 2 1247 20201201 120000 45001 0 \n", + "1513519 263 2 1247 20201201 120000 45001 0 \n", + "1513520 263 2 1247 20201201 120000 45001 0 \n", + "\n", + " enda_member@desc numtsl@desc timeslot@timeslot_index ... \\\n", + "0 0 25 1 ... \n", + "1 0 25 1 ... \n", + "2 0 25 1 ... \n", + "3 0 25 1 ... \n", + "4 0 25 1 ... \n", + "... ... ... ... ... \n", + "1513516 0 25 24 ... \n", + "1513517 0 25 24 ... \n", + "1513518 0 25 24 ... \n", + "1513519 0 25 24 ... \n", + "1513520 0 25 24 ... \n", + "\n", + " arg_lat@sat t_ref@aeolus_l2b p_ref@aeolus_l2b beta@aeolus_l2b \\\n", + "0 5.679873 NaN NaN NaN \n", + "1 5.679873 NaN NaN NaN \n", + "2 5.679873 NaN NaN NaN \n", + "3 5.679873 NaN NaN NaN \n", + "4 5.679873 NaN NaN NaN \n", + "... ... ... ... ... \n", + "1513516 2.268997 NaN NaN NaN \n", + "1513517 2.268997 NaN NaN NaN \n", + "1513518 2.268997 NaN NaN NaN \n", + "1513519 2.267844 NaN NaN NaN \n", + "1513520 2.267844 NaN NaN NaN \n", + "\n", + " dhlos_dt@aeolus_l2b dhlos_dp@aeolus_l2b dhlos_dbeta@aeolus_l2b \\\n", + "0 NaN NaN NaN \n", + "1 NaN NaN NaN \n", + "2 NaN NaN NaN \n", + "3 NaN NaN NaN \n", + "4 NaN NaN NaN \n", + "... ... ... ... \n", + "1513516 NaN NaN NaN \n", + "1513517 NaN NaN NaN \n", + "1513518 NaN NaN NaN \n", + "1513519 NaN NaN NaN \n", + "1513520 NaN NaN NaN \n", + "\n", + " horiz_length@aeolus_l2b vert_length@aeolus_l2b expver \n", + "0 11243.0 1010.0 hls0 \n", + "1 11243.0 1261.0 hls0 \n", + "2 14053.0 1009.0 hls0 \n", + "3 14053.0 757.0 hls0 \n", + "4 8432.0 757.0 hls0 \n", + "... ... ... ... \n", + "1513516 5618.0 1008.0 hls0 \n", + "1513517 2809.0 1008.0 hls0 \n", + "1513518 2809.0 1007.0 hls0 \n", + "1513519 8426.0 504.0 hls0 \n", + "1513520 14044.0 503.0 hls0 \n", + "\n", + "[1513521 rows x 61 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(df_decoded)" + ] + }, + { + "cell_type": "markdown", + "id": "1f9313c7-91c4-4528-bd8d-00c110eada98", + "metadata": {}, + "source": [ + "this is the pure python version of reading odb files" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "4d47f510-a30c-4f0e-83f9-6a830f3904e4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 1min 4s, sys: 1.79 s, total: 1min 5s\n", + "Wall time: 1min 5s\n" + ] + } + ], + "source": [ + "%%time\n", + "df_decoded = pyodc.read_odb('../scratch/data/Aeolus/test20201201.odb', single=True)" + ] + }, + { + "cell_type": "markdown", + "id": "92a9e3f7-1b16-47cb-9af4-ff53699254e1", + "metadata": {}, + "source": [ + "## using ODC\n", + "\n", + "odc is also a [command line tool](https://odc.readthedocs.io/en/latest/content/tools.html) to query an odb file or create a subset.\n", + "You need to load the module and then you can execute a command with odc syntax.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "005f7b1d-d77a-4dd4-9120-ffc8258f5dca", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading odc/1.4.5-gcc-8.5.0\n", + " Loading requirement: eckit/1.24.4-gcc-8.5.0\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "compare:\tCompares two ODB files\n", + "Usage:\n", + "\tcompare [-excludeColumns <list-of-columns>] [-excludeColumnsTypes <list-of-columns>] [-dontCheckMissing] <file1.odb> <file2.odb>\n", + "\n", + "count:\tCounts number of rows in files\n", + "Usage:\n", + "\tcount <file.odb>\n", + "\n", + "header:\tShows header(s) and metadata(s) of file\n", + "Usage:\n", + "\theader [-offsets] [-ddl] [-table <table-name-in-the-generated-ddl>] <file-name>\n", + "\n", + "import:\tImports data from a text file\n", + "Usage:\n", + "\timport\t[-d delimiter] <input.file> <output.file>\n", + "\n", + "\tdelimiter can be a single character (e.g.: ',') or TAB. As a data example:\n", + "\n", + "\tcol1:INTEGER,col2:REAL\n", + "\t1,2.0\n", + "\t3,4.0\n", + "\n", + "\n", + "index:\tCreates index of reports for a given file\n", + "Usage:\n", + "\tindex <file.odb> [<file.odb.idx>] \n", + "\n", + "\tSpecifically the index file is an ODB file with (INTEGER) columns: block_begin, block_length, seqno, n_rows\n", + "\tOne entry is made for each unique seqno - block pair within the source ODB file.\n", + "\n", + "\n", + "ls:\tShows file's contents\n", + "Usage:\n", + "\tls [-o <output-file>] <file-name>\n", + "\n", + "\n", + "\n", + "mdset:\tCreates a new file resetting types or values (constants only) of columns.\n", + "Usage:\n", + "\tmdset <update-list> <input.odb> <output.odb>\n", + "\n", + "\t<update-list> is a comma separated list of expressions of the form:\n", + "\t <column-name> : <type> = <value>\n", + "\n", + "\t<type> can be one of: integer, real, double, string. If ommited, the existing type of the column will not be changed.\n", + "\tBoth type and value are optional; at least one of the two should be present. For example:\n", + "\t odb mdset \"expver=' 0008'\" input.odb patched.odb \n", + "\n", + "\n", + "merge:\tMerges rows from files\n", + "Usage:\n", + "\tmerge -o <output-file.odb> <input1.odb> <input2.odb> ...\n", + "\n", + "\t or \n", + "\n", + "merge\t -S -o <output-file.odb> <input1.odb> <sql-select1> <input2.odb> <sql-select2> ...\n", + "\n", + "\n", + "set:\tCreates a new file setting columns to given values\n", + "Usage:\n", + "\tset <update-list> <input.odb> <output.odb>\n", + "\n", + "split:\tSplits file according to given template\n", + "Usage:\n", + "\tsplit [-no_verification] [-maxopenfiles <N>] <input.odb> <output_template.odb>\n", + "\n", + "sql:\tExecutes SQL statement\n", + "Usage:\n", + "\tsql <select-statement> | <script-filename>\n", + " [-T] Disables printing of column names\n", + " [-offset <offset>] Start processing file at a given offset\n", + " [-length <length>] Process only given bytes of data\n", + " [-N] Do not write NULLs, but proper missing data values\n", + " [-i <inputfile>] ODB input file\n", + " [-o <outputfile>] ODB output file\n", + " [-f default|wide|ascii|odb] ODB output format (odb is binary ODB, ascii and wide are ascii formatted with bitfield definitions in header. Default is ascii on stdout and odb to file)\n", + " [-delimiter <delim>] Changes the default values' delimiter (TAB by default)\n", + " delim can be any character or string\n", + " [--binary|--bin] Print bitfields in binary notation\n", + " [--no_alignment] Do not align columns\n", + " [--full_precision] Print with full precision\n", + "\n", + "\n" + ] + } + ], + "source": [ + "%%bash\n", + "# just load the module in this cell.\n", + "module load odc\n", + "# show help\n", + "odc help" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "96aa6e97-3439-42e3-88d0-f5de527885b7", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading odc/1.4.5-gcc-8.5.0\n", + " Loading requirement: eckit/1.24.4-gcc-8.5.0\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " type\texpver \t class\t stream\t andate\t antime\t reportype\trestricted@hdr\tenda_member@desc\t numtsl@desc\ttimeslot@timeslot_index\t seqno@hdr\t bufrtype@hdr\t subtype@hdr\t groupid@hdr\t obstype@hdr\t codetype@hdr\t sensor@hdr\t date@hdr\t time@hdr\t rdbdate@hdr\t rdbtime@hdr\treport_status@hdr\treport_event1@hdr\t report_rdbflag@hdr\t lat@hdr\t lon@hdr\t lsm@modsurf\tseaice@modsurf\t entryno@body\t obsvalue@body\t varno@body\tvertco_type@body\tvertco_reference_1@body\t datum_anflag@body\tdatum_status@body\t datum_event1@body\t datum_rdbflag@body\t biascorr@body\tbiascorr_fg@body\t qc_pge@body\t an_depar@body\t fg_depar@body\tobs_error@errstat\tfinal_obs_error@errstat\tfg_error@errstat\teda_spread@errstat\t azimuth@sat\t retrtype@hdr\t zenith@sat\t range@sat\t arg_lat@sat\tt_ref@aeolus_l2b\tp_ref@aeolus_l2b\tbeta@aeolus_l2b\tdhlos_dt@aeolus_l2b\tdhlos_dp@aeolus_l2b\tdhlos_dbeta@aeolus_l2b\thoriz_length@aeolus_l2b\tvert_length@aeolus_l2b\tconf_flag@aeolus_l2b\n", + " 263\t' hls0'\t 2\t 1247\t 20201201\t 0\t 45001\t 0\t 0\t 25\t 1\t 1\t 23\t 251\t 46\t 15\t 187\t NULL\t 20201130\t 210238\t 20201130\t 214455\t 12\t 2\t 0\t -5.706610\t 131.284409\t 0.000000\t 0.000000\t 1\t 64.730003\t 187\t 1\t 5820.886719\t 0\t 12\t 131584\t 0\t 0.000000\t 0.000000\t NULL\t 63.164753\t 64.002914\t 37.153870\t 37.153870\t 1.076056\t 0.976365\t 1.750565\t 210065741\t 0.927293\t 363476.000000\t 5.679873\t NULL\t NULL\t NULL\t NULL\t NULL\t NULL\t 11243.000000\t 1010.000000\t 1\n", + " 263\t' hls0'\t 2\t 1247\t 20201201\t 0\t 45001\t 0\t 0\t 25\t 1\t 2\t 23\t 251\t 46\t 15\t 187\t NULL\t 20201130\t 210238\t 20201130\t 214455\t 12\t 2\t 0\t -5.705090\t 131.276031\t 0.000000\t 0.000000\t 1\t NULL\t 187\t 1\t 7051.859375\t 0\t 12\t 131590\t 0\t 0.000000\t 0.000000\t NULL\t NULL\t NULL\t 122.078880\t 122.078880\t 1.201507\t 1.122098\t 1.750565\t 210065742\t 0.927119\t 364895.000000\t 5.679873\t NULL\t NULL\t NULL\t NULL\t NULL\t NULL\t 11243.000000\t 1261.000000\t 1\n", + " 263\t' hls0'\t 2\t 1247\t 20201201\t 0\t 45001\t 0\t 0\t 25\t 1\t 3\t 23\t 251\t 46\t 15\t 187\t NULL\t 20201130\t 210238\t 20201130\t 214455\t 12\t 2\t 0\t -5.703870\t 131.269333\t 0.000000\t 0.000000\t 1\t 105.169998\t 187\t 1\t 8552.428711\t 48\t 12\t 131712\t 0\t 0.000000\t 0.000000\t NULL\t 94.133278\t 95.480774\t 12.658989\t 12.658989\t 1.487877\t 1.433541\t 1.750565\t 210065743\t 0.927119\t 366315.000000\t 5.679873\t NULL\t NULL\t NULL\t NULL\t NULL\t NULL\t 14053.000000\t 1009.000000\t 1\n", + " 263\t' hls0'\t 2\t 1247\t 20201201\t 0\t 45001\t 0\t 0\t 25\t 1\t 4\t 23\t 251\t 46\t 15\t 187\t NULL\t 20201130\t 210238\t 20201130\t 214455\t 12\t 2\t 0\t -5.702960\t 131.264297\t 0.000000\t 0.000000\t 1\t 0.770000\t 187\t 1\t 9995.666016\t 0\t 12\t 131584\t 0\t 0.000000\t 0.000000\t NULL\t -17.179775\t -16.547409\t 9.256155\t 9.256155\t 1.853304\t 1.855463\t 1.750565\t 210065744\t 0.926944\t 367419.000000\t 5.679873\t NULL\t NULL\t NULL\t NULL\t NULL\t NULL\t 14053.000000\t 757.000000\t 1\n", + " 263\t' hls0'\t 2\t 1247\t 20201201\t 0\t 45001\t 0\t 0\t 25\t 1\t 5\t 23\t 251\t 46\t 15\t 187\t NULL\t 20201130\t 210238\t 20201130\t 214455\t 12\t 2\t 0\t -5.702040\t 131.259262\t 0.000000\t 0.000000\t 1\t 16.790001\t 187\t 1\t 11442.984375\t 0\t 12\t 131584\t 0\t 0.000000\t 0.000000\t NULL\t -3.046090\t -1.749628\t 5.420001\t 5.420001\t 1.909041\t 1.867477\t 1.750565\t 210065745\t 0.926944\t 368365.000000\t 5.679873\t NULL\t NULL\t NULL\t NULL\t NULL\t NULL\t 8432.000000\t 757.000000\t 1\n", + " 263\t' hls0'\t 2\t 1247\t 20201201\t 0\t 45001\t 0\t 0\t 25\t 1\t 6\t 23\t 251\t 46\t 15\t 187\t NULL\t 20201130\t 210238\t 20201130\t 214455\t 12\t 2\t 0\t -5.701120\t 131.254227\t 0.000000\t 0.000000\t 1\t 15.060000\t 187\t 1\t 13047.831055\t 0\t 12\t 131584\t 0\t 0.000000\t 0.000000\t NULL\t -1.049524\t 0.238602\t 7.846665\t 7.846665\t 1.912062\t 1.824771\t 1.750565\t 210065746\t 0.926770\t 369311.000000\t 5.679873\t NULL\t NULL\t NULL\t NULL\t NULL\t NULL\t 2811.000000\t 757.000000\t 1\n", + " 263\t' hls0'\t 2\t 1247\t 20201201\t 0\t 45001\t 0\t 0\t 25\t 1\t 7\t 23\t 251\t 46\t 15\t 187\t NULL\t 20201130\t 210238\t 20201130\t 214455\t 12\t 2\t 0\t -5.723190\t 131.234482\t 0.000000\t 0.000000\t 1\t -2.730000\t 187\t 1\t 18914.039062\t 0\t 12\t 131584\t 0\t 0.000000\t 0.000000\t NULL\t -11.962290\t -11.183959\t 24.755920\t 24.755920\t 1.733299\t 1.617335\t 1.750565\t 210065747\t 0.926595\t 372150.000000\t 5.679873\t NULL\t NULL\t NULL\t NULL\t NULL\t NULL\t 8431.000000\t 757.000000\t 1\n", + " 263\t' hls0'\t 2\t 1247\t 20201201\t 0\t 45001\t 0\t 0\t 25\t 1\t 8\t 23\t 251\t 46\t 15\t 187\t NULL\t 20201130\t 210238\t 20201130\t 214455\t 12\t 2\t 0\t -5.697460\t 131.234085\t 0.000000\t 0.000000\t 1\t 10.090000\t 187\t 1\t 21243.236328\t 0\t 12\t 131584\t 0\t 0.000000\t 0.000000\t NULL\t 1.059619\t 2.259655\t 8.975661\t 8.975661\t 1.543846\t 1.432204\t 1.750565\t 210065748\t 0.926421\t 373096.000000\t 5.679873\t NULL\t NULL\t NULL\t NULL\t NULL\t NULL\t 8432.000000\t 757.000000\t 0\n", + " 263\t' hls0'\t 2\t 1247\t 20201201\t 0\t 45001\t 0\t 0\t 25\t 1\t 9\t 23\t 251\t 46\t 15\t 187\t NULL\t 20201130\t 210238\t 20201130\t 214455\t 12\t 2\t 0\t -5.721360\t 131.224396\t 0.000000\t 0.000000\t 1\t 25.500000\t 187\t 1\t 23785.876953\t 0\t 12\t 131584\t 0\t 0.000000\t 0.000000\t NULL\t 18.075672\t 19.681799\t 11.561797\t 11.561797\t 1.464802\t 1.366932\t 1.750565\t 210065749\t 0.926421\t 374042.000000\t 5.679873\t NULL\t NULL\t NULL\t NULL\t NULL\t NULL\t 2811.000000\t 757.000000\t 1\n" + ] + } + ], + "source": [ + "%%bash\n", + "module load odc\n", + "# select only analysis time 0\n", + "odc sql 'select * where antime=0' -i ../scratch/data/Aeolus/test20201201.odb -f ascii | head" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "48ec9e3c-eb5f-45bb-bc9c-988621ea5996", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "nwp 2023.1 - 3.10", + "language": "python", + "name": "nwp2023.1" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/Python/README.md b/Python/README.md index 27a3bf23cd9dfc149a00ea66cab2907959193978..2a864f224347c7679a58a0d466fcf1a6f85488bc 100644 --- a/Python/README.md +++ b/Python/README.md @@ -135,6 +135,16 @@ You just need to add a configuration option and then you can open the Dashboard If you need to get a better understanding of you functions memory and execution time, try these profiling options. +## Q: How to read BUFR files with python? +[BUFR](QA-010-Reading-Bufr-Synop.ipynb) + +ECMWF created a python package that helps to read BUFR messages via ECCODES into pandas DataFrame. + +## Q: How to read ODB files with python? +[ODB](QA-011-Reading-ODB.ipynb) + +ECMWF created a python package that helps to read ODB messages via ODC or pure Python into pandas DataFrame. + ## Q: How to ignore user site packages? diff --git a/Python/bufr2synop.py b/Python/bufr2synop.py deleted file mode 100644 index 7c955814831bf0c30e70cd700da2f0371220068c..0000000000000000000000000000000000000000 --- a/Python/bufr2synop.py +++ /dev/null @@ -1,15 +0,0 @@ - -import eccodes -import pdbufr - - keys = ['blockNumber', - 'stationNumber', -'latitude', -'longitude', -'airTemperature', -'dewpointTemperature', - 'windSpeed', -'windDirection', -] - -df = pdbufr.read_bufr('ISMD22_LOWM.010000', columns=keys) \ No newline at end of file diff --git a/SSH-VPN-VNC/SSH.md b/SSH-VPN-VNC/SSH.md index 3936fad49d1d9606e6ad54fd69e3c15aab18c476..19aedade1a9889d54f9fc694a7661d9440bf31c9 100644 --- a/SSH-VPN-VNC/SSH.md +++ b/SSH-VPN-VNC/SSH.md @@ -77,7 +77,7 @@ Host a?-* a??-* hpc-* hpc2020-* ecs-* and replacing `[USERNAME]` and `[u:account USERNAME]` with your usernames. Using such a file allows to connect like this `ssh srvx1` using the correct server adress and specified username. Copy this file as well on `login.univie.ac.at` and you can use commands like this: `ssh -t login ssh jet` to connect directly to `jet` via the `login` gateway. -Please note the special algorithms for ecaccess and of course ECMWF uses [teleport](../ECMWF.md#connecting-to-ecmwf-services) now. +Please note the special algorithms for ecaccess and of course ECMWF uses [teleport](../ECMWF/README.md#connecting-to-ecmwf-services) now. **From eduroam**: You should be able to log in as above. diff --git a/Servers/SRVX1.md b/Servers/SRVX1.md index c488141bf8c2ad5d62686f48fe06191b74f23454..35dd850bc83631a3b2a2b4855f2d0fd0b25bb00d 100644 --- a/Servers/SRVX1.md +++ b/Servers/SRVX1.md @@ -10,7 +10,7 @@ Steps: 1. Request access / will be done for you by your supervisor. 2. As Staff, access using SSH - [How to SSH / VNC / VPN](../SSH-VPN-VNC/README.md) -3. As Student, access using Teaching Hub - [How to connect using the TeachingHub](../TeachingHub.md) +3. As Student, access using Teaching Hub - [How to connect using the TeachingHub](../Students/TeachingHub.md) ## System information | Name | Value | @@ -39,7 +39,7 @@ Steps: ## Jupyterhub <img src="../mkdocs/img/jupyterhub-logo.svg" width="150px"> -SRVX1 serves a teaching [jupyterhub](https://jupyterhub.readthedocs.io/en/stable/) with a [jupyterlab](https://jupyterlab.readthedocs.io/en/stable/). It allows easy access for students and teachers. Access: [https://srvx1.img.univie.ac.at/hub](https://srvx1.img.univie.ac.at/hub) +SRVX1 serves a teaching [jupyterhub](https://jupyterhub.readthedocs.io/en/stable/) with a [jupyterlab](https://jupyterlab.readthedocs.io/en/stable/). It allows easy access for students and teachers. Access: [https://teachinghub.wolke.img.univie.ac.at](https://teachinghub.wolke.img.univie.ac.at) Signup is only granted by teachers and requires a srvx1 user account. A new password is needed and a TOTP (time base one-time password) will be created. diff --git a/Students/README.md b/Students/README.md index f247d3a8b6f6a2fd01e3aaefe2a1a3e73e90b6de..afcde42fa8dd71b390ca54760a4e12a080999c73 100644 --- a/Students/README.md +++ b/Students/README.md @@ -5,20 +5,36 @@ To make you life easier we have listed a few things here that you should know to ???+ warning "since winter semester 2023" There is the [old teachinghub](./TeachingHub.md) (will retire 31.3.2024) and the new teachinghub, which is handled via Moodle. Your lecturerer will give you access. +## Guidelines + +Access to computational resources at IMGW is granted to the user by employment at the university or by a IMGW sponsor. Access is disabled on the termination date without further notice. An extension of the allocation may be negotiated with the responsible Admin or IMGW sponsor. The user accepts the following responsibilities: + +- Computers and information systems must be used in an ethical and legal manner. +- The user agrees not to duplicate or use copyrighted or proprietary software without proper authorization. +- The user may not use computers and information systems in any manner for any business, professional, or other activity that is unrelated to the purpose of the resource allocation. +- The user is required to acknowledge the use of IMGW resources in any resulting publications. +- The user is responsible for protecting her/his access credentials and/or passwords. +- The user may not share her/his account privileges with anyone or knowingly permit any unauthorized access to a computer, computer privileges, systems, networks, or programs. The accounts of those involved will be disabled if sharing is detected. +- The user is responsible for backing up critical data to protect it against loss or corruption. The user is also responsible for understanding the usage and data retention policies for the file system and data archive resources used. +- The user agrees to report potential security breaches as soon as possible to the responsible Admin or IMGW sponsor. +- The user is responsible for ensuring that IMGW has her/his current contact information, including phone number, email address, and mailing address. If the user’s name, phone number, email address, or other information changes, the responsible Admin or IMGW sponsor must be promptly notified. + ## Master Students -Great idea to start a master @ IMGW. Your supervisor will ask for an account for you and you will retrieve login credentials. +Great idea to start a master @ IMGW. Your supervisor will request a server account and you will retrieve login credentials. Please take a look at the [guidelines](#guidelines) above and acknowledge them. -There are three options: +As an employee or master student you can get access to these resources: - access to [SRVX1](../Servers/SRVX1.md) - access to [JET](../Servers/JET.md) - access to [VSC](../VSC.md) + - acccess to [ECMWF](../ECMWF/README.md) Most people at the department use [gitlab](https://gitlab.phaidra.org) and [mattermost](https://discuss.phaidra.org), which your supervisor can arange for you to get an account. -It might be useful to familiarize yourself with [git](../Git/README.md) +It might be useful to familiarize yourself with [git](../Git/README.md) and create a project dedicated to your master thesis on gitlab. Your supervisor can help you. Writing your thesis can be fascilitated by using [Overleaf](https://www.overleaf.de) and a [template](https://www.overleaf.com/read/ptpskhdqmqpt#566d55). As student at the University of Vienna, you can login with your u:account. +More template from our department can be found [here (wiki)](https://wiki.univie.ac.at/x/VBURC) ## Useful links