diff --git a/pangeo/load_data.ipynb b/pangeo/load_data.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..0375e485d6aec01e3f0f257f2443cd787655e36b --- /dev/null +++ b/pangeo/load_data.ipynb @@ -0,0 +1,252 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# How to access and load TRACMIP data from the Pangeo cloud\n", + "\n", + "1. \"load\" the Tracmip collection \n", + "2. get some basic info on Tracmip collection\n", + "3. load monthly mean precip for the aquaControl simulation\n", + "4. plot meridional zonal-mean time-mean profile for one model" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from matplotlib import pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import xarray as xr\n", + "import zarr\n", + "import gcsfs\n", + "\n", + "xr.set_options(display_style='html')\n", + "%matplotlib inline\n", + "%config InlineBackend.figure_format = 'retina' " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv('https://storage.googleapis.com/cmip6/tracmip.csv')\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'df' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-3-36bdea2f3883>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf_pr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mquery\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"frequency == 'Amon' & variable == 'pr' & experiment == 'aquaControl'\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mNameError\u001b[0m: name 'df' is not defined" + ] + } + ], + "source": [ + "df_pr = df.query(\"frequency == 'Amon' & variable == 'pr' & experiment == 'aquaControl'\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. \"Load\" Tracmip collection" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from intake import open_catalog\n", + "\n", + "# get whole pangeo catalogue\n", + "cat = open_catalog(\"https://raw.githubusercontent.com/pangeo-data/pangeo-datastore/master/intake-catalogs/climate.yaml\")\n", + "# select tracmip collection\n", + "col = cat.tracmip()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Basic info on the collection" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "print collection to screen: this shows that there is 3 output frequencies (monthly-mean, daily-mean, 3-hr snapshots), \n", + "11 experiments (6 are due to the CALTECH model with changed atmosperic opacity), and 47 variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "col" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "print starting and end portion of the collection" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "col.df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "col.df.tail()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "print some further information on the collection (i.e., dataframe)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "col.df.columns.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "col.df.model.unique()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "col.df.experiment.unique()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Now actually load the monthly-mean precip data for the aquaControl experiment, use a dictionary for this" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "note: the option \"zarr_kwargs={'consolidated': True}\" for to_dataset_dicts does not seem necessary but is still included here" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ds_dict = col.search(frequency=\"Amon\", experiment=\"aquaControl\",\n", + " variable=\"pr\").to_dataset_dict(zarr_kwargs={'consolidated': True})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Plot zonal-mean time-mean precip for last 20 years for CNRM-AM5 model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ds_dict['CNRM-AM5.aquaControl.Amon']['pr']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.plot(ds_dict['CNRM-AM5.aquaControl.Amon'].lat, \n", + " ds_dict['CNRM-AM5.aquaControl.Amon']['pr'].isel(time=slice(120,360)).mean(['lon', 'time'])*86400)\n", + "plt.xlabel('degree latitude')\n", + "plt.ylabel('precipitation (mm/day)')\n", + "plt.title('CNRM-AM5.aquaControl.Amon')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}