From df21827e2536126ded2a1617c32bc89506a0022c Mon Sep 17 00:00:00 2001
From: Aiko Voigt <aiko.voigt@univie.ac.at>
Date: Wed, 10 Mar 2021 15:50:43 +0100
Subject: [PATCH] Not working first example of loading data from Pangeo cloud

---
 pangeo/load_data.ipynb | 252 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 252 insertions(+)
 create mode 100644 pangeo/load_data.ipynb

diff --git a/pangeo/load_data.ipynb b/pangeo/load_data.ipynb
new file mode 100644
index 0000000..0375e48
--- /dev/null
+++ b/pangeo/load_data.ipynb
@@ -0,0 +1,252 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# How to access and load TRACMIP data from the Pangeo cloud\n",
+    "\n",
+    "1. \"load\" the Tracmip collection \n",
+    "2. get some basic info on Tracmip collection\n",
+    "3. load monthly mean precip for the aquaControl simulation\n",
+    "4. plot meridional zonal-mean time-mean profile for one model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from matplotlib import pyplot as plt\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import xarray as xr\n",
+    "import zarr\n",
+    "import gcsfs\n",
+    "\n",
+    "xr.set_options(display_style='html')\n",
+    "%matplotlib inline\n",
+    "%config InlineBackend.figure_format = 'retina' "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_csv('https://storage.googleapis.com/cmip6/tracmip.csv')\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'df' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-3-36bdea2f3883>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf_pr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mquery\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"frequency == 'Amon' & variable == 'pr' & experiment == 'aquaControl'\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[0;31mNameError\u001b[0m: name 'df' is not defined"
+     ]
+    }
+   ],
+   "source": [
+    "df_pr = df.query(\"frequency == 'Amon' & variable == 'pr' & experiment == 'aquaControl'\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1. \"Load\" Tracmip collection"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from intake import open_catalog\n",
+    "\n",
+    "# get whole pangeo catalogue\n",
+    "cat = open_catalog(\"https://raw.githubusercontent.com/pangeo-data/pangeo-datastore/master/intake-catalogs/climate.yaml\")\n",
+    "# select tracmip collection\n",
+    "col = cat.tracmip()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2. Basic info on the collection"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "print collection to screen: this shows that there is 3 output frequencies (monthly-mean, daily-mean, 3-hr snapshots), \n",
+    "11 experiments (6 are due to the CALTECH model with changed atmosperic opacity), and 47 variables"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "col"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "print starting and end portion of the collection"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "col.df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "col.df.tail()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "print some further information on the collection (i.e., dataframe)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "col.df.columns.unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "col.df.model.unique()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "col.df.experiment.unique()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3. Now actually load the monthly-mean precip data for the aquaControl experiment, use a dictionary for this"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "note: the option \"zarr_kwargs={'consolidated': True}\" for to_dataset_dicts does not seem necessary but is still included here"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ds_dict = col.search(frequency=\"Amon\", experiment=\"aquaControl\",\n",
+    "                     variable=\"pr\").to_dataset_dict(zarr_kwargs={'consolidated': True})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 4. Plot zonal-mean time-mean precip for last 20 years for CNRM-AM5 model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ds_dict['CNRM-AM5.aquaControl.Amon']['pr']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.plot(ds_dict['CNRM-AM5.aquaControl.Amon'].lat, \n",
+    "         ds_dict['CNRM-AM5.aquaControl.Amon']['pr'].isel(time=slice(120,360)).mean(['lon', 'time'])*86400)\n",
+    "plt.xlabel('degree latitude')\n",
+    "plt.ylabel('precipitation (mm/day)')\n",
+    "plt.title('CNRM-AM5.aquaControl.Amon')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
-- 
GitLab