From 3e01ae5ce72c02aa656e23769eac7b29049d1cd4 Mon Sep 17 00:00:00 2001 From: Tom White Date: Wed, 15 May 2024 12:33:36 +0100 Subject: [PATCH] Add notebooks for Pangeo examples at https://github.com/pangeo-data/distributed-array-examples --- .gitignore | 1 + examples/cubed.svg | 56 - examples/pangeo-1-vorticity.ipynb | 4140 ++++++++ examples/pangeo-2-quadratic-means.ipynb | 2463 +++++ examples/pangeo-3-tem.ipynb | 8318 +++++++++++++++++ .../pangeo-4-climatological-anomalies.ipynb | 2493 +++++ examples/pangeo-tem.ipynb | 2711 ------ examples/pangeo-vorticity.ipynb | 1318 --- examples/pangeo-vorticity.svg | 472 - 9 files changed, 17415 insertions(+), 4557 deletions(-) delete mode 100644 examples/cubed.svg create mode 100644 examples/pangeo-1-vorticity.ipynb create mode 100644 examples/pangeo-2-quadratic-means.ipynb create mode 100644 examples/pangeo-3-tem.ipynb create mode 100644 examples/pangeo-4-climatological-anomalies.ipynb delete mode 100644 examples/pangeo-tem.ipynb delete mode 100644 examples/pangeo-vorticity.ipynb delete mode 100644 examples/pangeo-vorticity.svg diff --git a/.gitignore b/.gitignore index edbbbd10..6e99b944 100644 --- a/.gitignore +++ b/.gitignore @@ -136,3 +136,4 @@ dmypy.json # Cubed .lithops_config +examples/cubed.svg diff --git a/examples/cubed.svg b/examples/cubed.svg deleted file mode 100644 index 313f6a51..00000000 --- a/examples/cubed.svg +++ /dev/null @@ -1,56 +0,0 @@ - - - - - - - -num tasks: 4 -max projected memory: 100.0 MB - - -array-001 - - -array-001 -asarray - - - - - -array-004 - - -array-004 -add (bw) - - - - - -array-001->array-004 - - - - - -array-002 - - -array-002 -asarray - - - - - -array-002->array-004 - - - - - diff --git a/examples/pangeo-1-vorticity.ipynb b/examples/pangeo-1-vorticity.ipynb new file mode 100644 index 00000000..72bf2aa7 --- /dev/null +++ b/examples/pangeo-1-vorticity.ipynb @@ -0,0 +1,4140 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "bdf4365e", + "metadata": {}, + "source": [ + "# Pangeo Vorticity Workload\n", + "\n", + "This is a notebook for exploring a simplified version of the example in https://github.com/pangeo-data/distributed-array-examples/issues/1.\n", + "\n", + "See also Tom Nicholas's [notebook](https://gist.github.com/TomNicholas/8366c917349b647d87860a20a257a3fb#file-benchmark-vorticity-ipynb) exploring this problem. Note that the code below for `diff` is based on the Dask version." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "b8386400", + "metadata": {}, + "outputs": [], + "source": [ + "import cubed\n", + "import cubed.random\n", + "from cubed.extensions.rich import RichProgressBar\n", + "import xarray as xr" + ] + }, + { + "cell_type": "markdown", + "id": "9c1953a4", + "metadata": {}, + "source": [ + "Initialization parameters for the workload" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "f4b61ed9", + "metadata": {}, + "outputs": [], + "source": [ + "t_length = 50\n", + "spec = cubed.Spec(allowed_mem=\"2GB\")" + ] + }, + { + "cell_type": "markdown", + "id": "48d462ed", + "metadata": {}, + "source": [ + "Create random data stored in Zarr." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "11782285", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "40a37f546c3d4752a92531e87aeb4692", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "
\n",
+       "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "u = cubed.random.random((t_length, 1, 987, 1920), chunks=(10, 1, -1, -1), spec=spec)\n", + "v = cubed.random.random((t_length, 1, 987, 1920), chunks=(10, 1, -1, -1), spec=spec)\n", + "dx = cubed.random.random((1, 987, 1920), chunks=(1, -1, -1), spec=spec)\n", + "dy = cubed.random.random((1, 987, 1920), chunks=(1, -1, -1), spec=spec)\n", + "arrays = [u, v, dx, dy]\n", + "paths = [\n", + " f\"{spec.work_dir}/u_{t_length}.zarr\",\n", + " f\"{spec.work_dir}/v_{t_length}.zarr\",\n", + " f\"{spec.work_dir}/dx_{t_length}.zarr\",\n", + " f\"{spec.work_dir}/dy_{t_length}.zarr\",\n", + "]\n", + "cubed.store(arrays, paths, compute_arrays_in_parallel=True, callbacks=[RichProgressBar()])" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "58bbcfde", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 2GB\n",
+       "Dimensions:  (time: 50, face: 1, j: 987, i: 1920)\n",
+       "Dimensions without coordinates: time, face, j, i\n",
+       "Data variables:\n",
+       "    U        (time, face, j, i) float64 758MB cubed.Array<chunksize=(10, 1, 987, 1920)>\n",
+       "    V        (time, face, j, i) float64 758MB cubed.Array<chunksize=(10, 1, 987, 1920)>\n",
+       "    dx       (face, j, i) float64 15MB cubed.Array<chunksize=(1, 987, 1920)>\n",
+       "    dy       (face, j, i) float64 15MB cubed.Array<chunksize=(1, 987, 1920)>
" + ], + "text/plain": [ + " Size: 2GB\n", + "Dimensions: (time: 50, face: 1, j: 987, i: 1920)\n", + "Dimensions without coordinates: time, face, j, i\n", + "Data variables:\n", + " U (time, face, j, i) float64 758MB cubed.Array\n", + " V (time, face, j, i) float64 758MB cubed.Array\n", + " dx (face, j, i) float64 15MB cubed.Array\n", + " dy (face, j, i) float64 15MB cubed.Array" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "u = cubed.from_zarr(paths[0], spec=spec)\n", + "v = cubed.from_zarr(paths[1], spec=spec)\n", + "dx = cubed.from_zarr(paths[2], spec=spec)\n", + "dy = cubed.from_zarr(paths[3], spec=spec)\n", + "ds = xr.Dataset(\n", + " dict(\n", + " U=([\"time\", \"face\", \"j\", \"i\"], u),\n", + " V=([\"time\", \"face\", \"j\", \"i\"], v),\n", + " dx=([\"face\", \"j\", \"i\"], dx),\n", + " dy=([\"face\", \"j\", \"i\"], dy),\n", + " )\n", + ")\n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "df8b97e8", + "metadata": {}, + "outputs": [], + "source": [ + "def diff(da, dim):\n", + " \"\"\"\n", + " First order derivative along dim.\n", + " \n", + " Differencing removes one grid element, so pad to restore size.\n", + " \"\"\"\n", + " # Once https://github.com/cubed-dev/cubed/issues/475 is done we can use this simpler implmentation:\n", + " # return da.diff(dim).pad({dim: (1, 0)}, mode='symmetric')\n", + " \n", + " ind = da.dims.index(dim)\n", + " d = da.diff(dim).data\n", + " pad_width = tuple((0, 0) if i != ind else (1, 0) for i in range(da.ndim))\n", + " chunks = da.data.chunks # make sure result after padding has original chunk size\n", + " p = cubed.pad(d, pad_width=pad_width, mode=\"symmetric\", chunks=chunks)\n", + " return xr.DataArray(p, dims=da.dims)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "c382a626", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray (face: 1, j: 987, i: 1920)> Size: 15MB\n",
+       "cubed.Array<array-061, shape=(1, 987, 1920), dtype=float64, chunks=((1,), (987,), (1920,))>\n",
+       "Dimensions without coordinates: face, j, i
" + ], + "text/plain": [ + " Size: 15MB\n", + "cubed.Array\n", + "Dimensions without coordinates: face, j, i" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "omega = diff(ds.U, dim='j') * ds.dx - diff(ds.V, dim='i') * ds.dy\n", + "mean = omega.mean('time', skipna=False)\n", + "mean" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "403ae0b0", + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "num tasks: 51\n", + "max projected memory: 909.6 MB\n", + "total nbytes written: 3.9 GB\n", + "optimized: True\n", + "\n", + "\n", + "op-017\n", + "\n", + "\n", + "op-017\n", + "from_zarr\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-017\n", + "\n", + "\n", + "array-017\n", + "u\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-017->array-017\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-027\n", + "\n", + "\n", + "op-027\n", + "__sub__\n", + "tasks: 5\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-017->op-027\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-017->op-027\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-027\n", + "\n", + "\n", + "array-027\n", + "d\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-027->array-027\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-030\n", + "\n", + "\n", + "op-030\n", + "pad\n", + "tasks: 5\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-027->op-030\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-033\n", + "\n", + "\n", + "op-033\n", + "pad\n", + "tasks: 5\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-027->op-033\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-030\n", + "\n", + "\n", + "array-030\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-030->array-030\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-030->op-033\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-033\n", + "\n", + "\n", + "array-033\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-033->array-033\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-057\n", + "\n", + "\n", + "op-057\n", + "__sub__\n", + "tasks: 5\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-033->op-057\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-019\n", + "\n", + "\n", + "op-019\n", + "from_zarr\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-019\n", + "\n", + "\n", + "array-019\n", + "dx\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-019->array-019\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-037\n", + "\n", + "\n", + "op-037\n", + "__getitem__\n", + "tasks: 1\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-019->op-037\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-037\n", + "\n", + "\n", + "array-037\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-037->array-037\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-037->op-057\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-018\n", + "\n", + "\n", + "op-018\n", + "from_zarr\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-018\n", + "\n", + "\n", + "array-018\n", + "v\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-018->array-018\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-045\n", + "\n", + "\n", + "op-045\n", + "__sub__\n", + "tasks: 5\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-018->op-045\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-018->op-045\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-045\n", + "\n", + "\n", + "array-045\n", + "d\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-045->array-045\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-048\n", + "\n", + "\n", + "op-048\n", + "pad\n", + "tasks: 5\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-045->op-048\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-051\n", + "\n", + "\n", + "op-051\n", + "pad\n", + "tasks: 5\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-045->op-051\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-048\n", + "\n", + "\n", + "array-048\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-048->array-048\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-048->op-051\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-051\n", + "\n", + "\n", + "array-051\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-051->array-051\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-051->op-057\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-020\n", + "\n", + "\n", + "op-020\n", + "from_zarr\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-020\n", + "\n", + "\n", + "array-020\n", + "dy\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-020->array-020\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-055\n", + "\n", + "\n", + "op-055\n", + "__getitem__\n", + "tasks: 1\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-020->op-055\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-055\n", + "\n", + "\n", + "array-055\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-055->array-055\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-055->op-057\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-057\n", + "\n", + "\n", + "array-057\n", + "None\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-057->array-057\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-058\n", + "\n", + "\n", + "op-058\n", + "mean\n", + "tasks: 2\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-057->op-058\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-058\n", + "\n", + "\n", + "array-058\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-058->array-058\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-061\n", + "\n", + "\n", + "op-061\n", + "mean\n", + "tasks: 1\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-058->op-061\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-061\n", + "\n", + "\n", + "array-061\n", + "None\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-061->array-061\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "create-arrays\n", + "\n", + "\n", + "create-arrays\n", + "tasks: 11\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "arrays\n", + "\n", + "\n", + "arrays\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "create-arrays->arrays\n", + "\n", + "\n", + "\n", + "\n", + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mean.data.visualize()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "7b8b6292", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "ac32d115b19f4d9eb6e090a7cc9b6137", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "
\n",
+       "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray (face: 1, j: 987, i: 1920)> Size: 15MB\n",
+       "array([[[-0.00935039,  0.05180967,  0.02368589, ...,  0.09296773,\n",
+       "         -0.02251309,  0.00811106],\n",
+       "        [-0.03833913,  0.05900023,  0.01459681, ..., -0.01437545,\n",
+       "          0.01901828,  0.02181335],\n",
+       "        [ 0.00342973,  0.01948303,  0.01160908, ...,  0.02988825,\n",
+       "         -0.0061075 , -0.01061161],\n",
+       "        ...,\n",
+       "        [ 0.03900664,  0.04362473,  0.00623131, ..., -0.03877337,\n",
+       "          0.00551559, -0.0664945 ],\n",
+       "        [-0.01084767, -0.00746532,  0.04994078, ...,  0.00983424,\n",
+       "         -0.00747178, -0.01885188],\n",
+       "        [ 0.00544092,  0.05520928, -0.0255991 , ...,  0.02842916,\n",
+       "          0.0289831 ,  0.01773723]]])\n",
+       "Dimensions without coordinates: face, j, i
" + ], + "text/plain": [ + " Size: 15MB\n", + "array([[[-0.00935039, 0.05180967, 0.02368589, ..., 0.09296773,\n", + " -0.02251309, 0.00811106],\n", + " [-0.03833913, 0.05900023, 0.01459681, ..., -0.01437545,\n", + " 0.01901828, 0.02181335],\n", + " [ 0.00342973, 0.01948303, 0.01160908, ..., 0.02988825,\n", + " -0.0061075 , -0.01061161],\n", + " ...,\n", + " [ 0.03900664, 0.04362473, 0.00623131, ..., -0.03877337,\n", + " 0.00551559, -0.0664945 ],\n", + " [-0.01084767, -0.00746532, 0.04994078, ..., 0.00983424,\n", + " -0.00747178, -0.01885188],\n", + " [ 0.00544092, 0.05520928, -0.0255991 , ..., 0.02842916,\n", + " 0.0289831 , 0.01773723]]])\n", + "Dimensions without coordinates: face, j, i" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mean.compute(callbacks=[RichProgressBar()])" + ] + }, + { + "cell_type": "markdown", + "id": "b9382d3b", + "metadata": {}, + "source": [ + "## A more efficient implementation\n", + "\n", + "In the above visualization we can see that the `diff` and `pad` operations are not fused together. This is something that could be improved in the future; see [#464](https://github.com/cubed-dev/cubed/issues/464) for what is needed.\n", + "\n", + "In the meantime, we can use Cubed's `map_overlap` to write a more efficient combined version of `diff` and `pad`, called `diff_pad`." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "34f02045", + "metadata": {}, + "outputs": [], + "source": [ + "from cubed.backend_array_api import namespace as nxp\n", + "from cubed.overlap import map_overlap\n", + "from cubed.vendor.dask.array.utils import validate_axis" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "e839c886", + "metadata": {}, + "outputs": [], + "source": [ + "def diff_pad(x, /, *, axis=-1):\n", + " axis = validate_axis(axis, x.ndim)\n", + " depth = tuple(1 if i == axis else 0 for i in range(x.ndim))\n", + " return map_overlap(\n", + " _diff_pad,\n", + " x,\n", + " dtype=x.dtype,\n", + " chunks=x.chunks,\n", + " depth=depth,\n", + " boundary=0, # actual value is ignored\n", + " trim=False,\n", + " axis=axis,\n", + " )\n", + "\n", + "\n", + "def _diff_pad(a, block_id=None, axis=None):\n", + " if block_id[axis] == 0: # pad first block along axis\n", + " sl = tuple(slice(1, -1) if i == axis else slice(None) for i in range(a.ndim))\n", + " val = nxp.diff(a[sl], axis=axis)\n", + " pad_width = tuple((1, 0) if i == axis else (0, 0) for i in range(a.ndim))\n", + " val = nxp.pad(val, pad_width=pad_width, mode=\"symmetric\")\n", + " return val\n", + " else:\n", + " sl = tuple(slice(None, -1) if i == axis else slice(None) for i in range(a.ndim))\n", + " return nxp.diff(a[sl], axis=axis)" + ] + }, + { + "cell_type": "markdown", + "id": "09bcf4eb", + "metadata": {}, + "source": [ + "Now we can write a more efficient version of `diff` from above" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "c3fbcb1f", + "metadata": {}, + "outputs": [], + "source": [ + "def diff2(da, dim):\n", + " ind = da.dims.index(dim)\n", + " dp = diff_pad(da.data, axis=ind)\n", + " return xr.DataArray(dp, dims=da.dims)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "45f3ebc1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray (face: 1, j: 987, i: 1920)> Size: 15MB\n",
+       "cubed.Array<array-082, shape=(1, 987, 1920), dtype=float64, chunks=((1,), (987,), (1920,))>\n",
+       "Dimensions without coordinates: face, j, i
" + ], + "text/plain": [ + " Size: 15MB\n", + "cubed.Array\n", + "Dimensions without coordinates: face, j, i" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "omega = diff2(ds.U, dim='j') * ds.dx - diff2(ds.V, dim='i') * ds.dy\n", + "mean = omega.mean('time', skipna=False)\n", + "mean" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "236caaa8", + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "num tasks: 27\n", + "max projected memory: 909.6 MB\n", + "total nbytes written: 2.4 GB\n", + "optimized: True\n", + "\n", + "\n", + "op-017\n", + "\n", + "\n", + "op-017\n", + "from_zarr\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-017\n", + "\n", + "\n", + "array-017\n", + "u\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-017->array-017\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-064\n", + "\n", + "\n", + "op-064\n", + "map_overlap\n", + "tasks: 5\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-017->op-064\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-064\n", + "\n", + "\n", + "array-064\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-064->array-064\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-078\n", + "\n", + "\n", + "op-078\n", + "__sub__\n", + "tasks: 5\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-064->op-078\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-019\n", + "\n", + "\n", + "op-019\n", + "from_zarr\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-019\n", + "\n", + "\n", + "array-019\n", + "dx\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-019->array-019\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-068\n", + "\n", + "\n", + "op-068\n", + "__getitem__\n", + "tasks: 1\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-019->op-068\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-068\n", + "\n", + "\n", + "array-068\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-068->array-068\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-068->op-078\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-018\n", + "\n", + "\n", + "op-018\n", + "from_zarr\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-018\n", + "\n", + "\n", + "array-018\n", + "v\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-018->array-018\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-072\n", + "\n", + "\n", + "op-072\n", + "map_overlap\n", + "tasks: 5\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-018->op-072\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-072\n", + "\n", + "\n", + "array-072\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-072->array-072\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-072->op-078\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-020\n", + "\n", + "\n", + "op-020\n", + "from_zarr\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-020\n", + "\n", + "\n", + "array-020\n", + "dy\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-020->array-020\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-076\n", + "\n", + "\n", + "op-076\n", + "__getitem__\n", + "tasks: 1\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-020->op-076\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-076\n", + "\n", + "\n", + "array-076\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-076->array-076\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-076->op-078\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-078\n", + "\n", + "\n", + "array-078\n", + "None\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-078->array-078\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-079\n", + "\n", + "\n", + "op-079\n", + "mean\n", + "tasks: 2\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-078->op-079\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-079\n", + "\n", + "\n", + "array-079\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-079->array-079\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-082\n", + "\n", + "\n", + "op-082\n", + "mean\n", + "tasks: 1\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-079->op-082\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-082\n", + "\n", + "\n", + "array-082\n", + "None\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-082->array-082\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "create-arrays\n", + "\n", + "\n", + "create-arrays\n", + "tasks: 7\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "arrays\n", + "\n", + "\n", + "arrays\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "create-arrays->arrays\n", + "\n", + "\n", + "\n", + "\n", + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mean.data.visualize()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "f64e2ea9", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "ea93bbb1ab8144d2b51fdadceab4a5f7", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "
\n",
+       "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray (face: 1, j: 987, i: 1920)> Size: 15MB\n",
+       "array([[[-0.00935039,  0.05180967,  0.02368589, ...,  0.09296773,\n",
+       "         -0.02251309,  0.00811106],\n",
+       "        [-0.03833913,  0.05900023,  0.01459681, ..., -0.01437545,\n",
+       "          0.01901828,  0.02181335],\n",
+       "        [ 0.00342973,  0.01948303,  0.01160908, ...,  0.02988825,\n",
+       "         -0.0061075 , -0.01061161],\n",
+       "        ...,\n",
+       "        [ 0.03900664,  0.04362473,  0.00623131, ..., -0.03877337,\n",
+       "          0.00551559, -0.0664945 ],\n",
+       "        [-0.01084767, -0.00746532,  0.04994078, ...,  0.00983424,\n",
+       "         -0.00747178, -0.01885188],\n",
+       "        [ 0.00544092,  0.05520928, -0.0255991 , ...,  0.02842916,\n",
+       "          0.0289831 ,  0.01773723]]])\n",
+       "Dimensions without coordinates: face, j, i
" + ], + "text/plain": [ + " Size: 15MB\n", + "array([[[-0.00935039, 0.05180967, 0.02368589, ..., 0.09296773,\n", + " -0.02251309, 0.00811106],\n", + " [-0.03833913, 0.05900023, 0.01459681, ..., -0.01437545,\n", + " 0.01901828, 0.02181335],\n", + " [ 0.00342973, 0.01948303, 0.01160908, ..., 0.02988825,\n", + " -0.0061075 , -0.01061161],\n", + " ...,\n", + " [ 0.03900664, 0.04362473, 0.00623131, ..., -0.03877337,\n", + " 0.00551559, -0.0664945 ],\n", + " [-0.01084767, -0.00746532, 0.04994078, ..., 0.00983424,\n", + " -0.00747178, -0.01885188],\n", + " [ 0.00544092, 0.05520928, -0.0255991 , ..., 0.02842916,\n", + " 0.0289831 , 0.01773723]]])\n", + "Dimensions without coordinates: face, j, i" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mean.compute(callbacks=[RichProgressBar()])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9572a661", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/pangeo-2-quadratic-means.ipynb b/examples/pangeo-2-quadratic-means.ipynb new file mode 100644 index 00000000..727e65df --- /dev/null +++ b/examples/pangeo-2-quadratic-means.ipynb @@ -0,0 +1,2463 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "f575fe3a-48fc-4643-808c-89d4cd5d1c5d", + "metadata": {}, + "source": [ + "# Pangeo Quadratic Means Workload\n", + "\n", + "This is a notebook for exploring the example in https://github.com/pangeo-data/distributed-array-examples/issues/2.\n", + "\n", + "See also Tom Nicholas's [notebook](https://gist.github.com/TomNicholas/8366c917349b647d87860a20a257a3fb#file-benchmark-quadratic-ipynb) and Tom White's blog post, [Optimizing Cubed](https://medium.com/pangeo/optimizing-cubed-7a0b8f65f5b7)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "dba3adc2-d5b8-4c2d-9bc6-c434d4f573b9", + "metadata": {}, + "outputs": [], + "source": [ + "import cubed\n", + "import cubed.random\n", + "from cubed.extensions.rich import RichProgressBar\n", + "import xarray as xr" + ] + }, + { + "cell_type": "markdown", + "id": "f7238837-e6d7-4499-8df6-bd99b9ca26f6", + "metadata": {}, + "source": [ + "Initialization parameters for the workload" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "1bb67eb5-e6ec-4254-b7e1-8a2318911038", + "metadata": {}, + "outputs": [], + "source": [ + "t_length = 50\n", + "spec = cubed.Spec(allowed_mem=\"2GB\")" + ] + }, + { + "cell_type": "markdown", + "id": "b51c5bd8-d27e-4699-8e3c-d650e5b66b72", + "metadata": {}, + "source": [ + "Create random data stored in Zarr." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "f0a94b85-8146-4ffb-9d6c-4ccfc1da0575", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "1a3d5ba8c0344fabb406b4a47484e68b", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "
\n",
+       "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "u = cubed.random.random((t_length, 1, 987, 1920), chunks=(10, 1, -1, -1), spec=spec)\n", + "v = cubed.random.random((t_length, 1, 987, 1920), chunks=(10, 1, -1, -1), spec=spec)\n", + "arrays = [u, v]\n", + "paths = [f\"{spec.work_dir}/u_{t_length}.zarr\", f\"{spec.work_dir}/v_{t_length}.zarr\"]\n", + "cubed.store(arrays, paths, compute_arrays_in_parallel=True, callbacks=[RichProgressBar()])" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "13c4e020-a968-4f4d-bfab-5d5d487dfb30", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 2GB\n",
+       "Dimensions:  (time: 50, face: 1, j: 987, i: 1920)\n",
+       "Dimensions without coordinates: time, face, j, i\n",
+       "Data variables:\n",
+       "    anom_u   (time, face, j, i) float64 758MB cubed.Array<chunksize=(10, 1, 987, 1920)>\n",
+       "    anom_v   (time, face, j, i) float64 758MB cubed.Array<chunksize=(10, 1, 987, 1920)>
" + ], + "text/plain": [ + " Size: 2GB\n", + "Dimensions: (time: 50, face: 1, j: 987, i: 1920)\n", + "Dimensions without coordinates: time, face, j, i\n", + "Data variables:\n", + " anom_u (time, face, j, i) float64 758MB cubed.Array\n", + " anom_v (time, face, j, i) float64 758MB cubed.Array" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "u = cubed.from_zarr(paths[0], spec=spec)\n", + "v = cubed.from_zarr(paths[1], spec=spec)\n", + "ds = xr.Dataset(\n", + " dict(\n", + " anom_u=([\"time\", \"face\", \"j\", \"i\"], u),\n", + " anom_v=([\"time\", \"face\", \"j\", \"i\"], v),\n", + " )\n", + ")\n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "283d3c71-5271-499d-8743-ff489e4c5b90", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 45MB\n",
+       "Dimensions:  (face: 1, j: 987, i: 1920)\n",
+       "Dimensions without coordinates: face, j, i\n",
+       "Data variables:\n",
+       "    anom_u   (face, j, i) float64 15MB cubed.Array<chunksize=(1, 987, 1920)>\n",
+       "    anom_v   (face, j, i) float64 15MB cubed.Array<chunksize=(1, 987, 1920)>\n",
+       "    uv       (face, j, i) float64 15MB cubed.Array<chunksize=(1, 987, 1920)>
" + ], + "text/plain": [ + " Size: 45MB\n", + "Dimensions: (face: 1, j: 987, i: 1920)\n", + "Dimensions without coordinates: face, j, i\n", + "Data variables:\n", + " anom_u (face, j, i) float64 15MB cubed.Array\n", + " anom_v (face, j, i) float64 15MB cubed.Array\n", + " uv (face, j, i) float64 15MB cubed.Array" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "quad = ds**2\n", + "quad[\"uv\"] = ds.anom_u * ds.anom_v\n", + "result = quad.mean(\"time\", skipna=False)\n", + "result" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "e97aa284", + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "num tasks: 33\n", + "max projected memory: 909.6 MB\n", + "total nbytes written: 2.5 GB\n", + "optimized: True\n", + "\n", + "\n", + "op-009\n", + "\n", + "\n", + "op-009\n", + "from_zarr\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-009\n", + "\n", + "\n", + "array-009\n", + "u\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-009->array-009\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-012\n", + "\n", + "\n", + "op-012\n", + "__pow__\n", + "tasks: 5\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-009->op-012\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-015\n", + "\n", + "\n", + "op-015\n", + "__mul__\n", + "tasks: 5\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-009->op-015\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-011\n", + "\n", + "\n", + "op-011\n", + "__pow__\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-011\n", + "\n", + "\n", + "array-011\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-011->array-011\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-011->op-012\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-012\n", + "\n", + "\n", + "array-012\n", + "values\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-012->array-012\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-016\n", + "\n", + "\n", + "op-016\n", + "mean\n", + "tasks: 2\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-012->op-016\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-016\n", + "\n", + "\n", + "array-016\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-016->array-016\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-019\n", + "\n", + "\n", + "op-019\n", + "mean\n", + "tasks: 1\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-016->op-019\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-019\n", + "\n", + "\n", + "array-019\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-019->array-019\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-010\n", + "\n", + "\n", + "op-010\n", + "from_zarr\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-010\n", + "\n", + "\n", + "array-010\n", + "v\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-010->array-010\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-014\n", + "\n", + "\n", + "op-014\n", + "__pow__\n", + "tasks: 5\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-010->op-014\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-010->op-015\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-013\n", + "\n", + "\n", + "op-013\n", + "__pow__\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-013\n", + "\n", + "\n", + "array-013\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-013->array-013\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-013->op-014\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-014\n", + "\n", + "\n", + "array-014\n", + "values\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-014->array-014\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-020\n", + "\n", + "\n", + "op-020\n", + "mean\n", + "tasks: 2\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-014->op-020\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-020\n", + "\n", + "\n", + "array-020\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-020->array-020\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-023\n", + "\n", + "\n", + "op-023\n", + "mean\n", + "tasks: 1\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-020->op-023\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-023\n", + "\n", + "\n", + "array-023\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-023->array-023\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-015\n", + "\n", + "\n", + "array-015\n", + "values\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-015->array-015\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-024\n", + "\n", + "\n", + "op-024\n", + "mean\n", + "tasks: 2\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-015->op-024\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-024\n", + "\n", + "\n", + "array-024\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-024->array-024\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-027\n", + "\n", + "\n", + "op-027\n", + "mean\n", + "tasks: 1\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-024->op-027\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-027\n", + "\n", + "\n", + "array-027\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-027->array-027\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "create-arrays\n", + "\n", + "\n", + "create-arrays\n", + "tasks: 9\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "arrays\n", + "\n", + "\n", + "arrays\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "create-arrays->arrays\n", + "\n", + "\n", + "\n", + "\n", + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cubed.visualize(\n", + " *(result[var].data for var in (\"anom_u\", \"anom_v\", \"uv\")),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "1700ce7b", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "ea28a388169c4e47a463759111801687", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "
\n",
+       "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 45MB\n",
+       "Dimensions:  (face: 1, j: 987, i: 1920)\n",
+       "Dimensions without coordinates: face, j, i\n",
+       "Data variables:\n",
+       "    anom_u   (face, j, i) float64 15MB 0.339 0.3298 0.3399 ... 0.3794 0.2628\n",
+       "    anom_v   (face, j, i) float64 15MB 0.3566 0.3384 0.3565 ... 0.3379 0.3464\n",
+       "    uv       (face, j, i) float64 15MB 0.2717 0.2679 0.281 ... 0.2727 0.2272
" + ], + "text/plain": [ + " Size: 45MB\n", + "Dimensions: (face: 1, j: 987, i: 1920)\n", + "Dimensions without coordinates: face, j, i\n", + "Data variables:\n", + " anom_u (face, j, i) float64 15MB 0.339 0.3298 0.3399 ... 0.3794 0.2628\n", + " anom_v (face, j, i) float64 15MB 0.3566 0.3384 0.3565 ... 0.3379 0.3464\n", + " uv (face, j, i) float64 15MB 0.2717 0.2679 0.281 ... 0.2727 0.2272" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "result.compute(callbacks=[RichProgressBar()])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b345817e", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/pangeo-3-tem.ipynb b/examples/pangeo-3-tem.ipynb new file mode 100644 index 00000000..7f011801 --- /dev/null +++ b/examples/pangeo-3-tem.ipynb @@ -0,0 +1,8318 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "f575fe3a-48fc-4643-808c-89d4cd5d1c5d", + "metadata": {}, + "source": [ + "# Pangeo Transformed Eulerian Mean Workload\n", + "\n", + "This is a notebook for exploring the example in https://github.com/pangeo-data/distributed-array-examples/issues/3.\n", + "\n", + "See also https://github.com/dcherian/ncar-challenge-suite/blob/main/tem.ipynb for the original, and the Cubed tracking issue at https://github.com/tomwhite/cubed/issues/145" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "dba3adc2-d5b8-4c2d-9bc6-c434d4f573b9", + "metadata": {}, + "outputs": [], + "source": [ + "import cubed\n", + "import cubed.random\n", + "from cubed.extensions.rich import RichProgressBar\n", + "import pandas as pd\n", + "import xarray as xr" + ] + }, + { + "cell_type": "markdown", + "id": "f7238837-e6d7-4499-8df6-bd99b9ca26f6", + "metadata": {}, + "source": [ + "Initialization parameters for the workload" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "1bb67eb5-e6ec-4254-b7e1-8a2318911038", + "metadata": {}, + "outputs": [], + "source": [ + "dims = (\"time\", \"level\", \"lat\", \"lon\")\n", + "# nyears is number of years, adjust to make bigger, \n", + "# full dataset is 60-ish years.\n", + "nyears = 1\n", + "#shape = (nyears * 365 * 24, 37, 721, 1440)\n", + "shape = (nyears * 31 * 24, 37, 72, 144) # make smaller while exploring locally\n", + "chunks = (24, 1, -1, -1)\n", + "spec = cubed.Spec(allowed_mem=\"2GB\")" + ] + }, + { + "cell_type": "markdown", + "id": "b51c5bd8-d27e-4699-8e3c-d650e5b66b72", + "metadata": {}, + "source": [ + "Create random data stored in Zarr." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "f0a94b85-8146-4ffb-9d6c-4ccfc1da0575", + "metadata": {}, + "outputs": [], + "source": [ + "u = cubed.random.random(shape, chunks=chunks, spec=spec)\n", + "v = cubed.random.random(shape, chunks=chunks, spec=spec)\n", + "w = cubed.random.random(shape, chunks=chunks, spec=spec)\n", + "t = cubed.random.random(shape, chunks=chunks, spec=spec)\n", + "arrays = [u, v, w, t]\n", + "paths = [\n", + " f\"{spec.work_dir}/u_{nyears}.zarr\",\n", + " f\"{spec.work_dir}/v_{nyears}.zarr\",\n", + " f\"{spec.work_dir}/w_{nyears}.zarr\",\n", + " f\"{spec.work_dir}/t_{nyears}.zarr\",\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "20d4b17b", + "metadata": {}, + "outputs": [], + "source": [ + "# Comment in below to save then load Zarr from filesystem\n", + "# cubed.store(arrays, paths, compute_arrays_in_parallel=True, callbacks=[RichProgressBar()])" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "9ede34e3", + "metadata": {}, + "outputs": [], + "source": [ + "# u = cubed.from_zarr(paths[0], spec=spec)\n", + "# v = cubed.from_zarr(paths[1], spec=spec)\n", + "# w = cubed.from_zarr(paths[2], spec=spec)\n", + "# t = cubed.from_zarr(paths[3], spec=spec)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "13c4e020-a968-4f4d-bfab-5d5d487dfb30", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 9GB\n",
+       "Dimensions:  (time: 744, level: 37, lat: 72, lon: 144)\n",
+       "Coordinates:\n",
+       "  * time     (time) datetime64[ns] 6kB 2001-01-01 ... 2001-01-31T23:00:00\n",
+       "Dimensions without coordinates: level, lat, lon\n",
+       "Data variables:\n",
+       "    U        (time, level, lat, lon) float64 2GB cubed.Array<chunksize=(24, 1, 72, 144)>\n",
+       "    V        (time, level, lat, lon) float64 2GB cubed.Array<chunksize=(24, 1, 72, 144)>\n",
+       "    W        (time, level, lat, lon) float64 2GB cubed.Array<chunksize=(24, 1, 72, 144)>\n",
+       "    T        (time, level, lat, lon) float64 2GB cubed.Array<chunksize=(24, 1, 72, 144)>
" + ], + "text/plain": [ + " Size: 9GB\n", + "Dimensions: (time: 744, level: 37, lat: 72, lon: 144)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 6kB 2001-01-01 ... 2001-01-31T23:00:00\n", + "Dimensions without coordinates: level, lat, lon\n", + "Data variables:\n", + " U (time, level, lat, lon) float64 2GB cubed.Array\n", + " V (time, level, lat, lon) float64 2GB cubed.Array\n", + " W (time, level, lat, lon) float64 2GB cubed.Array\n", + " T (time, level, lat, lon) float64 2GB cubed.Array" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds = xr.Dataset(\n", + " {\n", + " \"U\": (dims, u),\n", + " \"V\": (dims, v),\n", + " \"W\": (dims, w),\n", + " \"T\": (dims, t),\n", + " },\n", + " coords={\"time\": pd.date_range(\"2001-01-01\", periods=shape[0], freq=\"H\")},\n", + ")\n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "283d3c71-5271-499d-8743-ff489e4c5b90", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 63MB\n",
+       "Dimensions:  (time: 744, level: 37, lat: 72)\n",
+       "Coordinates:\n",
+       "  * time     (time) datetime64[ns] 6kB 2001-01-01 ... 2001-01-31T23:00:00\n",
+       "Dimensions without coordinates: level, lat\n",
+       "Data variables:\n",
+       "    U        (time, level, lat) float64 16MB cubed.Array<chunksize=(24, 1, 72)>\n",
+       "    V        (time, level, lat) float64 16MB cubed.Array<chunksize=(24, 1, 72)>\n",
+       "    W        (time, level, lat) float64 16MB cubed.Array<chunksize=(24, 1, 72)>\n",
+       "    T        (time, level, lat) float64 16MB cubed.Array<chunksize=(24, 1, 72)>
" + ], + "text/plain": [ + " Size: 63MB\n", + "Dimensions: (time: 744, level: 37, lat: 72)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 6kB 2001-01-01 ... 2001-01-31T23:00:00\n", + "Dimensions without coordinates: level, lat\n", + "Data variables:\n", + " U (time, level, lat) float64 16MB cubed.Array\n", + " V (time, level, lat) float64 16MB cubed.Array\n", + " W (time, level, lat) float64 16MB cubed.Array\n", + " T (time, level, lat) float64 16MB cubed.Array" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Note: we actually want skipna=True, but this isn't implemented in xarray yet\n", + "# see https://github.com/pydata/xarray/issues/7243\n", + "zonal_means = ds.mean(\"lon\", skipna=False)\n", + "zonal_means" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "fcfc18e3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 16GB\n",
+       "Dimensions:  (time: 744, level: 37, lat: 72, lon: 144)\n",
+       "Coordinates:\n",
+       "  * time     (time) datetime64[ns] 6kB 2001-01-01 ... 2001-01-31T23:00:00\n",
+       "Dimensions without coordinates: level, lat, lon\n",
+       "Data variables:\n",
+       "    U        (time, level, lat, lon) float64 2GB cubed.Array<chunksize=(24, 1, 72, 144)>\n",
+       "    V        (time, level, lat, lon) float64 2GB cubed.Array<chunksize=(24, 1, 72, 144)>\n",
+       "    W        (time, level, lat, lon) float64 2GB cubed.Array<chunksize=(24, 1, 72, 144)>\n",
+       "    T        (time, level, lat, lon) float64 2GB cubed.Array<chunksize=(24, 1, 72, 144)>\n",
+       "    uv       (time, level, lat, lon) float64 2GB cubed.Array<chunksize=(24, 1, 72, 144)>\n",
+       "    vt       (time, level, lat, lon) float64 2GB cubed.Array<chunksize=(24, 1, 72, 144)>\n",
+       "    uw       (time, level, lat, lon) float64 2GB cubed.Array<chunksize=(24, 1, 72, 144)>
" + ], + "text/plain": [ + " Size: 16GB\n", + "Dimensions: (time: 744, level: 37, lat: 72, lon: 144)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 6kB 2001-01-01 ... 2001-01-31T23:00:00\n", + "Dimensions without coordinates: level, lat, lon\n", + "Data variables:\n", + " U (time, level, lat, lon) float64 2GB cubed.Array\n", + " V (time, level, lat, lon) float64 2GB cubed.Array\n", + " W (time, level, lat, lon) float64 2GB cubed.Array\n", + " T (time, level, lat, lon) float64 2GB cubed.Array\n", + " uv (time, level, lat, lon) float64 2GB cubed.Array\n", + " vt (time, level, lat, lon) float64 2GB cubed.Array\n", + " uw (time, level, lat, lon) float64 2GB cubed.Array" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "anomaly = ds - zonal_means\n", + "anomaly['uv'] = anomaly.U*anomaly.V\n", + "anomaly['vt'] = anomaly.V*anomaly.T\n", + "anomaly['uw'] = anomaly.U*anomaly.W\n", + "anomaly" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "cce3f02a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 111MB\n",
+       "Dimensions:  (time: 744, level: 37, lat: 72)\n",
+       "Coordinates:\n",
+       "  * time     (time) datetime64[ns] 6kB 2001-01-01 ... 2001-01-31T23:00:00\n",
+       "Dimensions without coordinates: level, lat\n",
+       "Data variables:\n",
+       "    U        (time, level, lat) float64 16MB cubed.Array<chunksize=(24, 1, 72)>\n",
+       "    V        (time, level, lat) float64 16MB cubed.Array<chunksize=(24, 1, 72)>\n",
+       "    W        (time, level, lat) float64 16MB cubed.Array<chunksize=(24, 1, 72)>\n",
+       "    T        (time, level, lat) float64 16MB cubed.Array<chunksize=(24, 1, 72)>\n",
+       "    uv       (time, level, lat) float64 16MB cubed.Array<chunksize=(24, 1, 72)>\n",
+       "    vt       (time, level, lat) float64 16MB cubed.Array<chunksize=(24, 1, 72)>\n",
+       "    uw       (time, level, lat) float64 16MB cubed.Array<chunksize=(24, 1, 72)>
" + ], + "text/plain": [ + " Size: 111MB\n", + "Dimensions: (time: 744, level: 37, lat: 72)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 6kB 2001-01-01 ... 2001-01-31T23:00:00\n", + "Dimensions without coordinates: level, lat\n", + "Data variables:\n", + " U (time, level, lat) float64 16MB cubed.Array\n", + " V (time, level, lat) float64 16MB cubed.Array\n", + " W (time, level, lat) float64 16MB cubed.Array\n", + " T (time, level, lat) float64 16MB cubed.Array\n", + " uv (time, level, lat) float64 16MB cubed.Array\n", + " vt (time, level, lat) float64 16MB cubed.Array\n", + " uw (time, level, lat) float64 16MB cubed.Array" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "temdiags = zonal_means.merge(anomaly[['uv','vt','uw']].mean(\"lon\", skipna=False)) # actually want skipna=True\n", + "temdiags" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "6734ec3f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 5MB\n",
+       "Dimensions:  (time: 31, level: 37, lat: 72)\n",
+       "Coordinates:\n",
+       "  * time     (time) datetime64[ns] 248B 2001-01-01 2001-01-02 ... 2001-01-31\n",
+       "Dimensions without coordinates: level, lat\n",
+       "Data variables:\n",
+       "    U        (time, level, lat) float64 661kB cubed.Array<chunksize=(1, 1, 72)>\n",
+       "    V        (time, level, lat) float64 661kB cubed.Array<chunksize=(1, 1, 72)>\n",
+       "    W        (time, level, lat) float64 661kB cubed.Array<chunksize=(1, 1, 72)>\n",
+       "    T        (time, level, lat) float64 661kB cubed.Array<chunksize=(1, 1, 72)>\n",
+       "    uv       (time, level, lat) float64 661kB cubed.Array<chunksize=(1, 1, 72)>\n",
+       "    vt       (time, level, lat) float64 661kB cubed.Array<chunksize=(1, 1, 72)>\n",
+       "    uw       (time, level, lat) float64 661kB cubed.Array<chunksize=(1, 1, 72)>
" + ], + "text/plain": [ + " Size: 5MB\n", + "Dimensions: (time: 31, level: 37, lat: 72)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 248B 2001-01-01 2001-01-02 ... 2001-01-31\n", + "Dimensions without coordinates: level, lat\n", + "Data variables:\n", + " U (time, level, lat) float64 661kB cubed.Array\n", + " V (time, level, lat) float64 661kB cubed.Array\n", + " W (time, level, lat) float64 661kB cubed.Array\n", + " T (time, level, lat) float64 661kB cubed.Array\n", + " uv (time, level, lat) float64 661kB cubed.Array\n", + " vt (time, level, lat) float64 661kB cubed.Array\n", + " uw (time, level, lat) float64 661kB cubed.Array" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# note method=\"blockwise\" uses flox\n", + "temdiags = temdiags.resample(time=\"D\").mean(method=\"blockwise\", skipna=False) # actually want skipna=True\n", + "temdiags" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "e97aa284", + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "num tasks: 11480\n", + "max projected memory: 11.9 MB\n", + "total nbytes written: 113.0 MB\n", + "optimized: True\n", + "\n", + "\n", + "op-015\n", + "\n", + "\n", + "op-015\n", + "mean\n", + "tasks: 1147\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-015\n", + "\n", + "\n", + "array-015\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-015->array-015\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-065\n", + "\n", + "\n", + "op-065\n", + "permute_dims\n", + "tasks: 1147\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-015->op-065\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-067\n", + "\n", + "\n", + "op-067\n", + "permute_dims\n", + "tasks: 1147\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-015->op-067\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-018\n", + "\n", + "\n", + "op-018\n", + "mean\n", + "tasks: 1147\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-018\n", + "\n", + "\n", + "array-018\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-018->array-018\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-018->op-065\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-066\n", + "\n", + "\n", + "op-066\n", + "permute_dims\n", + "tasks: 1147\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-018->op-066\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-065\n", + "\n", + "\n", + "array-065\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-065->array-065\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-093\n", + "\n", + "\n", + "op-093\n", + "permute_dims\n", + "tasks: 1147\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-065->op-093\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-093\n", + "\n", + "\n", + "array-093\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-093->array-093\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-024\n", + "\n", + "\n", + "op-024\n", + "mean\n", + "tasks: 1147\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-024\n", + "\n", + "\n", + "array-024\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-024->array-024\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-024->op-066\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-066\n", + "\n", + "\n", + "array-066\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-066->array-066\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-094\n", + "\n", + "\n", + "op-094\n", + "permute_dims\n", + "tasks: 1147\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-066->op-094\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-094\n", + "\n", + "\n", + "array-094\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-094->array-094\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-021\n", + "\n", + "\n", + "op-021\n", + "mean\n", + "tasks: 1147\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-021\n", + "\n", + "\n", + "array-021\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-021->array-021\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-021->op-067\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-067\n", + "\n", + "\n", + "array-067\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-067->array-067\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-095\n", + "\n", + "\n", + "op-095\n", + "permute_dims\n", + "tasks: 1147\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-067->op-095\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-095\n", + "\n", + "\n", + "array-095\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-095->array-095\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "create-arrays\n", + "\n", + "\n", + "create-arrays\n", + "tasks: 10\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "arrays\n", + "\n", + "\n", + "arrays\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "create-arrays->arrays\n", + "\n", + "\n", + "\n", + "\n", + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cubed.visualize(\n", + " *(temdiags[var].data for var in (\"uv\", \"vt\", \"uw\")),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "1700ce7b", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "f9e290547e7c47c8b5576dbfb40ce1e5", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "
\n",
+       "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 5MB\n",
+       "Dimensions:  (time: 31, level: 37, lat: 72)\n",
+       "Coordinates:\n",
+       "  * time     (time) datetime64[ns] 248B 2001-01-01 2001-01-02 ... 2001-01-31\n",
+       "Dimensions without coordinates: level, lat\n",
+       "Data variables:\n",
+       "    U        (time, level, lat) float64 661kB 0.502 0.5039 0.504 ... 0.5 0.4956\n",
+       "    V        (time, level, lat) float64 661kB 0.4953 0.4931 ... 0.4944 0.4976\n",
+       "    W        (time, level, lat) float64 661kB 0.5017 0.4984 ... 0.5083 0.5015\n",
+       "    T        (time, level, lat) float64 661kB 0.4949 0.503 ... 0.5024 0.487\n",
+       "    uv       (time, level, lat) float64 661kB 0.00129 0.0009764 ... 0.0003541\n",
+       "    vt       (time, level, lat) float64 661kB -0.002118 -0.001934 ... 0.001608\n",
+       "    uw       (time, level, lat) float64 661kB 0.0007928 -0.00245 ... -0.002159
" + ], + "text/plain": [ + " Size: 5MB\n", + "Dimensions: (time: 31, level: 37, lat: 72)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 248B 2001-01-01 2001-01-02 ... 2001-01-31\n", + "Dimensions without coordinates: level, lat\n", + "Data variables:\n", + " U (time, level, lat) float64 661kB 0.502 0.5039 0.504 ... 0.5 0.4956\n", + " V (time, level, lat) float64 661kB 0.4953 0.4931 ... 0.4944 0.4976\n", + " W (time, level, lat) float64 661kB 0.5017 0.4984 ... 0.5083 0.5015\n", + " T (time, level, lat) float64 661kB 0.4949 0.503 ... 0.5024 0.487\n", + " uv (time, level, lat) float64 661kB 0.00129 0.0009764 ... 0.0003541\n", + " vt (time, level, lat) float64 661kB -0.002118 -0.001934 ... 0.001608\n", + " uw (time, level, lat) float64 661kB 0.0007928 -0.00245 ... -0.002159" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "temdiags.compute(callbacks=[RichProgressBar()])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b345817e", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/pangeo-4-climatological-anomalies.ipynb b/examples/pangeo-4-climatological-anomalies.ipynb new file mode 100644 index 00000000..5978de07 --- /dev/null +++ b/examples/pangeo-4-climatological-anomalies.ipynb @@ -0,0 +1,2493 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "f575fe3a-48fc-4643-808c-89d4cd5d1c5d", + "metadata": {}, + "source": [ + "# Pangeo Climatological Anomalies Workload\n", + "\n", + "This is a notebook for exploring the example in https://github.com/pangeo-data/distributed-array-examples/issues/4.\n", + "\n", + "Also includes code from https://github.com/cubed-dev/cubed/issues/223" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "dba3adc2-d5b8-4c2d-9bc6-c434d4f573b9", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import datetime, timedelta\n", + "import cubed\n", + "import cubed.random\n", + "from cubed.extensions.rich import RichProgressBar\n", + "import numpy as np\n", + "import xarray as xr" + ] + }, + { + "cell_type": "markdown", + "id": "f7238837-e6d7-4499-8df6-bd99b9ca26f6", + "metadata": {}, + "source": [ + "Initialization parameters for the workload" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "1bb67eb5-e6ec-4254-b7e1-8a2318911038", + "metadata": {}, + "outputs": [], + "source": [ + "t_length = 372\n", + "time = np.arange(datetime(1979,1,1), datetime(2022,1,1), timedelta(hours=1)).astype('datetime64[ns]')\n", + "lat = np.linspace(-90.0, 90.0, 721)[::-1].astype(np.float32)\n", + "lon = np.linspace(0.0, 359.8, 1440).astype(np.float32)\n", + "spec = cubed.Spec(allowed_mem=\"3GB\") # TODO: can we reduce to 2GB?" + ] + }, + { + "cell_type": "markdown", + "id": "b51c5bd8-d27e-4699-8e3c-d650e5b66b72", + "metadata": {}, + "source": [ + "Create random data stored in Zarr." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "f0a94b85-8146-4ffb-9d6c-4ccfc1da0575", + "metadata": {}, + "outputs": [], + "source": [ + "asn = cubed.array_api.astype(cubed.random.random((t_length, 721, 1440), chunks=(31, -1, -1), spec=spec), np.float32)\n", + "arrays = [asn]\n", + "paths = [\n", + " f\"{spec.work_dir}/asn_{t_length}.zarr\",\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "20d4b17b", + "metadata": {}, + "outputs": [], + "source": [ + "# Comment in below to save then load Zarr from filesystem\n", + "# cubed.store(arrays, paths, compute_arrays_in_parallel=True, callbacks=[RichProgressBar()])" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "9ede34e3", + "metadata": {}, + "outputs": [], + "source": [ + "# asn = cubed.from_zarr(paths[0], spec=spec)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "13c4e020-a968-4f4d-bfab-5d5d487dfb30", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 2GB\n",
+       "Dimensions:    (time: 372, latitude: 721, longitude: 1440)\n",
+       "Coordinates:\n",
+       "  * time       (time) datetime64[ns] 3kB 1979-01-01 ... 1979-01-16T11:00:00\n",
+       "  * latitude   (latitude) float32 3kB 90.0 89.75 89.5 ... -89.5 -89.75 -90.0\n",
+       "  * longitude  (longitude) float32 6kB 0.0 0.25 0.5001 ... 359.3 359.5 359.8\n",
+       "Data variables:\n",
+       "    asn        (time, latitude, longitude) float32 2GB cubed.Array<chunksize=(31, 721, 1440)>
" + ], + "text/plain": [ + " Size: 2GB\n", + "Dimensions: (time: 372, latitude: 721, longitude: 1440)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 3kB 1979-01-01 ... 1979-01-16T11:00:00\n", + " * latitude (latitude) float32 3kB 90.0 89.75 89.5 ... -89.5 -89.75 -90.0\n", + " * longitude (longitude) float32 6kB 0.0 0.25 0.5001 ... 359.3 359.5 359.8\n", + "Data variables:\n", + " asn (time, latitude, longitude) float32 2GB cubed.Array" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds = xr.Dataset(\n", + " {\n", + " \"asn\": (['time', 'latitude', 'longitude'], asn),\n", + " },\n", + " coords={'time': time[:t_length], 'latitude': lat, 'longitude': lon},\n", + ")\n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "1065a3d0", + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "num tasks: 13\n", + "max projected memory: 1.0 GB\n", + "total nbytes written: 1.5 GB\n", + "optimized: True\n", + "\n", + "\n", + "op-001\n", + "\n", + "\n", + "op-001\n", + "random\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-001\n", + "\n", + "\n", + "array-001\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-001->array-001\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-004\n", + "\n", + "\n", + "op-004\n", + "astype\n", + "tasks: 12\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-001->op-004\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-002\n", + "\n", + "\n", + "op-002\n", + "random\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-002\n", + "\n", + "\n", + "array-002\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-002->array-002\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-002->op-004\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-004\n", + "\n", + "\n", + "array-004\n", + "asn\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-004->array-004\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "create-arrays\n", + "\n", + "\n", + "create-arrays\n", + "tasks: 1\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "arrays\n", + "\n", + "\n", + "arrays\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "create-arrays->arrays\n", + "\n", + "\n", + "\n", + "\n", + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds.asn.data.visualize(show_hidden=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "283d3c71-5271-499d-8743-ff489e4c5b90", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 66MB\n",
+       "Dimensions:    (dayofyear: 16, latitude: 721, longitude: 1440)\n",
+       "Coordinates:\n",
+       "  * latitude   (latitude) float32 3kB 90.0 89.75 89.5 ... -89.5 -89.75 -90.0\n",
+       "  * longitude  (longitude) float32 6kB 0.0 0.25 0.5001 ... 359.3 359.5 359.8\n",
+       "  * dayofyear  (dayofyear) int64 128B 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16\n",
+       "Data variables:\n",
+       "    asn        (dayofyear, latitude, longitude) float32 66MB cubed.Array<chunksize=(16, 721, 1440)>
" + ], + "text/plain": [ + " Size: 66MB\n", + "Dimensions: (dayofyear: 16, latitude: 721, longitude: 1440)\n", + "Coordinates:\n", + " * latitude (latitude) float32 3kB 90.0 89.75 89.5 ... -89.5 -89.75 -90.0\n", + " * longitude (longitude) float32 6kB 0.0 0.25 0.5001 ... 359.3 359.5 359.8\n", + " * dayofyear (dayofyear) int64 128B 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16\n", + "Data variables:\n", + " asn (dayofyear, latitude, longitude) float32 66MB cubed.Array" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Note: we actually want skipna=True, but this isn't implemented in xarray yet\n", + "# see https://github.com/pydata/xarray/issues/7243\n", + "mean = ds.groupby(\"time.dayofyear\").mean(method=\"map-reduce\", skipna=False)\n", + "mean" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "fcfc18e3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 2GB\n",
+       "Dimensions:    (time: 372, latitude: 721, longitude: 1440)\n",
+       "Coordinates:\n",
+       "  * time       (time) datetime64[ns] 3kB 1979-01-01 ... 1979-01-16T11:00:00\n",
+       "  * latitude   (latitude) float32 3kB 90.0 89.75 89.5 ... -89.5 -89.75 -90.0\n",
+       "  * longitude  (longitude) float32 6kB 0.0 0.25 0.5001 ... 359.3 359.5 359.8\n",
+       "    dayofyear  (time) int64 3kB 1 1 1 1 1 1 1 1 1 ... 16 16 16 16 16 16 16 16 16\n",
+       "Data variables:\n",
+       "    asn        (time, latitude, longitude) float32 2GB cubed.Array<chunksize=(16, 721, 1440)>
" + ], + "text/plain": [ + " Size: 2GB\n", + "Dimensions: (time: 372, latitude: 721, longitude: 1440)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 3kB 1979-01-01 ... 1979-01-16T11:00:00\n", + " * latitude (latitude) float32 3kB 90.0 89.75 89.5 ... -89.5 -89.75 -90.0\n", + " * longitude (longitude) float32 6kB 0.0 0.25 0.5001 ... 359.3 359.5 359.8\n", + " dayofyear (time) int64 3kB 1 1 1 1 1 1 1 1 1 ... 16 16 16 16 16 16 16 16 16\n", + "Data variables:\n", + " asn (time, latitude, longitude) float32 2GB cubed.Array" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "anomaly = ds - mean.sel(dayofyear=ds.time.dt.dayofyear)\n", + "anomaly" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "33e5b223", + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "num tasks: 100\n", + "max projected memory: 3.0 GB\n", + "total nbytes written: 14.0 GB\n", + "optimized: True\n", + "\n", + "\n", + "op-004\n", + "\n", + "\n", + "op-004\n", + "astype\n", + "tasks: 12\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-004\n", + "\n", + "\n", + "array-004\n", + "asn\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-004->array-004\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-029\n", + "\n", + "\n", + "op-029\n", + "__sub__\n", + "tasks: 3\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-004->op-029\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-029\n", + "\n", + "\n", + "array-029\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-029->array-029\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-030\n", + "\n", + "\n", + "op-030\n", + "__sub__\n", + "tasks: 24\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-029->op-030\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-012\n", + "\n", + "\n", + "op-012\n", + "groupby_reduction\n", + "tasks: 12\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-012\n", + "\n", + "\n", + "array-012\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-012->array-012\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-013\n", + "\n", + "\n", + "op-013\n", + "groupby_reduction\n", + "tasks: 6\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-012->op-013\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-013\n", + "\n", + "\n", + "array-013\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-013->array-013\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-014\n", + "\n", + "\n", + "op-014\n", + "groupby_reduction\n", + "tasks: 3\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-013->op-014\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-014\n", + "\n", + "\n", + "array-014\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-014->array-014\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-015\n", + "\n", + "\n", + "op-015\n", + "groupby_reduction\n", + "tasks: 2\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-014->op-015\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-015\n", + "\n", + "\n", + "array-015\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-015->array-015\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-019\n", + "\n", + "\n", + "op-019\n", + "permute_dims\n", + "tasks: 1\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-015->op-019\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-019\n", + "\n", + "\n", + "array-019\n", + "value\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-019->array-019\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-022\n", + "\n", + "\n", + "op-022\n", + "__getitem__\n", + "tasks: 1\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-019->op-022\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-022\n", + "\n", + "\n", + "array-022\n", + "value\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-022->array-022\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-025\n", + "\n", + "\n", + "op-025\n", + "__getitem__\n", + "tasks: 1\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-022->op-025\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-025\n", + "\n", + "\n", + "array-025\n", + "value\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-025->array-025\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-028\n", + "\n", + "\n", + "op-028\n", + "__getitem__\n", + "tasks: 24\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-025->op-028\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-028\n", + "\n", + "\n", + "array-028\n", + "other_data\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-028->array-028\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-028->op-030\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-030\n", + "\n", + "\n", + "array-030\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-030->array-030\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "create-arrays\n", + "\n", + "\n", + "create-arrays\n", + "tasks: 11\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "arrays\n", + "\n", + "\n", + "arrays\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "create-arrays->arrays\n", + "\n", + "\n", + "\n", + "\n", + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cubed.visualize(\n", + " *(anomaly[var].data for var in [\"asn\"]),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "c7d3e127", + "metadata": {}, + "outputs": [], + "source": [ + "! rm -rf anomaly.zarr" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "b345817e", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "b6babab972b4483aa5c88ee6c29fe709", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "
\n",
+       "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "anomaly.to_zarr(\"anomaly.zarr\", chunkmanager_store_kwargs=dict(callbacks=[RichProgressBar()]))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "75dc8c67", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/pangeo-tem.ipynb b/examples/pangeo-tem.ipynb deleted file mode 100644 index 222b2f4d..00000000 --- a/examples/pangeo-tem.ipynb +++ /dev/null @@ -1,2711 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "67c8663d", - "metadata": {}, - "source": [ - "# Pangeo TEM Workload\n", - "\n", - "This is a notebook for exploring a simplified version of the example in https://github.com/dcherian/ncar-challenge-suite/blob/main/tem.ipynb.\n", - "\n", - "See https://github.com/tomwhite/cubed/issues/145" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "0f1e3c3c", - "metadata": {}, - "outputs": [], - "source": [ - "import cubed\n", - "import cubed.array_api as xp\n", - "import cubed.random\n", - "\n", - "import pandas as pd\n", - "import xarray as xr" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "8e1f099a", - "metadata": {}, - "outputs": [], - "source": [ - "dims = (\"time\", \"level\", \"lat\", \"lon\")\n", - "# 1 is number of years, adjust to make bigger, \n", - "# full dataset is 60-ish years.\n", - "# shape = (1 * 365 * 24, 37, 72, 1440)\n", - "shape = (1 * 31 * 24, 37, 72, 144) # make smaller while exploring\n", - "chunks = (24, 15, -1, -1)\n", - "\n", - "spec = cubed.Spec(max_mem=1_000_000_000)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "b4c7ff6a", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset>\n",
-       "Dimensions:  (time: 744, level: 37, lat: 72, lon: 144)\n",
-       "Coordinates:\n",
-       "  * time     (time) datetime64[ns] 2001-01-01 ... 2001-01-31T23:00:00\n",
-       "Dimensions without coordinates: level, lat, lon\n",
-       "Data variables:\n",
-       "    U        (time, level, lat, lon) float64 ...\n",
-       "    V        (time, level, lat, lon) float64 ...\n",
-       "    W        (time, level, lat, lon) float64 ...\n",
-       "    T        (time, level, lat, lon) float64 ...
" - ], - "text/plain": [ - "\n", - "Dimensions: (time: 744, level: 37, lat: 72, lon: 144)\n", - "Coordinates:\n", - " * time (time) datetime64[ns] 2001-01-01 ... 2001-01-31T23:00:00\n", - "Dimensions without coordinates: level, lat, lon\n", - "Data variables:\n", - " U (time, level, lat, lon) float64 ...\n", - " V (time, level, lat, lon) float64 ...\n", - " W (time, level, lat, lon) float64 ...\n", - " T (time, level, lat, lon) float64 ..." - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ds = xr.Dataset(\n", - " {\n", - " \"U\": (dims, cubed.random.random(shape, chunks=chunks, spec=spec)),\n", - " \"V\": (dims, cubed.random.random(shape, chunks=chunks, spec=spec)),\n", - " \"W\": (dims, cubed.random.random(shape, chunks=chunks, spec=spec)),\n", - " \"T\": (dims, cubed.random.random(shape, chunks=chunks, spec=spec)),\n", - " },\n", - " coords={\"time\": pd.date_range(\"2001-01-01\", periods=shape[0], freq=\"H\")},\n", - ")\n", - "ds" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "dd80eff1", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.DataArray 'U' (time: 744, level: 37, lat: 72, lon: 144)>\n",
-       "cubed.Array<array-004, shape=(744, 37, 72, 144), dtype=float64, chunks=((24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24), (15, 15, 7), (72,), (144,))>\n",
-       "Coordinates:\n",
-       "  * time     (time) datetime64[ns] 2001-01-01 ... 2001-01-31T23:00:00\n",
-       "Dimensions without coordinates: level, lat, lon
" - ], - "text/plain": [ - "\n", - "cubed.Array\n", - "Coordinates:\n", - " * time (time) datetime64[ns] 2001-01-01 ... 2001-01-31T23:00:00\n", - "Dimensions without coordinates: level, lat, lon" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ds.U" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "f5023814", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset>\n",
-       "Dimensions:  (time: 744, level: 37, lat: 72)\n",
-       "Coordinates:\n",
-       "  * time     (time) datetime64[ns] 2001-01-01 ... 2001-01-31T23:00:00\n",
-       "Dimensions without coordinates: level, lat\n",
-       "Data variables:\n",
-       "    U        (time, level, lat) float64 ...\n",
-       "    V        (time, level, lat) float64 ...\n",
-       "    W        (time, level, lat) float64 ...\n",
-       "    T        (time, level, lat) float64 ...
" - ], - "text/plain": [ - "\n", - "Dimensions: (time: 744, level: 37, lat: 72)\n", - "Coordinates:\n", - " * time (time) datetime64[ns] 2001-01-01 ... 2001-01-31T23:00:00\n", - "Dimensions without coordinates: level, lat\n", - "Data variables:\n", - " U (time, level, lat) float64 ...\n", - " V (time, level, lat) float64 ...\n", - " W (time, level, lat) float64 ...\n", - " T (time, level, lat) float64 ..." - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Note: we actually want skipna=True, but this isn't implemented in xarray yet\n", - "# see https://github.com/pydata/xarray/issues/7243\n", - "zonal_means = ds.mean(\"lon\", skipna=False)\n", - "zonal_means" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "02788a25", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset>\n",
-       "Dimensions:  (time: 744, level: 37, lat: 72, lon: 144)\n",
-       "Coordinates:\n",
-       "  * time     (time) datetime64[ns] 2001-01-01 ... 2001-01-31T23:00:00\n",
-       "Dimensions without coordinates: level, lat, lon\n",
-       "Data variables:\n",
-       "    U        (time, level, lat, lon) float64 ...\n",
-       "    V        (time, level, lat, lon) float64 ...\n",
-       "    W        (time, level, lat, lon) float64 ...\n",
-       "    T        (time, level, lat, lon) float64 ...\n",
-       "    uv       (time, level, lat, lon) float64 ...\n",
-       "    vt       (time, level, lat, lon) float64 ...\n",
-       "    uw       (time, level, lat, lon) float64 ...
" - ], - "text/plain": [ - "\n", - "Dimensions: (time: 744, level: 37, lat: 72, lon: 144)\n", - "Coordinates:\n", - " * time (time) datetime64[ns] 2001-01-01 ... 2001-01-31T23:00:00\n", - "Dimensions without coordinates: level, lat, lon\n", - "Data variables:\n", - " U (time, level, lat, lon) float64 ...\n", - " V (time, level, lat, lon) float64 ...\n", - " W (time, level, lat, lon) float64 ...\n", - " T (time, level, lat, lon) float64 ...\n", - " uv (time, level, lat, lon) float64 ...\n", - " vt (time, level, lat, lon) float64 ...\n", - " uw (time, level, lat, lon) float64 ..." - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "anomaly = ds - zonal_means\n", - "anomaly['uv'] = anomaly.U*anomaly.V\n", - "anomaly['vt'] = anomaly.V*anomaly.T\n", - "anomaly['uw'] = anomaly.U*anomaly.W\n", - "anomaly" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "1716e1c4", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset>\n",
-       "Dimensions:  (time: 744, level: 37, lat: 72)\n",
-       "Coordinates:\n",
-       "  * time     (time) datetime64[ns] 2001-01-01 ... 2001-01-31T23:00:00\n",
-       "Dimensions without coordinates: level, lat\n",
-       "Data variables:\n",
-       "    U        (time, level, lat) float64 ...\n",
-       "    V        (time, level, lat) float64 ...\n",
-       "    W        (time, level, lat) float64 ...\n",
-       "    T        (time, level, lat) float64 ...\n",
-       "    uv       (time, level, lat) float64 ...\n",
-       "    vt       (time, level, lat) float64 ...\n",
-       "    uw       (time, level, lat) float64 ...
" - ], - "text/plain": [ - "\n", - "Dimensions: (time: 744, level: 37, lat: 72)\n", - "Coordinates:\n", - " * time (time) datetime64[ns] 2001-01-01 ... 2001-01-31T23:00:00\n", - "Dimensions without coordinates: level, lat\n", - "Data variables:\n", - " U (time, level, lat) float64 ...\n", - " V (time, level, lat) float64 ...\n", - " W (time, level, lat) float64 ...\n", - " T (time, level, lat) float64 ...\n", - " uv (time, level, lat) float64 ...\n", - " vt (time, level, lat) float64 ...\n", - " uw (time, level, lat) float64 ..." - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "temdiags = zonal_means.merge(anomaly[['uv','vt','uw']].mean(\"lon\", skipna=False)) # actually want skipna=True\n", - "temdiags" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "9250bbfd", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset>\n",
-       "Dimensions:    (dayofyear: 31, level: 37, lat: 72)\n",
-       "Coordinates:\n",
-       "  * dayofyear  (dayofyear) int64 1 2 3 4 5 6 7 8 9 ... 24 25 26 27 28 29 30 31\n",
-       "Dimensions without coordinates: level, lat\n",
-       "Data variables:\n",
-       "    U          (dayofyear, level, lat) float64 ...\n",
-       "    V          (dayofyear, level, lat) float64 ...\n",
-       "    W          (dayofyear, level, lat) float64 ...\n",
-       "    T          (dayofyear, level, lat) float64 ...\n",
-       "    uv         (dayofyear, level, lat) float64 ...\n",
-       "    vt         (dayofyear, level, lat) float64 ...\n",
-       "    uw         (dayofyear, level, lat) float64 ...
" - ], - "text/plain": [ - "\n", - "Dimensions: (dayofyear: 31, level: 37, lat: 72)\n", - "Coordinates:\n", - " * dayofyear (dayofyear) int64 1 2 3 4 5 6 7 8 9 ... 24 25 26 27 28 29 30 31\n", - "Dimensions without coordinates: level, lat\n", - "Data variables:\n", - " U (dayofyear, level, lat) float64 ...\n", - " V (dayofyear, level, lat) float64 ...\n", - " W (dayofyear, level, lat) float64 ...\n", - " T (dayofyear, level, lat) float64 ...\n", - " uv (dayofyear, level, lat) float64 ...\n", - " vt (dayofyear, level, lat) float64 ...\n", - " uw (dayofyear, level, lat) float64 ..." - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "temdiags_mean = temdiags.groupby('time.dayofyear').mean(skipna=False)\n", - "temdiags_mean" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "8ed7be49", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[[ 1.92543754e-03, 8.62170647e-04, 1.67535136e-03, ...,\n", - " -2.50751881e-03, 2.46263741e-03, -4.70656500e-04],\n", - " [-9.69165572e-04, -9.19808903e-04, -8.64318054e-04, ...,\n", - " 9.10300914e-04, 7.68390560e-04, -9.60456448e-04],\n", - " [ 6.77793313e-04, 2.03099229e-03, 1.44811866e-03, ...,\n", - " 2.01528240e-03, -1.03220836e-03, 1.94811893e-03],\n", - " ...,\n", - " [ 2.14917057e-03, 1.47596143e-04, -7.10794437e-04, ...,\n", - " -2.50238921e-04, -1.38891106e-03, 2.17674787e-03],\n", - " [ 1.19509492e-03, -7.62831655e-04, -1.08836927e-03, ...,\n", - " -1.45508192e-03, 7.55372268e-05, 2.78225531e-04],\n", - " [-4.35496517e-03, 1.28230145e-03, 4.80595941e-04, ...,\n", - " 2.88439879e-04, 2.96332222e-03, 7.91863232e-04]],\n", - "\n", - " [[ 8.82986835e-04, 1.54489743e-03, -1.49333807e-03, ...,\n", - " 5.29418813e-04, 1.63187013e-03, -2.07224193e-04],\n", - " [ 9.55443785e-04, 2.54465546e-03, -9.19900311e-04, ...,\n", - " -1.39288060e-03, 2.21651069e-03, 1.47995069e-04],\n", - " [-7.01814903e-04, -1.68102991e-03, -1.29477818e-03, ...,\n", - " 3.13384138e-04, -1.30865629e-03, 1.77824569e-03],\n", - " ...,\n", - " [-3.52397366e-03, -6.79305040e-04, -2.14349293e-03, ...,\n", - " 1.87527147e-03, -3.25520479e-05, -2.15899612e-05],\n", - " [ 1.63726090e-03, -5.77533746e-04, -7.36883128e-05, ...,\n", - " -1.39907308e-03, -1.52952078e-04, 8.81376198e-04],\n", - " [-2.35014413e-03, -5.04083084e-04, -4.77540340e-04, ...,\n", - " 1.23444797e-03, 5.71524836e-04, -1.06911029e-03]],\n", - "\n", - " [[-6.76046164e-04, 1.00822320e-03, -2.24455923e-03, ...,\n", - " -1.10717774e-03, -2.00245065e-03, -7.11825695e-04],\n", - " [ 6.60666716e-04, 8.47521507e-04, -2.09004663e-03, ...,\n", - " 1.82431769e-04, 2.98712804e-03, 1.81179336e-04],\n", - " [ 8.90949546e-04, 2.03368887e-04, -8.75401454e-04, ...,\n", - " 1.44188907e-03, 7.32184069e-04, -3.16988423e-04],\n", - " ...,\n", - " [ 7.10740482e-04, 1.48801481e-03, 1.15863589e-03, ...,\n", - " -2.00549727e-03, -3.77421384e-05, 9.23212283e-04],\n", - " [ 9.96007282e-04, -1.63505959e-05, 6.54791612e-04, ...,\n", - " 6.17042736e-04, 1.29283052e-03, -6.03389263e-04],\n", - " [ 8.24574209e-04, 1.88788382e-03, 3.95372441e-04, ...,\n", - " -9.24382835e-04, -1.27852521e-03, -2.25838836e-05]],\n", - "\n", - " ...,\n", - "\n", - " [[-1.72225771e-03, 6.66428002e-04, -2.13037730e-03, ...,\n", - " -2.67176501e-03, -4.02366613e-04, 7.35956183e-04],\n", - " [ 3.39723093e-04, -1.29779524e-03, -1.23399194e-03, ...,\n", - " -1.78263750e-03, 1.69794295e-03, -9.93540772e-05],\n", - " [-1.28624363e-03, 3.66594558e-04, -1.50847381e-03, ...,\n", - " -5.46152652e-05, -8.04215342e-04, -7.24984607e-04],\n", - " ...,\n", - " [ 4.10706264e-04, -4.17386006e-04, 2.31336418e-04, ...,\n", - " 6.55327474e-04, 4.14411516e-04, 3.24550774e-03],\n", - " [ 1.95525139e-03, -2.35227380e-03, -9.99084542e-04, ...,\n", - " 1.62829132e-03, 9.69095656e-04, -1.49439057e-03],\n", - " [ 1.82066333e-03, 6.53274368e-04, -1.86011761e-03, ...,\n", - " 1.25957309e-03, -6.88163769e-04, 3.47627300e-04]],\n", - "\n", - " [[-2.32699809e-04, -2.00950469e-03, 1.18200895e-03, ...,\n", - " 2.91200625e-03, -1.35686054e-03, -1.54041751e-03],\n", - " [-4.38585671e-04, -1.71412560e-04, 1.43469667e-03, ...,\n", - " -8.00626033e-05, 7.56622863e-04, 1.20081303e-03],\n", - " [-1.53841374e-03, -2.13838395e-04, -2.68519284e-03, ...,\n", - " 4.40948980e-05, 1.47521752e-03, 2.74396257e-04],\n", - " ...,\n", - " [-6.75613724e-04, 7.10640075e-05, -8.35952594e-04, ...,\n", - " -9.02927942e-04, -2.46258759e-03, -9.94494523e-04],\n", - " [ 2.02874360e-03, -1.89183963e-03, 5.88172912e-05, ...,\n", - " -1.08450790e-03, -7.61052577e-04, 1.69481076e-03],\n", - " [ 3.03819565e-04, 1.49770731e-03, 1.46578274e-03, ...,\n", - " 1.05137475e-03, 2.67330224e-03, -1.74462858e-03]],\n", - "\n", - " [[-1.09142934e-03, 2.22897568e-03, 1.65366229e-03, ...,\n", - " 5.56856096e-04, 1.43595299e-03, 3.46458755e-04],\n", - " [-1.87899240e-03, -9.73849683e-04, -1.16765643e-03, ...,\n", - " 1.14138265e-03, 2.20218452e-04, -3.09016418e-06],\n", - " [ 2.04292351e-03, -5.14150022e-04, 7.48065866e-05, ...,\n", - " 1.31778954e-03, 1.27937819e-03, -1.78836143e-03],\n", - " ...,\n", - " [ 4.24524791e-04, -2.19092597e-03, 1.05686194e-03, ...,\n", - " 1.92356567e-03, 1.24714194e-04, -1.83935611e-03],\n", - " [ 1.35446115e-03, -1.86572888e-05, 8.90174177e-05, ...,\n", - " -7.56328452e-04, 7.03265920e-04, -7.90998972e-04],\n", - " [-1.30668621e-03, -3.58709712e-04, 8.04285067e-04, ...,\n", - " -9.88911246e-04, -4.51279163e-04, -9.39181905e-04]]])" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "temdiags_mean[\"uv\"].values" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6e152ba3", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.13" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/examples/pangeo-vorticity.ipynb b/examples/pangeo-vorticity.ipynb deleted file mode 100644 index bd776418..00000000 --- a/examples/pangeo-vorticity.ipynb +++ /dev/null @@ -1,1318 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "f5a954d5", - "metadata": {}, - "source": [ - "# Pangeo Vorticity Workload\n", - "\n", - "This is a notebook for exploring a simplified version of the example in https://github.com/pangeo-data/distributed-array-examples/issues/1.\n", - "\n", - "In particular, it is the code from https://github.com/coiled/coiled-runtime/issues/174" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "8d745a7b", - "metadata": {}, - "outputs": [], - "source": [ - "import cubed.array_api as xp\n", - "import cubed.random\n", - "from cubed.extensions.tqdm import TqdmProgressBar" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "1bff01e3", - "metadata": {}, - "outputs": [], - "source": [ - "a = cubed.random.random((1000, 900, 800), chunks=100)\n", - "b = cubed.random.random((1000, 900, 800), chunks=100)\n", - "x = cubed.random.random((900, 800), chunks=100)\n", - "y = cubed.random.random((900, 800), chunks=100)\n", - "\n", - "result = a[1:] * x + b[1:] * y\n", - "result = xp.mean(result)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "f8ca9389", - "metadata": {}, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - "num tasks: 5192\n", - "max required memory: 100.0 MB\n", - "\n", - "\n", - "array-017\n", - "\n", - "\n", - "array-017\n", - "__getitem__ \n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-020\n", - "\n", - "\n", - "array-020\n", - "__getitem__ (bw)\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-017->array-020\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-018\n", - "\n", - "\n", - "array-018\n", - "__getitem__ \n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-019\n", - "\n", - "\n", - "array-019\n", - "__getitem__ (rc)\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-018->array-019\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-019->array-020\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-001\n", - "\n", - "\n", - "array-001\n", - "random \n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-004\n", - "\n", - "\n", - "array-004\n", - "random (bw)\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-001->array-004\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-002\n", - "\n", - "\n", - "array-002\n", - "random \n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-003\n", - "\n", - "\n", - "array-003\n", - "random (rc)\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-002->array-003\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-003->array-004\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-004->array-020\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-021\n", - "\n", - "\n", - "array-021\n", - "__mul__ (bw)\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-020->array-021\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-009\n", - "\n", - "\n", - "array-009\n", - "random \n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-012\n", - "\n", - "\n", - "array-012\n", - "random (bw)\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-009->array-012\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-010\n", - "\n", - "\n", - "array-010\n", - "random \n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-011\n", - "\n", - "\n", - "array-011\n", - "random (rc)\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-010->array-011\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-011->array-012\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-012->array-021\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-028\n", - "\n", - "\n", - "array-028\n", - "mean (bw)\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-021->array-028\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-022\n", - "\n", - "\n", - "array-022\n", - "__getitem__ \n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-025\n", - "\n", - "\n", - "array-025\n", - "__getitem__ (bw)\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-022->array-025\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-023\n", - "\n", - "\n", - "array-023\n", - "__getitem__ \n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-024\n", - "\n", - "\n", - "array-024\n", - "__getitem__ (rc)\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-023->array-024\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-024->array-025\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-005\n", - "\n", - "\n", - "array-005\n", - "random \n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-008\n", - "\n", - "\n", - "array-008\n", - "random (bw)\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-005->array-008\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-006\n", - "\n", - "\n", - "array-006\n", - "random \n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-007\n", - "\n", - "\n", - "array-007\n", - "random (rc)\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-006->array-007\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-007->array-008\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-008->array-025\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-026\n", - "\n", - "\n", - "array-026\n", - "__mul__ (bw)\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-025->array-026\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-013\n", - "\n", - "\n", - "array-013\n", - "random \n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-016\n", - "\n", - "\n", - "array-016\n", - "random (bw)\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-013->array-016\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-014\n", - "\n", - "\n", - "array-014\n", - "random \n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-015\n", - "\n", - "\n", - "array-015\n", - "random (rc)\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-014->array-015\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-015->array-016\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-016->array-026\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-026->array-028\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-029\n", - "\n", - "\n", - "array-029\n", - "mean (rc)\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-028->array-029\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-032\n", - "\n", - "\n", - "array-032\n", - "mean (bw)\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "array-029->array-032\n", - "\n", - "\n", - "\n", - "\n", - "" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "result.visualize(\"pangeo-vorticity\")" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "0b9b8905", - "metadata": {}, - "outputs": [ - { - "data": { - "application/json": { - "ascii": false, - "bar_format": null, - "colour": null, - "elapsed": 0.00949406623840332, - "initial": 0, - "n": 0, - "ncols": null, - "nrows": 47, - "postfix": null, - "prefix": "array-019", - "rate": null, - "total": 1, - "unit": "it", - "unit_divisor": 1000, - "unit_scale": false - }, - "application/vnd.jupyter.widget-view+json": { - "model_id": "8f268522a58240a180abd2d8f534e021", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "array-019: 0%| | 0/1 [00:00 - - - - - - -num tasks: 5192 -max required memory: 100.0 MB - - -array-017 - - -array-017 -__getitem__ - - - - - -array-020 - - -array-020 -__getitem__ (bw) - - - - - -array-017->array-020 - - - - - -array-018 - - -array-018 -__getitem__ - - - - - -array-019 - - -array-019 -__getitem__ (rc) - - - - - -array-018->array-019 - - - - - -array-019->array-020 - - - - - -array-001 - - -array-001 -random - - - - - -array-004 - - -array-004 -random (bw) - - - - - -array-001->array-004 - - - - - -array-002 - - -array-002 -random - - - - - -array-003 - - -array-003 -random (rc) - - - - - -array-002->array-003 - - - - - -array-003->array-004 - - - - - -array-004->array-020 - - - - - -array-021 - - -array-021 -__mul__ (bw) - - - - - -array-020->array-021 - - - - - -array-009 - - -array-009 -random - - - - - -array-012 - - -array-012 -random (bw) - - - - - -array-009->array-012 - - - - - -array-010 - - -array-010 -random - - - - - -array-011 - - -array-011 -random (rc) - - - - - -array-010->array-011 - - - - - -array-011->array-012 - - - - - -array-012->array-021 - - - - - -array-028 - - -array-028 -mean (bw) - - - - - -array-021->array-028 - - - - - -array-022 - - -array-022 -__getitem__ - - - - - -array-025 - - -array-025 -__getitem__ (bw) - - - - - -array-022->array-025 - - - - - -array-023 - - -array-023 -__getitem__ - - - - - -array-024 - - -array-024 -__getitem__ (rc) - - - - - -array-023->array-024 - - - - - -array-024->array-025 - - - - - -array-005 - - -array-005 -random - - - - - -array-008 - - -array-008 -random (bw) - - - - - -array-005->array-008 - - - - - -array-006 - - -array-006 -random - - - - - -array-007 - - -array-007 -random (rc) - - - - - -array-006->array-007 - - - - - -array-007->array-008 - - - - - -array-008->array-025 - - - - - -array-026 - - -array-026 -__mul__ (bw) - - - - - -array-025->array-026 - - - - - -array-013 - - -array-013 -random - - - - - -array-016 - - -array-016 -random (bw) - - - - - -array-013->array-016 - - - - - -array-014 - - -array-014 -random - - - - - -array-015 - - -array-015 -random (rc) - - - - - -array-014->array-015 - - - - - -array-015->array-016 - - - - - -array-016->array-026 - - - - - -array-026->array-028 - - - - - -array-029 - - -array-029 -mean (rc) - - - - - -array-028->array-029 - - - - - -array-032 - - -array-032 -mean (bw) - - - - - -array-029->array-032 - - - - -