diff --git a/.gitignore b/.gitignore index edbbbd10f..6e99b9442 100644 --- a/.gitignore +++ b/.gitignore @@ -136,3 +136,4 @@ dmypy.json # Cubed .lithops_config +examples/cubed.svg diff --git a/cubed/extensions/rich.py b/cubed/extensions/rich.py index 320663913..231b34feb 100644 --- a/cubed/extensions/rich.py +++ b/cubed/extensions/rich.py @@ -58,7 +58,9 @@ def on_operation_start(self, event): self.progress.start_task(self.progress_tasks[event.name]) def on_task_end(self, event): - self.progress.update(self.progress_tasks[event.name], advance=event.num_tasks) + self.progress.update( + self.progress_tasks[event.name], advance=event.num_tasks, refresh=True + ) class SpinnerWhenRunningColumn(SpinnerColumn): diff --git a/examples/cubed.svg b/examples/cubed.svg deleted file mode 100644 index 313f6a51f..000000000 --- a/examples/cubed.svg +++ /dev/null @@ -1,56 +0,0 @@ - - - - - diff --git a/examples/pangeo-1-vorticity.ipynb b/examples/pangeo-1-vorticity.ipynb new file mode 100644 index 000000000..72bf2aa7f --- /dev/null +++ b/examples/pangeo-1-vorticity.ipynb @@ -0,0 +1,4140 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "bdf4365e", + "metadata": {}, + "source": [ + "# Pangeo Vorticity Workload\n", + "\n", + "This is a notebook for exploring a simplified version of the example in https://github.com/pangeo-data/distributed-array-examples/issues/1.\n", + "\n", + "See also Tom Nicholas's [notebook](https://gist.github.com/TomNicholas/8366c917349b647d87860a20a257a3fb#file-benchmark-vorticity-ipynb) exploring this problem. Note that the code below for `diff` is based on the Dask version." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "b8386400", + "metadata": {}, + "outputs": [], + "source": [ + "import cubed\n", + "import cubed.random\n", + "from cubed.extensions.rich import RichProgressBar\n", + "import xarray as xr" + ] + }, + { + "cell_type": "markdown", + "id": "9c1953a4", + "metadata": {}, + "source": [ + "Initialization parameters for the workload" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "f4b61ed9", + "metadata": {}, + "outputs": [], + "source": [ + "t_length = 50\n", + "spec = cubed.Spec(allowed_mem=\"2GB\")" + ] + }, + { + "cell_type": "markdown", + "id": "48d462ed", + "metadata": {}, + "source": [ + "Create random data stored in Zarr." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "11782285", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "40a37f546c3d4752a92531e87aeb4692", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n" + ], + "text/plain": [] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "u = cubed.random.random((t_length, 1, 987, 1920), chunks=(10, 1, -1, -1), spec=spec)\n", + "v = cubed.random.random((t_length, 1, 987, 1920), chunks=(10, 1, -1, -1), spec=spec)\n", + "dx = cubed.random.random((1, 987, 1920), chunks=(1, -1, -1), spec=spec)\n", + "dy = cubed.random.random((1, 987, 1920), chunks=(1, -1, -1), spec=spec)\n", + "arrays = [u, v, dx, dy]\n", + "paths = [\n", + " f\"{spec.work_dir}/u_{t_length}.zarr\",\n", + " f\"{spec.work_dir}/v_{t_length}.zarr\",\n", + " f\"{spec.work_dir}/dx_{t_length}.zarr\",\n", + " f\"{spec.work_dir}/dy_{t_length}.zarr\",\n", + "]\n", + "cubed.store(arrays, paths, compute_arrays_in_parallel=True, callbacks=[RichProgressBar()])" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "58bbcfde", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
<xarray.Dataset> Size: 2GB\n", + "Dimensions: (time: 50, face: 1, j: 987, i: 1920)\n", + "Dimensions without coordinates: time, face, j, i\n", + "Data variables:\n", + " U (time, face, j, i) float64 758MB cubed.Array<chunksize=(10, 1, 987, 1920)>\n", + " V (time, face, j, i) float64 758MB cubed.Array<chunksize=(10, 1, 987, 1920)>\n", + " dx (face, j, i) float64 15MB cubed.Array<chunksize=(1, 987, 1920)>\n", + " dy (face, j, i) float64 15MB cubed.Array<chunksize=(1, 987, 1920)>
<xarray.DataArray (face: 1, j: 987, i: 1920)> Size: 15MB\n", + "cubed.Array<array-061, shape=(1, 987, 1920), dtype=float64, chunks=((1,), (987,), (1920,))>\n", + "Dimensions without coordinates: face, j, i
\n", + "\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
<xarray.DataArray (face: 1, j: 987, i: 1920)> Size: 15MB\n", + "array([[[-0.00935039, 0.05180967, 0.02368589, ..., 0.09296773,\n", + " -0.02251309, 0.00811106],\n", + " [-0.03833913, 0.05900023, 0.01459681, ..., -0.01437545,\n", + " 0.01901828, 0.02181335],\n", + " [ 0.00342973, 0.01948303, 0.01160908, ..., 0.02988825,\n", + " -0.0061075 , -0.01061161],\n", + " ...,\n", + " [ 0.03900664, 0.04362473, 0.00623131, ..., -0.03877337,\n", + " 0.00551559, -0.0664945 ],\n", + " [-0.01084767, -0.00746532, 0.04994078, ..., 0.00983424,\n", + " -0.00747178, -0.01885188],\n", + " [ 0.00544092, 0.05520928, -0.0255991 , ..., 0.02842916,\n", + " 0.0289831 , 0.01773723]]])\n", + "Dimensions without coordinates: face, j, i
<xarray.DataArray (face: 1, j: 987, i: 1920)> Size: 15MB\n", + "cubed.Array<array-082, shape=(1, 987, 1920), dtype=float64, chunks=((1,), (987,), (1920,))>\n", + "Dimensions without coordinates: face, j, i
\n", + "\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
<xarray.DataArray (face: 1, j: 987, i: 1920)> Size: 15MB\n", + "array([[[-0.00935039, 0.05180967, 0.02368589, ..., 0.09296773,\n", + " -0.02251309, 0.00811106],\n", + " [-0.03833913, 0.05900023, 0.01459681, ..., -0.01437545,\n", + " 0.01901828, 0.02181335],\n", + " [ 0.00342973, 0.01948303, 0.01160908, ..., 0.02988825,\n", + " -0.0061075 , -0.01061161],\n", + " ...,\n", + " [ 0.03900664, 0.04362473, 0.00623131, ..., -0.03877337,\n", + " 0.00551559, -0.0664945 ],\n", + " [-0.01084767, -0.00746532, 0.04994078, ..., 0.00983424,\n", + " -0.00747178, -0.01885188],\n", + " [ 0.00544092, 0.05520928, -0.0255991 , ..., 0.02842916,\n", + " 0.0289831 , 0.01773723]]])\n", + "Dimensions without coordinates: face, j, i
\n", + "\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "u = cubed.random.random((t_length, 1, 987, 1920), chunks=(10, 1, -1, -1), spec=spec)\n", + "v = cubed.random.random((t_length, 1, 987, 1920), chunks=(10, 1, -1, -1), spec=spec)\n", + "arrays = [u, v]\n", + "paths = [f\"{spec.work_dir}/u_{t_length}.zarr\", f\"{spec.work_dir}/v_{t_length}.zarr\"]\n", + "cubed.store(arrays, paths, compute_arrays_in_parallel=True, callbacks=[RichProgressBar()])" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "13c4e020-a968-4f4d-bfab-5d5d487dfb30", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
<xarray.Dataset> Size: 2GB\n", + "Dimensions: (time: 50, face: 1, j: 987, i: 1920)\n", + "Dimensions without coordinates: time, face, j, i\n", + "Data variables:\n", + " anom_u (time, face, j, i) float64 758MB cubed.Array<chunksize=(10, 1, 987, 1920)>\n", + " anom_v (time, face, j, i) float64 758MB cubed.Array<chunksize=(10, 1, 987, 1920)>
<xarray.Dataset> Size: 45MB\n", + "Dimensions: (face: 1, j: 987, i: 1920)\n", + "Dimensions without coordinates: face, j, i\n", + "Data variables:\n", + " anom_u (face, j, i) float64 15MB cubed.Array<chunksize=(1, 987, 1920)>\n", + " anom_v (face, j, i) float64 15MB cubed.Array<chunksize=(1, 987, 1920)>\n", + " uv (face, j, i) float64 15MB cubed.Array<chunksize=(1, 987, 1920)>
\n", + "\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
<xarray.Dataset> Size: 45MB\n", + "Dimensions: (face: 1, j: 987, i: 1920)\n", + "Dimensions without coordinates: face, j, i\n", + "Data variables:\n", + " anom_u (face, j, i) float64 15MB 0.339 0.3298 0.3399 ... 0.3794 0.2628\n", + " anom_v (face, j, i) float64 15MB 0.3566 0.3384 0.3565 ... 0.3379 0.3464\n", + " uv (face, j, i) float64 15MB 0.2717 0.2679 0.281 ... 0.2727 0.2272
<xarray.Dataset> Size: 9GB\n", + "Dimensions: (time: 744, level: 37, lat: 72, lon: 144)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 6kB 2001-01-01 ... 2001-01-31T23:00:00\n", + "Dimensions without coordinates: level, lat, lon\n", + "Data variables:\n", + " U (time, level, lat, lon) float64 2GB cubed.Array<chunksize=(24, 1, 72, 144)>\n", + " V (time, level, lat, lon) float64 2GB cubed.Array<chunksize=(24, 1, 72, 144)>\n", + " W (time, level, lat, lon) float64 2GB cubed.Array<chunksize=(24, 1, 72, 144)>\n", + " T (time, level, lat, lon) float64 2GB cubed.Array<chunksize=(24, 1, 72, 144)>
<xarray.Dataset> Size: 63MB\n", + "Dimensions: (time: 744, level: 37, lat: 72)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 6kB 2001-01-01 ... 2001-01-31T23:00:00\n", + "Dimensions without coordinates: level, lat\n", + "Data variables:\n", + " U (time, level, lat) float64 16MB cubed.Array<chunksize=(24, 1, 72)>\n", + " V (time, level, lat) float64 16MB cubed.Array<chunksize=(24, 1, 72)>\n", + " W (time, level, lat) float64 16MB cubed.Array<chunksize=(24, 1, 72)>\n", + " T (time, level, lat) float64 16MB cubed.Array<chunksize=(24, 1, 72)>
<xarray.Dataset> Size: 16GB\n", + "Dimensions: (time: 744, level: 37, lat: 72, lon: 144)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 6kB 2001-01-01 ... 2001-01-31T23:00:00\n", + "Dimensions without coordinates: level, lat, lon\n", + "Data variables:\n", + " U (time, level, lat, lon) float64 2GB cubed.Array<chunksize=(24, 1, 72, 144)>\n", + " V (time, level, lat, lon) float64 2GB cubed.Array<chunksize=(24, 1, 72, 144)>\n", + " W (time, level, lat, lon) float64 2GB cubed.Array<chunksize=(24, 1, 72, 144)>\n", + " T (time, level, lat, lon) float64 2GB cubed.Array<chunksize=(24, 1, 72, 144)>\n", + " uv (time, level, lat, lon) float64 2GB cubed.Array<chunksize=(24, 1, 72, 144)>\n", + " vt (time, level, lat, lon) float64 2GB cubed.Array<chunksize=(24, 1, 72, 144)>\n", + " uw (time, level, lat, lon) float64 2GB cubed.Array<chunksize=(24, 1, 72, 144)>
<xarray.Dataset> Size: 111MB\n", + "Dimensions: (time: 744, level: 37, lat: 72)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 6kB 2001-01-01 ... 2001-01-31T23:00:00\n", + "Dimensions without coordinates: level, lat\n", + "Data variables:\n", + " U (time, level, lat) float64 16MB cubed.Array<chunksize=(24, 1, 72)>\n", + " V (time, level, lat) float64 16MB cubed.Array<chunksize=(24, 1, 72)>\n", + " W (time, level, lat) float64 16MB cubed.Array<chunksize=(24, 1, 72)>\n", + " T (time, level, lat) float64 16MB cubed.Array<chunksize=(24, 1, 72)>\n", + " uv (time, level, lat) float64 16MB cubed.Array<chunksize=(24, 1, 72)>\n", + " vt (time, level, lat) float64 16MB cubed.Array<chunksize=(24, 1, 72)>\n", + " uw (time, level, lat) float64 16MB cubed.Array<chunksize=(24, 1, 72)>
<xarray.Dataset> Size: 5MB\n", + "Dimensions: (time: 31, level: 37, lat: 72)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 248B 2001-01-01 2001-01-02 ... 2001-01-31\n", + "Dimensions without coordinates: level, lat\n", + "Data variables:\n", + " U (time, level, lat) float64 661kB cubed.Array<chunksize=(1, 1, 72)>\n", + " V (time, level, lat) float64 661kB cubed.Array<chunksize=(1, 1, 72)>\n", + " W (time, level, lat) float64 661kB cubed.Array<chunksize=(1, 1, 72)>\n", + " T (time, level, lat) float64 661kB cubed.Array<chunksize=(1, 1, 72)>\n", + " uv (time, level, lat) float64 661kB cubed.Array<chunksize=(1, 1, 72)>\n", + " vt (time, level, lat) float64 661kB cubed.Array<chunksize=(1, 1, 72)>\n", + " uw (time, level, lat) float64 661kB cubed.Array<chunksize=(1, 1, 72)>
\n", + "\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
<xarray.Dataset> Size: 5MB\n", + "Dimensions: (time: 31, level: 37, lat: 72)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 248B 2001-01-01 2001-01-02 ... 2001-01-31\n", + "Dimensions without coordinates: level, lat\n", + "Data variables:\n", + " U (time, level, lat) float64 661kB 0.502 0.5039 0.504 ... 0.5 0.4956\n", + " V (time, level, lat) float64 661kB 0.4953 0.4931 ... 0.4944 0.4976\n", + " W (time, level, lat) float64 661kB 0.5017 0.4984 ... 0.5083 0.5015\n", + " T (time, level, lat) float64 661kB 0.4949 0.503 ... 0.5024 0.487\n", + " uv (time, level, lat) float64 661kB 0.00129 0.0009764 ... 0.0003541\n", + " vt (time, level, lat) float64 661kB -0.002118 -0.001934 ... 0.001608\n", + " uw (time, level, lat) float64 661kB 0.0007928 -0.00245 ... -0.002159
<xarray.Dataset> Size: 2GB\n", + "Dimensions: (time: 372, latitude: 721, longitude: 1440)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 3kB 1979-01-01 ... 1979-01-16T11:00:00\n", + " * latitude (latitude) float32 3kB 90.0 89.75 89.5 ... -89.5 -89.75 -90.0\n", + " * longitude (longitude) float32 6kB 0.0 0.25 0.5001 ... 359.3 359.5 359.8\n", + "Data variables:\n", + " asn (time, latitude, longitude) float32 2GB cubed.Array<chunksize=(31, 721, 1440)>
<xarray.Dataset> Size: 66MB\n", + "Dimensions: (dayofyear: 16, latitude: 721, longitude: 1440)\n", + "Coordinates:\n", + " * latitude (latitude) float32 3kB 90.0 89.75 89.5 ... -89.5 -89.75 -90.0\n", + " * longitude (longitude) float32 6kB 0.0 0.25 0.5001 ... 359.3 359.5 359.8\n", + " * dayofyear (dayofyear) int64 128B 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16\n", + "Data variables:\n", + " asn (dayofyear, latitude, longitude) float32 66MB cubed.Array<chunksize=(16, 721, 1440)>
<xarray.Dataset> Size: 2GB\n", + "Dimensions: (time: 372, latitude: 721, longitude: 1440)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 3kB 1979-01-01 ... 1979-01-16T11:00:00\n", + " * latitude (latitude) float32 3kB 90.0 89.75 89.5 ... -89.5 -89.75 -90.0\n", + " * longitude (longitude) float32 6kB 0.0 0.25 0.5001 ... 359.3 359.5 359.8\n", + " dayofyear (time) int64 3kB 1 1 1 1 1 1 1 1 1 ... 16 16 16 16 16 16 16 16 16\n", + "Data variables:\n", + " asn (time, latitude, longitude) float32 2GB cubed.Array<chunksize=(16, 721, 1440)>
\n", + "\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "
<xarray.Dataset>\n", - "Dimensions: (time: 744, level: 37, lat: 72, lon: 144)\n", - "Coordinates:\n", - " * time (time) datetime64[ns] 2001-01-01 ... 2001-01-31T23:00:00\n", - "Dimensions without coordinates: level, lat, lon\n", - "Data variables:\n", - " U (time, level, lat, lon) float64 ...\n", - " V (time, level, lat, lon) float64 ...\n", - " W (time, level, lat, lon) float64 ...\n", - " T (time, level, lat, lon) float64 ...
<xarray.DataArray 'U' (time: 744, level: 37, lat: 72, lon: 144)>\n", - "cubed.Array<array-004, shape=(744, 37, 72, 144), dtype=float64, chunks=((24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24), (15, 15, 7), (72,), (144,))>\n", - "Coordinates:\n", - " * time (time) datetime64[ns] 2001-01-01 ... 2001-01-31T23:00:00\n", - "Dimensions without coordinates: level, lat, lon
<xarray.Dataset>\n", - "Dimensions: (time: 744, level: 37, lat: 72)\n", - "Coordinates:\n", - " * time (time) datetime64[ns] 2001-01-01 ... 2001-01-31T23:00:00\n", - "Dimensions without coordinates: level, lat\n", - "Data variables:\n", - " U (time, level, lat) float64 ...\n", - " V (time, level, lat) float64 ...\n", - " W (time, level, lat) float64 ...\n", - " T (time, level, lat) float64 ...
<xarray.Dataset>\n", - "Dimensions: (time: 744, level: 37, lat: 72, lon: 144)\n", - "Coordinates:\n", - " * time (time) datetime64[ns] 2001-01-01 ... 2001-01-31T23:00:00\n", - "Dimensions without coordinates: level, lat, lon\n", - "Data variables:\n", - " U (time, level, lat, lon) float64 ...\n", - " V (time, level, lat, lon) float64 ...\n", - " W (time, level, lat, lon) float64 ...\n", - " T (time, level, lat, lon) float64 ...\n", - " uv (time, level, lat, lon) float64 ...\n", - " vt (time, level, lat, lon) float64 ...\n", - " uw (time, level, lat, lon) float64 ...
<xarray.Dataset>\n", - "Dimensions: (time: 744, level: 37, lat: 72)\n", - "Coordinates:\n", - " * time (time) datetime64[ns] 2001-01-01 ... 2001-01-31T23:00:00\n", - "Dimensions without coordinates: level, lat\n", - "Data variables:\n", - " U (time, level, lat) float64 ...\n", - " V (time, level, lat) float64 ...\n", - " W (time, level, lat) float64 ...\n", - " T (time, level, lat) float64 ...\n", - " uv (time, level, lat) float64 ...\n", - " vt (time, level, lat) float64 ...\n", - " uw (time, level, lat) float64 ...
<xarray.Dataset>\n", - "Dimensions: (dayofyear: 31, level: 37, lat: 72)\n", - "Coordinates:\n", - " * dayofyear (dayofyear) int64 1 2 3 4 5 6 7 8 9 ... 24 25 26 27 28 29 30 31\n", - "Dimensions without coordinates: level, lat\n", - "Data variables:\n", - " U (dayofyear, level, lat) float64 ...\n", - " V (dayofyear, level, lat) float64 ...\n", - " W (dayofyear, level, lat) float64 ...\n", - " T (dayofyear, level, lat) float64 ...\n", - " uv (dayofyear, level, lat) float64 ...\n", - " vt (dayofyear, level, lat) float64 ...\n", - " uw (dayofyear, level, lat) float64 ...