{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Dask example\n", "\n", "*This material is adapted from the [Dask tutorial documentation](https://tutorial.dask.org/00_overview.html) and the [Earth and Environmental Data Science](https://earth-env-data-science.github.io/intro.html) website.*\n", "\n", "\n", ":::{note}\n", "A **Dask array** looks and feels a lot like a `Numpy` array.\n", ":::\n", "\n", "However, a dask array doesn't directly hold any data. Instead, it *symbolically* represents the computations needed to generate the data.\n", "\n", "Nothing is actually computed until the actual numerical values are needed. This mode of operation is called **\"lazy\"**; it allows one to build up complex, large calculations symbolically before turning them over the scheduler for execution.\n", "\n", "If we want to create a `Numpy` array of all ones, we do it like this:" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[1., 1., 1., ..., 1., 1., 1.],\n", " [1., 1., 1., ..., 1., 1., 1.],\n", " [1., 1., 1., ..., 1., 1., 1.],\n", " ...,\n", " [1., 1., 1., ..., 1., 1., 1.],\n", " [1., 1., 1., ..., 1., 1., 1.],\n", " [1., 1., 1., ..., 1., 1., 1.]])" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import numpy as np\n", "shape = (1000, 4000)\n", "ones_np = np.ones(shape)\n", "ones_np" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This array contains exactly 32 MB of data:" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "32.0" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ones_np.nbytes / 1e6" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now let's create the same array using dask's array interface." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n",
"
| \n",
"\n", "\n", " | \n", "
\n",
"
| \n",
"\n", "\n", " | \n", "
\n",
"
| \n",
"\n", "\n", " | \n", "
\n",
"
| \n",
"\n", "\n", " | \n", "
\n",
"Client\n", "
| \n",
"\n",
"Cluster\n", "
| \n",
"
\n",
"
| \n",
"\n", "\n", " | \n", "
<xarray.Dataset>\n", "Dimensions: (lat: 720, lon: 1440, nv: 2, time: 1)\n", "Coordinates:\n", " * time (time) datetime64[ns] 2015-01-01\n", " * lat (lat) float32 -89.88 -89.62 -89.38 -89.12 ... 89.38 89.62 89.88\n", " * lon (lon) float32 0.125 0.375 0.625 0.875 ... 359.1 359.4 359.6 359.9\n", " * nv (nv) int32 0 1\n", "Data variables:\n", " lat_bnds (lat, nv) float32 -89.75 -90.0 -89.5 -89.75 ... 89.5 90.0 89.75\n", " lon_bnds (lon, nv) float32 0.0 0.25 0.25 0.5 ... 359.5 359.8 359.8 360.0\n", " crs int32 -2147483647\n", " adt (time, lat, lon) float64 ...\n", "Attributes: (12/43)\n", " Conventions: CF-1.6\n", " Metadata_Conventions: Unidata Dataset Discovery v1.0\n", " cdm_data_type: Grid\n", " comment: Absolute Dynamic Topography\n", " contact: aviso@altimetry.fr\n", " creator_email: aviso@altimetry.fr\n", " ... ...\n", " summary: Delayed-Time Level-4 sea surface height ...\n", " time_coverage_duration: P1D\n", " time_coverage_end: 2015-01-01T12:00:00Z\n", " time_coverage_resolution: P1D\n", " time_coverage_start: 2014-12-31T12:00:00Z\n", " title: DT merged all satellites Global Ocean Gr...
array(['2015-01-01T00:00:00.000000000'], dtype='datetime64[ns]')
array([-89.875, -89.625, -89.375, ..., 89.375, 89.625, 89.875],\n", " dtype=float32)
array([1.25000e-01, 3.75000e-01, 6.25000e-01, ..., 3.59375e+02, 3.59625e+02,\n", " 3.59875e+02], dtype=float32)
array([0, 1], dtype=int32)
array([[-89.75, -90. ],\n", " [-89.5 , -89.75],\n", " [-89.25, -89.5 ],\n", " ...,\n", " [ 89.5 , 89.25],\n", " [ 89.75, 89.5 ],\n", " [ 90. , 89.75]], dtype=float32)
array([[0.0000e+00, 2.5000e-01],\n", " [2.5000e-01, 5.0000e-01],\n", " [5.0000e-01, 7.5000e-01],\n", " ...,\n", " [3.5925e+02, 3.5950e+02],\n", " [3.5950e+02, 3.5975e+02],\n", " [3.5975e+02, 3.6000e+02]], dtype=float32)
array(-2147483647, dtype=int32)
[1036800 values with dtype=float64]
<xarray.Dataset>\n", "Dimensions: (lat: 720, lon: 1440, nv: 2, time: 365)\n", "Coordinates:\n", " * time (time) datetime64[ns] 2015-01-01 2015-01-02 ... 2015-12-31\n", " * lat (lat) float32 -89.88 -89.62 -89.38 -89.12 ... 89.38 89.62 89.88\n", " * lon (lon) float32 0.125 0.375 0.625 0.875 ... 359.1 359.4 359.6 359.9\n", " * nv (nv) int32 0 1\n", "Data variables:\n", " lat_bnds (time, lat, nv) float32 dask.array<chunksize=(1, 720, 2), meta=np.ndarray>\n", " lon_bnds (time, lon, nv) float32 dask.array<chunksize=(1, 1440, 2), meta=np.ndarray>\n", " crs (time) int32 -2147483647 -2147483647 ... -2147483647 -2147483647\n", " adt (time, lat, lon) float64 dask.array<chunksize=(1, 720, 1440), meta=np.ndarray>\n", "Attributes: (12/43)\n", " Conventions: CF-1.6\n", " Metadata_Conventions: Unidata Dataset Discovery v1.0\n", " cdm_data_type: Grid\n", " comment: Absolute Dynamic Topography\n", " contact: aviso@altimetry.fr\n", " creator_email: aviso@altimetry.fr\n", " ... ...\n", " summary: Delayed-Time Level-4 sea surface height ...\n", " time_coverage_duration: P1D\n", " time_coverage_end: 2015-01-01T12:00:00Z\n", " time_coverage_resolution: P1D\n", " time_coverage_start: 2014-12-31T12:00:00Z\n", " title: DT merged all satellites Global Ocean Gr...
array(['2015-01-01T00:00:00.000000000', '2015-01-02T00:00:00.000000000',\n", " '2015-01-03T00:00:00.000000000', ..., '2015-12-29T00:00:00.000000000',\n", " '2015-12-30T00:00:00.000000000', '2015-12-31T00:00:00.000000000'],\n", " dtype='datetime64[ns]')
array([-89.875, -89.625, -89.375, ..., 89.375, 89.625, 89.875],\n", " dtype=float32)
array([1.25000e-01, 3.75000e-01, 6.25000e-01, ..., 3.59375e+02, 3.59625e+02,\n", " 3.59875e+02], dtype=float32)
array([0, 1], dtype=int32)
\n",
"
| \n",
"\n", "\n", " | \n", "
\n",
"
| \n",
"\n", "\n", " | \n", "
array([-2147483647, -2147483647, -2147483647, -2147483647, -2147483647,\n", " -2147483647, -2147483647, -2147483647, -2147483647, -2147483647,\n", " -2147483647, -2147483647, -2147483647, -2147483647, -2147483647,\n", " -2147483647, -2147483647, -2147483647, -2147483647, -2147483647,\n", " -2147483647, -2147483647, -2147483647, -2147483647, -2147483647,\n", " -2147483647, -2147483647, -2147483647, -2147483647, -2147483647,\n", " -2147483647, -2147483647, -2147483647, -2147483647, -2147483647,\n", " -2147483647, -2147483647, -2147483647, -2147483647, -2147483647,\n", " -2147483647, -2147483647, -2147483647, -2147483647, -2147483647,\n", " -2147483647, -2147483647, -2147483647, -2147483647, -2147483647,\n", " -2147483647, -2147483647, -2147483647, -2147483647, -2147483647,\n", " -2147483647, -2147483647, -2147483647, -2147483647, -2147483647,\n", " -2147483647, -2147483647, -2147483647, -2147483647, -2147483647,\n", " -2147483647, -2147483647, -2147483647, -2147483647, -2147483647,\n", " -2147483647, -2147483647, -2147483647, -2147483647, -2147483647,\n", " -2147483647, -2147483647, -2147483647, -2147483647, -2147483647,\n", " -2147483647, -2147483647, -2147483647, -2147483647, -2147483647,\n", " -2147483647, -2147483647, -2147483647, -2147483647, -2147483647,\n", " -2147483647, -2147483647, -2147483647, -2147483647, -2147483647,\n", " -2147483647, -2147483647, -2147483647, -2147483647, -2147483647,\n", "...\n", " -2147483647, -2147483647, -2147483647, -2147483647, -2147483647,\n", " -2147483647, -2147483647, -2147483647, -2147483647, -2147483647,\n", " -2147483647, -2147483647, -2147483647, -2147483647, -2147483647,\n", " -2147483647, -2147483647, -2147483647, -2147483647, -2147483647,\n", " -2147483647, -2147483647, -2147483647, -2147483647, -2147483647,\n", " -2147483647, -2147483647, -2147483647, -2147483647, -2147483647,\n", " -2147483647, -2147483647, -2147483647, -2147483647, -2147483647,\n", " -2147483647, -2147483647, -2147483647, -2147483647, -2147483647,\n", " -2147483647, -2147483647, -2147483647, -2147483647, -2147483647,\n", " -2147483647, -2147483647, -2147483647, -2147483647, -2147483647,\n", " -2147483647, -2147483647, -2147483647, -2147483647, -2147483647,\n", " -2147483647, -2147483647, -2147483647, -2147483647, -2147483647,\n", " -2147483647, -2147483647, -2147483647, -2147483647, -2147483647,\n", " -2147483647, -2147483647, -2147483647, -2147483647, -2147483647,\n", " -2147483647, -2147483647, -2147483647, -2147483647, -2147483647,\n", " -2147483647, -2147483647, -2147483647, -2147483647, -2147483647,\n", " -2147483647, -2147483647, -2147483647, -2147483647, -2147483647,\n", " -2147483647, -2147483647, -2147483647, -2147483647, -2147483647,\n", " -2147483647, -2147483647, -2147483647, -2147483647, -2147483647],\n", " dtype=int32)
\n",
"
| \n",
"\n", "\n", " | \n", "
<xarray.DataArray 'adt' (time: 365, lat: 720, lon: 1440)>\n", "dask.array<concatenate, shape=(365, 720, 1440), dtype=float64, chunksize=(1, 720, 1440), chunktype=numpy.ndarray>\n", "Coordinates:\n", " * time (time) datetime64[ns] 2015-01-01 2015-01-02 ... 2015-12-31\n", " * lat (lat) float32 -89.88 -89.62 -89.38 -89.12 ... 89.38 89.62 89.88\n", " * lon (lon) float32 0.125 0.375 0.625 0.875 ... 359.1 359.4 359.6 359.9\n", "Attributes:\n", " grid_mapping: crs\n", " long_name: Absolute Dynamic Topography\n", " standard_name: sea_surface_height_above_geoid\n", " units: m
\n",
"
| \n",
"\n", "\n", " | \n", "
array(['2015-01-01T00:00:00.000000000', '2015-01-02T00:00:00.000000000',\n", " '2015-01-03T00:00:00.000000000', ..., '2015-12-29T00:00:00.000000000',\n", " '2015-12-30T00:00:00.000000000', '2015-12-31T00:00:00.000000000'],\n", " dtype='datetime64[ns]')
array([-89.875, -89.625, -89.375, ..., 89.375, 89.625, 89.875],\n", " dtype=float32)
array([1.25000e-01, 3.75000e-01, 6.25000e-01, ..., 3.59375e+02, 3.59625e+02,\n", " 3.59875e+02], dtype=float32)
<xarray.DataArray 'adt' (lat: 720, lon: 1440)>\n", "array([[nan, nan, nan, ..., nan, nan, nan],\n", " [nan, nan, nan, ..., nan, nan, nan],\n", " [nan, nan, nan, ..., nan, nan, nan],\n", " ...,\n", " [nan, nan, nan, ..., nan, nan, nan],\n", " [nan, nan, nan, ..., nan, nan, nan],\n", " [nan, nan, nan, ..., nan, nan, nan]])\n", "Coordinates:\n", " * lat (lat) float32 -89.88 -89.62 -89.38 -89.12 ... 89.38 89.62 89.88\n", " * lon (lon) float32 0.125 0.375 0.625 0.875 ... 359.1 359.4 359.6 359.9
array([[nan, nan, nan, ..., nan, nan, nan],\n", " [nan, nan, nan, ..., nan, nan, nan],\n", " [nan, nan, nan, ..., nan, nan, nan],\n", " ...,\n", " [nan, nan, nan, ..., nan, nan, nan],\n", " [nan, nan, nan, ..., nan, nan, nan],\n", " [nan, nan, nan, ..., nan, nan, nan]])
array([-89.875, -89.625, -89.375, ..., 89.375, 89.625, 89.875],\n", " dtype=float32)
array([1.25000e-01, 3.75000e-01, 6.25000e-01, ..., 3.59375e+02, 3.59625e+02,\n", " 3.59875e+02], dtype=float32)