diff --git a/Untitled.ipynb b/Untitled.ipynb new file mode 100644 index 00000000000..db0b6fd46f5 --- /dev/null +++ b/Untitled.ipynb @@ -0,0 +1,1891 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:  (sample: 32, time: 1223, x: 64, y: 64)\n",
+       "Coordinates:\n",
+       "  * time     (time) float64 0.0 0.04909 0.09817 0.1473 ... 59.89 59.94 59.98\n",
+       "  * x        (x) float32 0.049087387 0.14726216 ... 6.1359234 6.234098\n",
+       "  * y        (y) float32 0.049087387 0.14726216 ... 6.1359234 6.234098\n",
+       "Dimensions without coordinates: sample\n",
+       "Data variables:\n",
+       "    u        (sample, time, x, y) float32 dask.array<chunksize=(1, 64, 64, 64), meta=np.ndarray>\n",
+       "    v        (sample, time, x, y) float32 dask.array<chunksize=(1, 64, 64, 64), meta=np.ndarray>\n",
+       "Attributes:\n",
+       "    constant_force_magnitude:   0.05\n",
+       "    constant_force_wavenumber:  3\n",
+       "    density:                    1.0\n",
+       "    filename_prefix:            20200313_v0\n",
+       "    init_cfl_safety_factor:     0.5\n",
+       "    init_peak_wavenumber:       3.0\n",
+       "    linear_force_coefficient:   -0.02\n",
+       "    maximum_velocity:           1.0\n",
+       "    ndim:                       2\n",
+       "    num_trajectories:           16\n",
+       "    output_dir:                 /namespace/gas/primary/whirl/datasets/forced_...\n",
+       "    save_dt:                    0.04908738657832146\n",
+       "    save_grid_size:             64\n",
+       "    seed:                       0\n",
+       "    simulation_grid_size:       512\n",
+       "    simulation_time:            60.0\n",
+       "    viscosity:                  0.0\n",
+       "    warmup_grid_size:           512\n",
+       "    warmup_time:                60.0\n",
+       "    xm_experiment_id:           11889045\n",
+       "    xm_work_unit_id:            4
" + ], + "text/plain": [ + "\n", + "Dimensions: (sample: 32, time: 1223, x: 64, y: 64)\n", + "Coordinates:\n", + " * time (time) float64 0.0 0.04909 0.09817 0.1473 ... 59.89 59.94 59.98\n", + " * x (x) float32 0.049087387 0.14726216 ... 6.1359234 6.234098\n", + " * y (y) float32 0.049087387 0.14726216 ... 6.1359234 6.234098\n", + "Dimensions without coordinates: sample\n", + "Data variables:\n", + " u (sample, time, x, y) float32 dask.array\n", + " v (sample, time, x, y) float32 dask.array\n", + "Attributes:\n", + " constant_force_magnitude: 0.05\n", + " constant_force_wavenumber: 3\n", + " density: 1.0\n", + " filename_prefix: 20200313_v0\n", + " init_cfl_safety_factor: 0.5\n", + " init_peak_wavenumber: 3.0\n", + " linear_force_coefficient: -0.02\n", + " maximum_velocity: 1.0\n", + " ndim: 2\n", + " num_trajectories: 16\n", + " output_dir: /namespace/gas/primary/whirl/datasets/forced_...\n", + " save_dt: 0.04908738657832146\n", + " save_grid_size: 64\n", + " seed: 0\n", + " simulation_grid_size: 512\n", + " simulation_time: 60.0\n", + " viscosity: 0.0\n", + " warmup_grid_size: 512\n", + " warmup_time: 60.0\n", + " xm_experiment_id: 11889045\n", + " xm_work_unit_id: 4" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import gcsfs\n", + "import xarray\n", + "\n", + "gcs = gcsfs.GCSFileSystem(token={\n", + " \"type\": \"service_account\",\n", + " \"project_id\": \"gas-pde-superresolution\",\n", + " \"private_key_id\": \"3eb9a314d0038a37f14c7ec5f8904d6b2dc6471b\",\n", + " \"private_key\": \"-----BEGIN PRIVATE KEY-----\\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCeg7KEJLL0ODqY\\nSFV2ZxOPYlkPu7UX9PhDFNXq3p1IGeRDUGWV4/V/Hc7i6ZRsy6sdRg/G0/OGuF2O\\nkkxlRS+3+JI+nRkjdnVt69bVT0j0pMfQhfIbGq3JRiK5IVIWJNiWcVCMdc3kdf7O\\n8Rl7Q97OXjn+P3a89KjIl20CET5+wPz3OIRncTYgMp3HAwrh2SR89arTsFtj9/oD\\nJsCRchGncCbr+rSoBl6zW0EO5dJQjRmRyDrcxyOfIuWCefw8M3zPGlNL7Oz9FRv6\\nZy+8ccuqOkK2RXV1ywQW6I/Di+DpQjp0WLnDHsZj5iYyYx/Wop7YgTRoguL83WiX\\nwNBFioE/AgMBAAECggEAFbjsfMlesLMH7Jh1Bb3IY2zR0aMihP7wCplPW1lYamoJ\\n6uOYmDreCKZ4FIZ37ZTXhTJ1tQSUqdCYGZKY34PkryxkNipnKIHyQ7b/AMpVsBjo\\nvP2uGOLGEAAJgB891YUFHVC3/heiuH0nBwrpOAgr/1HQVrQxvGE2xA5kRNVCIDJw\\nxzCCKgpvC8f1GaXfGAkZYMkEOmh7HNcQsJm/Z5nrnEyYdv7GIACqm8CsPVYRNsb1\\n5fVeRvGCYkaS7gCTb01nSM2jALMzjCxILAc56TwCCp1SUfZsIrQQmQRxqUYtTvMv\\ngJJ7NOIvDbPg/k6+V52ol5WWVL27txwXeUKMGFVD8QKBgQDcrJYlaoQW+YfOUIEz\\nXgt44qsavLBDhKnQrJE4+xqblEYe2jBp88WgmYeNa0CT5RuaakpBMqlI4/UU/dq+\\nbHJinv/kOGudmHNqIJraoZ2QO533NdQO5dCGJpsdRBZakAnLw1WVsmtarPXWHIQy\\ncNQsg46GVnBvcJu2pZes15/FBwKBgQC348C/NDVK/CqvffKdDfW1IGTt4u5DVSq3\\nEp3aU4R+FBIJkDraZvuZ+Tz4MU4BBnJN1/TjyucsWJUqmtAOsmYvMJ5b2r8hLSqf\\nABjXL8+MrlYlnGnQy6lUqWHAcAOh2hNkWecR9FPgBEYQOtH/CCPlxMQ4R7dH04YP\\noW9dGP3MCQKBgQCM8x+kHnjoQ9HekcU0jik9uWoY1dSwC5h71P2dK6Y/IWH4atzp\\nj+73OZk/VvYUgQ40fxeWtUkzaYctNUf2wUX+gBhqKS9zpeByNQk0BG6jpbCsKhGc\\nwmWCij4hX0/34HChWc7cou23KSqM7AH0ReT/2be7TpLaW07aRsKuf7tpGQKBgCfx\\n1UCzepAbTrpoRXdQPPtnWgVCsjb2HR0/cgKm+bTZb0Gcjuw6SpUbyt6u9x7J47J9\\nIrkIrka3x09F6cQHhKLjkMKrNbFjOkrnnn2/6PnKNPReka3FT7cQRCelLWTPFj9K\\nOCNyS3Xx97Y20L7xabrTjzwad/mmPQzQKHy0nQzhAoGAT3fO0Bq89N6T/PakpAyc\\n8InOLNPvWnmdvyqrgZrdoettaPrbtkCr5Y/W1FlwDadooZT9pnOJCxwCloBdQ1Si\\nsovjdi29ndLWyIcvi883+pp/Ibdu32+YQPS8cz65Sdafpty/icbhBfxLumdSld5Z\\nckajAqFLrZ4WwAHIml0FyEY=\\n-----END PRIVATE KEY-----\\n\",\n", + " \"client_email\": \"storage-view-only@gas-pde-superresolution.iam.gserviceaccount.com\",\n", + " \"client_id\": \"104301135988419863840\",\n", + " \"auth_uri\": \"https://accounts.google.com/o/oauth2/auth\",\n", + " \"token_uri\": \"https://oauth2.googleapis.com/token\",\n", + " \"auth_provider_x509_cert_url\": \"https://www.googleapis.com/oauth2/v1/certs\",\n", + " \"client_x509_cert_url\": \"https://www.googleapis.com/robot/v1/metadata/x509/storage-view-only%40gas-pde-superresolution.iam.gserviceaccount.com\"\n", + "})\n", + "mapper = gcs.get_mapper('gs://gas-whirl/datasets/forced_turbulence_2d_test.zarr')\n", + "ds = xarray.open_zarr(mapper, consolidated=True)\n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray 'u' (sample: 32, time: 1223, x: 64, y: 64)>\n",
+       "dask.array<zarr, shape=(32, 1223, 64, 64), dtype=float32, chunksize=(1, 64, 64, 64), chunktype=numpy.ndarray>\n",
+       "Coordinates:\n",
+       "  * time     (time) float64 0.0 0.04909 0.09817 0.1473 ... 59.89 59.94 59.98\n",
+       "  * x        (x) float32 0.049087387 0.14726216 ... 6.1359234 6.234098\n",
+       "  * y        (y) float32 0.049087387 0.14726216 ... 6.1359234 6.234098\n",
+       "Dimensions without coordinates: sample\n",
+       "Attributes:\n",
+       "    offset:   [1.0, 0.5]
" + ], + "text/plain": [ + "\n", + "dask.array\n", + "Coordinates:\n", + " * time (time) float64 0.0 0.04909 0.09817 0.1473 ... 59.89 59.94 59.98\n", + " * x (x) float32 0.049087387 0.14726216 ... 6.1359234 6.234098\n", + " * y (y) float32 0.049087387 0.14726216 ... 6.1359234 6.234098\n", + "Dimensions without coordinates: sample\n", + "Attributes:\n", + " offset: [1.0, 0.5]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds.u" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray 'u' (sample: 32, time: 1223, x: 64, y: 64)>\n",
+       "dask.array<zarr, shape=(32, 1223, 64, 64), dtype=float32, chunksize=(1, 64, 64, 64), chunktype=numpy.ndarray>\n",
+       "Coordinates:\n",
+       "  * time     (time) float64 0.0 0.04909 0.09817 0.1473 ... 59.89 59.94 59.98\n",
+       "  * x        (x) float32 0.049087387 0.14726216 ... 6.1359234 6.234098\n",
+       "  * y        (y) float32 0.049087387 0.14726216 ... 6.1359234 6.234098\n",
+       "Dimensions without coordinates: sample\n",
+       "Attributes:\n",
+       "    offset:   [1.0, 0.5]
" + ], + "text/plain": [ + "\n", + "dask.array\n", + "Coordinates:\n", + " * time (time) float64 0.0 0.04909 0.09817 0.1473 ... 59.89 59.94 59.98\n", + " * x (x) float32 0.049087387 0.14726216 ... 6.1359234 6.234098\n", + " * y (y) float32 0.049087387 0.14726216 ... 6.1359234 6.234098\n", + "Dimensions without coordinates: sample\n", + "Attributes:\n", + " offset: [1.0, 0.5]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds.u" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Untitled1.ipynb b/Untitled1.ipynb new file mode 100644 index 00000000000..00665e46cc3 --- /dev/null +++ b/Untitled1.ipynb @@ -0,0 +1,83 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "def fail():\n", + " raise ValueError('something very long ' * 100)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long ", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mfail\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m\u001b[0m in \u001b[0;36mfail\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mfail\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'something very long '\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0;36m100\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mValueError\u001b[0m: something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long " + ] + } + ], + "source": [ + "fail()" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long something very long \n" + ] + } + ], + "source": [ + "print('something very long ' * 100)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/a.nc b/a.nc new file mode 100644 index 00000000000..621e546ab5c Binary files /dev/null and b/a.nc differ diff --git a/deplistv1.txt b/deplistv1.txt new file mode 100644 index 00000000000..e13fbca9e8c --- /dev/null +++ b/deplistv1.txt @@ -0,0 +1,226 @@ +_libgcc_mutex 0.1 conda_forge conda-forge +_openmp_mutex 4.5 1_llvm conda-forge +affine 2.3.0 py_0 conda-forge +alabaster 0.7.12 py_0 conda-forge +antlr-python-runtime 4.7.2 py38_1001 conda-forge +asciitree 0.3.3 py_2 conda-forge +attrs 19.3.0 py_0 conda-forge +babel 2.8.0 py_0 conda-forge +backcall 0.1.0 py_0 conda-forge +bleach 3.1.5 pyh9f0ad1d_0 conda-forge +bokeh 2.0.1 py38h32f6830_0 conda-forge +boost-cpp 1.72.0 h8e57a91_0 conda-forge +bottleneck 1.3.2 py38h8790de6_1 conda-forge +brotlipy 0.7.0 py38h1e0a361_1000 conda-forge +bzip2 1.0.8 h516909a_2 conda-forge +ca-certificates 2020.1.1 0 +cairo 1.16.0 hcf35c78_1003 conda-forge +cartopy 0.18.0 py38h172510d_0 conda-forge +certifi 2020.4.5.1 py38_0 +cf-units 2.1.4 py38h8790de6_0 conda-forge +cffi 1.14.0 py38hd463f26_0 conda-forge +cfgrib 0.9.8.1 py_0 conda-forge +cfitsio 3.470 h3eac812_5 conda-forge +cftime 1.1.2 py38h8790de6_0 conda-forge +chardet 3.0.4 py38h32f6830_1006 conda-forge +click 7.1.2 pyh9f0ad1d_0 conda-forge +click-plugins 1.1.1 py_0 conda-forge +cligj 0.5.0 py_0 conda-forge +cloudpickle 1.4.1 py_0 conda-forge +commonmark 0.9.1 pypi_0 pypi +cryptography 2.9.2 py38h766eaa4_0 conda-forge +curl 7.69.1 h33f0ec9_0 conda-forge +cycler 0.10.0 py_2 conda-forge +cytoolz 0.10.1 py38h516909a_0 conda-forge +dask 2.15.0 py_0 conda-forge +dask-core 2.15.0 py_0 conda-forge +decorator 4.4.2 py_0 conda-forge +defusedxml 0.6.0 py_0 conda-forge +distributed 2.15.2 py38h32f6830_0 conda-forge +docutils 0.16 py38h32f6830_1 conda-forge +eccodes 2.17.0 h59f7be3_1 conda-forge +entrypoints 0.3 py38h32f6830_1001 conda-forge +expat 2.2.9 he1b5a44_2 conda-forge +fasteners 0.14.1 py_3 conda-forge +fontconfig 2.13.1 h86ecdb6_1001 conda-forge +freetype 2.10.1 he06d7ca_0 conda-forge +freexl 1.0.5 h14c3975_1002 conda-forge +fsspec 0.7.3 py_0 conda-forge +gdal 3.0.4 py38h172510d_6 conda-forge +geos 3.8.1 he1b5a44_0 conda-forge +geotiff 1.5.1 h05acad5_10 conda-forge +gettext 0.19.8.1 hc5be6a0_1002 conda-forge +giflib 5.2.1 h516909a_2 conda-forge +glib 2.64.2 h6f030ca_0 conda-forge +h5netcdf 0.8.0 py_0 conda-forge +h5py 2.10.0 nompi_py38h513d04c_102 conda-forge +hdf4 4.2.13 hf30be14_1003 conda-forge +hdf5 1.10.5 nompi_h3c11f04_1104 conda-forge +heapdict 1.0.1 py_0 conda-forge +icu 64.2 he1b5a44_1 conda-forge +idna 2.9 py_1 conda-forge +imagesize 1.2.0 py_0 conda-forge +importlib-metadata 1.6.0 py38h32f6830_0 conda-forge +importlib_metadata 1.6.0 0 conda-forge +ipykernel 5.2.1 py38h23f93f0_0 conda-forge +ipython 7.14.0 py38h32f6830_0 conda-forge +ipython_genutils 0.2.0 py_1 conda-forge +iris 2.4.0 py38_0 conda-forge +jasper 1.900.1 h07fcdf6_1006 conda-forge +jedi 0.17.0 py38h32f6830_0 conda-forge +jinja2 2.11.2 pyh9f0ad1d_0 conda-forge +jpeg 9c h14c3975_1001 conda-forge +json-c 0.13.1 h14c3975_1001 conda-forge +jsonschema 3.2.0 py38h32f6830_1 conda-forge +jupyter_client 6.1.3 py_0 conda-forge +jupyter_core 4.6.3 py38h32f6830_1 conda-forge +kealib 1.4.13 hec59c27_0 conda-forge +kiwisolver 1.2.0 py38hbf85e49_0 conda-forge +krb5 1.17.1 h2fd8d38_0 conda-forge +ld_impl_linux-64 2.34 h53a641e_0 conda-forge +libaec 1.0.4 he1b5a44_1 conda-forge +libblas 3.8.0 16_openblas conda-forge +libcblas 3.8.0 16_openblas conda-forge +libcurl 7.69.1 hf7181ac_0 conda-forge +libdap4 3.20.6 h1d1bd15_0 conda-forge +libedit 3.1.20170329 hf8c457e_1001 conda-forge +libffi 3.2.1 he1b5a44_1007 conda-forge +libgcc-ng 9.2.0 h24d8f2e_2 conda-forge +libgdal 3.0.4 h3dfc09a_6 conda-forge +libgfortran-ng 7.3.0 hdf63c60_5 conda-forge +libiconv 1.15 h516909a_1006 conda-forge +libkml 1.3.0 hb574062_1011 conda-forge +liblapack 3.8.0 16_openblas conda-forge +libllvm8 8.0.1 hc9558a2_0 conda-forge +libnetcdf 4.7.4 nompi_h9f9fd6a_101 conda-forge +libopenblas 0.3.9 h5ec1e0e_0 conda-forge +libpng 1.6.37 hed695b0_1 conda-forge +libpq 12.2 h5513abc_1 conda-forge +libsodium 1.0.17 h516909a_0 conda-forge +libspatialite 4.3.0a h2482549_1038 conda-forge +libssh2 1.8.2 h22169c7_2 conda-forge +libstdcxx-ng 9.2.0 hdf63c60_2 conda-forge +libtiff 4.1.0 hc7e4089_6 conda-forge +libuuid 2.32.1 h14c3975_1000 conda-forge +libwebp-base 1.1.0 h516909a_3 conda-forge +libxcb 1.13 h14c3975_1002 conda-forge +libxml2 2.9.10 hee79883_0 conda-forge +llvm-openmp 10.0.0 hc9558a2_0 conda-forge +llvmlite 0.31.0 py38h4f45e52_1 conda-forge +locket 0.2.0 py_2 conda-forge +lz4-c 1.9.2 he1b5a44_1 conda-forge +markupsafe 1.1.1 py38h1e0a361_1 conda-forge +matplotlib-base 3.2.1 py38h2af1d28_0 conda-forge +mistune 0.8.4 py38h1e0a361_1001 conda-forge +mock 4.0.2 py_0 +monotonic 1.5 py_0 conda-forge +msgpack-python 1.0.0 py38hbf85e49_1 conda-forge +nbconvert 5.6.1 py38h32f6830_1 conda-forge +nbformat 5.0.6 py_0 conda-forge +nbsphinx 0.6.1 pyh9f0ad1d_0 conda-forge +ncurses 6.1 hf484d3e_1002 conda-forge +netcdf4 1.5.3 nompi_py38heb6102f_103 conda-forge +numba 0.48.0 py38hb3f55d8_0 conda-forge +numcodecs 0.6.4 py38he1b5a44_0 conda-forge +numpy 1.18.4 py38h8854b6b_0 conda-forge +numpydoc 0.9.2 py_0 conda-forge +olefile 0.46 py_0 conda-forge +openjpeg 2.3.1 h981e76c_3 conda-forge +openssl 1.1.1g h7b6447c_0 +owslib 0.19.2 py_1 conda-forge +packaging 20.1 py_0 conda-forge +pandas 1.0.3 py38hcb8c335_1 conda-forge +pandoc 2.9.2.1 0 conda-forge +pandocfilters 1.4.2 py_1 conda-forge +parso 0.7.0 pyh9f0ad1d_0 conda-forge +partd 1.1.0 py_0 conda-forge +patsy 0.5.1 py_0 conda-forge +pcre 8.44 he1b5a44_0 conda-forge +pexpect 4.8.0 py38h32f6830_1 conda-forge +pickleshare 0.7.5 py38h32f6830_1001 conda-forge +pillow 7.1.2 py38hb39fc2d_0 +pip 20.1 pyh9f0ad1d_0 conda-forge +pixman 0.38.0 h516909a_1003 conda-forge +poppler 0.67.0 h14e79db_8 conda-forge +poppler-data 0.4.9 1 conda-forge +postgresql 12.2 h8573dbc_1 conda-forge +proj 7.0.0 h966b41f_3 conda-forge +prompt-toolkit 3.0.5 py_0 conda-forge +psutil 5.7.0 py38h1e0a361_1 conda-forge +pthread-stubs 0.4 h14c3975_1001 conda-forge +ptyprocess 0.6.0 py_1001 conda-forge +pycparser 2.20 py_0 conda-forge +pyepsg 0.4.0 py_0 conda-forge +pygments 2.6.1 py_0 conda-forge +pyke 1.1.1 py38h32f6830_1002 conda-forge +pyopenssl 19.1.0 py_1 conda-forge +pyparsing 2.4.7 pyh9f0ad1d_0 conda-forge +pyproj 2.6.1.post1 py38h7521cb9_0 conda-forge +pyrsistent 0.16.0 py38h1e0a361_0 conda-forge +pyshp 2.1.0 py_0 conda-forge +pysocks 1.7.1 py38h32f6830_1 conda-forge +python 3.8.2 he5300dc_7_cpython conda-forge +python-dateutil 2.8.1 py_0 conda-forge +python_abi 3.8 1_cp38 conda-forge +pytz 2020.1 pyh9f0ad1d_0 conda-forge +pyyaml 5.3.1 py38h1e0a361_0 conda-forge +pyzmq 19.0.0 py38ha71036d_1 conda-forge +rasterio 1.1.3 py38h900e953_0 conda-forge +readline 8.0 hf8c457e_0 conda-forge +readthedocs-sphinx-ext 1.0.3 pypi_0 pypi +recommonmark 0.6.0 pypi_0 pypi +requests 2.23.0 pyh8c360ce_2 conda-forge +scipy 1.4.1 py38h18bccfc_3 conda-forge +seaborn 0.10.1 py_0 conda-forge +setuptools 46.1.3 py38h32f6830_0 conda-forge +shapely 1.7.0 py38hd168ffb_3 conda-forge +six 1.14.0 py_1 conda-forge +snowballstemmer 2.0.0 py_0 conda-forge +snuggs 1.4.7 py_0 conda-forge +sortedcontainers 2.1.0 py_0 conda-forge +sphinx 3.0.3 py_0 +sphinx_rtd_theme 0.4.3 py_0 +sphinxcontrib-applehelp 1.0.2 py_0 conda-forge +sphinxcontrib-devhelp 1.0.2 py_0 conda-forge +sphinxcontrib-htmlhelp 1.0.3 py_0 conda-forge +sphinxcontrib-jsmath 1.0.1 py_0 conda-forge +sphinxcontrib-qthelp 1.0.3 py_0 conda-forge +sphinxcontrib-serializinghtml 1.1.4 py_0 conda-forge +sqlite 3.30.1 hcee41ef_0 conda-forge +statsmodels 0.11.1 py38h1e0a361_1 conda-forge +tbb 2018.0.5 h2d50403_0 conda-forge +tblib 1.6.0 py_0 conda-forge +testpath 0.4.4 py_0 conda-forge +tiledb 1.7.7 h8efa9f0_1 conda-forge +tk 8.6.10 hed695b0_0 conda-forge +toolz 0.10.0 py_0 conda-forge +tornado 6.0.4 py38h1e0a361_1 conda-forge +traitlets 4.3.3 py38h32f6830_1 conda-forge +typing_extensions 3.7.4.2 py_0 conda-forge +tzcode 2020a h516909a_0 conda-forge +udunits2 2.2.27.6 h4e0c4b3_1001 conda-forge +urllib3 1.25.9 py_0 conda-forge +wcwidth 0.1.9 pyh9f0ad1d_0 conda-forge +webencodings 0.5.1 py_1 conda-forge +wheel 0.34.2 py_1 conda-forge +xarray 0.15.1 py_0 conda-forge +xerces-c 3.2.2 h8412b87_1004 conda-forge +xorg-kbproto 1.0.7 h14c3975_1002 conda-forge +xorg-libice 1.0.10 h516909a_0 conda-forge +xorg-libsm 1.2.3 h84519dc_1000 conda-forge +xorg-libx11 1.6.9 h516909a_0 conda-forge +xorg-libxau 1.0.9 h14c3975_0 conda-forge +xorg-libxdmcp 1.1.3 h516909a_0 conda-forge +xorg-libxext 1.3.4 h516909a_0 conda-forge +xorg-libxrender 0.9.10 h516909a_1002 conda-forge +xorg-renderproto 0.11.1 h14c3975_1002 conda-forge +xorg-xextproto 7.3.0 h14c3975_1002 conda-forge +xorg-xproto 7.0.31 h14c3975_1007 conda-forge +xz 5.2.5 h516909a_0 conda-forge +yaml 0.2.4 h516909a_0 conda-forge +zarr 2.4.0 py_0 conda-forge +zeromq 4.3.2 he1b5a44_2 conda-forge +zict 2.0.0 py_0 conda-forge +zipp 3.1.0 py_0 conda-forge +zlib 1.2.11 h516909a_1006 conda-forge +zstd 1.4.4 h6597ccf_3 conda-forge diff --git a/deplistv2.txt b/deplistv2.txt new file mode 100644 index 00000000000..2f51e1bcae4 --- /dev/null +++ b/deplistv2.txt @@ -0,0 +1,226 @@ +_libgcc_mutex 0.1 conda_forge conda-forge +_openmp_mutex 4.5 1_llvm conda-forge +affine 2.3.0 py_0 conda-forge +alabaster 0.7.12 py_0 conda-forge +antlr-python-runtime 4.7.2 py38_1001 conda-forge +asciitree 0.3.3 py_2 conda-forge +attrs 19.3.0 py_0 conda-forge +babel 2.8.0 py_0 conda-forge +backcall 0.1.0 py_0 conda-forge +bleach 3.1.4 pyh9f0ad1d_0 conda-forge +bokeh 2.0.1 py38h32f6830_0 conda-forge +boost-cpp 1.72.0 h8e57a91_0 conda-forge +bottleneck 1.3.2 py38h8790de6_1 conda-forge +brotlipy 0.7.0 py38h1e0a361_1000 conda-forge +bzip2 1.0.8 h516909a_2 conda-forge +ca-certificates 2020.1.1 0 +cairo 1.16.0 hcf35c78_1003 conda-forge +cartopy 0.17.0 py38h9cf8511_1015 conda-forge +certifi 2020.4.5.1 py38_0 +cf-units 2.1.4 py38h8790de6_0 conda-forge +cffi 1.14.0 py38hd463f26_0 conda-forge +cfgrib 0.9.8.1 py_0 conda-forge +cfitsio 3.470 h3eac812_5 conda-forge +cftime 1.1.1.2 py38h8790de6_0 conda-forge +chardet 3.0.4 py38h32f6830_1006 conda-forge +click 7.1.2 pyh9f0ad1d_0 conda-forge +click-plugins 1.1.1 py_0 conda-forge +cligj 0.5.0 py_0 conda-forge +cloudpickle 1.4.1 py_0 conda-forge +commonmark 0.9.1 pypi_0 pypi +cryptography 2.9.2 py38h766eaa4_0 conda-forge +curl 7.69.1 h33f0ec9_0 conda-forge +cycler 0.10.0 py_2 conda-forge +cytoolz 0.10.1 py38h516909a_0 conda-forge +dask 2.15.0 py_0 conda-forge +dask-core 2.15.0 py_0 conda-forge +decorator 4.4.2 py_0 conda-forge +defusedxml 0.6.0 py_0 conda-forge +distributed 2.15.2 py38h32f6830_0 conda-forge +docutils 0.16 py38h32f6830_1 conda-forge +eccodes 2.17.0 h59f7be3_1 conda-forge +entrypoints 0.3 py38h32f6830_1001 conda-forge +expat 2.2.9 he1b5a44_2 conda-forge +fasteners 0.14.1 py_3 conda-forge +fontconfig 2.13.1 h86ecdb6_1001 conda-forge +freetype 2.10.1 he06d7ca_0 conda-forge +freexl 1.0.5 h14c3975_1002 conda-forge +fsspec 0.7.3 py_0 conda-forge +geos 3.8.1 he1b5a44_0 conda-forge +geotiff 1.5.1 h05acad5_10 conda-forge +gettext 0.19.8.1 hc5be6a0_1002 conda-forge +giflib 5.2.1 h516909a_2 conda-forge +glib 2.64.2 h6f030ca_0 conda-forge +h5netcdf 0.8.0 py_0 conda-forge +h5py 2.10.0 nompi_py38h513d04c_102 conda-forge +hdf4 4.2.13 hf30be14_1003 conda-forge +hdf5 1.10.5 nompi_h3c11f04_1104 conda-forge +heapdict 1.0.1 py_0 conda-forge +icu 64.2 he1b5a44_1 conda-forge +idna 2.9 py_1 conda-forge +imagesize 1.2.0 py_0 conda-forge +importlib-metadata 1.6.0 py38h32f6830_0 conda-forge +importlib_metadata 1.6.0 0 conda-forge +ipykernel 5.2.1 py38h23f93f0_0 conda-forge +ipython 7.14.0 py38h32f6830_0 conda-forge +ipython_genutils 0.2.0 py_1 conda-forge +iris 2.4.0 py38_0 conda-forge +jasper 1.900.1 h07fcdf6_1006 conda-forge +jedi 0.17.0 py38h32f6830_0 conda-forge +jinja2 2.11.2 pyh9f0ad1d_0 conda-forge +jpeg 9c h14c3975_1001 conda-forge +json-c 0.13.1 h14c3975_1001 conda-forge +jsonschema 3.2.0 py38h32f6830_1 conda-forge +jupyter_client 6.1.3 py_0 conda-forge +jupyter_core 4.6.3 py38h32f6830_1 conda-forge +kealib 1.4.13 hec59c27_0 conda-forge +kiwisolver 1.2.0 py38hbf85e49_0 conda-forge +krb5 1.17.1 h2fd8d38_0 conda-forge +ld_impl_linux-64 2.34 h53a641e_0 conda-forge +libaec 1.0.4 he1b5a44_1 conda-forge +libblas 3.8.0 16_openblas conda-forge +libcblas 3.8.0 16_openblas conda-forge +libcurl 7.69.1 hf7181ac_0 conda-forge +libdap4 3.20.6 h1d1bd15_0 conda-forge +libedit 3.1.20170329 hf8c457e_1001 conda-forge +libffi 3.2.1 he1b5a44_1007 conda-forge +libgcc-ng 9.2.0 h24d8f2e_2 conda-forge +libgdal 3.0.4 h3dfc09a_6 conda-forge +libgfortran-ng 7.3.0 hdf63c60_5 conda-forge +libiconv 1.15 h516909a_1006 conda-forge +libkml 1.3.0 hb574062_1011 conda-forge +liblapack 3.8.0 16_openblas conda-forge +libllvm8 8.0.1 hc9558a2_0 conda-forge +libnetcdf 4.7.4 nompi_h9f9fd6a_101 conda-forge +libopenblas 0.3.9 h5ec1e0e_0 conda-forge +libpng 1.6.37 hed695b0_1 conda-forge +libpq 12.2 h5513abc_1 conda-forge +libsodium 1.0.17 h516909a_0 conda-forge +libspatialite 4.3.0a h2482549_1038 conda-forge +libssh2 1.8.2 h22169c7_2 conda-forge +libstdcxx-ng 9.2.0 hdf63c60_2 conda-forge +libtiff 4.1.0 hc7e4089_6 conda-forge +libuuid 2.32.1 h14c3975_1000 conda-forge +libwebp-base 1.1.0 h516909a_3 conda-forge +libxcb 1.13 h14c3975_1002 conda-forge +libxml2 2.9.10 hee79883_0 conda-forge +llvm-openmp 10.0.0 hc9558a2_0 conda-forge +llvmlite 0.31.0 py38h4f45e52_1 conda-forge +locket 0.2.0 py_2 conda-forge +lz4-c 1.9.2 he1b5a44_0 conda-forge +markupsafe 1.1.1 py38h1e0a361_1 conda-forge +matplotlib-base 3.2.1 py38h2af1d28_0 conda-forge +mistune 0.8.4 py38h1e0a361_1001 conda-forge +mock 4.0.2 py_0 +monotonic 1.5 py_0 conda-forge +msgpack-python 1.0.0 py38hbf85e49_1 conda-forge +nbconvert 5.6.1 py38h32f6830_1 conda-forge +nbformat 5.0.6 py_0 conda-forge +nbsphinx 0.6.1 pyh9f0ad1d_0 conda-forge +ncurses 6.1 hf484d3e_1002 conda-forge +netcdf4 1.5.3 nompi_py38heb6102f_103 conda-forge +numba 0.48.0 py38hb3f55d8_0 conda-forge +numcodecs 0.6.4 py38he1b5a44_0 conda-forge +numpy 1.18.1 py38h8854b6b_1 conda-forge +numpydoc 0.9.2 py_0 conda-forge +olefile 0.46 py_0 conda-forge +openjpeg 2.3.1 h981e76c_3 conda-forge +openssl 1.1.1g h7b6447c_0 +owslib 0.19.2 py_1 conda-forge +packaging 20.1 py_0 conda-forge +pandas 1.0.3 py38hcb8c335_1 conda-forge +pandoc 2.9.2.1 0 conda-forge +pandocfilters 1.4.2 py_1 conda-forge +parso 0.7.0 pyh9f0ad1d_0 conda-forge +partd 1.1.0 py_0 conda-forge +patsy 0.5.1 py_0 conda-forge +pcre 8.44 he1b5a44_0 conda-forge +pexpect 4.8.0 py38h32f6830_1 conda-forge +pickleshare 0.7.5 py38h32f6830_1001 conda-forge +pillow 7.1.2 py38hb39fc2d_0 +pip 20.1 pyh9f0ad1d_0 conda-forge +pixman 0.38.0 h516909a_1003 conda-forge +poppler 0.67.0 h14e79db_8 conda-forge +poppler-data 0.4.9 1 conda-forge +postgresql 12.2 h8573dbc_1 conda-forge +proj 7.0.0 h966b41f_3 conda-forge +prompt-toolkit 3.0.5 py_0 conda-forge +psutil 5.7.0 py38h1e0a361_1 conda-forge +pthread-stubs 0.4 h14c3975_1001 conda-forge +ptyprocess 0.6.0 py_1001 conda-forge +pycparser 2.20 py_0 conda-forge +pyepsg 0.4.0 py_0 conda-forge +pygments 2.6.1 py_0 conda-forge +pykdtree 1.3.1 py38h8790de6_1003 conda-forge +pyke 1.1.1 py38h32f6830_1002 conda-forge +pyopenssl 19.1.0 py_1 conda-forge +pyparsing 2.4.7 pyh9f0ad1d_0 conda-forge +pyproj 2.6.0 py38h7521cb9_1 conda-forge +pyrsistent 0.16.0 py38h1e0a361_0 conda-forge +pyshp 2.1.0 py_0 conda-forge +pysocks 1.7.1 py38h32f6830_1 conda-forge +python 3.8.2 he5300dc_7_cpython conda-forge +python-dateutil 2.8.1 py_0 conda-forge +python_abi 3.8 1_cp38 conda-forge +pytz 2020.1 pyh9f0ad1d_0 conda-forge +pyyaml 5.3.1 py38h1e0a361_0 conda-forge +pyzmq 19.0.0 py38ha71036d_1 conda-forge +rasterio 1.1.3 py38h900e953_0 conda-forge +readline 8.0 hf8c457e_0 conda-forge +readthedocs-sphinx-ext 1.0.3 pypi_0 pypi +recommonmark 0.6.0 pypi_0 pypi +requests 2.23.0 pyh8c360ce_2 conda-forge +scipy 1.4.1 py38h18bccfc_3 conda-forge +seaborn 0.10.1 py_0 conda-forge +setuptools 46.1.3 py38h32f6830_0 conda-forge +shapely 1.7.0 py38hd168ffb_3 conda-forge +six 1.14.0 py_1 conda-forge +snowballstemmer 2.0.0 py_0 conda-forge +snuggs 1.4.7 py_0 conda-forge +sortedcontainers 2.1.0 py_0 conda-forge +sphinx 3.0.3 py_0 +sphinx_rtd_theme 0.4.3 py_0 +sphinxcontrib-applehelp 1.0.2 py_0 conda-forge +sphinxcontrib-devhelp 1.0.2 py_0 conda-forge +sphinxcontrib-htmlhelp 1.0.3 py_0 conda-forge +sphinxcontrib-jsmath 1.0.1 py_0 conda-forge +sphinxcontrib-qthelp 1.0.3 py_0 conda-forge +sphinxcontrib-serializinghtml 1.1.4 py_0 conda-forge +sqlite 3.30.1 hcee41ef_0 conda-forge +statsmodels 0.11.1 py38h1e0a361_1 conda-forge +tbb 2018.0.5 h2d50403_0 conda-forge +tblib 1.6.0 py_0 conda-forge +testpath 0.4.4 py_0 conda-forge +tiledb 1.7.7 h8efa9f0_1 conda-forge +tk 8.6.10 hed695b0_0 conda-forge +toolz 0.10.0 py_0 conda-forge +tornado 6.0.4 py38h1e0a361_1 conda-forge +traitlets 4.3.3 py38h32f6830_1 conda-forge +typing_extensions 3.7.4.2 py_0 conda-forge +tzcode 2020a h516909a_0 conda-forge +udunits2 2.2.27.6 h4e0c4b3_1001 conda-forge +urllib3 1.25.9 py_0 conda-forge +wcwidth 0.1.9 pyh9f0ad1d_0 conda-forge +webencodings 0.5.1 py_1 conda-forge +wheel 0.34.2 py_1 conda-forge +xarray 0.15.1 py_0 conda-forge +xerces-c 3.2.2 h8412b87_1004 conda-forge +xorg-kbproto 1.0.7 h14c3975_1002 conda-forge +xorg-libice 1.0.10 h516909a_0 conda-forge +xorg-libsm 1.2.3 h84519dc_1000 conda-forge +xorg-libx11 1.6.9 h516909a_0 conda-forge +xorg-libxau 1.0.9 h14c3975_0 conda-forge +xorg-libxdmcp 1.1.3 h516909a_0 conda-forge +xorg-libxext 1.3.4 h516909a_0 conda-forge +xorg-libxrender 0.9.10 h516909a_1002 conda-forge +xorg-renderproto 0.11.1 h14c3975_1002 conda-forge +xorg-xextproto 7.3.0 h14c3975_1002 conda-forge +xorg-xproto 7.0.31 h14c3975_1007 conda-forge +xz 5.2.5 h516909a_0 conda-forge +yaml 0.2.4 h516909a_0 conda-forge +zarr 2.4.0 py_0 conda-forge +zeromq 4.3.2 he1b5a44_2 conda-forge +zict 2.0.0 py_0 conda-forge +zipp 3.1.0 py_0 conda-forge +zlib 1.2.11 h516909a_1006 conda-forge +zstd 1.4.4 h6597ccf_3 conda-forge diff --git a/doc/path/to/directory.zarr/.zattrs b/doc/path/to/directory.zarr/.zattrs new file mode 100644 index 00000000000..9e26dfeeb6e --- /dev/null +++ b/doc/path/to/directory.zarr/.zattrs @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/doc/path/to/directory.zarr/.zgroup b/doc/path/to/directory.zarr/.zgroup new file mode 100644 index 00000000000..3b7daf227c1 --- /dev/null +++ b/doc/path/to/directory.zarr/.zgroup @@ -0,0 +1,3 @@ +{ + "zarr_format": 2 +} \ No newline at end of file diff --git a/doc/path/to/directory.zarr/foo/.zarray b/doc/path/to/directory.zarr/foo/.zarray new file mode 100644 index 00000000000..14262acaa3f --- /dev/null +++ b/doc/path/to/directory.zarr/foo/.zarray @@ -0,0 +1,24 @@ +{ + "chunks": [ + 4, + 5, + 2 + ], + "compressor": { + "blocksize": 0, + "clevel": 5, + "cname": "lz4", + "id": "blosc", + "shuffle": 1 + }, + "dtype": "`_. - Better dtype consistency for ``rolling.mean()``. (:issue:`7062`, :pull:`7063`) By `Sam Levang `_. +- Explicitly opening a file multiple times (e.g., after modifying it on disk) + now reopens the file from scratch, rather than reusing a cached version + (:issue:`4240`, :issue:`4862`). + By `Stephan Hoyer `_. - Allow writing NetCDF files including only dimensionless variables using the distributed or multiprocessing scheduler (:issue:`7013`, :pull:`7040`). By `Francesco Nattino `_. @@ -1234,10 +1238,6 @@ New Features Bug fixes ~~~~~~~~~ -- Explicitly opening a file multiple times (e.g., after modifying it on disk) - now reopens the file from scratch, rather than reusing a cached version - (:issue:`4240`, :issue:`4862`). - By `Stephan Hoyer `_. - :py:meth:`DataArray.resample` and :py:meth:`Dataset.resample` do not trigger computations anymore if :py:meth:`Dataset.weighted` or :py:meth:`DataArray.weighted` are applied (:issue:`4625`, :pull:`4668`). By `Julius Busecke `_. - :py:func:`merge` with ``combine_attrs='override'`` makes a copy of the attrs (:issue:`4627`). - By default, when possible, xarray will now always use values of type ``int64`` when encoding diff --git a/fast_groupby_bins_agg.ipynb b/fast_groupby_bins_agg.ipynb new file mode 100644 index 00000000000..f5e60c9068b --- /dev/null +++ b/fast_groupby_bins_agg.ipynb @@ -0,0 +1,2214 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "id": "pRDJrkjEC3Re" + }, + "outputs": [], + "source": [ + "# Copyright 2020 Google LLC.\n", + "# SPDX-License-Identifier: Apache-2.0\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "import xarray\n", + "import numpy_groupies\n", + "\n", + "def _binned_agg(\n", + " array: np.ndarray,\n", + " indices: np.ndarray,\n", + " num_bins: int,\n", + " *,\n", + " func,\n", + " fill_value,\n", + " dtype,\n", + ") -> np.ndarray:\n", + " \"\"\"NumPy helper function for aggregating over bins.\"\"\"\n", + " mask = np.logical_not(np.isnan(indices))\n", + " int_indices = indices[mask].astype(int)\n", + " shape = array.shape[:-indices.ndim] + (num_bins,)\n", + " result = numpy_groupies.aggregate(\n", + " int_indices, array[..., mask],\n", + " func=func,\n", + " size=num_bins,\n", + " fill_value=fill_value,\n", + " dtype=dtype,\n", + " axis=-1,\n", + " )\n", + " return result\n", + "\n", + "def groupby_bins_agg(\n", + " array: xarray.DataArray,\n", + " group: xarray.DataArray,\n", + " bins,\n", + " func='sum',\n", + " fill_value=0,\n", + " dtype=None,\n", + " **cut_kwargs,\n", + ") -> xarray.DataArray:\n", + " \"\"\"Faster equivalent of Xarray's groupby_bins(...).sum().\"\"\"\n", + " # TODO: implement this upstream in xarray:\n", + " # https://github.com/pydata/xarray/issues/4473\n", + " binned = pd.cut(np.ravel(group), bins, **cut_kwargs)\n", + " new_dim_name = group.name + \"_bins\"\n", + " indices = group.copy(data=binned.codes.reshape(group.shape))\n", + "\n", + " result = xarray.apply_ufunc(\n", + " _binned_agg, array, indices,\n", + " input_core_dims=[indices.dims, indices.dims],\n", + " output_core_dims=[[new_dim_name]],\n", + " output_dtypes=[array.dtype],\n", + " dask_gufunc_kwargs=dict(\n", + " output_sizes={new_dim_name: binned.categories.size},\n", + " ),\n", + " kwargs={\n", + " 'num_bins': binned.categories.size,\n", + " 'func': func,\n", + " 'fill_value': fill_value,\n", + " 'dtype': dtype,\n", + " },\n", + " dask='parallelized',\n", + " )\n", + " result.coords[new_dim_name] = binned.categories\n", + " return result\n", + "\n", + "def make_test_data(t, x, y, seed=0):\n", + " signal = xarray.DataArray(\n", + " np.random.RandomState(seed).rand(t, x, y),\n", + " dims=['time', 'y', 'x'],\n", + " coords={\n", + " 'time': np.arange(t),\n", + " 'y': np.arange(x),\n", + " 'x': np.arange(y),\n", + " },\n", + " name='signal')\n", + " distance = ((signal.x ** 2 + signal.y ** 2) ** 0.5).rename('distance')\n", + " return signal, distance" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "r0Jp6XRmIoNU" + }, + "source": [ + "# unit test" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "id": "Qa6NH3WGEFyC" + }, + "outputs": [], + "source": [ + "signal, distance = make_test_data(t=2, x=50, y=50)\n", + "bins = 10" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 219 + }, + "id": "hz1ikpo3Ik-j", + "outputId": "f88ae860-c056-424c-94b4-c52608951631" + }, + "outputs": [], + "source": [ + "actual = groupby_bins_agg(signal, distance, bins, func='mean')\n", + "expected = signal.groupby_bins(distance, bins=10).mean()\n", + "xarray.testing.assert_allclose(actual, expected)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 219 + }, + "id": "6FIRcPRzImDV", + "outputId": "dc0741f1-44b4-437c-b577-d8e4785c54b8" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray (time: 2, distance_bins: 10)>\n",
+       "array([[0.51498271, 0.46370372, 0.48996133, 0.51069211, 0.5302821 ,\n",
+       "        0.50000696, 0.48753868, 0.52151072, 0.48915714, 0.51292164],\n",
+       "       [0.53229943, 0.52167522, 0.45915308, 0.5293949 , 0.47742068,\n",
+       "        0.48900111, 0.48465034, 0.47729889, 0.5059115 , 0.50237199]])\n",
+       "Coordinates:\n",
+       "  * time           (time) int64 0 1\n",
+       "  * distance_bins  (distance_bins) object (-0.0693, 6.93] ... (62.367, 69.296]
" + ], + "text/plain": [ + "\n", + "array([[0.51498271, 0.46370372, 0.48996133, 0.51069211, 0.5302821 ,\n", + " 0.50000696, 0.48753868, 0.52151072, 0.48915714, 0.51292164],\n", + " [0.53229943, 0.52167522, 0.45915308, 0.5293949 , 0.47742068,\n", + " 0.48900111, 0.48465034, 0.47729889, 0.5059115 , 0.50237199]])\n", + "Coordinates:\n", + " * time (time) int64 0 1\n", + " * distance_bins (distance_bins) object (-0.0693, 6.93] ... (62.367, 69.296]" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "actual" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FP84FGufIrJV" + }, + "source": [ + "# Speed tests" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": { + "id": "AoEU13x4IqzO" + }, + "outputs": [], + "source": [ + "signal, distance = make_test_data(t=20, x=1000, y=1000)\n", + "bins = 50" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "160.0" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "signal.nbytes / 1e6" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OpDWpcNcJpG_" + }, + "source": [ + "## numpy speed test" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ya0e9MVDE5NS", + "outputId": "21a7dbc0-0692-4e8f-98f9-f1e1dd1252b9" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 8.52 s, sys: 674 ms, total: 9.19 s\n", + "Wall time: 10.3 s\n" + ] + } + ], + "source": [ + "%time _ = signal.groupby_bins(distance, bins).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-ywMYaSCGaYl", + "outputId": "e5e61066-6819-40b9-a061-74badb248001" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 909 ms, sys: 290 ms, total: 1.2 s\n", + "Wall time: 1.3 s\n" + ] + } + ], + "source": [ + "%time _ = groupby_bins_agg(signal, distance, bins, func='mean')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Fzrbi2JQJqjH" + }, + "source": [ + "## dask speed test" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "uJkGMSk_JCcU", + "outputId": "15989c46-55dc-45db-a24b-159fff0b3f12" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray 'signal' (time: 20, y: 1000, x: 1000)>\n",
+       "dask.array<xarray-<this-array>, shape=(20, 1000, 1000), dtype=float64, chunksize=(1, 1000, 1000), chunktype=numpy.ndarray>\n",
+       "Coordinates:\n",
+       "  * time     (time) int64 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19\n",
+       "  * y        (y) int64 0 1 2 3 4 5 6 7 8 ... 991 992 993 994 995 996 997 998 999\n",
+       "  * x        (x) int64 0 1 2 3 4 5 6 7 8 ... 991 992 993 994 995 996 997 998 999
" + ], + "text/plain": [ + "\n", + "dask.array, shape=(20, 1000, 1000), dtype=float64, chunksize=(1, 1000, 1000), chunktype=numpy.ndarray>\n", + "Coordinates:\n", + " * time (time) int64 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19\n", + " * y (y) int64 0 1 2 3 4 5 6 7 8 ... 991 992 993 994 995 996 997 998 999\n", + " * x (x) int64 0 1 2 3 4 5 6 7 8 ... 991 992 993 994 995 996 997 998 999" + ] + }, + "execution_count": 77, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import dask\n", + "dask_signal = signal.chunk({'time': 1})\n", + "dask.config.set(num_workers=4)\n", + "dask_signal" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Ibm73W1IJXmE", + "outputId": "287be907-ec7d-4f77-de0e-c60043c1087f" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 8.13 s, sys: 365 ms, total: 8.49 s\n", + "Wall time: 8.87 s\n", + "CPU times: user 1.12 s, sys: 332 ms, total: 1.45 s\n", + "Wall time: 967 ms\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray 'signal' (time: 20, distance_bins: 50)>\n",
+       "dask.array<transpose, shape=(20, 50), dtype=float64, chunksize=(1, 1), chunktype=numpy.ndarray>\n",
+       "Coordinates:\n",
+       "  * distance_bins  (distance_bins) object (-1.413, 28.256] ... (1384.543, 141...\n",
+       "  * time           (time) int64 0 1 2 3 4 5 6 7 8 ... 11 12 13 14 15 16 17 18 19
" + ], + "text/plain": [ + "\n", + "dask.array\n", + "Coordinates:\n", + " * distance_bins (distance_bins) object (-1.413, 28.256] ... (1384.543, 141...\n", + " * time (time) int64 0 1 2 3 4 5 6 7 8 ... 11 12 13 14 15 16 17 18 19" + ] + }, + "execution_count": 85, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%time result = dask_signal.groupby_bins(distance, bins).mean()\n", + "%time result.compute()\n", + "result" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "kEOl2RPNJU0v", + "outputId": "0118c6ac-5d56-4f2e-b9f7-8dd8db7d1b32" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 54.8 ms, sys: 7.46 ms, total: 62.2 ms\n", + "Wall time: 61.3 ms\n", + "CPU times: user 884 ms, sys: 191 ms, total: 1.08 s\n", + "Wall time: 484 ms\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray (time: 20, distance_bins: 50)>\n",
+       "dask.array<transpose, shape=(20, 50), dtype=float64, chunksize=(1, 50), chunktype=numpy.ndarray>\n",
+       "Coordinates:\n",
+       "  * time           (time) int64 0 1 2 3 4 5 6 7 8 ... 11 12 13 14 15 16 17 18 19\n",
+       "  * distance_bins  (distance_bins) object (-1.413, 28.256] ... (1384.543, 141...
" + ], + "text/plain": [ + "\n", + "dask.array\n", + "Coordinates:\n", + " * time (time) int64 0 1 2 3 4 5 6 7 8 ... 11 12 13 14 15 16 17 18 19\n", + " * distance_bins (distance_bins) object (-1.413, 28.256] ... (1384.543, 141..." + ] + }, + "execution_count": 84, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%time result = groupby_bins_agg(dask_signal, distance, bins, func='mean')\n", + "%time result.compute()\n", + "result" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "fast groupby agg.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/myfile.h5 b/myfile.h5 new file mode 100644 index 00000000000..7797f6f5db4 Binary files /dev/null and b/myfile.h5 differ diff --git a/properties/test_isnull.py b/properties/test_isnull.py new file mode 100644 index 00000000000..6a49f781dd3 --- /dev/null +++ b/properties/test_isnull.py @@ -0,0 +1,28 @@ +import hypothesis.extra.numpy as npst +import hypothesis.strategies as st +import numpy as np +import pandas as pd +from hypothesis import given, settings + +import xarray as xr + +# Run for a while - arrays are a bigger search space than usual +settings.register_profile("ci", deadline=None) +settings.load_profile("ci") + + +an_array = npst.arrays( + dtype=npst.nested_dtypes(), + shape=npst.array_shapes(max_side=3), # max_side specified for performance +) + + +@given(st.data(), an_array) +def test_isnull_consistency(data, array): + actual = xr.core.duck_array_ops.isnull(array) + if array.dtype.kind == "V": + # not supported by pandas + expected = np.zeros_like(array, dtype=bool) + else: + expected = pd.isnull(array) + np.testing.assert_equal(actual, expected) diff --git a/rasm.zarr/.zattrs b/rasm.zarr/.zattrs new file mode 100644 index 00000000000..f45e4c7b8c1 --- /dev/null +++ b/rasm.zarr/.zattrs @@ -0,0 +1,13 @@ +{ + "NCO": "netCDF Operators version 4.7.9 (Homepage = http://nco.sf.net, Code = http://github.com/nco/nco)", + "comment": "Output from the Variable Infiltration Capacity (VIC) model.", + "convention": "CF-1.4", + "history": "Fri Aug 7 17:57:38 2020: ncatted -a bounds,,d,, rasm.nc\nTue Dec 27 14:15:22 2016: ncatted -a dimensions,,d,, rasm.nc rasm.nc\nTue Dec 27 13:38:40 2016: ncks -3 rasm.nc rasm.nc\nhistory deleted for brevity", + "institution": "U.W.", + "nco_openmp_thread_number": 1, + "output_frequency": "daily", + "output_mode": "averaged", + "references": "Based on the initial model of Liang et al., 1994, JGR, 99, 14,415- 14,429.", + "source": "RACM R1002RBRxaaa01a", + "title": "/workspace/jhamman/processed/R1002RBRxaaa01a/lnd/temp/R1002RBRxaaa01a.vic.ha.1979-09-01.nc" +} \ No newline at end of file diff --git a/rasm.zarr/.zgroup b/rasm.zarr/.zgroup new file mode 100644 index 00000000000..3b7daf227c1 --- /dev/null +++ b/rasm.zarr/.zgroup @@ -0,0 +1,3 @@ +{ + "zarr_format": 2 +} \ No newline at end of file diff --git a/rasm.zarr/Tair/.zarray b/rasm.zarr/Tair/.zarray new file mode 100644 index 00000000000..49a65e5da52 --- /dev/null +++ b/rasm.zarr/Tair/.zarray @@ -0,0 +1,24 @@ +{ + "chunks": [ + 9, + 52, + 138 + ], + "compressor": { + "blocksize": 0, + "clevel": 5, + "cname": "lz4", + "id": "blosc", + "shuffle": 1 + }, + "dtype": "= (3, 10): + from typing import TypeGuard + else: + from typing_extensions import TypeGuard + except ImportError: +- def is_scalar(value: Any, include_0d: bool = True) -> bool: +- """Whether to treat a value as a scalar. ++ if TYPE_CHECKING: ++ raise ++ else: ++ ++ def is_scalar(value: Any, include_0d: bool = True) -> bool: ++ """Whether to treat a value as a scalar. ++ ++ Any non-iterable, string, or 0-D array ++ """ ++ return _is_scalar(value, include_0d) ++ + +- Any non-iterable, string, or 0-D array +- """ +- return _is_scalar(value, include_0d) + else: ++ + def is_scalar(value: Any, include_0d: bool = True) -> TypeGuard[Hashable]: + """Whether to treat a value as a scalar. + + diff --git a/testing.nc b/testing.nc new file mode 100644 index 00000000000..fd0dbbcf09a Binary files /dev/null and b/testing.nc differ diff --git a/testing.zarr/.zattrs b/testing.zarr/.zattrs new file mode 100644 index 00000000000..e368282ca7b --- /dev/null +++ b/testing.zarr/.zattrs @@ -0,0 +1,7 @@ +{ + "Conventions": "COARDS", + "description": "Data is from NMC initialized reanalysis\n(4x/day). These are the 0.9950 sigma level values.", + "platform": "Model", + "references": "http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanalysis.html", + "title": "4x daily NMC reanalysis (1948)" +} \ No newline at end of file diff --git a/testing.zarr/.zgroup b/testing.zarr/.zgroup new file mode 100644 index 00000000000..3b7daf227c1 --- /dev/null +++ b/testing.zarr/.zgroup @@ -0,0 +1,3 @@ +{ + "zarr_format": 2 +} \ No newline at end of file diff --git a/testing.zarr/air/.zarray b/testing.zarr/air/.zarray new file mode 100644 index 00000000000..7c28a8431c8 --- /dev/null +++ b/testing.zarr/air/.zarray @@ -0,0 +1,24 @@ +{ + "chunks": [ + 730, + 13, + 27 + ], + "compressor": { + "blocksize": 0, + "clevel": 5, + "cname": "lz4", + "id": "blosc", + "shuffle": 1 + }, + "dtype": " int: + return len(self.shape) + + @property + def size(self) -> int: + out = 1 + for s in self.shape: + out *= s + return out + + def __len__(self) -> int: + try: + return self.shape[0] + except IndexError: + raise TypeError("len() of unsized object") from None + + def __getitem_explicit__(self, indexer: "ExplicitIndexer") -> "Array": + raise NotImplementedError + + def __setitem_explicit__(self, indexer: "ExplicitIndexer", value: DuckArray): + raise NotImplementedError + + def transpose(self, order): + return TransposeArray(self, order) + + +class ExplicitIndexer: + """Base class for explicit indexer objects. + + ExplicitIndexer objects wrap a tuple of values given by their ``tuple`` + property. These tuples should always have length equal to the number of + dimensions on the indexed array. + + Do not instantiate BaseIndexer objects directly: instead, use one of the + sub-classes BasicIndexer, OuterIndexer or VectorizedIndexer. + """ + + __slots__ = ("_value",) + + def __init__(self, key): + if type(self) is ExplicitIndexer: + raise TypeError("cannot instantiate base ExplicitIndexer objects") + self._value = tuple(key) + + @property + def value(self): + return self._value + + def __repr__(self): + return f"{type(self).__name__}({self.value})" + + +def _as_integer_or_none(value): + return None if value is None else operator.index(value) + + +def as_integer_slice(value): + start = _as_integer_or_none(value.start) + stop = _as_integer_or_none(value.stop) + step = _as_integer_or_none(value.step) + return slice(start, stop, step) + + +class BasicIndexer(ExplicitIndexer): + """Tuple for basic indexing. + + All elements should be int or slice objects. Indexing follows NumPy's + rules for basic indexing: each axis is independently sliced and axes + indexed with an integer are dropped from the result. + """ + + __slots__ = () + + def __init__(self, key): + if not isinstance(key, tuple): + raise TypeError(f"key must be a tuple: {key!r}") + + new_key = [] + for k in key: + if isinstance(k, integer_types): + k = int(k) + elif isinstance(k, slice): + k = as_integer_slice(k) + else: + raise TypeError( + f"unexpected indexer type for {type(self).__name__}: {k!r}" + ) + new_key.append(k) + + super().__init__(new_key) + + +class OuterIndexer(ExplicitIndexer): + """Tuple for outer/orthogonal indexing. + + All elements should be int, slice or 1-dimensional np.ndarray objects with + an integer dtype. Indexing is applied independently along each axis, and + axes indexed with an integer are dropped from the result. This type of + indexing works like MATLAB/Fortran. + """ + + __slots__ = () + + def __init__(self, key): + if not isinstance(key, tuple): + raise TypeError(f"key must be a tuple: {key!r}") + + found_ndarray = False + + new_key = [] + for k in key: + if isinstance(k, integer_types): + k = int(k) + elif isinstance(k, slice): + k = as_integer_slice(k) + elif isinstance(k, np.ndarray): + found_ndarray = True + if not np.issubdtype(k.dtype, np.integer): + raise TypeError( + f"invalid indexer array, does not have integer dtype: {k!r}" + ) + if k.ndim != 1: + raise TypeError( + f"invalid indexer array for {type(self).__name__}; must have " + f"exactly 1 dimension: {k!r}" + ) + k = np.asarray(k, dtype=np.int64) + else: + raise TypeError( + f"unexpected indexer type for {type(self).__name__}: {k!r}" + ) + new_key.append(k) + + if not found_ndarray: + raise ValueError("no ndarray key found: lower to BasicIndexer instead") + + super().__init__(new_key) + + +class VectorizedIndexer(ExplicitIndexer): + """Tuple for vectorized indexing. + + All elements should be slice or N-dimensional np.ndarray objects with an + integer dtype and the same number of dimensions. Indexing follows proposed + rules for np.ndarray.vindex, which matches NumPy's advanced indexing rules + (including broadcasting) except sliced axes are always moved to the end: + https://github.com/numpy/numpy/pull/6256 + """ + + __slots__ = () + + def __init__(self, key): + if not isinstance(key, tuple): + raise TypeError(f"key must be a tuple: {key!r}") + + new_key = [] + ndim = None + for k in key: + if isinstance(k, slice): + k = as_integer_slice(k) + elif isinstance(k, np.ndarray): + if not np.issubdtype(k.dtype, np.integer): + raise TypeError( + f"invalid indexer array, does not have integer dtype: {k!r}" + ) + if ndim is None: + ndim = k.ndim + elif ndim != k.ndim: + ndims = [k.ndim for k in key if isinstance(k, np.ndarray)] + raise ValueError( + "invalid indexer key: ndarray arguments " + f"have different numbers of dimensions: {ndims}" + ) + k = np.asarray(k, dtype=np.int64) + else: + raise TypeError( + f"unexpected indexer type for {type(self).__name__}: {k!r}" + ) + new_key.append(k) + + if ndim is None: + raise ValueError("no ndarray key found: lower to BasicIndexer instead") + + super().__init__(new_key) + + @property + def ndarray_shape(self) -> Tuple[int, ...]: + arrays = [k for k in self.value if isinstance(k, np.ndarray)] + return np.broadcast(*arrays).shape + + +@dataclass(frozen=True) +class TransposeArray(Array): + array: Array + order: Tuple[int, ...] + + @property + def dtype(self): + return self.array.dtype + + @property + def shape(self): + return tuple(self.array.shape[axis] for axis in self.order) + + def _updated_key_and_order(self, key): + from .indexing import arrayize_vectorized_indexer + + if isinstance(key, (BasicIndexer, OuterIndexer)): + new_key = type(key)(tuple(key.value[axis] for axis in self.order)) + new_order = tuple( + axis + for axis, k in zip(self.order, new_key.value) + if not isinstance(k, int) + ) + else: + assert isinstance(key, VectorizedIndexer) + key = arrayize_vectorized_indexer(key, self.shape) + new_key = type(key)( + tuple(key.value[axis].transpose(self.order) for axis in self.order) + ) + new_order = None # check this! + + return new_key, new_order + + def __getitem_explicit__(self, indexer: ExplicitIndexer): + new_indexer, new_order = self._updated_key_and_order(indexer) + new_array = self.array.__getitem_explicit__(new_indexer) + if new_order is None: + return new_array + else: + return type(self)(new_array, new_order) + + def __setitem_explicit__(self, indexer: ExplicitIndexer, value): + new_indexer, new_order = self._updated_key_and_order(indexer) + if new_order is not None: + value = np.transpose(value, new_order) + self.array.__setitem_explicit__(new_indexer, value) diff --git a/xarray/lazyindex/indexing.py b/xarray/lazyindex/indexing.py new file mode 100644 index 00000000000..8aea0bd781a --- /dev/null +++ b/xarray/lazyindex/indexing.py @@ -0,0 +1,266 @@ +import numpy as np +from typing import cast, Tuple, Union +from dataclasses import dataclass + +from .core import ( + Array, + DuckArray, + BasicIndexer, + ExplicitIndexer, + OuterIndexer, + VectorizedIndexer, +) + + +def expanded_indexer(key, ndim): + """Given a key for indexing an ndarray, return an equivalent key which is a + tuple with length equal to the number of dimensions. + + The expansion is done by replacing all `Ellipsis` items with the right + number of full slices and then padding the key with full slices so that it + reaches the appropriate dimensionality. + """ + if not isinstance(key, tuple): + # numpy treats non-tuple keys equivalent to tuples of length 1 + key = (key,) + new_key = [] + # handling Ellipsis right is a little tricky, see: + # http://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#advanced-indexing + found_ellipsis = False + for k in key: + if k is Ellipsis: + if not found_ellipsis: + new_key.extend((ndim + 1 - len(key)) * [slice(None)]) + found_ellipsis = True + else: + new_key.append(slice(None)) + else: + new_key.append(k) + if len(new_key) > ndim: + raise IndexError("too many indices") + new_key.extend((ndim - len(new_key)) * [slice(None)]) + return tuple(new_key) + + +def _normalize_slice(sl, size): + """Ensure that given slice only contains positive start and stop values + (stop can be -1 for full-size slices with negative steps, e.g. [-10::-1])""" + return slice(*sl.indices(size)) + + +def _slice_slice(old_slice: slice, applied_slice: slice, size: int) -> slice: + """Given a slice and the size of the dimension to which it will be applied, + index it with another slice to return a new slice equivalent to applying + the slices sequentially + """ + old_slice = _normalize_slice(old_slice, size) + + size_after_old_slice = len(range(old_slice.start, old_slice.stop, old_slice.step)) + if size_after_old_slice == 0: + # nothing left after applying first slice + return slice(0) + + applied_slice = _normalize_slice(applied_slice, size_after_old_slice) + + start = old_slice.start + applied_slice.start * old_slice.step + if start < 0: + # nothing left after applying second slice + # (can only happen for old_slice.step < 0, e.g. [10::-1], [20:]) + return slice(0) + + stop = old_slice.start + applied_slice.stop * old_slice.step + if stop < 0: + stop = None + + step = old_slice.step * applied_slice.step + + return slice(start, stop, step) + + +def _expand_slice(slice_, size): + return np.arange(*slice_.indices(size)) + + +def _index_indexer_1d(old_indexer, applied_indexer, size): + assert isinstance(applied_indexer, integer_types + (slice, np.ndarray)) + if isinstance(applied_indexer, slice) and applied_indexer == slice(None): + # shortcut for the usual case + return old_indexer + if isinstance(old_indexer, slice): + if isinstance(applied_indexer, slice): + indexer = _slice_slice(old_indexer, applied_indexer, size) + else: + indexer = _expand_slice(old_indexer, size)[applied_indexer] + else: + indexer = old_indexer[applied_indexer] + return indexer + + +BasicOrOuterIndexer = Union[BasicIndexer, OuterIndexer] + + +@dataclass(frozen=True) +class OIndexArray(Array): + array: DuckArray + key: BasicOrOuterIndexer + + @property + def dtype(self): + return self.array.dtype + + @property + def shape(self): + shape = [] + for size, k in zip(self.array.shape, self.key.value): + if isinstance(k, slice): + shape.append(len(range(*k.indices(size)))) + elif isinstance(k, np.ndarray): + shape.extend(k.shape) + return tuple(shape) + + def _updated_key(self, new_key: BasicOrOuterIndexer) -> BasicOrOuterIndexer: + iter_new_key = iter(expanded_indexer(new_key.value, self.ndim)) + full_key = [] + for size, k in zip(self.array.shape, self.key.value): + if isinstance(k, int): + full_key.append(k) + else: + full_key.append(_index_indexer_1d(k, next(iter_new_key), size)) + full_key = tuple(full_key) + + if all(isinstance(k, (int, slice)) for k in full_key): + return BasicIndexer(full_key) + return OuterIndexer(full_key) + + def _getitem_explicit_(self, indexer: ExplicitIndexer) -> Array: + if isinstance(indexer, VectorizedIndexer): + return VIndexArray(self, indexer) + indexer = cast(BasicOrOuterIndexer, indexer) + return type(self)(self.array, self._updated_key(indexer)) + + def _setitem_explicit_(self, indexer: ExplicitIndexer, value): + if isinstance(indexer, VectorizedIndexer): + raise NotImplementedError( + "Lazy item assignment with the vectorized indexer is not yet " + "implemented. Load your data first by .load() or compute()." + ) + indexer = cast(BasicOrOuterIndexer, indexer) + full_key = self._updated_key(indexer) + self.array[full_key] = value + + +@dataclass(frozen=True) +class VIndexArray(Array): + array: DuckArray + key: VectorizedIndexer + + @property + def dtype(self): + return self.array.dtype + + @property + def shape(self): + slice_shape = [] + for size, k in zip(self.array.shape, self.key.value): + if isinstance(k, slice): + slice_shape.append(len(range(*k.indices(size)))) + + return self.key.ndarray_shape + tuple(slice_shape) + + def _updated_key(self, new_key): + if isinstance(new_key, VectorizedIndexer): + # TODO: use slicing and transposing rather than converting into + # ndarrays + new_key = arrayize_vectorized_indexer(new_key, self.shape) + else: + new_key = _outer_to_vectorized_indexer(new_key, self.shape) + # TODO: handle slices rather than converting entirely into ndarrays. + old_key = arrayize_vectorized_indexer(self.key, self.array.shape) + return VectorizedIndexer( + tuple(o[new_key.value] for o in np.broadcast_arrays(*old_key.value)) + ) + + def _getitem_explicit_(self, indexer: ExplicitIndexer): + # TODO: lower into OIndexArray when possible + return type(self)(self.array, self._updated_key(indexer)) + + def _setitem_explicit_(self, indexer: ExplicitIndexer, value): + raise NotImplementedError( + "Lazy item assignment with the vectorized indexer is not yet " + "implemented. Load your data first by .load() or compute()." + ) + + +def arrayize_vectorized_indexer( + indexer: VectorizedIndexer, shape: Tuple[int, ...] +) -> VectorizedIndexer: + """Return an identical vindex but slices are replaced by arrays. + + Parameters + ---------- + key : Outer/Basic Indexer + An indexer to convert. + shape : tuple + Shape of the array subject to the indexing. + + Returns + ------- + VectorizedIndexer + Tuple suitable for use to index a NumPy array with vectorized indexing. + Each element is an array: broadcasting them together gives the shape + of the result. + """ + slices = [v for v in indexer.value if isinstance(v, slice)] + if not slices: + return indexer + + arrays = [v for v in indexer.value if isinstance(v, np.ndarray)] + n_dim = arrays[0].ndim if arrays else 0 + i_dim = 0 + new_key = [] + for v, size in zip(indexer.value, shape): + if isinstance(v, np.ndarray): + new_key.append(np.reshape(v, v.shape + (1,) * len(slices))) + else: + v = cast(slice, v) + index_shape = ( + (1,) * (n_dim + i_dim) + (-1,) + (1,) * (len(slices) - i_dim - 1) + ) + new_key.append(np.arange(*v.indices(size)).reshape(index_shape)) + i_dim += 1 + return VectorizedIndexer(tuple(new_key)) + + +def _outer_to_vectorized_indexer( + key: BasicOrOuterIndexer, shape: Tuple[int, ...] +) -> VectorizedIndexer: + """Convert an OuterIndexer into an vectorized indexer. + + Parameters + ---------- + key : Outer/Basic Indexer + An indexer to convert. + shape : tuple + Shape of the array subject to the indexing. + + Returns + ------- + VectorizedIndexer + Tuple suitable for use to index a NumPy array with vectorized indexing. + Each element is an array: broadcasting them together gives the shape + of the result. + """ + n_dim = len([k for k in key.value if not isinstance(k, int)]) + i_dim = 0 + new_key = [] + for k, size in zip(key.value, shape): + if isinstance(k, int): + new_key.append(np.array(k).reshape((1,) * n_dim)) + else: # np.ndarray or slice + if isinstance(k, slice): + k = np.arange(*k.indices(size)) + assert k.dtype.kind == "i" + index_shape = [(1,) * i_dim + (k.size,) + (1,) * (n_dim - i_dim - 1)] + new_key.append(k.reshape(*index_shape)) + i_dim += 1 + return VectorizedIndexer(tuple(new_key))