From 88fc6ba996c9b45ae8e6ea796e1be7a7ed9771d0 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sun, 30 Jun 2024 22:39:39 -0600 Subject: [PATCH 01/21] Add encode_cf, decode_cf --- doc/source/io.ipynb | 122 ++++++++++++++++++++++++++------------------ xvec/accessor.py | 53 +++++++++++++++++++ 2 files changed, 124 insertions(+), 51 deletions(-) diff --git a/doc/source/io.ipynb b/doc/source/io.ipynb index 77b4a93..0d9d8c8 100644 --- a/doc/source/io.ipynb +++ b/doc/source/io.ipynb @@ -411,38 +411,48 @@ " divorce (county, year) float64 99kB 1.859 2.62 3.747 ... 4.782 7.415\n", " age (county, year) float64 99kB 28.8 30.5 34.5 ... 28.97 35.33\n", "Indexes:\n", - " county GeometryIndex (crs=EPSG:4326)
    • year
      PandasIndex
      PandasIndex(Index([1960, 1970, 1980, 1990], dtype='int64', name='year'))
    • county
      GeometryIndex (crs=EPSG:4326)
      GeometryIndex(\n",
      +       "    [<POLYGON ((-95.343 48.547, -95.341 48.715, -95.094 48.717, -95.095 48.912, -...>\n",
      +       "     <POLYGON ((-118.851 47.95, -118.847 48.478, -118.869 48.479, -118.87 48.647,...>\n",
      +       "     <POLYGON ((-117.438 48.044, -117.541 48.043, -117.536 47.79, -117.607 47.798...>\n",
      +       "     <POLYGON ((-118.971 47.939, -118.973 47.96, -118.953 47.989, -118.953 48.02,...>\n",
      +       "     ...\n",
      +       "     <POLYGON ((-107.195 34.584, -107.718 34.584, -108.227 34.583, -109.048 34.59...>\n",
      +       "     <POLYGON ((-76.396 37.108, -76.403 37.091, -76.434 37.089, -76.464 37.103, -...>\n",
      +       "     <POLYGON ((-77.532 38.565, -77.721 38.84, -77.707 38.877, -77.678 38.885, -7...>\n",
      +       "     <POLYGON ((-111.372 44.745, -111.368 45.35, -111.349 45.349, -111.349 45.635...>],\n",
      +       "    crs=EPSG:4326)
  • " ], "text/plain": [ " Size: 420kB\n", @@ -485,22 +495,13 @@ "source": [ "## CF conventions and netCDF, Zarr\n", "\n", - "Use `cf_xarray.geometry.encode_geometries` and `cf_xarray.geometry.decode_geometries` to encode geometry arrays with CF conventions to a form that is compatible with any array storage format (e.g. netCDF, Zarr).\n" + "Use `.xvec.encode_cf()` to to encode geometry arrays with CF conventions to a form that is compatible with any array storage format (e.g. netCDF, Zarr). This function uses `cf_xarray.geometry.encode_geometries` and `cf_xarray.geometry.decode_geometries` under the hood, so `cf_xarray` must be installed.\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, - "outputs": [], - "source": [ - "import cf_xarray as cfxr" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, "outputs": [ { "data": { @@ -871,6 +872,7 @@ "
    <xarray.Dataset> Size: 2MB\n",
            "Dimensions:             (county: 3085, part: 3172, node: 80563, year: 4)\n",
            "Coordinates:\n",
    +       "    spatial_ref         int64 8B 0\n",
            "  * year                (year) int64 32B 1960 1970 1980 1990\n",
            "Dimensions without coordinates: county, part, node\n",
            "Data variables:\n",
    @@ -880,45 +882,46 @@
            "    geometry_container  float64 8B nan\n",
            "    x                   (node) float64 645kB -95.34 -95.34 ... -111.3 -111.4\n",
            "    y                   (node) float64 645kB 48.55 48.72 48.72 ... 44.73 44.75\n",
    -       "    crd_x               (county) float64 25kB -95.34 -118.9 ... -77.53 -111.4\n",
    -       "    crd_y               (county) float64 25kB 48.55 47.95 48.04 ... 38.57 44.75\n",
    +       "    lon                 (county) float64 25kB -95.34 -118.9 ... -77.53 -111.4\n",
    +       "    lat                 (county) float64 25kB 48.55 47.95 48.04 ... 38.57 44.75\n",
            "    population          (county, year) int64 99kB 4304 3987 3764 ... 55800 65077\n",
            "    unemployment        (county, year) float64 99kB 7.9 9.0 ... 7.018 5.489\n",
            "    divorce             (county, year) float64 99kB 1.859 2.62 ... 4.782 7.415\n",
    -       "    age                 (county, year) float64 99kB 28.8 30.5 ... 28.97 35.33
    • year
      PandasIndex
      PandasIndex(Index([1960, 1970, 1980, 1990], dtype='int64', name='year'))
  • " ], "text/plain": [ " Size: 2MB\n", "Dimensions: (county: 3085, part: 3172, node: 80563, year: 4)\n", "Coordinates:\n", + " spatial_ref int64 8B 0\n", " * year (year) int64 32B 1960 1970 1980 1990\n", "Dimensions without coordinates: county, part, node\n", "Data variables:\n", @@ -928,21 +931,21 @@ " geometry_container float64 8B nan\n", " x (node) float64 645kB -95.34 -95.34 ... -111.3 -111.4\n", " y (node) float64 645kB 48.55 48.72 48.72 ... 44.73 44.75\n", - " crd_x (county) float64 25kB -95.34 -118.9 ... -77.53 -111.4\n", - " crd_y (county) float64 25kB 48.55 47.95 48.04 ... 38.57 44.75\n", + " lon (county) float64 25kB -95.34 -118.9 ... -77.53 -111.4\n", + " lat (county) float64 25kB 48.55 47.95 48.04 ... 38.57 44.75\n", " population (county, year) int64 99kB 4304 3987 3764 ... 55800 65077\n", " unemployment (county, year) float64 99kB 7.9 9.0 ... 7.018 5.489\n", " divorce (county, year) float64 99kB 1.859 2.62 ... 4.782 7.415\n", " age (county, year) float64 99kB 28.8 30.5 ... 28.97 35.33" ] }, - "execution_count": 4, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "encoded = cfxr.geometry.encode_geometries(cube)\n", + "encoded = cube.xvec.encode_cf()\n", "encoded" ] }, @@ -955,16 +958,16 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 5, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -978,12 +981,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "On open, use `cf_xarray.decode_geometries` to recover the array" + "On open, use `.xvec.decode_cf` to recover the array. This function uses `cf_xarray.decode_geometries` so again `cf_xarray` must be installed." ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -1363,14 +1366,14 @@ " population (county, year) int64 99kB dask.array<chunksize=(3085, 4), meta=np.ndarray>\n", " unemployment (county, year) float64 99kB dask.array<chunksize=(3085, 4), meta=np.ndarray>\n", "Indexes:\n", - " county GeometryIndex (crs=EPSG:4326)