From a6758893d9538103a989a70b79386c9adc1f4153 Mon Sep 17 00:00:00 2001 From: Kevin Schwarzwald Date: Tue, 13 Feb 2024 18:58:12 -0500 Subject: [PATCH] update docs to 0.3.2.0 --- docs/api_user.rst | 2 +- docs/conf.py | 4 ++-- docs/index.rst | 6 +++--- docs/installation.rst | 21 +++++++++++++++++---- docs/intro.rst | 20 +++++++++----------- docs/notebooks/base_run.ipynb | 2 +- docs/notebooks/full_run.ipynb | 4 ++-- docs/tips.rst | 14 +++++++++++--- xagg/classes.py | 8 ++++---- xagg/diag.py | 1 - 10 files changed, 50 insertions(+), 32 deletions(-) diff --git a/docs/api_user.rst b/docs/api_user.rst index ff27359..4cd445a 100644 --- a/docs/api_user.rst +++ b/docs/api_user.rst @@ -23,7 +23,7 @@ Primary (wrapper) functions Auxiliary functions ================================= -.. automodule:: xagg.aux +.. automodule:: xagg.auxfuncs :members: diff --git a/docs/conf.py b/docs/conf.py index 70fb4d9..b9aaf37 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -19,11 +19,11 @@ # -- Project information ----------------------------------------------------- project = 'xagg' -copyright = '2021-2023, Kevin Schwarzwald' +copyright = '2021-2024, Kevin Schwarzwald' author = 'Kevin Schwarzwald' # The full version, including alpha/beta/rc tags -release = '3.1' +release = '0.3.2.0' # -- General configuration --------------------------------------------------- diff --git a/docs/index.rst b/docs/index.rst index 2654fbe..8ee3246 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -6,9 +6,9 @@ xagg ======================================= -A package to aggregate gridded data in ``xarray`` to polygons in ``geopandas`` using area-weighting from the relative area overlaps between pixels and polygons. +A package to aggregate gridded data in :py:mod:`xarray` to polygons in :py:mod:`geopandas` using area-weighting from the relative area overlaps between pixels and polygons. -The simplest code run, involving raster data in an ``xarray`` Dataset ``ds`` and polygons in a ``geopandas`` GeoDataFrame ``gpd``, is:: +The simplest code run, involving raster data in an :py:mod:`xarray` :py:class:`Dataset` ``ds`` and polygons in a :py:mod:`geopandas` :py:class:`GeoDataFrame` ``gpd``, is:: import xagg as xa @@ -19,7 +19,7 @@ The simplest code run, involving raster data in an ``xarray`` Dataset ``ds`` and aggregated = xa.aggregate(ds,weightmap) -``aggregated`` can then be turned into an ``xarray`` Dataset, a ``geopandas`` GeoDataFrame, or directly exported to a CSV (for use in, e.g., STATA), NetCDF, or Shapefile. +:py:class:`aggregated` can then be turned into an :py:mod:`xarray` :py:class:`Dataset`, a :py:mod:`geopandas` :py:class:`GeoDataFrame`, or directly exported to a CSV (for use in, e.g., STATA), NetCDF, or Shapefile. .. toctree:: :caption: Contents diff --git a/docs/installation.rst b/docs/installation.rst index 7177088..8e6e83d 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -1,14 +1,27 @@ Installation ======================================= -The easiest way to install ``xagg`` is through ``conda`` or ``mamba``:: +The easiest and recommended way to install :py:mod:`xagg` is through ``conda`` or ``mamba``:: # Mamba - mamba install -c conda-forge xagg + mamba install -c conda-forge xagg==0.3.2.0 # Conda - conda install -c conda-forge xagg + conda install -c conda-forge xagg==0.3.2.0 ``xagg`` can also be installed through ``pip``:: - pip install xagg \ No newline at end of file + pip install xagg + +though certain depencies may not be available. + +Optional dependencies +---------------------------------------- +If using `weights` grids to add another weight layer to the aggregation (e.g., weighting raster data additionally by population density), we recommend installing :py:mod:`xesmf`, which is required for :py:mod:`xagg` to perform regridding if the weight and raster grids are not equal. :py:mod:`xesmf` must be [installed manually](https://xesmf.readthedocs.io/en/stable/installation.html), since its dependencies are not available through ``pip`` (and ``conda`` does not support installing optional dependencies). + + +If wanting to create diagnostic figures using :py:meth:`weightmap.diag_fig()`, :py:mod:`matplotlib`, :py:mod:`cartopy`, and :py:mod:`cmocean` are additionally required. These can be installed using ``pip`` :: + + pip install xagg[plots] + + diff --git a/docs/intro.rst b/docs/intro.rst index 9afc06b..a351755 100644 --- a/docs/intro.rst +++ b/docs/intro.rst @@ -5,13 +5,13 @@ Overview ======================================= Science often happens on grids - gridded weather products, interpolated pollution data, night time lights, remote sensing all approximate the continuous real world for reasons of data resolution, processing time, or ease of calculation. -However, living things don't live on grids, and rarely play, act, or observe data on grids either. Instead, humans tend to work on the county, state, township, okrug, or city level; birds tend to fly along complex migratory corridors; and rain- and watersheds follow valleys and mountains. +However, living things don't live on grids, and rarely play, act, or observe data on grids either. Instead, humans tend to work on the county, state, township, Bezirk, or city level; birds tend to fly along complex migratory corridors; and rain- and watersheds follow valleys and mountains. So, whenever we need to work with both gridded and geographic data products, we need ways of getting them to match up. We may be interested for example what the average temperature over a county is, or the average rainfall rate over a watershed. -Enter ``xagg``. +Enter :py:mod:`xagg`. -``xagg`` provides an easy-to-use (2 lines!), standardized way of aggregating raster data to polygons. All you need is some gridded data in an :class:`xarray.Dataset` or :class:`xarray.DataArray` and some polygon data in a :class:`geopandas.GeoDataFrame`. Both of these are easy to use for the purposes of ``xagg`` - for example, all you need to use a shapefile is to open it:: +:py:mod:`xagg` provides an easy-to-use (2 lines!), standardized way of aggregating raster data to polygons. All you need is some gridded data in an :class:`xarray.Dataset` or :class:`xarray.DataArray` and some polygon data in a :class:`geopandas.GeoDataFrame`. Both of these are easy to use for the purposes of :py:mod:`xagg` - for example, all you need to use a shapefile is to open it:: import xarray as xr import geopandas as gpd @@ -23,7 +23,7 @@ Enter ``xagg``. gdf = gpd.open_dataset('file.shp') -``xagg`` will then figure out the geographic grid (lat/lon) in ``ds``, create polygons for each pixel, and then generate intersects between every polygon in the shapefile and every pixel. For each polygon in the shapefile, the relative area of each covering pixel is calculated - so, for example, if a polygon (say, a US county) is the size and shape of a grid pixel, but is split halfway between two pixels, the weight for each pixel will be 0.5, and the value of the gridded variables on that polygon will just be the average of both. +:py:mod:`xagg` will then figure out the geographic grid (lat/lon) in ``ds``, create polygons for each pixel, and then generate intersects between every polygon in the shapefile and every pixel. For each polygon in the shapefile, the relative area of each covering pixel is calculated - so, for example, if a polygon (say, a US county) is the size and shape of a grid pixel, but is split halfway between two pixels, the weight for each pixel will be 0.5, and the value of the gridded variables on that polygon will just be the average of both. The two lines mentioned before? :: @@ -35,20 +35,18 @@ The two lines mentioned before? :: # Aggregate data in [ds] onto polygons aggregated = xa.aggregate(ds,weightmap) - # aggregated can now be converted into an xarray dataset (using aggregated.to_dataset()), - # or a geopandas geodataframe (using aggregated.to_dataframe()), or directly exported - # to netcdf, csv, or shp files using aggregated.to_csv()/.to_netcdf()/.to_shp() +The :py:class:`aggregated` object can now be converted into an xarray dataset (using :py:meth:`aggregated.to_dataset()`) or a :py:mod:`geopandas` :py:class:`GeoDataFrame` (using :py:meth:`aggregated.to_dataframe()`), or directly exported to netcdf, csv, or shp files using :py:meth:`aggregated.to_csv()`/:py:meth:`.to_netcdf()`/:py:meth:`.to_shp()` -Researchers often need to weight your data by more than just its relative area overlap with a polygon (for example, do you want to weight pixels with more population more?). ``xagg`` has a built-in support for adding an additional weight grid (another `xarray` DataArray) into :func:`xagg.wrappers.pixel_overlaps`. +Researchers often need to weight your data by more than just its relative area overlap with a polygon (for example, do you want to weight pixels with more population more?). :py:mod:`xagg` has a built-in support for adding an additional weight grid (another :py:mod:`xarray` :py:class:`DataArray`) into :py:func:`pixel_overlaps`. -Finally, ``xagg`` allows for direct exporting of the aggregated data in several commonly used data formats (please open issues if you'd like support for something else!): +Finally, :py:mod:`xagg` allows for direct exporting of the aggregated data in several commonly used data formats (please open issues if you'd like support for something else!): - NetCDF - CSV for STATA, R - Shapefile for QGIS, further spatial processing -Best of all, ``xagg`` is flexible. Multiple variables in your dataset? ``xagg`` will aggregate them all, as long as they have at least ``lat/lon`` dimensions. Fields in your shapefile that you'd like to keep? ``xagg`` keeps all fields (for example FIPS codes from county datasets) all the way through the final export. Weird dimension names? ``xagg`` is trained to recognize all versions of "lat", "Latitude", "Y", "nav_lat", "Latitude_1"... etc. that the author has run into over the years of working with climate data; and this list is easily expandable as a keyword argument if needed. +Best of all, :py:mod:`xagg` is flexible. Multiple variables in your dataset? :py:mod:`xagg` will aggregate them all, as long as they have at least ``lat/lon`` dimensions. Fields in your shapefile that you'd like to keep? :py:mod:`xagg` keeps all fields (for example FIPS codes from county datasets) all the way through the final export. Weird dimension names? :py:mod:`xagg` is trained to recognize all versions of "lat", "Latitude", "Y", "nav_lat", "Latitude_1"... etc. that the author has run into over the years of working with climate data; and this list is easily expandable as a keyword argument if needed. Use cases ======================================= @@ -59,4 +57,4 @@ Many climate econometrics studies use societal data (mortality, crop yields, etc Area-weighting of pixels onto polygons ensures that aggregating weather and climate data onto polygons occurs in a robust way. Consider a (somewhat contrived) example: an administrative region is in a relatively flat lowlands, but a pixel that slightly overlaps the polygon primarily covers a wholly different climate (mountainous, desert, etc.). Using a simple mask would weight that pixel the same, though its information is not necessarily relevant to the climate of the region. Population-weighting may not always be sufficient either; consider Los Angeles, which has multiple significantly different climates, all with high densities. -``xagg`` allows a simple population *and* area-averaging, in addition to export functions that will turn the aggregated data into output easily used in STATA or R for further calculations. +:py:mod:`xagg` allows a simple population *and* area-averaging, in addition to export functions that will turn the aggregated data into output easily used in STATA or R for further calculations. diff --git a/docs/notebooks/base_run.ipynb b/docs/notebooks/base_run.ipynb index 590654f..4d7c404 100644 --- a/docs/notebooks/base_run.ipynb +++ b/docs/notebooks/base_run.ipynb @@ -881,7 +881,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.17" + "version": "3.12.1" } }, "nbformat": 4, diff --git a/docs/notebooks/full_run.ipynb b/docs/notebooks/full_run.ipynb index 41c32a9..331e3ba 100644 --- a/docs/notebooks/full_run.ipynb +++ b/docs/notebooks/full_run.ipynb @@ -11,7 +11,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "broken-labor", "metadata": {}, "outputs": [], @@ -1078,7 +1078,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.17" + "version": "3.12.1" } }, "nbformat": 4, diff --git a/docs/tips.rst b/docs/tips.rst index 8666ad3..48a35b3 100644 --- a/docs/tips.rst +++ b/docs/tips.rst @@ -30,13 +30,13 @@ If calculating weights from rasters is taking a substantial amount of time (e.g. # Continue as usual... aggregated = xa.aggregate(ds,weightmap) -Note that ``weightmap.to_file(fn)`` creates and populates a separate _directory_ named ``fn`` to be able to store all the relevant components of the ``weightmap`` class, including shapefiles with the geometry of the input polygons, the dataframe with the pixel overlap data, the source grid, and any additional weight grids. +Note that :py:meth:`weightmap.to_file(fn)` creates and populates a separate _directory_ named ``fn`` to be able to store all the relevant components of the :py:class:`weightmap` class, including shapefiles with the geometry of the input polygons, the dataframe with the pixel overlap data, the source grid, and any additional weight grids. This feature is still slightly experimental, so please let us know your experiences! Speed up overlap calculation --------------------------------------- -At the expense of increased memory issue, processing may be sped up using an alternate calculation method (``impl='dot_product'``) :: +At the expense of increased memory usage, processing may be sped up using an alternate calculation method (``impl='dot_product'``) :: # Get overlap between pixels and polygons weightmap = xa.pixel_overlaps(ds,gdf,impl='dot_product') @@ -44,7 +44,15 @@ At the expense of increased memory issue, processing may be sped up using an alt # Aggregate data in [ds] onto polygons aggregated = xa.aggregate(ds,weightmap,impl='dot_product') -This feature is still slightly experimental, so please let us know your experiences! +Create diagnostic figure to inspect raster/polygon overlaps +------------------------------------------------------------ +Once you have created a :py:class:`weightmap`, the following code will create a diagnostic figure, showing a particular polygon (or groups of polygons) + the grid cells that overlap it, colored by the relative overlap of each grid cell with the polygon :: + + # Querying polygon by column of the polygon `gdf` + weightmap.diag_fig({'name':'Alaska'},ds) + + # Plotting the first polygon in the polygon `gdf` + weightmap.diag_fig(0,ds) diff --git a/xagg/classes.py b/xagg/classes.py index 5ad1898..351fa36 100644 --- a/xagg/classes.py +++ b/xagg/classes.py @@ -13,10 +13,10 @@ except ImportError: no_plotting = True -try: - import tables -except ImportError: - no_hd5_output = True +#try: +# import tables +#except ImportError: +# no_hd5_output = True # POSSIBLE CHANGE: I'm not quite sure how python deals with memory diff --git a/xagg/diag.py b/xagg/diag.py index 343a4e0..9071811 100644 --- a/xagg/diag.py +++ b/xagg/diag.py @@ -2,7 +2,6 @@ import xarray as xr import numpy as np import geopandas as gpd -import xagg as xa import warnings from cartopy import crs as ccrs from matplotlib import pyplot as plt