jgrss · jgrss · Oct 2, 2022 · Sep 8, 2022 · Sep 8, 2022 · Sep 8, 2022
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -26,9 +26,9 @@ jobs:
         run: |
           # Install GDAL
           sudo apt update && sudo apt upgrade -y && sudo apt install -y
-          sudo apt install software-properties-common -y 
-          sudo add-apt-repository ppa:ubuntugis/ppa 
-          sudo apt install libmysqlclient-dev default-libmysqlclient-dev -y 
+          sudo apt install software-properties-common -y
+          sudo add-apt-repository ppa:ubuntugis/ppa
+          sudo apt install libmysqlclient-dev default-libmysqlclient-dev -y
           sudo apt install gdal-bin libgdal-dev libgl1 libspatialindex-dev g++ -y
           export CPLUS_INCLUDE_PATH=/usr/include/gdal
           export C_INCLUDE_PATH=/usr/include/gdal
@@ -42,13 +42,17 @@ jobs:
           pip install GDAL==$GDAL_VERSION --no-binary=gdal
       - name: Install PyTorch
         run: |
-          pip install --upgrade --no-cache-dir "setuptools<=58.*"
-          pip install torch torchvision torchaudio
-          pip install torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric torch-geometric-temporal
+          TORCH_CPU="https://download.pytorch.org/whl/cpu"
+          PYG_TORCH_CPU="https://data.pyg.org/whl/torch-1.12.0+cpu.html"
+          pip install --upgrade --no-cache-dir setuptools>=0.59.0
+          pip install torch torchvision torchaudio --extra-index-url $TORCH_CPU
+          pip install torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric -f $PYG_TORCH_CPU
+          pip install torch-geometric-temporal>=0.54.0
+          pip install torchmetrics>=0.7.0
+        if: steps.cnetenv.outputs.cache-hit != 'true'
       - name: Install cultionet
         run: |
           pip install .
-        if: steps.cnetenv.outputs.cache-hit != 'true'
       - name: Pytest
         run: |
           pip install pytest

diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 [![GitHub version](https://badge.fury.io/gh/jgrss%2Fcultionet.svg)](https://badge.fury.io/gh/jgrss%2Fcultionet)
 [![](https://github.com/jgrss/cultionet/actions/workflows/ci.yml/badge.svg)](https://github.com/jgrss/cultionet/actions?query=workflow%3ACI)
 
-**cultionet** is a library for semantic segmentation of cultivated land using a neural network. 
+**cultionet** is a library for semantic segmentation of cultivated land using a neural network.
 
 The library is built on **[PyTorch Geometric](https://pytorch-geometric.readthedocs.io)** and **[PyTorch Lightning](https://www.pytorchlightning.ai/)**. The segmentation objectives (class targets and losses) were designed following the work by [Waldner _et al._](https://www.sciencedirect.com/science/article/abs/pii/S0034425720301115). However, there are two key differences between this library and the paper above:
 
@@ -36,23 +36,23 @@ x = [[r_w1, ..., r_w25, g_w1, ..., g_wN, b_w1, ..., b_wN, n_w1, ..., n_wN]]
 
 ### Create the training data
 
-Training data should consist of two files per grid/year. One file is a polygon vector file (stored as a GeoPandas-compatible 
-format like GeoPackage) of the training grid for a region. The other is a polygon vector file (stored in the same format) 
+Training data should consist of two files per grid/year. One file is a polygon vector file (stored as a GeoPandas-compatible
+format like GeoPackage) of the training grid for a region. The other is a polygon vector file (stored in the same format)
 of the training labels for a grid.
 
 **What is a grid?**
-> A grid defines an area to be labeled. For example, a grid could be 1 km x 1 km. A grid should be small enough to be combined 
-> with other grids in batches in GPU memory. Thus, 1 km x 1 km is a good size with, say, Sentinel-2 imagery at 10 m spatial 
+> A grid defines an area to be labeled. For example, a grid could be 1 km x 1 km. A grid should be small enough to be combined
+> with other grids in batches in GPU memory. Thus, 1 km x 1 km is a good size with, say, Sentinel-2 imagery at 10 m spatial
 > resolution.
 
-> **Note:** grids across a study area should all be of equal dimensions 
+> **Note:** grids across a study area should all be of equal dimensions
 
 **What is a training label?**
-> Training labels are __polygons__ of delineated crop (i.e., crop fields). The training labels will be clipped to the 
+> Training labels are __polygons__ of delineated crop (i.e., crop fields). The training labels will be clipped to the
 > training grid (described above). Thus, it is important to exhaustively digitize all crop fields within a grid.
 
 **Configuration file**
-> The configuration file (`cultionet/scripts/config.yml`) is used to create training datasets. This file is only meant 
+> The configuration file (`cultionet/scripts/config.yml`) is used to create training datasets. This file is only meant
 > to be a template. For each project, copy this template and modify it accordingly.
 
 * image_vis
@@ -61,14 +61,14 @@ of the training labels for a grid.
   * The start and end range of the training regions to use in the dataset.
 * years
   * A list of years to use in the training dataset. Image years correspond to the _end_ period of the time series. Thus, 2021 would align with a time 2020-2021 series.
-  
+
 **Training data requirements**
-> The polygon vector file should have a field named 'class', with values for crop fields set equal to 1. For grids with 
-> all null data (i.e., non-crop), simply create an empty polygon or a polygon matching the grid extent with 'class' 
+> The polygon vector file should have a field named 'class', with values for crop fields set equal to 1. For grids with
+> all null data (i.e., non-crop), simply create an empty polygon or a polygon matching the grid extent with 'class'
 > value equal to 0.
 
 **Training name requirements**
-> The polygon/grid pairs should be named **{region}_{poly}_{year}.gpkg**. The region name should be an integer of 
+> The polygon/grid pairs should be named **{region}_{poly}_{year}.gpkg**. The region name should be an integer of
 > six character length (e.g., the region might correspond to grid 1 and be named '000001_poly_2020.gpkg'.
 
 Example directory structure for training data.
@@ -82,19 +82,19 @@ project_dir:
 
 ### Create the image time series
 
-This must be done outside of `cultionet`. Essentially, a directory with band or VI time series must be generated before 
+This must be done outside of `cultionet`. Essentially, a directory with band or VI time series must be generated before
 using `cultionet`.
 
 > **Note:** it is expected that the time series have length greater than 1
 
 - The raster files should be stored as GeoTiffs with names that follow the `yyyyddd.tif` format.
-- There is no maximum requirement on the temporal frequency (i.e., daily, weekly, bi-weekly, monthly, etc.). 
+- There is no maximum requirement on the temporal frequency (i.e., daily, weekly, bi-weekly, monthly, etc.).
   - Just note that a higher frequency will result in larger memory footprints for the GPU plus slower training and inference.
 - While there is no requirement for the time series frequency, time series _must_ have different start and end years.
   - For example, a northern hemisphere time series might consist of (1 Jan 2020 to 1 Jan 2021) whereas a southern hemisphere time series might range from (1 July 2020 to 1 July 2021). In either case, note that something like (1 Jan 2020 to 1 Dec 2020) will not work.
 - The years in the directories must align with the training data files. More specifically, the training data year (year in the polygon/grid pairs) should correspond to the time series end year.
   - For example, a file named `000001_poly_2020.gpkg` should be trained on 2019-2020 imagery, while `000001_poly_2022.gpkg` would match a 2021-2022 time series.
-- The image time series footprints (bounding box) can be of any size, but should encompass the training data bounds. During data creation (next step below), only the relevant bounds of the image are extracted and matched with the training data using the training grid bounds. 
+- The image time series footprints (bounding box) can be of any size, but should encompass the training data bounds. During data creation (next step below), only the relevant bounds of the image are extracted and matched with the training data using the training grid bounds.
 
 **Example time series directory with bi-weekly cadence for three VIs (i.e., evi2, gcvi, kndvi)**
 ```yaml
@@ -130,31 +130,33 @@ pyenv venv venv.cultionet
 (venv.cultionet) cultionet create --project-path /project_dir --grid-size 100 100 --config-file config.yml
 ```
 
-The output .pt data files will be stored in `/project_dir/data/train/processed`. Each .pt data file will consist of 
-all the information needed to train the segmentation model. 
+The output .pt data files will be stored in `/project_dir/data/train/processed`. Each .pt data file will consist of
+all the information needed to train the segmentation model.
 
 ## Training a model
 
-To train the model, you will need to create the train dataset object and pass it to `cultionet` fit method. A script 
+To train the model, you will need to create the train dataset object and pass it to `cultionet` fit method. A script
 is provided to help ease this process. To train a model on a dataset, use (as an example):
 
 ```commandline
 (venv.cultionet) cultionet train --project-path /project_dir --val-frac 0.2 --random-seed 500 --batch-size 4 --epochs 30 --filters 32 --device gpu --patience 5 --learning-rate 0.001 --reset-model
 ```
 
-For more CLI options, see: 
+For more CLI options, see:
 
 ```commandline
 (venv.cultionet) cultionet train -h
 ```
 
 ### Example usage of the cultionet API
 
-In the examples below, we use the project path of the setup examples above to train a model using cultionet. Note that 
+In the examples below, we use the project path of the setup examples above to train a model using cultionet. Note that
 this is similar to using the CLI example above. The point here is simply to demonstrate the use of the Python API.
 
 #### Fit a model using cultionet
 
+The example below illustrates what `cultionet train` does.
+
 ```python
 import cultionet
 from cultionet.data.datasets import EdgeDataset
@@ -200,59 +202,72 @@ cultionet.fit(
 
 After a model has been fit, the last checkpoint file can be found at `/project_dir/ckpt/last.ckpt`.
 
-#### Apply model inference to a single .pt data object
-
-```python
-import cultionet
-from cultionet.data.utils import create_network_data, create_network_dataset
-from cultionet.utils.project_paths import setup_paths
-from cultionet.utils import model_preprocessing
-import numpy as np
-from rasterio.windows import Window
-import torch
+## Predicting on an image with a trained model
 
-# The random seed|state used to split the data
-random_seed = 42
-
-# This is a helper function to manage paths
-ppaths = setup_paths('project_dir')
-
-# Create the network data
-nvars = 3   # number of variables (VIs) used to train the model
-ntime = 25  # number of samples in 1 time series
-nfeas = nvars * ntime   # total number of model features
-height = 64     # number of image rows
-width = 64      # number of image columns
-# Create fake data as an example of the required dimensions
-xvars = np.random.random((nfeas, height, width))
-# Create the network data
-data = create_network_data(xvars, ntime, nvars)
-# Load the z-score norm values
-data_values = torch.load(ppaths.norm_file)
-# Create the temporary dataset
-predict_ds = create_network_dataset(data, ppaths.predict_path, data_values)
-
-# Apply inference
-stack, lit_model = cultionet.predict(
-                        predict_ds=predict_ds,
-                        ckpt_file=ppaths.ckpt_file,
-                        filters=32,
-                        device='gpu',
-                        w=Window(row_off=0, col_off=0, height=height, width=width),
-                        w_pad=Window(row_off=0, col_off=0, height=height, width=width)
-                    )
+```commandline
+(venv.cultionet) cultionet predict --project-path /project_dir --out-path predictions.tif --grid-id 1 --window-size 100 --config-file project_config.yml --device cpu --processes 4 
 ```
 
 ## Installation
 
-If using a GPU with CUDA 11.3, see the `cultionet` [Dockerfile](https://github.com/jgrss/cultionet/blob/main/Dockerfile) 
+### (Option 1) Build Docker images
+
+If using a GPU with CUDA 11.3, see the `cultionet` [Dockerfile](https://github.com/jgrss/cultionet/blob/main/Dockerfile)
 and [dockerfiles/README.md](https://github.com/jgrss/cultionet/blob/main/dockerfiles/README.md) to build a Docker image.
 
 If installing from scratch locally, see the instructions below.
 
-### Requirements
+### (Option 2) Install with Conda Mamba on a CPU
+
+#### 1) Create a Conda `environment.yml` file with:
+
+```yaml
+name: venv.cnet
+channels:
+- defaults
+dependencies:
+- python=3.8.12
+- libgcc
+- libspatialindex
+- libgdal=3.4.1
+- gdal=3.4.1
+- numpy>=1.22.0
+- pip
+```
+
+#### 2) Install Python packages
+
+```commandline
+conda install -c conda-forge mamba
+mamba env create --file environment.yml
+conda activate venv.cnet
+(venv.cnet) mamba install pytorch==1.11.0 torchvision==0.12.0 torchaudio==0.11.0 -c pytorch
+(venv.cnet) mamba install pyg -c pyg
+(venv.cnet) pip install -U pip setuptools wheel
+(venv.cnet) pip install cultionet@git+https://github.com/jgrss/cultionet.git@jgrss/network_args
+(venv.cnet) pip install -U threadpoolctl
+```
+
+### (Option 3) Install with pip on a CPU
+
+This section assumes you have all the necessary Linux builds, such as GDAL. If not, see the next installation section.
+
+#### Install Python packages
+
+```commandline
+pyenv virtualenv 3.8.12 venv.cnet
+pyenv activate venv.cnet
+(venv.cnet) pip install -U pip setuptools wheel numpy cython
+(venv.cnet) pip install gdal==$(gdal-config --version | awk -F'[.]' '{print $1"."$2"."$3}') --no-binary=gdal
+(venv.cnet) pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu
+(venv.cnet) TORCH_VERSION=$(python -c "import torch;print(torch.__version__)")
+(venv.cnet) pip install torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric -f https://data.pyg.org/whl/torch-${TORCH_VERSION}.html
+(venv.cnet) pip install cultionet@git+https://github.com/jgrss/cultionet.git@jgrss/network_args
+```
+
+### (Option 4) Install CUDA and built GPU packages
 
-1. Install NVIDIA driver
+1. Install NVIDIA driver (skip if using the CPU)
 
 ```commandline
 sudo add-apt-repository ppa:graphics-drivers/ppa
@@ -264,7 +279,7 @@ sudo apt install nvidia-driver-465
 
 `reboot machine`
 
-2. Install CUDA toolkit
+2. Install CUDA toolkit (skip if using the CPU)
 > See https://developer.nvidia.com/cuda-11.3.0-download-archive?target_os=Linux&target_arch=x86_64&Distribution=Ubuntu&target_version=20.04&target_type=deb_local
 
 `reboot machine`

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,7 +1,7 @@
 [build-system]
 requires = [
-    'setuptools==59.5.0',
+    'setuptools>=59.5.0',
     'wheel',
-    'numpy>=1.21.0',
+    'numpy>=1.22.0',
     'Cython',
 ]
diff --git a/requirements.txt b/requirements.txt
diff --git a/setup.cfg b/setup.cfg
@@ -15,42 +15,41 @@ classifiers =
     Topic :: Scientific :: Neural Network
     Topic :: Scientific :: Segmentation
     Programming Language :: Cython
-    Programming Language :: Python :: 3.7 :: 3.8
+    Programming Language :: Python :: 3.8
 
 [options]
 package_dir=
     =src
 packages=find:
 include_package_data = True
 setup_requires =
-    cython>=0.29.*
+    cython>=0.29.0
 install_requires =
-    attrs>=21.*
-    frozendict>=2.2.*
-    frozenlist>=1.3.*
+    attrs>=21.0
+    frozendict>=2.2.0
+    frozenlist>=1.3.0
     numpy>=1.22.0
-    scipy>=1.2.*
-    pandas<=1.3.5
-    geopandas>=0.10.*
+    scipy>=1.5.0
+    pandas>=1.*,<=1.3.5
+    geopandas>=0.10.0
     rasterio
-    shapely>=1.8.*
-    scikit-image>=0.19.*
-    xarray>=0.21.*
-    opencv-python>=4.5.5.*
-    torch
-    pytorch_lightning>=1.5.9
-    torchmetrics>=0.7.0
-    torch-geometric>=2.0.2
-    torch-geometric-temporal>=0.40
+    shapely>=1.8.0
+    scikit-image>=0.19.0
+    xarray>=2022.6.0
+    opencv-python>=4.5.5.0
     decorator==4.4.2
     rtree>=0.9.7
-    graphviz>=0.19.*
+    graphviz>=0.19.0
     tqdm>=4.62.*
     pyDeprecate==0.3.1
     future>=0.17.1
     tensorboard>=2.2.0
     PyYAML>=5.1
-    setuptools==59.5.0;python_version>='3.7.12'
+    pytorch_lightning>=1.7.6
+    ray>=2.0.0
+    geowombat@git+https://github.com/jgrss/[email protected]
+    tsaug@git+https://github.com/jgrss/tsaug.git
+    setuptools>=59.5.0;python_version>='3.8.12'
 
 [options.extras_require]
 docs = numpydoc

diff --git a/src/cultionet/__init__.py b/src/cultionet/__init__.py
@@ -1,5 +1,5 @@
 __path__: str = __import__('pkgutil').extend_path(__path__, __name__)
-__version__ = '1.1.1'
-from .model import fit, predict
+__version__ = '1.2.1'
+from .model import fit, load_model, predict
 
-__all__ = ['fit', 'predict']
+__all__ = ['fit', 'load_model', 'predict']