Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Multi roi multi period area estimate #414

Merged
merged 10 commits into from
Nov 7, 2024
205 changes: 205 additions & 0 deletions notebooks/multi_roi_area_astimate.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!git clone https://github.com/nasaharvest/crop-mask.git"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Create a Config file with these steps\n",
"\n",
"1. Create a file `config.yaml`\n",
"2. Follow this template to provide RoI names, and necessary file paths in your config file.\n",
"\n",
"```\n",
"rois: \n",
" - roi_name: AlGedaref\n",
" roi_boundary_path: '../data/roi/alqadarif_boundary.shp'\n",
" samples_and_cropmap:\n",
" - year: 2022\n",
" set1_labels_path: '../data/ceo/rechecked_processed_Feb-2022---Feb-2023_ceo_samples.csv'\n",
" set2_labels_path: '../data/ceo/rechecked_processed_Feb-2022---Feb-2023_ceo_samples.csv'\n",
" crop_mask_path: '../data/rasters/AlQadarif_AlJazirah_2022_cropmask_epsg32636_v5_pp.tif'\n",
" - year: 2023\n",
" set1_labels_path: '../data/ceo/rechecked_processed_Feb-2023---Feb-2024_ceo_samples.csv'\n",
" set2_labels_path: '../data/ceo/rechecked_processed_Feb-2023---Feb-2024_ceo_samples.csv'\n",
" crop_mask_path: '../data/rasters/AlQadarif_AlJazirah_2023_cropmask_epsg32636_v5_pp.tif'\n",
"\n",
" - roi_name: AlJazirah\n",
" roi_boundary_path: '../data/roi/aljazirah_boundary.shp'\n",
" samples_and_cropmap:\n",
" - year: 2022\n",
" set1_labels_path: '../data/ceo/rechecked_processed_Feb-2022---Feb-2023_ceo_samples.csv'\n",
" set2_labels_path: '../data/ceo/rechecked_processed_Feb-2022---Feb-2023_ceo_samples.csv'\n",
" crop_mask_path: '../data/rasters/AlQadarif_AlJazirah_2022_cropmask_epsg32636_v5_pp.tif'\n",
" - year: 2023\n",
" set1_labels_path: '../data/ceo/rechecked_processed_Feb-2023---Feb-2024_ceo_samples.csv'\n",
" set2_labels_path: '../data/ceo/rechecked_processed_Feb-2023---Feb-2024_ceo_samples.csv'\n",
" crop_mask_path: '../data/rasters/AlQadarif_AlJazirah_2023_cropmask_epsg32636_v5_pp.tif'\n",
" \n",
"output_path: '../results.csv' \n",
"```"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Install required packages\n",
"# Skip this step if you have already installed the packages in your local environment\n",
"!pip install geopandas -q\n",
"!pip install seaborn -q\n",
"!pip install rasterio -q\n",
"!pip install cartopy -q"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import sys\n",
"import yaml"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"module_path = os.path.abspath(os.path.join('..'))\n",
"if module_path not in sys.path:\n",
" sys.path.append(module_path)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%cd crop-mask"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from src.area_utils import (\n",
" run_area_estimates_config\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"config_paths = \"../config.yaml\"\n",
"with open(config_paths, 'r') as file:\n",
" config = yaml.safe_load(file)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"annual_estimates = run_area_estimates_config(config, show_output_charts=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"\n",
"def plot_crop_area_with_confidence(annual_estimates):\n",
" \"\"\"\n",
" This function takes in a dictionary with crop area data and plots a bar chart \n",
" with 95% confidence intervals for each region and year.\n",
"\n",
" Parameters:\n",
" data (dict): A dictionary where keys are region names and values contain \n",
" crop area data, confidence intervals, and years.\n",
" \"\"\"\n",
" regions = list(annual_estimates.keys())\n",
" years = [2022, 2023]\n",
"\n",
" crop_areas = np.array([annual_estimates[region]['crop area'] for region in regions])\n",
" crop_ci = np.array([annual_estimates[region]['95%CI crop'] for region in regions])\n",
"\n",
" fig, ax = plt.subplots(figsize=(10, 6))\n",
"\n",
" bar_width = 0.35\n",
" index = np.arange(len(regions))\n",
"\n",
" for i, year in enumerate(years):\n",
" ax.bar(index + i * bar_width, crop_areas[:, i], bar_width,\n",
" yerr=crop_ci[:, i], label=f'Year {year}', capsize=5)\n",
"\n",
" ax.set_xlabel('Region')\n",
" ax.set_ylabel('Crop Area (hectares)')\n",
" ax.set_title('Crop Area by Region with 95% Confidence Intervals (2022 vs 2023)')\n",
" ax.set_xticks(index + bar_width / 2)\n",
" ax.set_xticklabels(regions)\n",
" ax.legend()\n",
"\n",
" ax.set_axisbelow(True)\n",
" ax.grid(True, which='both', axis='y', linestyle='--', linewidth=0.7)\n",
"\n",
" plt.tight_layout()\n",
" plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plot_crop_area_with_confidence(annual_estimates)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "cropmask",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
178 changes: 178 additions & 0 deletions src/area_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import csv
import json
import os
from typing import List, Optional, Tuple, Union
Expand All @@ -8,6 +9,7 @@
import numpy as np
import pandas as pd
import rasterio as rio
import yaml
from osgeo import gdal
from rasterio import transform
from rasterio.mask import mask
Expand Down Expand Up @@ -741,3 +743,179 @@ def plot_area(summary: pd.DataFrame) -> None:

plt.tight_layout()
plt.show()


def area_estimate_pipeline(
aoi_path: str, map_path: str, ceo_set_1: str, ceo_set_2: str, visual_outputs: bool
) -> dict:
"""Returns area estimate for a particular region of interest (ROI) using cropland raster and labels.

Args:
aoi_path (str): The file path to the Area of Interest (AOI) ShapeFile/GeoJSON file, which defines the region for which the area estimates are computed.
map_path (str): The file path to the cropland raster file (.tif) that contains the land cover map of the area of interest.
ceo_set_1 (str): The file path to the first CEO (Collect Earth Online) set, which includes ground truth labels.
ceo_set_2 (str): The file path to the second CEO set, which includes additional ground truth labels for further validation or comparison.
visual_outputs (bool): A boolean flag indicating whether to generate visual outputs (plots, maps, etc.) to visualize the results.

Returns:
dict: A dictionary containing area estimates and accuracy metrics for each class in the confusion matrix.

The dictionary has the following keys:
- 'user_accuracy': A list of user's accuracy values for each class.
- 'producer_accuracy': A list of producer's accuracy values for each class.
- 'area_estimates': A nested dictionary containing area estimates and 95% confidence intervals in different units:
- 'pr': Proportional area estimates and confidence intervals.
- 'px': Area estimates and confidence intervals in pixels.
- 'ha': Area estimates and confidence intervals in hectares.
- 'confidence_interval': The 95% confidence interval for each class.

"""
aoi = gpd.read_file(aoi_path)
roi = aoi.dissolve()

map_array, map_meta = load_raster(map_path, roi)

if map_array.data.dtype == "uint8":
binary_map = map_array.data
else:
binary_map = binarize(map_array, map_meta)

crop_area_px, noncrop_area_px = cal_map_area_class(binary_map, unit="pixels")
crop_area_ha, noncrop_area_ha = cal_map_area_class(binary_map, unit="ha")
crop_area_frac, noncrop_area_frac = cal_map_area_class(binary_map, unit="fraction")

ceo_geom = reference_sample_agree(binary_map, map_meta, ceo_set_1, ceo_set_2)
ceo_geom = ceo_geom[ceo_geom["Mapped class"] != 255]

cm = compute_confusion_matrix(ceo_geom)

a_j = np.array([noncrop_area_px, crop_area_px], dtype=np.int64)

px_size = map_meta["transform"][0]

estimates = compute_area_estimate(cm, a_j, px_size)

if visual_outputs:
plt.imshow(binary_map, cmap="YlGn", vmin=0, vmax=1)
plt.show()
a_ha, err_ha = estimates["area"]["ha"]
u_j, err_u_j = estimates["user"]
p_i, err_p_i = estimates["producer"]
summary = create_area_estimate_summary(
a_ha, err_ha, u_j, err_u_j, p_i, err_p_i, columns=["Non-Crop", "Crop"]
)

return estimates


def write_area_estimate_results_to_csv(data: dict, output_file: str) -> bool:
"""
Writes area estimate results to a CSV file in an easy-to-interpret format.

Args:
data (dict): A nested dictionary containing the area estimates and their respective confidence intervals
for different regions of interest (ROIs) and years. The dictionary structure is expected to be:
{
'ROI_name_1': {
'year': [list of years],
'crop area': [list of crop area estimates],
'95%CI crop': [list of 95% confidence intervals for crop area],
'Non-crop area': [list of non-crop area estimates],
'95%CI noncrop': [list of 95% confidence intervals for non-crop area]
},
'ROI_name_2': {...},
...
}
output_file (str): The file path where the output CSV will be saved.

Returns:
bool: Returns True when the CSV file is successfully written.

"""

# The header based on the RoI results names(keys)
header = ["Year"]
for roi in data.keys():
header.extend([f"{roi} crop area", "95% CI", f"{roi} Non-crop area", "95% CI"])

with open(output_file, "w", newline="") as file:

writer = csv.writer(file)
writer.writerow(header)

years = data[list(data.keys())[0]]["year"]

for i, year in enumerate(years):
row = [year]
for roi in data.keys():
row.append(data[roi]["crop area"][i])
row.append(data[roi]["95%CI crop"][i])
row.append(data[roi]["Non-crop area"][i])
row.append(data[roi]["95%CI noncrop"][i])
writer.writerow(row)

return True


def run_area_estimates_config(config: dict, show_output_charts: bool = False) -> dict:
"""
Runs area estimates for multiple regions of interest (ROIs) and compiles the results.

Args:
config (dict): A dictionary containing configuration information for the ROIs and their associated data.
It includes details such as the names of the ROIs, paths to boundary files, crop mask files,
CEO datasets, and the output path for saving the CSV file.
show_output_charts (bool): A boolean flag indicating whether to display visual output charts during processing.

Returns:
dict: A dictionary containing area estimates and confidence intervals for each ROI and year.

Raises:
Exception: If there is an error writing the results to the CSV file.

"""

all_results = {}
for roi in config["rois"]:
all_results[roi["roi_name"]] = {
"year": [],
"crop area": [],
"95%CI crop": [],
"Non-crop area": [],
"95%CI noncrop": [],
}
roi_results = all_results[roi["roi_name"]]

for yearly_data in roi["samples_and_cropmap"]:

print("\n" * 3 + f"_____ Running for {roi['roi_name']}, {yearly_data['year']} _____")

# Call the function for yearly estimate
estimates = area_estimate_pipeline(
aoi_path=roi["roi_boundary_path"],
map_path=yearly_data["crop_mask_path"],
ceo_set_1=yearly_data["set1_labels_path"],
ceo_set_2=yearly_data["set2_labels_path"],
visual_outputs=show_output_charts,
)

a_ha, err_ha = estimates["area"]["ha"]
a_ha, err_ha = a_ha.round(2), err_ha.round(2)

non_crop_area = a_ha[0]
crop_area = a_ha[1]
non_crop_area_err = err_ha[0]
crop_area_err = err_ha[1]

# Keep yearly results
roi_results["year"].append(yearly_data["year"])
roi_results["crop area"].append(crop_area)
roi_results["95%CI crop"].append(crop_area_err)
roi_results["Non-crop area"].append(non_crop_area)
roi_results["95%CI noncrop"].append(non_crop_area_err)

if write_area_estimate_results_to_csv(all_results, config["output_path"]):
return all_results

else:
raise Exception("Error Writing Estimates to CSV")
Loading