fix(bundlelib): bundles lib itself with new pandas version

hotosm · Nov 15, 2024 · 04856ad · 04856ad
1 parent 0f803ea
commit 04856ad
Show file tree

Hide file tree

Showing 8 changed files with 45 additions and 20 deletions.
diff --git a/hot_fair_utilities/inference/evaluate.py b/hot_fair_utilities/inference/evaluate.py
@@ -1,12 +1,21 @@
 # Patched from ramp-code.scripts.calculate_accuracy.iou created for ramp project by [email protected]
 
+# Standard library imports
 from pathlib import Path
+
+# Third party imports
 import geopandas as gpd
 
-from ramp.utils.eval_utils import get_iou_accuracy_metrics
+try:
+    # Third party imports
+    from ramp.utils.eval_utils import get_iou_accuracy_metrics
+except ImportError:
+    print("Ramp eval metrics are not available, Possibly ramp is not installed")
 
 
-def evaluate(test_path, truth_path, filter_area_m2=None, iou_threshold=0.5, verbose=False):
+def evaluate(
+    test_path, truth_path, filter_area_m2=None, iou_threshold=0.5, verbose=False
+):
     """
     Calculate precision/recall/F1-score based on intersection-over-union accuracy evaluation protocol defined by RAMP.
 
@@ -29,9 +38,9 @@ def evaluate(test_path, truth_path, filter_area_m2=None, iou_threshold=0.5, verb
     truth_df, test_df = gpd.read_file(str(truth_path)), gpd.read_file(str(test_path))
     metrics = get_iou_accuracy_metrics(test_df, truth_df, filter_area_m2, iou_threshold)
 
-    n_detections = metrics['n_detections']
+    n_detections = metrics["n_detections"]
     n_truth = metrics["n_truth"]
-    n_truepos = metrics['true_pos']
+    n_truepos = metrics["true_pos"]
     n_falsepos = n_detections - n_truepos
     n_falseneg = n_truth - n_truepos
     agg_precision = n_truepos / n_detections

diff --git a/hot_fair_utilities/postprocessing/building_footprint.py b/hot_fair_utilities/postprocessing/building_footprint.py
@@ -1,5 +1,7 @@
+# Standard library imports
 import collections
 
+# Third party imports
 from geopandas import GeoSeries
 from shapely.geometry import MultiPolygon, Polygon
 from shapely.ops import unary_union
@@ -75,4 +77,4 @@ def extract(self, tile, mask):
                 self.features.append(feature)
 
     def save(self, out):
-        GeoSeries(self.features).set_crs(CRS).to_file(out)
+        GeoSeries(self.features).set_crs(CRS).to_file(out, driver="GeoJSON")
diff --git a/hot_fair_utilities/postprocessing/merge_polygons.py b/hot_fair_utilities/postprocessing/merge_polygons.py
@@ -71,4 +71,4 @@ def unbuffered(shape):
             features.append(feature)
 
     gs = GeoSeries(features).set_crs(SOURCE_CRS)
-    gs.simplify(TOLERANCE).to_file(new_polygons_path)
+    gs.simplify(TOLERANCE).to_file(new_polygons_path, driver="GeoJSON")
diff --git a/hot_fair_utilities/postprocessing/vectorize.py b/hot_fair_utilities/postprocessing/vectorize.py
@@ -15,7 +15,9 @@
 AREA_THRESHOLD = 5
 
 
-def vectorize(input_path: str, output_path: str , tolerance: float = 0.5, area_threshold: float = 5) -> None:
+def vectorize(
+    input_path: str, output_path: str, tolerance: float = 0.5, area_threshold: float = 5
+) -> None:
     """Polygonize raster tiles from the input path.
 
     Note that as input, we are expecting GeoTIF images with EPSG:3857 as
@@ -52,15 +54,14 @@ def vectorize(input_path: str, output_path: str , tolerance: float = 0.5, area_t
     polygons = [
         Polygon(poly.exterior.coords)
         for poly in polygons
-        if poly.area != max_area
-        and poly.area / median_area > area_threshold
+        if poly.area != max_area and poly.area / median_area > area_threshold
     ]
 
     gs = gpd.GeoSeries(polygons, crs=kwargs["crs"]).simplify(tolerance)
     gs = remove_overlapping_polygons(gs)
     if gs.empty:
         raise ValueError("No Features Found")
-    gs.to_crs("EPSG:4326").to_file(output_path)
+    gs.to_crs("EPSG:4326").to_file(output_path, driver="GeoJSON")
 
 
 def remove_overlapping_polygons(gs: gpd.GeoSeries) -> gpd.GeoSeries:
@@ -79,4 +80,4 @@ def remove_overlapping_polygons(gs: gpd.GeoSeries) -> gpd.GeoSeries:
                 else:
                     to_remove.add(j)
 
-    return gs.drop(list(to_remove))
+    return gs.drop(list(to_remove))
diff --git a/hot_fair_utilities/preprocessing/clip_labels.py b/hot_fair_utilities/preprocessing/clip_labels.py
@@ -3,6 +3,7 @@
 from glob import glob
 from pathlib import Path
 
+# Third party imports
 # Third-party imports
 import geopandas
 from osgeo import gdal
@@ -13,7 +14,12 @@
 
 
 def clip_labels(
-    input_path: str, output_path: str, rasterize=False, rasterize_options=None,all_geojson_file=None,epsg=3857
+    input_path: str,
+    output_path: str,
+    rasterize=False,
+    rasterize_options=None,
+    all_geojson_file=None,
+    epsg=3857,
 ) -> None:
     """Clip and rasterize the GeoJSON labels for each aerial image.
 
@@ -72,13 +78,13 @@ def clip_labels(
     ):
         filename = Path(path).stem
         if all_geojson_file:
-            geojson_file_all_labels=all_geojson_file
-        else : 
+            geojson_file_all_labels = all_geojson_file
+        else:
             geojson_file_all_labels = f"{output_path}/labels_epsg3857.geojson"
         clipped_geojson_file = f"{output_geojson_path}/{filename}.geojson"
 
         # Bounding box as a tuple
-        x_min, y_min, x_max, y_max = get_bounding_box(filename,epsg=epsg)
+        x_min, y_min, x_max, y_max = get_bounding_box(filename, epsg=epsg)
         # Bounding box as a polygon
         bounding_box_polygon = box(x_min, y_min, x_max, y_max)
 
@@ -87,11 +93,13 @@ def clip_labels(
         gdf_all_labels = geopandas.read_file(geojson_file_all_labels)
         gdf_clipped = gdf_all_labels.clip(bounding_box_polygon)
         if len(gdf_clipped) > 0:
-            gdf_clipped.to_file(clipped_geojson_file)
+            gdf_clipped.to_file(clipped_geojson_file, driver="GeoJSON")
         else:
             schema = {"geometry": "Polygon", "properties": {"id": "int"}}
             crs = f"EPSG:{epsg}"
-            gdf_clipped.to_file(clipped_geojson_file, schema=schema, crs=crs)
+            gdf_clipped.to_file(
+                clipped_geojson_file, schema=schema, crs=crs, driver="GeoJSON"
+            )
 
         # Rasterizing
         if rasterize:

diff --git a/hot_fair_utilities/preprocessing/fix_labels.py b/hot_fair_utilities/preprocessing/fix_labels.py
@@ -33,4 +33,4 @@ def fix_labels(input_path: str, output_path: str) -> None:
     if gdf.empty:
         raise ValueError("Error: gdf is empty, No Labels found : Check your labels")
     gdf["geometry"] = gdf.apply(remove_self_intersection, axis=1)
-    gdf.to_file(output_path)
+    gdf.to_file(output_path, driver="GeoJSON")
diff --git a/hot_fair_utilities/preprocessing/reproject_labels.py b/hot_fair_utilities/preprocessing/reproject_labels.py
@@ -1,4 +1,5 @@
 # Third-party imports
+# Third party imports
 import geopandas
 
 
@@ -13,4 +14,4 @@ def reproject_labels_to_epsg3857(input_path: str, output_path: str) -> None:
         output_path: Path to the GeoJSON file where the output data will go.
     """
     labels_gdf = geopandas.read_file(input_path).set_crs("EPSG:4326")
-    labels_gdf.to_crs("EPSG:3857").to_file(output_path)
+    labels_gdf.to_crs("EPSG:3857").to_file(output_path, driver="GeoJSON")
diff --git a/pyproject.toml b/pyproject.toml
@@ -2,6 +2,10 @@
 requires      = ["setuptools>=61.0.0", "wheel"]
 build-backend = "setuptools.build_meta"
 
+[tool.setuptools]
+packages = ["hot_fair_utilities"]
+
+
 [project]
 name = "hot-fair-utilities"
 version = "2.0.0"
@@ -18,7 +22,7 @@ keywords = [
     "postprocessing", "stitching","training"
 ]
 dependencies = [
-    "shapely==1.8.0", "GDAL", "numpy", 
+    "shapely==1.8.0","GDAL", "numpy", 
     "Pillow==9.1", "geopandas<=0.14.5","pandas==2.2.0", 
     "rasterio", "mercantile==1.2.1", "tqdm==4.64.0", 
     "rtree","opencv-python-headless<=4.9.0.80",