axinc-ai · Koichi73 · Dec 6, 2024
diff --git a/README.md b/README.md
@@ -455,6 +455,7 @@ If you would like to try on your computer:
 | [<img src="image_segmentation/grounded_sam/output.png" width=128px>](image_segmentation/grounded_sam/) | [grounded_sam](/image_segmentation/grounded_sam/) | [Grounded-SAM](https://github.com/IDEA-Research/Grounded-Segment-Anything/tree/main) | Pytorch | 1.2.16 and later |
 | [<img src="image_segmentation/segment-anything-2/output.png" width=128px>](image_segmentation/segment-anything-2/) | [segment-anything-2](/image_segmentation/segment-anything-2/) | [Segment Anything 2](https://github.com/facebookresearch/segment-anything-2) | Pytorch | 1.2.16 and later |
 | [<img src="image_segmentation/fast_sam/output.png" width=128px>](image_segmentation/fast_sam/) | [fast_sam](/image_segmentation/fast_sam/) | [FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM) | Pytorch | 1.2.14 and later |
+| [<img src="image_segmentation/detectree/output.png" width=128px>](image_segmentation/detectree/) | [detectree](/image_segmentation/detectree/) | [detectree](https://github.com/martibosch/detectree) | scikit-learn |  |
 
 ## Large Language Model
 

diff --git a/image_segmentation/detectree/LICENSE b/image_segmentation/detectree/LICENSE
diff --git a/image_segmentation/detectree/README.md b/image_segmentation/detectree/README.md
@@ -0,0 +1,61 @@
+# detectree
+
+## Input
+
+<img src="tile.png" width="600px">
+
+(Image from [https://wms.geo.admin.ch/?SERVICE=WMS&REQUEST=GetMap&VERSION=1.3.0&FORMAT=image/png&LAYERS=ch.swisstopo.images-swissimage&CRS=EPSG:2056&BBOX=2532980,1152150,2533380,1152450&WIDTH=800&HEIGHT=600](https://wms.geo.admin.ch/?SERVICE=WMS&REQUEST=GetMap&VERSION=1.3.0&FORMAT=image/png&LAYERS=ch.swisstopo.images-swissimage&CRS=EPSG:2056&BBOX=2532980,1152150,2533380,1152450&WIDTH=800&HEIGHT=600))
+
+## Output
+
+<img src="output.png" width="600px">
+
+## Requirements
+This model requires additional module.
+
+```
+pip3 install -r requirments.txt
+```
+
+## Usage
+Automatically downloads the onnx and prototxt files on the first run.
+It is necessary to be connected to the Internet while downloading.
+
+For the sample image,
+```
+$ python3 detectree.py
+```
+
+If you want to run in onnx mode, you specify `--onnx` option as below.
+```
+$ python3 detectree.py --onnx
+```
+
+If you want to specify the input image, put the image path after the `--input` option.  
+You can use `--savepath` option to change the name of the output file to save.
+```
+$ python3 detectree.py --input IMAGE_PATH --savepath SAVE_IMAGE_PATH
+```
+
+By adding the `--video` option, you can input the video.   
+If you pass `0` as an argument to VIDEO_PATH, you can use the webcam input instead of the video file.
+```
+$ python3 detectree.py --video VIDEO_PATH
+```
+
+## Reference
+
+- [detectree](https://github.com/martibosch/detectree)
+
+## Framework
+
+PyTorch
+
+## Model Format
+
+ONNX opset=17
+
+## Netron
+
+[detectree.onnx.prototxt](https://netron.app/?url=https://storage.googleapis.com/ailia-models/detectree/detectree.onnx.prototxt)
+
diff --git a/image_segmentation/detectree/detectree.py b/image_segmentation/detectree/detectree.py
@@ -0,0 +1,208 @@
+import os
+import sys
+import time
+
+import numpy as np
+import cv2
+import rasterio as rio
+import maxflow as mf
+
+import ailia
+
+from detectree_utils import pixel_features
+
+# import original modules
+sys.path.append('../../util')
+from arg_utils import get_base_parser, update_parser, get_savepath  # noqa: E402
+from model_utils import check_and_download_models  # noqa: E402
+from webcamera_utils import get_capture, get_writer  # noqa: E402
+
+# logger
+from logging import getLogger   # noqa: E402
+logger = getLogger(__name__)
+
+# ======================
+# Parameters 1
+# ======================
+IMAGE_PATH = 'tile.png'
+SAVE_IMAGE_PATH = 'output.png'
+
+TREE_VAL = 255
+NONTREE_VAL = 0
+REFINE = True
+REFINE_BETA = 50
+REFINE_INT_RESCALE = 10000
+MOORE_NEIGHBORHOOD_ARR = np.array([[0, 0, 0], [0, 0, 1], [1, 1, 1]])
+
+# ======================
+# Arguemnt Parser Config
+# ======================
+parser = get_base_parser(
+    'detectree', IMAGE_PATH, SAVE_IMAGE_PATH
+)
+parser.add_argument(
+    '-o', '--onnx', action='store_true',
+    help="Option to use onnxrutime to run or not."
+)
+args = update_parser(parser)
+
+# ======================
+# Parameters 2
+# ======================
+WEIGHT_PATH = 'detectree.onnx'
+MODEL_PATH = 'detectree.onnx.prototxt'
+REMOTE_PATH = 'https://storage.googleapis.com/ailia-models/detectree/'
+
+# ======================
+# Utils
+# ======================
+def load_image(input_path):
+    with rio.open(input_path) as src:
+        arr = src.read()
+    return np.rollaxis(arr[:3], 0, 3)
+
+def preprocess(img):
+    return pixel_features.PixelFeaturesBuilder().build_features_from_arr(img)
+
+def post_process(output, img_shape):
+    if not REFINE:
+        y_pred = output[0].reshape(img_shape).astype(int)*TREE_VAL
+    else:
+        p_nontree, p_tree = np.hsplit(output[1], 2)
+        g = mf.Graph[int]()
+        node_ids = g.add_grid_nodes(img_shape)
+        P_nontree = p_nontree.reshape(img_shape)
+        P_tree = p_tree.reshape(img_shape)
+
+        D_tree = (REFINE_INT_RESCALE * np.log(P_nontree)).astype(int)
+        D_nontree = (REFINE_INT_RESCALE * np.log(P_tree)).astype(int)
+        g.add_grid_edges(node_ids, REFINE_BETA, structure=MOORE_NEIGHBORHOOD_ARR)
+        g.add_grid_tedges(node_ids, D_tree, D_nontree)
+        g.maxflow()
+        y_pred = np.full(img_shape, NONTREE_VAL)
+        y_pred[g.get_grid_segments(node_ids)] = TREE_VAL
+    return y_pred
+
+def segment_image(img, net):
+    img_shape = img.shape[:2]
+    img = preprocess(img)
+
+    if args.onnx:
+        input_name = net.get_inputs()[0].name
+        output = net.run(None, {input_name: img})
+    else:
+        output = net.predict(img)
+
+    out = post_process(output, img_shape)
+    out = out.astype(np.uint8)
+    return out
+
+# ======================
+# Main functions
+# ======================
+def recognize_from_image():
+    # net initialize
+    if args.onnx:
+        import onnxruntime
+        net = onnxruntime.InferenceSession(WEIGHT_PATH)
+    else:
+        logger.info(f'env_id: {args.env_id}')
+        net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=args.env_id)
+
+    # input image loop
+    for image_path in args.input:
+        # prepare input data
+        logger.info(image_path)
+        img = load_image(image_path)
+        logger.debug(f'input image shape: {img.shape}')
+
+        # inference
+        logger.info('Start inference...')
+        if args.benchmark:
+            logger.info('BENCHMARK mode')
+            total_time_estimation = 0
+            for i in range(args.benchmark_count):
+                start = int(round(time.time() * 1000))
+                model_out = segment_image(img, net)
+                end = int(round(time.time() * 1000))
+                estimation_time = (end - start)
+
+                logger.info(f'\tailia processing estimation time {estimation_time} ms')
+                if i != 0:
+                    total_time_estimation = total_time_estimation + estimation_time
+
+            logger.info(f'\taverage time estimation {total_time_estimation / (args.benchmark_count - 1)} ms')
+        else:
+            model_out = segment_image(img, net)
+
+        # save result
+        savepath = get_savepath(args.savepath, image_path)
+        logger.info(f'saved at : {savepath}')
+        cv2.imwrite(savepath, model_out)
+    logger.info('Script finished successfully.')
+
+
+def recognize_from_video():
+    # net initialize
+    if args.onnx:
+        import onnxruntime
+        net = onnxruntime.InferenceSession(WEIGHT_PATH)
+    else:
+        logger.info(f'env_id: {args.env_id}')
+        net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=args.env_id)
+
+    capture = get_capture(args.video)
+    video_height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    video_width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
+
+    if args.savepath != SAVE_IMAGE_PATH:
+        writer = get_writer(args.savepath, video_height, video_width)
+    else:
+        writer = None
+
+    frame_names = None
+    frame_shown = False
+    frame_idx = 0
+    while(True):
+        ret, img = capture.read()
+
+        if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret:
+            break
+        if frame_shown and cv2.getWindowProperty('frame', cv2.WND_PROP_VISIBLE) == 0:
+            break
+
+        model_out = segment_image(img, net)
+        print(type(model_out))
+        print(model_out.dtype)
+        print(model_out.shape)
+        print(np.min(model_out), np.max(model_out))
+
+        cv2.imshow('frame', model_out)
+        if frame_names is not None:
+            cv2.imwrite(f'video_{frame_idx}.png', model_out)
+
+        if writer is not None:
+            writer.write(model_out)
+
+        frame_shown = True
+        frame_idx = frame_idx + 1
+
+    if writer is not None:
+        writer.release()
+    logger.info('Script finished successfully.')
+
+
+def main():
+    # model files check and download
+    check_and_download_models(WEIGHT_PATH, MODEL_PATH, REMOTE_PATH)
+
+    if args.video is not None:
+        # video mode
+        recognize_from_video()
+    else:
+        # image mode
+        recognize_from_image()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/image_segmentation/detectree/detectree_utils/filters.py b/image_segmentation/detectree/detectree_utils/filters.py
@@ -0,0 +1,63 @@
+"""Utilities to produce filters."""
+
+import numpy as np
+
+def _gaussian_kernel1d(sigma, order, radius):
+    """
+    Compute a 1-D Gaussian convolution kernel.
+
+    From https://github.com/scipy/scipy/blob/v1.9.2/scipy/ndimage/_filters.py#L179-L207
+    Copying it here since it is not part of scipy's public API.
+    See https://github.com/martibosch/detectree/issues/12
+    """
+    if order < 0:
+        raise ValueError("order must be non-negative")
+    exponent_range = np.arange(order + 1)
+    sigma2 = sigma * sigma
+    x = np.arange(-radius, radius + 1)
+    phi_x = np.exp(-0.5 / sigma2 * x**2)
+    phi_x = phi_x / phi_x.sum()
+
+    if order == 0:
+        return phi_x
+    else:
+        q = np.zeros(order + 1)
+        q[0] = 1
+        D = np.diag(exponent_range[1:], 1)  # D @ q(x) = q'(x)
+        P = np.diag(np.ones(order) / -sigma2, -1)  # P @ q(x) = q(x) * p'(x)
+        Q_deriv = D + P
+        for _ in range(order):
+            q = Q_deriv.dot(q)
+        q = (x[:, None] ** exponent_range).dot(q)
+        return q * phi_x
+
+
+def _get_gaussian_kernel1d(sigma, *, order=0, truncate=4.0):
+    """Based on scipy.ndimage.filters.gaussian_filter1d."""
+    sd = float(sigma)
+    # make the radius of the filter equal to truncate standard deviations
+    lw = int(truncate * sd + 0.5)
+    # # Since we are calling correlate, not convolve, revert the kernel
+    # weights = _gaussian_kernel1d(sigma, order, lw)[::-1]
+    weights = _gaussian_kernel1d(sigma, order, lw)
+    return weights
+
+
+def get_texture_kernel(sigma):
+    """
+    Get a texture kernel based on Yang et al. (2009).
+
+    Parameters
+    ----------
+    sigma : numeric
+        Scale parameter to build a texture kernel, based on a Gaussian on the
+        X dimension and a second-derivative Gaussian in the Y dimension
+
+    Returns
+    -------
+    texture_kernel : array-like
+    """
+    g0_kernel_arr = _get_gaussian_kernel1d(sigma, order=0)
+    g2_kernel_arr = _get_gaussian_kernel1d(sigma, order=2)
+
+    return np.dot(g2_kernel_arr.reshape(1, -1).T, g0_kernel_arr.reshape(1, -1))