eEcoLiDAR · fnattino · Jan 29, 2020 · Jan 29, 2020 · Jan 29, 2020 · Feb 4, 2020
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## 0.3.3 - 2020-
+- Changed:
+    - Vectorized sigma_z feature extractor
+
 ## 0.3.2 - 2019-12-12
 ## Added
 - Features added:

diff --git a/laserchicken/feature_extractor/sigma_z_feature_extractor.py b/laserchicken/feature_extractor/sigma_z_feature_extractor.py
@@ -7,11 +7,12 @@
 from numpy.linalg import LinAlgError
 
 from laserchicken.feature_extractor.base_feature_extractor import FeatureExtractor
-from laserchicken.utils import get_point, fit_plane
 
+from laserchicken.utils import get_xyz_per_neighborhood
 
 class SigmaZFeatureExtractor(FeatureExtractor):
     """Height percentiles feature extractor class."""
+    is_vectorized = True
 
     @classmethod
     def requires(cls):
@@ -38,7 +39,7 @@ def provides(cls):
         """
         return ['sigma_z']
 
-    def extract(self, source_point_cloud, neighborhood, target_point_cloud, target_index, volume_description):
+    def extract(self, source_point_cloud, neighborhoods, target_point_cloud, target_index, volume_description):
         """
         Extract the feature value(s) of the point cloud at location of the target.
 
@@ -49,14 +50,49 @@ def extract(self, source_point_cloud, neighborhood, target_point_cloud, target_i
         :param volume_description: volume object describing the containing volume of the neighborhood
         :return:
         """
-        x, y, z = get_point(source_point_cloud, neighborhood)
+        if not (isinstance(neighborhoods[0], list) or isinstance(neighborhoods[0], range)):
+            neighborhoods = [neighborhoods]
+
+        xyz_grp = get_xyz_per_neighborhood(source_point_cloud, neighborhoods)
+        len_ngbrs = [len(ngbr) for ngbr in neighborhoods]
+
         try:
-            plane_estimator = fit_plane(x, y, z)
-            normalized = z - plane_estimator(x, y)
-            return np.std(normalized)
+            residuals = self._get_sum_of_residuals_from_fitted_planes(xyz_grp)
+            return np.sqrt(np.divide(residuals, len_ngbrs))
         except LinAlgError:
             return 0
 
+    @staticmethod
+    def _get_sum_of_residuals_from_fitted_planes(xyz):
+        """
+        Fit planes to each of the neighborhoods and returns the corresponding sum of residuals
+        :param xyz: 3D masked array with (x,y,z) of points in neighboroods.
+        :return: array with residuals
+        """
+        # setup coefficient matrices and ordinate values
+        matrix = np.ones_like(xyz)
+        matrix[:, 1:, :] = xyz[:, 0:2, :]
+        matrix[xyz.mask] = 0.
+        matrix = np.transpose(matrix, (0, 2, 1))
+        a = np.zeros((xyz.shape[0], xyz.shape[2]))
+        a[:, :] = xyz[:, 2, :]
+
+        # SVD decomposition of matrices to construct pseudo-inverse
+        u, s, v = np.linalg.svd(matrix, full_matrices=False)
+
+        # find matrices with zero-singular values, and solve linear problems
+        zero_sing_val_mask = np.any(np.isclose(s, 0.), axis=1)
+        inv_s = np.zeros_like(s)
+        inv_s[~zero_sing_val_mask, :] = 1 / s[~zero_sing_val_mask, :]
+        parameters = np.einsum('ijk,ij->ik', v,
+                               inv_s * np.einsum('ijk,ij->ik', u, a))
+
+        # determine residuals for non-singular matrices, set others to zero
+        a_val = np.einsum('ijk,ik->ij', matrix, parameters)
+        residuals = np.sum(np.power(a - a_val, 2), axis=1)
+        residuals[zero_sing_val_mask] = 0.
+        return residuals
+
     def get_params(self):
         """
         Return a tuple of parameters involved in the current feature extractor object.

diff --git a/laserchicken/feature_extractor/test_sigma_z_feature_extractor.py b/laserchicken/feature_extractor/test_sigma_z_feature_extractor.py
@@ -1,12 +1,17 @@
+import os
+import time
 import unittest
 
 import numpy as np
+from numpy.linalg import LinAlgError
 
-from laserchicken import keys
+from laserchicken import compute_neighbors, keys, read_las
 from laserchicken.feature_extractor import compute_features
+from laserchicken.feature_extractor.base_feature_extractor import FeatureExtractor
 from laserchicken.test_tools import create_point_cloud, create_points_in_xy_grid
+from laserchicken.utils import get_point, fit_plane, copy_point_cloud
 from laserchicken.volume_specification import InfiniteCylinder
-
+from .sigma_z_feature_extractor import SigmaZFeatureExtractor as SigmaZVectorizedFeatureExtractor
 
 class TestExtractSigmaZ(unittest.TestCase):
     def test_constantValues_result0(self):
@@ -42,5 +47,68 @@ def assert_std_for_z_function_in_xy_grid(z_checkered, expected):
     np.testing.assert_almost_equal(
         targets[keys.point]['sigma_z']['data'][0], expected)
 
-
-
+class TestExtractSigmaZComparison(unittest.TestCase):
+    point_cloud = None
+
+    def test_sigma_z_multiple_neighborhoods(self):
+        """
+        Test and compare the serial and vectorized sigma_z implementations.
+
+        sigma_z is computed for a list of neighborhoods in real data. A vectorized implementation and a serial
+        implementation are compared and timed. Any difference in result between the two methods is definitely
+        unexpected.
+        """
+        # vectorized version
+        t0 = time.time()
+        extract_vect = SigmaZVectorizedFeatureExtractor()
+        sigma_z_vec = extract_vect.extract(self.point_cloud, self.neigh, None, None, None)
+        print('Timing Vectorize : {}'.format((time.time() - t0)))
+
+        # serial version
+        sigma_z = []
+        t0 = time.time()
+        for n in self.neigh:
+            extract = SigmaZSerial()
+            sigma_z.append(extract.extract(self.point_cloud, n, None, None, None))
+        print('Timing Serial : {}'.format((time.time() - t0)))
+        sigma_z = np.array(sigma_z)
+        np.testing.assert_allclose(sigma_z_vec, sigma_z, atol=1.e-7)
+
+    def setUp(self):
+        """
+        Set up the test.
+
+        Load in a bunch of real data from AHN3.
+        """
+        np.random.seed(1234)
+
+        _TEST_FILE_NAME = 'AHN3.las'
+        _TEST_DATA_SOURCE = 'testdata'
+
+        _CYLINDER = InfiniteCylinder(4)
+        _PC_260807 = read_las.read(os.path.join(_TEST_DATA_SOURCE, _TEST_FILE_NAME))
+        _PC_1000 = copy_point_cloud(_PC_260807, array_mask=(
+            np.random.choice(range(len(_PC_260807[keys.point]['x']['data'])), size=1000, replace=False)))
+        _1000_NEIGHBORHOODS_IN_260807 = next(compute_neighbors.compute_neighborhoods(_PC_260807, _PC_1000, _CYLINDER))
+
+        self.point_cloud = _PC_260807
+        self.neigh = _1000_NEIGHBORHOODS_IN_260807
+
+class SigmaZSerial(FeatureExtractor):
+    """Serial implementation. Used to test the current (vectorized) implementation."""
+
+    @classmethod
+    def requires(cls):
+        return []
+
+    @classmethod
+    def provides(cls):
+        return ["sigma_z"]
+
+    def extract(self, sourcepc, neighborhood, targetpc, targetindex, volume):
+        x, y, z = get_point(sourcepc, neighborhood)
+        try:
+            plane_estimator, residuals = fit_plane(x, y, z)
+            return np.sqrt(np.divide(residuals, x.size))
+        except LinAlgError:
+            return 0
diff --git a/laserchicken/test_utils.py b/laserchicken/test_utils.py
@@ -108,8 +108,9 @@ def test_leastsqr(self):
         #     z = 5 + i % np.sqrt(n_points)
         #     points[i] = np.array(((i % np.sqrt(n_points)), (np.floor(i / np.sqrt(n_points))), z))
         #t0 = time()
-        f = fit_plane(self.points[:, 0], self.points[:, 1], self.points[:, 2])
+        f, res = fit_plane(self.points[:, 0], self.points[:, 1], self.points[:, 2])
         #print('LSQR : %f' %(time()-t0))
+        np.testing.assert_almost_equal(res, 0.)
         estimates = f(self.points[:, 0], self.points[:, 1])
         np.testing.assert_allclose(estimates, self.points[:, 2])
 

diff --git a/laserchicken/utils.py b/laserchicken/utils.py
@@ -1,7 +1,6 @@
 import datetime
 
 import numpy as np
-
 from laserchicken import keys, _version
 
 
@@ -24,20 +23,27 @@ def get_xyz_per_neighborhood(sourcepc, neighborhoods):
     :param neighborhoods:
     :return: 3d tensor as a masked array
     """
-    max_length = max(map(lambda x: len(x), neighborhoods))
+    lengths = np.array([len(x) for x in neighborhoods], dtype=int)
+    max_length = lengths.max()
 
-    xyz_grp = np.zeros((len(neighborhoods), 3, max_length))
-    mask = np.zeros((len(neighborhoods), 3, max_length))
-    for i, neighborhood in enumerate(neighborhoods):
-        n_neighbors = len(neighborhood)
-        if n_neighbors is 0:
-            continue
-        x, y, z = get_point(sourcepc, neighborhood)
-        xyz_grp[i, 0, :n_neighbors] = x
-        xyz_grp[i, 1, :n_neighbors] = y
-        xyz_grp[i, 2, :n_neighbors] = z
-        mask[i, :, :n_neighbors] = 1
-    return np.ma.MaskedArray(xyz_grp, mask == 0)
+    xyz = np.column_stack((sourcepc[keys.point]["x"]["data"],
+                           sourcepc[keys.point]["y"]["data"],
+                           sourcepc[keys.point]["z"]["data"]))
+
+    indices = np.zeros((len(neighborhoods), max_length), dtype=int)
+    indices[:] = np.arange(max_length, dtype=int)
+    indices_mask = indices < lengths[:, None]
+    indices[indices_mask] = np.concatenate(neighborhoods)
+    xyz_grp = xyz.take(indices, axis=0).transpose((0,2,1))
+
+    # construct mask for 3d tensor (true for invalid data)
+    xyz_mask = np.zeros((3, len(neighborhoods), max_length), dtype=bool)
+    xyz_mask[:] = ~indices_mask
+    xyz_mask = np.transpose(xyz_mask, (1,0,2))
+
+    # set invalid coordinates to zero
+    xyz_grp[xyz_mask] = 0.
+    return np.ma.MaskedArray(xyz_grp, xyz_mask)
 
 
 def get_attributes_per_neighborhood(point_cloud, neighborhoods, attribute_names):
@@ -179,7 +185,7 @@ def fit_plane_svd(xpts, ypts, zpts):
 
 def fit_plane(x, y, a):
     """
-    Fit a plane and return a function that returns a for every given x and y.
+    Fit a plane and return a function that returns a for every given x and y and the sum of the residuals.
 
     Solves Ax = b where A is the matrix of (x,y) combinations, x are the plane parameters, and b the values.
     Example:
@@ -192,7 +198,9 @@ def fit_plane(x, y, a):
     :param y: y coordinates
     :param a: value (for instance height)
     :return: function that returns a for every given x and y
+    :return: sum of the residuals
     """
     matrix = np.column_stack((np.ones(x.size), x, y))
-    parameters, _, _, _ = np.linalg.lstsq(matrix, a)
-    return lambda x_in, y_in: np.stack((np.ones(len(x)), x_in, y_in)).T.dot(parameters)
+    parameters, residuals, _, _ = np.linalg.lstsq(matrix, a, rcond=None)
+    return (lambda x_in, y_in: np.stack((np.ones(len(x)), x_in, y_in)).T.dot(parameters),
+            residuals.item() if residuals.size > 0 else 0.)