From d44ca33cc6a8c19b0204ac66dfeddd2d58d82635 Mon Sep 17 00:00:00 2001 From: Francesco Nattino Date: Wed, 29 Jan 2020 16:32:50 +0100 Subject: [PATCH 1/8] fit_plane function now returns sum of residuals No need to recompute them from the plane parameters and the input points. --- .../feature_extractor/sigma_z_feature_extractor.py | 5 ++--- laserchicken/utils.py | 8 +++++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/laserchicken/feature_extractor/sigma_z_feature_extractor.py b/laserchicken/feature_extractor/sigma_z_feature_extractor.py index a6e5627..e2ab9e1 100644 --- a/laserchicken/feature_extractor/sigma_z_feature_extractor.py +++ b/laserchicken/feature_extractor/sigma_z_feature_extractor.py @@ -51,9 +51,8 @@ def extract(self, source_point_cloud, neighborhood, target_point_cloud, target_i """ x, y, z = get_point(source_point_cloud, neighborhood) try: - plane_estimator = fit_plane(x, y, z) - normalized = z - plane_estimator(x, y) - return np.std(normalized) + plane_estimator, residuals = fit_plane(x, y, z) + return np.sqrt(np.divide(residuals, x.size)) except LinAlgError: return 0 diff --git a/laserchicken/utils.py b/laserchicken/utils.py index 2aef6ce..4fd3343 100644 --- a/laserchicken/utils.py +++ b/laserchicken/utils.py @@ -179,7 +179,7 @@ def fit_plane_svd(xpts, ypts, zpts): def fit_plane(x, y, a): """ - Fit a plane and return a function that returns a for every given x and y. + Fit a plane and return a function that returns a for every given x and y and the sum of the residuals. Solves Ax = b where A is the matrix of (x,y) combinations, x are the plane parameters, and b the values. Example: @@ -192,7 +192,9 @@ def fit_plane(x, y, a): :param y: y coordinates :param a: value (for instance height) :return: function that returns a for every given x and y + :return: sum of the residuals """ matrix = np.column_stack((np.ones(x.size), x, y)) - parameters, _, _, _ = np.linalg.lstsq(matrix, a) - return lambda x_in, y_in: np.stack((np.ones(len(x)), x_in, y_in)).T.dot(parameters) + parameters, residuals, _, _ = np.linalg.lstsq(matrix, a) + return (lambda x_in, y_in: np.stack((np.ones(len(x)), x_in, y_in)).T.dot(parameters), + residuals.item() if residuals.size > 0 else np.nan) From 8048fcb053d23254f0a9482c1d18e005a376cb05 Mon Sep 17 00:00:00 2001 From: Francesco Nattino Date: Wed, 29 Jan 2020 16:41:13 +0100 Subject: [PATCH 2/8] test updated fit_plane now returns plane function and sum of residuals. Test on value of the residuals introduced. --- laserchicken/test_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/laserchicken/test_utils.py b/laserchicken/test_utils.py index b691ef8..4bfd37f 100644 --- a/laserchicken/test_utils.py +++ b/laserchicken/test_utils.py @@ -108,8 +108,9 @@ def test_leastsqr(self): # z = 5 + i % np.sqrt(n_points) # points[i] = np.array(((i % np.sqrt(n_points)), (np.floor(i / np.sqrt(n_points))), z)) #t0 = time() - f = fit_plane(self.points[:, 0], self.points[:, 1], self.points[:, 2]) + f, res = fit_plane(self.points[:, 0], self.points[:, 1], self.points[:, 2]) #print('LSQR : %f' %(time()-t0)) + np.testing.assert_almost_equal(res, 0.) estimates = f(self.points[:, 0], self.points[:, 1]) np.testing.assert_allclose(estimates, self.points[:, 2]) From 66faca5fd4b016e1df6568d8e86b1622d526b943 Mon Sep 17 00:00:00 2001 From: Francesco Nattino Date: Wed, 29 Jan 2020 16:49:03 +0100 Subject: [PATCH 3/8] Parameter added in call to np.linalg.lstsq Suppress warning due to change in behavior from np 1.14.0 with rcond. --- laserchicken/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/laserchicken/utils.py b/laserchicken/utils.py index 4fd3343..edecd0f 100644 --- a/laserchicken/utils.py +++ b/laserchicken/utils.py @@ -195,6 +195,6 @@ def fit_plane(x, y, a): :return: sum of the residuals """ matrix = np.column_stack((np.ones(x.size), x, y)) - parameters, residuals, _, _ = np.linalg.lstsq(matrix, a) + parameters, residuals, _, _ = np.linalg.lstsq(matrix, a, rcond=None) return (lambda x_in, y_in: np.stack((np.ones(len(x)), x_in, y_in)).T.dot(parameters), residuals.item() if residuals.size > 0 else np.nan) From 0e2a79af66722eec72dce0fa12f01125c2d5c8a4 Mon Sep 17 00:00:00 2001 From: Francesco Nattino Date: Tue, 4 Feb 2020 10:46:09 +0100 Subject: [PATCH 4/8] Fixed bug introduced on residuals For singula matrices, the sum of residuals should be zero, not nan. --- laserchicken/utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/laserchicken/utils.py b/laserchicken/utils.py index edecd0f..8bea33e 100644 --- a/laserchicken/utils.py +++ b/laserchicken/utils.py @@ -1,7 +1,6 @@ import datetime import numpy as np - from laserchicken import keys, _version @@ -197,4 +196,4 @@ def fit_plane(x, y, a): matrix = np.column_stack((np.ones(x.size), x, y)) parameters, residuals, _, _ = np.linalg.lstsq(matrix, a, rcond=None) return (lambda x_in, y_in: np.stack((np.ones(len(x)), x_in, y_in)).T.dot(parameters), - residuals.item() if residuals.size > 0 else np.nan) + residuals.item() if residuals.size > 0 else 0.) \ No newline at end of file From c15e5e34c0efad0326c2855168518b04eb188e47 Mon Sep 17 00:00:00 2001 From: Francesco Nattino Date: Tue, 4 Feb 2020 11:05:38 +0100 Subject: [PATCH 5/8] sigma_z feature extractor vectorized Use SVD decomposition explicitely, since numpy lstsq does not work on stack of matrices. --- .../sigma_z_feature_extractor.py | 47 +++++++++++++++++-- 1 file changed, 42 insertions(+), 5 deletions(-) diff --git a/laserchicken/feature_extractor/sigma_z_feature_extractor.py b/laserchicken/feature_extractor/sigma_z_feature_extractor.py index e2ab9e1..fc1b4ce 100644 --- a/laserchicken/feature_extractor/sigma_z_feature_extractor.py +++ b/laserchicken/feature_extractor/sigma_z_feature_extractor.py @@ -7,11 +7,12 @@ from numpy.linalg import LinAlgError from laserchicken.feature_extractor.base_feature_extractor import FeatureExtractor -from laserchicken.utils import get_point, fit_plane +from laserchicken.utils import get_xyz_per_neighborhood class SigmaZFeatureExtractor(FeatureExtractor): """Height percentiles feature extractor class.""" + is_vectorized = True @classmethod def requires(cls): @@ -38,7 +39,7 @@ def provides(cls): """ return ['sigma_z'] - def extract(self, source_point_cloud, neighborhood, target_point_cloud, target_index, volume_description): + def extract(self, source_point_cloud, neighborhoods, target_point_cloud, target_index, volume_description): """ Extract the feature value(s) of the point cloud at location of the target. @@ -49,13 +50,49 @@ def extract(self, source_point_cloud, neighborhood, target_point_cloud, target_i :param volume_description: volume object describing the containing volume of the neighborhood :return: """ - x, y, z = get_point(source_point_cloud, neighborhood) + if not (isinstance(neighborhoods[0], list) or isinstance(neighborhoods[0], range)): + neighborhoods = [neighborhoods] + + xyz_grp = get_xyz_per_neighborhood(source_point_cloud, neighborhoods) + len_ngbrs = [len(ngbr) for ngbr in neighborhoods] + try: - plane_estimator, residuals = fit_plane(x, y, z) - return np.sqrt(np.divide(residuals, x.size)) + residuals = self._get_sum_of_residuals_from_fitted_planes(xyz_grp) + return np.sqrt(np.divide(residuals, len_ngbrs)) except LinAlgError: return 0 + @staticmethod + def _get_sum_of_residuals_from_fitted_planes(xyz): + """ + Fit planes to each of the neighborhoods and returns the corresponding sum of residuals + :param xyz: 3D masked array with (x,y,z) of points in neighboroods. + :return: array with residuals + """ + # setup coefficient matrices and ordinate values + matrix = np.ones_like(xyz) + matrix[:, 1:, :] = xyz[:, 0:2, :] + matrix[xyz.mask] = 0. + matrix = np.transpose(matrix, (0, 2, 1)) + a = np.zeros((xyz.shape[0], xyz.shape[2])) + a[:, :] = xyz[:, 2, :] + + # SVD decomposition of matrices to construct pseudo-inverse + u, s, v = np.linalg.svd(matrix, full_matrices=False) + + # find matrices with zero-singular values, and solve linear problems + zero_sing_val_mask = np.any(np.isclose(s, 0.), axis=1) + inv_s = np.zeros_like(s) + inv_s[~zero_sing_val_mask, :] = 1 / s[~zero_sing_val_mask, :] + parameters = np.einsum('ijk,ij->ik', v, + inv_s * np.einsum('ijk,ij->ik', u, a)) + + # determine residuals for non-singular matrices, set others to zero + a_val = np.einsum('ijk,ik->ij', matrix, parameters) + residuals = np.sum(np.power(a - a_val, 2), axis=1) + residuals[zero_sing_val_mask] = 0. + return residuals + def get_params(self): """ Return a tuple of parameters involved in the current feature extractor object. From 5ded3ff3e04d8cce754a48754a6e1950db358c4e Mon Sep 17 00:00:00 2001 From: Francesco Nattino Date: Tue, 4 Feb 2020 16:31:46 +0100 Subject: [PATCH 6/8] coords extraction for multiple neighborhoods vectorized --- laserchicken/utils.py | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/laserchicken/utils.py b/laserchicken/utils.py index 8bea33e..13fe4f9 100644 --- a/laserchicken/utils.py +++ b/laserchicken/utils.py @@ -23,20 +23,27 @@ def get_xyz_per_neighborhood(sourcepc, neighborhoods): :param neighborhoods: :return: 3d tensor as a masked array """ - max_length = max(map(lambda x: len(x), neighborhoods)) - - xyz_grp = np.zeros((len(neighborhoods), 3, max_length)) - mask = np.zeros((len(neighborhoods), 3, max_length)) - for i, neighborhood in enumerate(neighborhoods): - n_neighbors = len(neighborhood) - if n_neighbors is 0: - continue - x, y, z = get_point(sourcepc, neighborhood) - xyz_grp[i, 0, :n_neighbors] = x - xyz_grp[i, 1, :n_neighbors] = y - xyz_grp[i, 2, :n_neighbors] = z - mask[i, :, :n_neighbors] = 1 - return np.ma.MaskedArray(xyz_grp, mask == 0) + lengths = np.array([len(x) for x in neighborhoods], dtype=int) + max_length = lengths.max() + + xyz = np.column_stack((sourcepc[keys.point]["x"]["data"], + sourcepc[keys.point]["y"]["data"], + sourcepc[keys.point]["z"]["data"])) + + indices = np.zeros((len(neighborhoods), max_length), dtype=int) + indices[:] = np.arange(max_length, dtype=int) + indices_mask = indices < lengths[:, None] + indices[indices_mask] = np.concatenate(neighborhoods) + xyz_grp = xyz.take(indices, axis=0).transpose((0,2,1)) + + # construct mask for 3d tensor (true for invalid data) + xyz_mask = np.zeros((3, len(neighborhoods), max_length), dtype=bool) + xyz_mask[:] = ~indices_mask + xyz_mask = np.transpose(xyz_mask, (1,0,2)) + + # set invalid coordinates to zero + xyz_grp[xyz_mask] = 0. + return np.ma.MaskedArray(xyz_grp, xyz_mask) def get_attributes_per_neighborhood(point_cloud, neighborhoods, attribute_names): From 8cbf152111260a4820dc5629415313d1a0942b7b Mon Sep 17 00:00:00 2001 From: Francesco Nattino Date: Tue, 4 Feb 2020 16:39:05 +0100 Subject: [PATCH 7/8] added test to sigma_z extractor new vectorized implementation compared agains previous serial version. --- .../test_sigma_z_feature_extractor.py | 76 ++++++++++++++++++- 1 file changed, 72 insertions(+), 4 deletions(-) diff --git a/laserchicken/feature_extractor/test_sigma_z_feature_extractor.py b/laserchicken/feature_extractor/test_sigma_z_feature_extractor.py index 4762e14..a408e71 100644 --- a/laserchicken/feature_extractor/test_sigma_z_feature_extractor.py +++ b/laserchicken/feature_extractor/test_sigma_z_feature_extractor.py @@ -1,12 +1,17 @@ +import os +import time import unittest import numpy as np +from numpy.linalg import LinAlgError -from laserchicken import keys +from laserchicken import compute_neighbors, keys, read_las from laserchicken.feature_extractor import compute_features +from laserchicken.feature_extractor.base_feature_extractor import FeatureExtractor from laserchicken.test_tools import create_point_cloud, create_points_in_xy_grid +from laserchicken.utils import get_point, fit_plane, copy_point_cloud from laserchicken.volume_specification import InfiniteCylinder - +from .sigma_z_feature_extractor import SigmaZFeatureExtractor as SigmaZVectorizedFeatureExtractor class TestExtractSigmaZ(unittest.TestCase): def test_constantValues_result0(self): @@ -42,5 +47,68 @@ def assert_std_for_z_function_in_xy_grid(z_checkered, expected): np.testing.assert_almost_equal( targets[keys.point]['sigma_z']['data'][0], expected) - - +class TestExtractSigmaZComparison(unittest.TestCase): + point_cloud = None + + def test_sigma_z_multiple_neighborhoods(self): + """ + Test and compare the serial and vectorized sigma_z implementations. + + sigma_z is computed for a list of neighborhoods in real data. A vectorized implementation and a serial + implementation are compared and timed. Any difference in result between the two methods is definitely + unexpected. + """ + # vectorized version + t0 = time.time() + extract_vect = SigmaZVectorizedFeatureExtractor() + sigma_z_vec = extract_vect.extract(self.point_cloud, self.neigh, None, None, None) + print('Timing Vectorize : {}'.format((time.time() - t0))) + + # serial version + sigma_z = [] + t0 = time.time() + for n in self.neigh: + extract = SigmaZSerial() + sigma_z.append(extract.extract(self.point_cloud, n, None, None, None)) + print('Timing Serial : {}'.format((time.time() - t0))) + sigma_z = np.array(sigma_z) + np.testing.assert_allclose(sigma_z_vec, sigma_z, atol=1.e-7) + + def setUp(self): + """ + Set up the test. + + Load in a bunch of real data from AHN3. + """ + np.random.seed(1234) + + _TEST_FILE_NAME = 'AHN3.las' + _TEST_DATA_SOURCE = 'testdata' + + _CYLINDER = InfiniteCylinder(4) + _PC_260807 = read_las.read(os.path.join(_TEST_DATA_SOURCE, _TEST_FILE_NAME)) + _PC_1000 = copy_point_cloud(_PC_260807, array_mask=( + np.random.choice(range(len(_PC_260807[keys.point]['x']['data'])), size=1000, replace=False))) + _1000_NEIGHBORHOODS_IN_260807 = next(compute_neighbors.compute_neighborhoods(_PC_260807, _PC_1000, _CYLINDER)) + + self.point_cloud = _PC_260807 + self.neigh = _1000_NEIGHBORHOODS_IN_260807 + +class SigmaZSerial(FeatureExtractor): + """Serial implementation. Used to test the current (vectorized) implementation.""" + + @classmethod + def requires(cls): + return [] + + @classmethod + def provides(cls): + return ["sigma_z"] + + def extract(self, sourcepc, neighborhood, targetpc, targetindex, volume): + x, y, z = get_point(sourcepc, neighborhood) + try: + plane_estimator, residuals = fit_plane(x, y, z) + return np.sqrt(np.divide(residuals, x.size)) + except LinAlgError: + return 0 From 0f2aca13dd2eeba20f7caa9ac7a3f5a8bffa96a2 Mon Sep 17 00:00:00 2001 From: Francesco Nattino Date: Fri, 7 Feb 2020 14:57:17 +0100 Subject: [PATCH 8/8] CHANGELOG.md file updated --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f1aa4d3..a4fd18c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## 0.3.3 - 2020- +- Changed: + - Vectorized sigma_z feature extractor + ## 0.3.2 - 2019-12-12 ## Added - Features added: