diff --git a/README.md b/README.md index 13613f20..81e14f52 100644 --- a/README.md +++ b/README.md @@ -177,26 +177,25 @@ y = np.random.RandomState(20011012).rand(1000) * 100. model = gs.Exponential(dim=2, var=2, len_scale=8) srf = gs.SRF(model, mean=0, seed=19970221) field = srf((x, y)) -# estimate the variogram of the field with 40 bins -bins = np.arange(40) -bin_center, gamma = gs.vario_estimate((x, y), field, bins) +# estimate the variogram of the field +bin_center, gamma = gs.vario_estimate((x, y), field) # fit the variogram with a stable model. (no nugget fitted) fit_model = gs.Stable(dim=2) fit_model.fit_variogram(bin_center, gamma, nugget=False) # output -ax = fit_model.plot(x_max=40) -ax.plot(bin_center, gamma) +ax = fit_model.plot(x_max=bin_center[-1]) +ax.scatter(bin_center, gamma) print(fit_model) ``` Which gives: ```python -Stable(dim=2, var=1.92, len_scale=8.15, nugget=0.0, anis=[1.], angles=[0.], alpha=1.05) +Stable(dim=2, var=1.85, len_scale=7.42, nugget=0.0, anis=[1.0], angles=[0.0], alpha=1.09) ```

-Variogram +Variogram

@@ -325,6 +324,7 @@ in memory for immediate 3D plotting in Python. - [hankel >= 1.0.2](https://github.com/steven-murray/hankel) - [emcee >= 3.0.0](https://github.com/dfm/emcee) - [pyevtk >= 1.1.1](https://github.com/pyscience-projects/pyevtk) +- [meshio>=4.0.3, <5.0](https://github.com/nschloe/meshio) ### Optional @@ -339,7 +339,7 @@ You can contact us via . ## License -[LGPLv3][license_link] © 2018-2020 +[LGPLv3][license_link] © 2018-2021 [pip_link]: https://pypi.org/project/gstools [conda_link]: https://docs.conda.io/en/latest/miniconda.html diff --git a/docs/source/index.rst b/docs/source/index.rst index 296de012..aa57dc94 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -205,24 +205,23 @@ model again. model = gs.Exponential(dim=2, var=2, len_scale=8) srf = gs.SRF(model, mean=0, seed=19970221) field = srf((x, y)) - # estimate the variogram of the field with 40 bins - bins = np.arange(40) - bin_center, gamma = gs.vario_estimate((x, y), field, bins) + # estimate the variogram of the field + bin_center, gamma = gs.vario_estimate((x, y), field) # fit the variogram with a stable model. (no nugget fitted) fit_model = gs.Stable(dim=2) fit_model.fit_variogram(bin_center, gamma, nugget=False) # output - ax = fit_model.plot(x_max=40) - ax.plot(bin_center, gamma) + ax = fit_model.plot(x_max=bin_center[-1]) + ax.scatter(bin_center, gamma) print(fit_model) Which gives: .. code-block:: python - Stable(dim=2, var=1.92, len_scale=8.15, nugget=0.0, anis=[1.], angles=[0.], alpha=1.05) + Stable(dim=2, var=1.85, len_scale=7.42, nugget=0.0, anis=[1.0], angles=[0.0], alpha=1.09) -.. image:: https://raw.githubusercontent.com/GeoStat-Framework/GSTools/master/docs/source/pics/exp_vario_fit.png +.. image:: https://raw.githubusercontent.com/GeoStat-Framework/GeoStat-Framework.github.io/master/img/GS_vario_est.png :width: 400px :align: center @@ -361,6 +360,7 @@ Requirements - `hankel >= 1.0.2 `_ - `emcee >= 3.0.0 `_ - `pyevtk >= 1.1.1 `_ +- `meshio>=4.0.3, <5.0 `_ Optional diff --git a/examples/00_misc/04_herten.py b/examples/00_misc/04_herten.py index 2e81820f..4f27ed56 100644 --- a/examples/00_misc/04_herten.py +++ b/examples/00_misc/04_herten.py @@ -145,7 +145,7 @@ def generate_transmissivity(): # results reproducible, we can also set a seed. -bins = np.linspace(0, 10, 50) +bins = gs.standard_bins(pos=(x_u, y_u), max_dist=10) bin_center, gamma = gs.vario_estimate( (x_u, y_u), herten_log_trans.reshape(-1), diff --git a/examples/03_variogram/grid_dim_origin_spacing.txt b/examples/00_misc/grid_dim_origin_spacing.txt similarity index 100% rename from examples/03_variogram/grid_dim_origin_spacing.txt rename to examples/00_misc/grid_dim_origin_spacing.txt diff --git a/examples/03_variogram/herten_transmissivity.gz b/examples/00_misc/herten_transmissivity.gz similarity index 100% rename from examples/03_variogram/herten_transmissivity.gz rename to examples/00_misc/herten_transmissivity.gz diff --git a/examples/03_variogram/05_auto_fit_variogram.py b/examples/03_variogram/05_auto_fit_variogram.py new file mode 100644 index 00000000..9a796314 --- /dev/null +++ b/examples/03_variogram/05_auto_fit_variogram.py @@ -0,0 +1,35 @@ +""" +Fit Variogram with automatic binning +------------------------------------ +""" +import numpy as np +import gstools as gs + +############################################################################### +# Generate a synthetic field with an exponential model. + +x = np.random.RandomState(19970221).rand(1000) * 100.0 +y = np.random.RandomState(20011012).rand(1000) * 100.0 +model = gs.Exponential(dim=2, var=2, len_scale=8) +srf = gs.SRF(model, mean=0, seed=19970221) +field = srf((x, y)) +print(field.var()) +############################################################################### +# Estimate the variogram of the field with automatic binning. + +bin_center, gamma = gs.vario_estimate((x, y), field) +print("estimated bin number:", len(bin_center)) +print("maximal bin distance:", bin_center[-1]) + +############################################################################### +# Fit the variogram with a stable model (no nugget fitted). + +fit_model = gs.Stable(dim=2) +fit_model.fit_variogram(bin_center, gamma, nugget=False) +print(fit_model) + +############################################################################### +# Plot the fitting result. + +ax = fit_model.plot(x_max=bin_center[-1]) +ax.scatter(bin_center, gamma) diff --git a/examples/03_variogram/06_auto_bin_latlon.py b/examples/03_variogram/06_auto_bin_latlon.py new file mode 100644 index 00000000..4ca02925 --- /dev/null +++ b/examples/03_variogram/06_auto_bin_latlon.py @@ -0,0 +1,76 @@ +""" +Automatic binning with lat-lon data +----------------------------------- + +In this example we demonstrate automatic binning for a tiny data set +containing temperature records from Germany +(See the detailed DWD example for more information on the data). + +We use a data set from 20 meteo-stations choosen randomly. +""" +import numpy as np +import gstools as gs + +# lat, lon, temperature +data = np.array( + [ + [52.9336, 8.237, 15.7], + [48.6159, 13.0506, 13.9], + [52.4853, 7.9126, 15.1], + [50.7446, 9.345, 17.0], + [52.9437, 12.8518, 21.9], + [53.8633, 8.1275, 11.9], + [47.8342, 10.8667, 11.4], + [51.0881, 12.9326, 17.2], + [48.406, 11.3117, 12.9], + [49.7273, 8.1164, 17.2], + [49.4691, 11.8546, 13.4], + [48.0197, 12.2925, 13.9], + [50.4237, 7.4202, 18.1], + [53.0316, 13.9908, 21.3], + [53.8412, 13.6846, 21.3], + [54.6792, 13.4343, 17.4], + [49.9694, 9.9114, 18.6], + [51.3745, 11.292, 20.2], + [47.8774, 11.3643, 12.7], + [50.5908, 12.7139, 15.8], + ] +) +pos = data.T[:2] # lat, lon +field = data.T[2] # temperature + +############################################################################### +# Since the overall range of these meteo-stations is too low, we can use the +# data-variance as additional information during the fit of the variogram. + +emp_v = gs.vario_estimate(pos, field, latlon=True) +sph = gs.Spherical(latlon=True, rescale=gs.EARTH_RADIUS) +sph.fit_variogram(*emp_v, sill=np.var(field)) +ax = sph.plot(x_max=2 * np.max(emp_v[0])) +ax.scatter(*emp_v, label="Empirical variogram") +ax.legend() +print(sph) + +############################################################################### +# As we can see, the variogram fitting was successful and providing the data +# variance helped finding the right length-scale. +# +# Now, we'll use this covariance model to interpolate the given data with +# ordinary kriging. + +# enclosing box for data points +grid_lat = np.linspace(np.min(pos[0]), np.max(pos[0])) +grid_lon = np.linspace(np.min(pos[1]), np.max(pos[1])) +# ordinary kriging +krige = gs.krige.Ordinary(sph, pos, field) +krige((grid_lat, grid_lon), mesh_type="structured") +ax = krige.plot() +# plotting lat on y-axis and lon on x-axis +ax.scatter(pos[1], pos[0], 50, c=field, edgecolors="k", label="input") +ax.legend() + +############################################################################### +# Looks good, doesn't it? +# +# This example shows, that setting up variogram estimation and kriging routines +# is straight forward with GSTools. ;-) diff --git a/examples/08_geo_coordinates/00_field_generation.py b/examples/08_geo_coordinates/00_field_generation.py index a44c7735..b5685c7d 100755 --- a/examples/08_geo_coordinates/00_field_generation.py +++ b/examples/08_geo_coordinates/00_field_generation.py @@ -19,7 +19,7 @@ model = gs.Gaussian(latlon=True, var=1, len_scale=777, rescale=gs.EARTH_RADIUS) lat = lon = range(-80, 81) -srf = gs.SRF(model, seed=12345) +srf = gs.SRF(model, seed=1234) field = srf.structured((lat, lon)) srf.plot() @@ -53,7 +53,7 @@ # .. note:: # # Note, that the estimated variogram coincides with the yadrenko variogram, -# which means it depends on the great-circle distance. +# which means it depends on the great-circle distance given in radians. # # Keep that in mind when defining bins: The range is at most # :math:`\pi\approx 3.14`, which corresponds to the half globe. diff --git a/examples/08_geo_coordinates/01_dwd_krige.py b/examples/08_geo_coordinates/01_dwd_krige.py index 8b3aacd5..3d4dd138 100755 --- a/examples/08_geo_coordinates/01_dwd_krige.py +++ b/examples/08_geo_coordinates/01_dwd_krige.py @@ -79,8 +79,7 @@ def get_dwd_temperature(): # As the maximal bin distance we choose 8 degrees, which corresponds to a # chordal length of about 900 km. -bin_max = np.deg2rad(8) -bins = np.linspace(0, bin_max, 20) +bins = gs.standard_bins((lat, lon), max_dist=np.deg2rad(8), latlon=True) bin_c, vario = gs.vario_estimate((lat, lon), temp, bins, latlon=True) ############################################################################### @@ -100,7 +99,7 @@ def get_dwd_temperature(): model = gs.Spherical(latlon=True, rescale=gs.EARTH_RADIUS) model.fit_variogram(bin_c, vario, nugget=False) -ax = model.plot("vario_yadrenko", x_max=bin_max) +ax = model.plot("vario_yadrenko", x_max=bins[-1]) ax.scatter(bin_c, vario) print(model) diff --git a/gstools/__init__.py b/gstools/__init__.py index 695dd92c..8ac6b421 100644 --- a/gstools/__init__.py +++ b/gstools/__init__.py @@ -94,13 +94,14 @@ Variogram Estimation ^^^^^^^^^^^^^^^^^^^^ -Estimate the variogram of a given field +Estimate the variogram of a given field with these routines .. currentmodule:: gstools.variogram .. autosummary:: vario_estimate vario_estimate_axis + standard_bins Misc ==== @@ -129,6 +130,7 @@ vario_estimate_axis, vario_estimate_structured, vario_estimate_unstructured, + standard_bins, ) from gstools.covmodel import ( CovModel, @@ -184,6 +186,7 @@ "vario_estimate_axis", "vario_estimate_structured", "vario_estimate_unstructured", + "standard_bins", ] __all__ += [ diff --git a/gstools/tools/geometric.py b/gstools/tools/geometric.py index d34565c5..555fea46 100644 --- a/gstools/tools/geometric.py +++ b/gstools/tools/geometric.py @@ -26,6 +26,7 @@ ang2dir latlon2pos pos2latlon + chordal_to_great_circle """ # pylint: disable=C0103 @@ -51,6 +52,7 @@ "ang2dir", "latlon2pos", "pos2latlon", + "chordal_to_great_circle", ] @@ -641,3 +643,24 @@ def pos2latlon(pos, radius=1.0, dtype=np.double): lat = np.arcsin(np.maximum(np.minimum(pos[2] / radius, 1.0), -1.0)) lon = np.arctan2(pos[1], pos[0]) return np.rad2deg((lat, lon), dtype=dtype) + + +def chordal_to_great_circle(dist): + """ + Calculate great circle distance corresponding to given chordal distance. + + Parameters + ---------- + dist : array_like + Chordal distance of two points on the unit-sphere. + + Returns + ------- + :class:`numpy.ndarray` + Great circle distance corresponding to given chordal distance. + + Notes + ----- + If given values are not in [0, 1], they will be truncated. + """ + return 2 * np.arcsin(np.maximum(np.minimum(np.divide(dist, 2), 1), 0)) diff --git a/gstools/variogram/__init__.py b/gstools/variogram/__init__.py index 8fb0444d..22a362b1 100644 --- a/gstools/variogram/__init__.py +++ b/gstools/variogram/__init__.py @@ -11,6 +11,12 @@ vario_estimate vario_estimate_axis +Binning +^^^^^^^ + +.. autosummary:: + standard_bins + ---- """ @@ -20,10 +26,12 @@ vario_estimate_structured, vario_estimate_unstructured, ) +from gstools.variogram.binning import standard_bins __all__ = [ "vario_estimate", "vario_estimate_axis", "vario_estimate_unstructured", "vario_estimate_structured", + "standard_bins", ] diff --git a/gstools/variogram/binning.py b/gstools/variogram/binning.py new file mode 100644 index 00000000..d12b09d2 --- /dev/null +++ b/gstools/variogram/binning.py @@ -0,0 +1,97 @@ +# -*- coding: utf-8 -*- +""" +GStools subpackage providing binning routines. + +.. currentmodule:: gstools.variogram.binning + +The following functions are provided + +.. autosummary:: + standard_bins +""" +# pylint: disable=C0103 + +import numpy as np + +from gstools.tools.geometric import ( + gen_mesh, + format_struct_pos_dim, + latlon2pos, + chordal_to_great_circle, +) + +__all__ = ["standard_bins"] + + +def _sturges(pnt_cnt): + return int(np.ceil(2 * np.log2(pnt_cnt) + 1)) + + +def standard_bins( + pos=None, + dim=2, + latlon=False, + mesh_type="unstructured", + bin_no=None, + max_dist=None, +): + r""" + Get standard binning. + + Parameters + ---------- + pos : :class:`list`, optional + the position tuple, containing either the point coordinates (x, y, ...) + or the axes descriptions (for mesh_type='structured') + dim : :class:`int`, optional + Field dimension. + latlon : :class:`bool`, optional + Whether the data is representing 2D fields on earths surface described + by latitude and longitude. When using this, the estimator will + use great-circle distance for variogram estimation. + Note, that only an isotropic variogram can be estimated and a + ValueError will be raised, if a direction was specified. + Bin edges need to be given in radians in this case. + Default: False + mesh_type : :class:`str`, optional + 'structured' / 'unstructured', indicates whether the pos tuple + describes the axis or the point coordinates. + Default: `'unstructured'` + bin_no: :class:`int`, optional + number of bins to create. If None is given, will be determined by + Sturges' rule from the number of points. + Default: None + max_dist: :class:`float`, optional + Cut of length for the bins. If None is given, it will be set to one + third of the box-diameter from the given points. + Default: None + + Returns + ------- + :class:`numpy.ndarray` + The generated bin edges. + + Notes + ----- + Internally uses double precision and also returns doubles. + """ + dim = 2 if latlon else int(dim) + if bin_no is None or max_dist is None: + if pos is None: + raise ValueError("standard_bins: no pos tuple given.") + if mesh_type != "unstructured": + pos = gen_mesh(format_struct_pos_dim(pos, dim)[0]) + else: + pos = np.array(pos, dtype=np.double).reshape(dim, -1) + pos = latlon2pos(pos) if latlon else pos + pnt_cnt = len(pos[0]) + box = [] + for axis in pos: + box.append([np.min(axis), np.max(axis)]) + box = np.array(box) + diam = np.linalg.norm(box[:, 0] - box[:, 1]) + # convert diameter to great-circle distance if using latlon + diam = chordal_to_great_circle(diam) if latlon else diam + bin_no = _sturges(pnt_cnt) if bin_no is None else int(bin_no) + max_dist = diam / 3 if max_dist is None else float(max_dist) + return np.linspace(0, max_dist, num=bin_no + 1, dtype=np.double) diff --git a/gstools/variogram/variogram.py b/gstools/variogram/variogram.py index ffef99b6..ddf9835c 100644 --- a/gstools/variogram/variogram.py +++ b/gstools/variogram/variogram.py @@ -26,6 +26,7 @@ ma_structured, directional, ) +from gstools.variogram.binning import standard_bins __all__ = [ "vario_estimate", @@ -67,7 +68,7 @@ def _separate_dirs_test(direction, angles_tol): def vario_estimate( pos, field, - bin_edges, + bin_edges=None, sampling_size=None, sampling_seed=None, estimator="matheron", @@ -208,14 +209,16 @@ def vario_estimate( ----- Internally uses double precision and also returns doubles. """ - bin_edges = np.array(bin_edges, ndmin=1, dtype=np.double) - bin_centres = (bin_edges[:-1] + bin_edges[1:]) / 2.0 + if bin_edges is not None: + bin_edges = np.array(bin_edges, ndmin=1, dtype=np.double) + bin_centres = (bin_edges[:-1] + bin_edges[1:]) / 2.0 # allow multiple fields at same positions (ndmin=2: first axis -> field ID) # need to convert to ma.array, since list of ma.array is not recognised field = np.ma.array(field, ndmin=2, dtype=np.double) masked = np.ma.is_masked(field) or np.any(mask) # catch special case if everything is masked if masked and np.all(mask): + bin_centres = np.empty(0) if bin_edges is None else bin_centres estimates = np.zeros_like(bin_centres) if return_counts: return bin_centres, estimates, np.zeros_like(estimates, dtype=int) @@ -289,6 +292,10 @@ def vario_estimate( ) field = field[:, sampled_idx] pos = pos[:, sampled_idx] + # create bining if not given + if bin_edges is None: + bin_edges = standard_bins(pos, dim, latlon) + bin_centres = (bin_edges[:-1] + bin_edges[1:]) / 2.0 # select variogram estimator cython_estimator = _set_estimator(estimator) # run diff --git a/tests/test_variogram_unstructured.py b/tests/test_variogram_unstructured.py index 42eac2ca..bf026454 100644 --- a/tests/test_variogram_unstructured.py +++ b/tests/test_variogram_unstructured.py @@ -297,6 +297,15 @@ def test_direction_assertion(self): self.assertRaises( # wrong dimension of angles ValueError, gs.vario_estimate, pos, fld, bns, angles=[[1, 1]] ) + self.assertRaises( # direction on latlon + ValueError, + gs.vario_estimate, + pos, + fld, + bns, + direction=[1, 0], + latlon=True, + ) def test_mask_no_data(self): pos = [[1, 2, 3, 4, 5], [1, 2, 3, 4, 5]] @@ -357,6 +366,44 @@ def test_fit_directional(self): with self.assertRaises(ValueError): model.fit_variogram(bin_center, emp_vario[:2]) + def test_auto_binning(self): + # structured mesh + bin_center, emp_vario = gs.vario_estimate( + self.pos, + self.field, + mesh_type="structured", + ) + self.assertEqual(len(bin_center), 21) + self.assertTrue(np.all(bin_center[1:] > bin_center[:-1])) + self.assertTrue(np.all(bin_center > 0)) + # unstructured mesh + bin_center, emp_vario = gs.vario_estimate( + self.pos, + self.field[:, 0, 0], + ) + self.assertEqual(len(bin_center), 8) + self.assertTrue(np.all(bin_center[1:] > bin_center[:-1])) + self.assertTrue(np.all(bin_center > 0)) + # latlon coords + bin_center, emp_vario = gs.vario_estimate( + self.pos[:2], + self.field[..., 0], + mesh_type="structured", + latlon=True, + ) + self.assertEqual(len(bin_center), 15) + self.assertTrue(np.all(bin_center[1:] > bin_center[:-1])) + self.assertTrue(np.all(bin_center > 0)) + + def test_standard_bins(self): + # structured mesh + bins = gs.standard_bins(self.pos, dim=3, mesh_type="structured") + self.assertEqual(len(bins), 22) + self.assertTrue(np.all(bins[1:] > bins[:-1])) + self.assertTrue(np.all(bins[1:] > 0)) + # no pos given + self.assertRaises(ValueError, gs.standard_bins) + if __name__ == "__main__": unittest.main()