-
Notifications
You must be signed in to change notification settings - Fork 55
/
Variogram.py
1819 lines (1449 loc) · 55.7 KB
/
Variogram.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""
Variogram class
"""
import copy
import os
import warnings
import numpy as np
from pandas import DataFrame
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.spatial.distance import pdist, squareform
from sklearn.isotonic import IsotonicRegression
from skgstat import estimators, models, binning
class Variogram(object):
"""Variogram Class
Calculates a variogram of the separating distances in the given
coordinates and relates them to one of the semi-variance measures of
the given dependent values.
"""
def __init__(self,
coordinates=None,
values=None,
estimator='matheron',
model='spherical',
dist_func='euclidean',
bin_func='even',
normalize=False,
fit_method='trf',
fit_sigma=None,
use_nugget=False,
maxlag=None,
n_lags=10,
verbose=False
):
r"""Variogram Class
Note: The directional variogram estimation is not re-implemented yet.
Therefore the parameters is-directional, azimuth and tolerance will
be ignored at the moment and can be subject to changes.
Parameters
----------
coordinates : numpy.ndarray
Array of shape (m, n). Will be used as m observation points of
n-dimensions. This variogram can be calculated on 1 - n
dimensional coordinates. In case a 1-dimensional array is passed,
a second array of same length containing only zeros will be
stacked to the passed one.
values : numpy.ndarray
Array of values observed at the given coordinates. The length of
the values array has to match the m dimension of the coordinates
array. Will be used to calculate the dependent variable of the
variogram.
estimator : str, callable
String identifying the semi-variance estimator to be used.
Defaults to the Matheron estimator. Possible values are:
* matheron [Matheron, default]
* cressie [Cressie-Hawkins]
* dowd [Dowd-Estimator]
* genton [Genton]
* minmax [MinMax Scaler]
* entropy [Shannon Entropy]
If a callable is passed, it has to accept an array of absoulte
differences, aligned to the 1D distance matrix (flattened upper
triangle) and return a scalar, that converges towards small
values for similarity (high covariance).
model : str
String identifying the theoretical variogram function to be used
to describe the experimental variogram. Can be one of:
* spherical [Spherical, default]
* exponential [Exponential]
* gaussian [Gaussian]
* cubic [Cubic]
* stable [Stable model]
* matern [Matérn model]
* nugget [nugget effect variogram]
dist_func : str
String identifying the distance function. Defaults to
'euclidean'. Can be any metric accepted by
scipy.spatial.distance.pdist. Additional parameters are not (yet)
passed through to pdist. These are accepted by pdist for some of
the metrics. In these cases the default values are used.
bin_func : str
String identifying the binning function used to find lag class
edges. At the moment there are two possible values: 'even'
(default) or 'uniform'. Even will find n_lags bins of same width
in the interval [0,maxlag[. 'uniform' will identfy n_lags bins on
the same interval, but with varying edges so that all bins count
the same amount of observations.
normalize : bool
Defaults to False. If True, the independent and dependent
variable will be normalized to the range [0,1].
fit_method : str
String identifying the method to be used for fitting the
theoretical variogram function to the experimental. More info is
given in the Variogram.fit docs. Can be one of:
* 'lm': Levenberg-Marquardt algorithm for unconstrained
problems. This is the faster algorithm, yet is the fitting of
a variogram not unconstrianed.
* 'trf': Trust Region Reflective function for non-linear
constrained problems. The class will set the boundaries
itself. This is the default function.
fit_sigma : numpy.ndarray, str
Defaults to None. The sigma is used as measure of uncertainty
during variogram fit. If fit_sigma is an array, it has to hold
n_lags elements, giving the uncertainty for all lags classes. If
fit_sigma is None (default), it will give no weight to any lag.
Higher values indicate higher uncertainty and will lower the
influcence of the corresponding lag class for the fit.
If fit_sigma is a string, a pre-defined function of separating
distance will be used to fill the array. Can be one of:
* 'linear': Linear loss with distance. Small bins will have
higher impact.
* 'exp': The weights decrease by a e-function of distance
* 'sqrt': The weights decrease by the squareroot of distance
* 'sq': The weights decrease by the squared distance.
More info is given in the Variogram.fit_sigma documentation.
use_nugget : bool
Defaults to False. If True, a nugget effet will be added to all
Variogram.models as a third (or fourth) fitting parameter. A
nugget is essentially the y-axis interception of the theoretical
variogram function.
maxlag : float, str
Can specify the maximum lag distance directly by giving a value
larger than 1. The binning function will not find any lag class
with an edge larger than maxlag. If 0 < maxlag < 1, then maxlag
is relative and maxlag * max(Variogram.distance) will be used.
In case maxlag is a string it has to be one of 'median', 'mean'.
Then the median or mean of all Variogram.distance will be used.
Note maxlag=0.5 will use half the maximum separating distance,
this is not the same as 'median', which is the median of all
separating distances
n_lags : int
Specify the number of lag classes to be defined by the binning
function.
verbose : bool
Set the Verbosity of the class. Not Implemented yet.
"""
# Set coordinates
self._X = np.asarray(coordinates)
# pairwise differences
self._diff = None
# set verbosity
self.verbose = verbose
# set values
self._values = None
# calc_diff = False here, because it will be calculated by fit() later
self.set_values(values=values, calc_diff=False)
# distance matrix
self._dist = None
# set distance calculation function
self._dist_func = None
self.set_dist_function(func=dist_func)
# lags and max lag
self._n_lags = None
self.n_lags = n_lags
self._maxlag = None
self.maxlag = maxlag
# harmonize model placeholder
self._harmonize = False
# estimator can be a function or a string
self._estimator = None
self.set_estimator(estimator_name=estimator)
# model can be a function or a string
self._model = None
self.set_model(model_name=model)
# the binning settings
self._bin_func = None
self._groups = None
self._bins = None
self.set_bin_func(bin_func=bin_func)
# specify if the lag should be given absolute or relative to the maxlag
self._normalized = normalize
# set if nugget effect shall be used
self._use_nugget = None
self.use_nugget = use_nugget
# set the fitting method and sigma array
self.fit_method = fit_method
self._fit_sigma = None
self.fit_sigma = fit_sigma
# set attributes to be filled during calculation
self.cov = None
self.cof = None
# settings, not reachable by init (not yet)
self._cache_experimental = False
# do the preprocessing and fitting upon initialization
# Note that fit() calls preprocessing
self.fit(force=True)
@property
def coordinates(self):
"""Coordinates property
Array of observation locations the variogram is build for. This
property has no setter. If you want to change the coordinates,
use a new Variogram instance.
Returns
-------
coordinates : numpy.array
"""
return self._X
@property
def values(self):
"""Values property
Array of observations, the variogram is build for. The setter of this
property utilizes the Variogram.set_values function for setting new
arrays.
Returns
-------
values : numpy.ndarray
See Also
--------
Variogram.set_values
"""
return self._values
@values.setter
def values(self, values):
self.set_values(values=values)
@property
def value_matrix(self):
"""Value matrix
Returns a matrix of pairwise differences in absolute values. The
matrix will have the shape (m, m) with m = len(Variogram.values).
Note that Variogram.values holds the values themselves, while the
value_matrix consists of their pairwise differences.
Returns
-------
values : numpy.matrix
Matrix of pairwise absolute differences of the values.
See Also
--------
Variogram._diff
"""
return squareform(self._diff)
def set_values(self, values, calc_diff=True):
"""Set new values
Will set the passed array as new value array. This array has to be of
same length as the first axis of the coordinates array. The Variogram
class does only accept one dimensional arrays.
On success all fitting parameters are deleted and the pairwise
differences are recalculated.
Raises :py:class:`ValueError`s on shape mismatches and a Warning
Parameters
----------
values : numpy.ndarray
Returns
-------
void
Raises
------
ValueError : raised if the values array shape does not match the
coordinates array, or more than one dimension given
Warning : raised if all input values are the same
See Also
--------
Variogram.values
"""
# check dimensions
if not len(values) == len(self._X):
raise ValueError('The length of the values array has to match' +
'the length of coordinates')
# use an array
_y = np.asarray(values)
if not _y.ndim == 1:
raise ValueError('The values shall be a 1-D array.' +
'Multi-dimensional values not supported yet.')
# check if all input values are the same
if len(set(_y)) < 2:
raise Warning('All input values are the same.')
# reset fitting parameter
self.cof, self.cov = None, None
self._diff = None
# set new values
self._values = np.asarray(values)
# recalculate the pairwise differences
if calc_diff:
self._calc_diff(force=True)
@property
def bin_func(self):
"""Binning function
Returns an instance of the function used for binning the separating
distances into the given amount of bins. Both functions use the same
signature of func(distances, n, maxlag).
The setter of this property utilizes the Variogram.set_bin_func to
set a new function.
Returns
-------
binning_function : function
See Also
--------
Variogram.set_bin_func
"""
return self._bin_func
@bin_func.setter
def bin_func(self, bin_func):
self.set_bin_func(bin_func=bin_func)
def set_bin_func(self, bin_func):
"""Set binning function
Sets a new binning function to be used. The new binning method is set
by a string identifying the new function to be used. Can be one of:
['even', 'uniform'].
Parameters
----------
bin_func : str
Can be one of:
* **'even'**: Use skgstat.binning.even_width_lags for using
n_lags lags of equal width up to maxlag.
* **'uniform'**: Use skgstat.binning.uniform_count_lags for using
n_lags lags up to maxlag in which the pairwise differences
follow a uniform distribution.
Returns
-------
void
See Also
--------
Variogram.bin_func
skgstat.binning.uniform_count_lags
skgstat.binning.even_width_lags
"""
# switch the input
if bin_func.lower() == 'even':
self._bin_func = binning.even_width_lags
elif bin_func.lower() == 'uniform':
self._bin_func = binning.uniform_count_lags
else:
raise ValueError('%s binning method is not known' % bin_func)
# reset groups and bins
self._groups = None
self._bins = None
self.cof, self.cov = None, None
@property
def normalized(self):
return self._normalized
@normalized.setter
def normalized(self, status):
# set the new value
self._normalized = status
@property
def bins(self):
if self._bins is None:
self._bins = self.bin_func(self.distance, self.n_lags, self.maxlag)
return self._bins.copy()
@bins.setter
def bins(self, bins):
# set the new bins
self._bins = bins
# clean the groups as they are not valid anymore
self._groups = None
self.cov = None
self.cof = None
@property
def n_lags(self):
"""Number of lag bins
Pass the number of lag bins to be used on
this Variogram instance. This will reset
the grouping index and fitting parameters
"""
return self._n_lags
@n_lags.setter
def n_lags(self, n):
# TODO: here accept strings and implement some optimum methods
# string are not implemented yet
if isinstance(n, str):
raise NotImplementedError('n_lags string values not implemented')
# n_lags is int
elif isinstance(n, int):
if n < 1:
raise ValueError('n_lags has to be a positive integer')
# set parameter
self._n_lags = n
# reset the bins
self._bins = None
# else
else:
raise ValueError('n_lags has to be a positive integer')
# reset the fitting
self.cof = None
self.cov = None
@property
def estimator(self):
return self._estimator
@estimator.setter
def estimator(self, value):
self.set_estimator(estimator_name=value)
def set_estimator(self, estimator_name):
# reset the fitting
self.cof, self.cov = None, None
if isinstance(estimator_name, str):
if estimator_name.lower() == 'matheron':
self._estimator = estimators.matheron
elif estimator_name.lower() == 'cressie':
self._estimator = estimators.cressie
elif estimator_name.lower() == 'dowd':
self._estimator = estimators.dowd
elif estimator_name.lower() == 'genton':
self._estimator = estimators.genton
elif estimator_name.lower() == 'minmax':
self._estimator = estimators.minmax
elif estimator_name.lower() == 'percentile':
self._estimator = estimators.percentile
elif estimator_name.lower() == 'entropy':
self._estimator = estimators.entropy
else:
raise ValueError(
('Variogram estimator %s is not understood, please ' +
'provide the function.') % estimator_name
)
elif callable(estimator_name):
self._estimator = estimator_name
else:
raise ValueError('The estimator has to be a string or callable.')
@property
def model(self):
return self._model
@model.setter
def model(self, value):
self.set_model(model_name=value)
def set_model(self, model_name):
"""
Set model as the new theoretical variogram function.
"""
# reset the fitting
self.cof, self.cov = None, None
if isinstance(model_name, str):
# at first reset harmonize
self._harmonize = False
if model_name.lower() == 'spherical':
self._model = models.spherical
elif model_name.lower() == 'exponential':
self._model = models.exponential
elif model_name.lower() == 'gaussian':
self._model = models.gaussian
elif model_name.lower() == 'cubic':
self._model = models.cubic
elif model_name.lower() == 'stable':
self._model = models.stable
elif model_name.lower() == 'matern':
self._model = models.matern
elif model_name.lower() == 'harmonize':
self._harmonize = True
self._model = self._build_harmonized_model()
else:
raise ValueError(
'The theoretical Variogram function %s is not understood, please provide the function' % model_name)
else:
self._model = model_name
def _build_harmonized_model(self):
x = self.bins
y = self.experimental
_x = x[~np.isnan(y)]
_y = y[~np.isnan(y)]
regr = IsotonicRegression(increasing=True).fit(_x, _y)
# create the model function
def harmonize(x):
"""Monotonized Variogram
Return the isotonic harmonized experimental variogram.
This means, the experimental variogram is monotonic after harmonization.
The harmonization is done using following Hinterding (2003) using
the PAVA algorithm (Barlow and Bartholomew, 1972).
Returns
-------
gamma : numpy.ndarray
monotonized experimental variogram
References
----------
Barlow, R., D. Bartholomew, et al. (1972): Statistical Interference Under Order Restrictions.
John Wiley and Sons, New York.
Hiterding, A. (2003): Entwicklung hybrider Interpolationsverfahren für den automatisierten Betrieb am
Beispiel meteorologischer Größen. Dissertation, Institut für Geoinformatik, Westphälische
Wilhelms-Universität Münster, IfGIprints, Münster. ISBN: 3-936616-12-4
"""
if isinstance(x, (list, tuple, np.ndarray)):
return regr.transform(x)
else:
return regr.transform([x])
return harmonize
@property
def use_nugget(self):
return self._use_nugget
@use_nugget.setter
def use_nugget(self, nugget):
if not isinstance(nugget, bool):
raise ValueError('use_nugget has to be of type bool.')
# set new value
self._use_nugget = nugget
@property
def dist_function(self):
return self._dist_func
def _dist_func_wrapper(self, x):
if callable(self._dist_func):
return self._dist_func(x)
else:
return pdist(X=x, metric=self._dist_func)
@dist_function.setter
def dist_function(self, func):
self.set_dist_function(func=func)
def set_dist_function(self, func):
"""Set distance function
Set the function used for distance calculation. func can either be a
callable or a string. The ranked distance function is not implemented
yet. strings will be forwarded to the scipy.spatial.distance.pdist
function as the metric argument.
If func is a callable, it has to return the upper triangle of the
distance matrix as a flat array (Like the pdist function).
Parameters
----------
func : string, callable
Returns
-------
numpy.array
"""
# reset the distances and fitting
self._dist = None
self.cof, self.cov = None, None
if isinstance(func, str):
if func.lower() == 'rank':
raise NotImplementedError
else:
# if not ranks, it has to be a scipy metric
self._dist_func = func
elif callable(func):
self._dist_func = func
else:
raise ValueError('Input not supported. Pass a string or callable.')
# re-calculate distances
self._calc_distances()
@property
def distance(self):
if self._dist is None:
self._calc_distances()
return self._dist
@distance.setter
def distance(self, dist_array):
self._dist = dist_array
@property
def distance_matrix(self):
return squareform(self.distance)
@property
def maxlag(self):
return self._maxlag
@maxlag.setter
def maxlag(self, value):
# reset fitting
self.cof, self.cov = None, None
# remove bins
self._bins = None
self._groups = None
# set new maxlag
if value is None:
self._maxlag = None
elif isinstance(value, str):
if value == 'median':
self._maxlag = np.median(self.distance)
elif value == 'mean':
self._maxlag = np.mean(self.distance)
elif value < 1:
self._maxlag = value * np.max(self.distance)
else:
self._maxlag = value
@property
def fit_sigma(self):
r"""Fitting Uncertainty
Set or calculate an array of observation uncertainties aligned to the
Variogram.bins. These will be used to weight the observations in the
cost function, which divides the residuals by their uncertainty.
When setting fit_sigma, the array of uncertainties itself can be
given, or one of the strings: ['linear', 'exp', 'sqrt', 'sq']. The
parameters described below refer to the setter of this property.
Parameters
----------
sigma : string, array
Sigma can either be an array of discrete uncertainty values,
which have to align to the Variogram.bins, or of type string.
Then, the weights for fitting are calculated as a function of
(lag) distance.
* **sigma='linear'**: The residuals get weighted by the lag
distance normalized to the maximum lag distance, denoted as
:math:`w_n`
* **sigma='exp'**: The residuals get weighted by the function:
:math:`w = e^{1 / w_n}`
* **sigma='sqrt'**: The residuals get weighted by the function:
:math:`w = \sqrt(w_n)`
* **sigma='sq'**: The residuals get weighted by the function:
:math:`w = w_n^2`
Returns
-------
void
Notes
-----
The cost function is defined as:
.. math::
chisq = \sum {\frac{r}{\sigma}}^2
where r are the residuals between the experimental variogram and the
modeled values for the same lag. Following this function,
small values will increase the influence of that residual, while a very
large sigma will cause the observation to be ignored.
See Also
--------
scipy.optimize.curve_fit
"""
# unweighted
if self._fit_sigma is None:
return None
# discrete uncertainties
elif isinstance(self._fit_sigma, (list, tuple, np.ndarray)):
if len(self._fit_sigma) == len(self._bins):
return self._fit_sigma
else:
raise AttributeError('fit_sigma and bins need the same length.')
# linear function of distance
elif self._fit_sigma == 'linear':
return self.bins / np.max(self.bins)
# e function of distance
elif self._fit_sigma == 'exp':
return 1. / np.exp(1. / (self.bins / np.max(self.bins)))
# sqrt function of distance
elif self._fit_sigma == 'sqrt':
return np.sqrt(self.bins / np.max(self.bins))
# squared function of distance
elif self._fit_sigma == 'sq':
return (self.bins / np.max(self.bins)) ** 2
else:
raise ValueError("fit_sigma is not understood. It has to be an " +
"array or one of ['linear', 'exp', 'sqrt', 'sq'].")
@fit_sigma.setter
def fit_sigma(self, sigma):
self._fit_sigma = sigma
# remove fitting parameters
self.cof = None
self.cov = None
def lag_groups(self):
"""Lag class groups
Retuns a mask array with as many elements as self._diff has,
identifying the lag class group for each pairwise difference. Can be
used to extract all pairwise values within the same lag bin.
Returns
-------
numpy.ndarray
See Also
--------
Variogram.lag_classes
"""
if self._groups is None:
self._calc_groups()
return self._groups
def lag_classes(self):
"""Iterate over the lag classes
Generates an iterator over all lag classes. Can be zipped with
Variogram.bins to identify the lag.
Returns
-------
iterable
"""
# yield all groups
for i in np.unique(self.lag_groups()):
if i < 0:
continue
else:
yield self._diff[np.where(self.lag_groups() == i)]
def preprocessing(self, force=False):
"""Preprocessing function
Prepares all input data for the fit and transform functions. Namely,
the distances are calculated and the value differences. Then the
binning is set up and bin edges are calculated. If any of the listed
subsets are already prepared, their processing is skipped. This
behaviour can be changed by the force parameter. This will cause a
clean preprocessing.
Parameters
----------
force : bool
If set to True, all preprocessing data sets will be deleted. Use
it in case you need a clean preprocessing.
Returns
-------
void
"""
# call the _calc functions
self._calc_distances(force=force)
self._calc_diff(force=force)
self._calc_groups(force=force)
def fit(self, force=False, method=None, sigma=None, **kwargs):
"""Fit the variogram
The fit function will fit the theoretical variogram function to the
experimental. The preprocessed distance matrix, pairwise differences
and binning will not be recalculated, if already done. This could be
forced by setting the force parameter to true.
In case you call fit function directly, with method or sigma,
the parameters set on Variogram object instantiation will get
overwritten. All other keyword arguments will be passed to
scipy.optimize.curve_fit function.
Parameters
----------
force : bool
If set to True, a clean preprocessing of the distance matrix,
pairwise differences and the binning will be forced. Default is
False.
method : string
A string identifying one of the implemented fitting procedures.
Can be one of ['lm', 'trf']:
* lm: Levenberg-Marquardt algorithms implemented in
scipy.optimize.leastsq function.
* trf: Trust Region Reflective algorithm implemented in
scipy.optimize.least_squares(method='trf')
sigma : string, array
Uncertainty array for the bins. Has to have the same dimension as
self.bins. Refer to Variogram.fit_sigma for more information.
Returns
-------
void
See Also
--------
scipy.optimize
scipy.optimize.curve_fit
scipy.optimize.leastsq
scipy.optimize.least_squares
"""
# TODO: the kwargs need to be preserved somehow
# delete the last cov and cof
self.cof = None
self.cov = None
# if force, force a clean preprocessing
self.preprocessing(force=force)
# load the data
x = self.bins
y = self.experimental
# overwrite fit setting if new params are given
if method is not None:
self.fit_method = method
if sigma is not None:
self.fit_sigma = sigma
# remove nans
_x = x[~np.isnan(y)]
_y = y[~np.isnan(y)]
# handle harmonized models
if self._harmonize:
_x = np.linspace(0, np.max(_x), 100)
_y = self._model(_x)
# get the params
s = 0.95 * np.nanmax(_y)
with warnings.catch_warnings():
warnings.simplefilter('ignore')
r = np.argwhere(_y >= s)[0][0]
n = _y[0] if self.use_nugget else 0.0
# set the params
self.cof = [r, s, n]
return
# Switch the method
# Trust Region Reflective
if self.fit_method == 'trf':
bounds = (0, self.__get_fit_bounds(x, y))
self.cof, self.cov = curve_fit(
self._model,
_x, _y,
method='trf',
sigma=self.fit_sigma,
p0=bounds[1],
bounds=bounds,
**kwargs
)
# Levenberg-Marquardt
elif self.fit_method == 'lm':
self.cof, self.cov = curve_fit(
self.model,
_x, _y,
method='lm',
sigma=self.fit_sigma,
**kwargs
)
else:
raise ValueError("fit method has to be one of ['trf', 'lm']")
def transform(self, x):
"""Transform
Transform a given set of lag values to the theoretical variogram
function using the actual fitting and preprocessing parameters in
this Variogram instance
Parameters
----------
x : numpy.array
Array of lag values to be used as model input for the fitted
theoretical variogram model
Returns
-------
numpy.array
"""
self.preprocessing()
# if instance is not fitted, fit it
if self.cof is None:
self.fit(force=True)
# return the result
return self.fitted_model(x)
@property
def fitted_model(self):
"""Fitted Model
Returns a callable that takes a distance value and returns a
semivariance. This model is fitted to the current Variogram
parameters. The function will be interpreted at return time with the
parameters hard-coded into the function code.
Returns
-------
model : callable
The current semivariance model fitted to the current Variogram
model parameters.
"""
if self.cof is None:
self.fit(force=True)
# get the pars
cof = self.cof
# get the function
func = self._model