diff --git a/MANIFEST.in b/MANIFEST.in index ebe9984..8b9a556 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -7,4 +7,5 @@ include TODO include .coveragerc include Dockerfile include Dockerfile.legacy -graft skgstat/data/rf \ No newline at end of file +graft skgstat/data/rf +graft skgstat/data/samples \ No newline at end of file diff --git a/docs/changelog.rst b/docs/changelog.rst index 92b6e1f..9a293c9 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -7,6 +7,7 @@ Version 0.6.3-rc - [interfaces] If any of the gstools interfaces are used, the Variogram will call :func:`fit ` without forcing a full preprocessing cycle. This fixes edge cases, where a parameter was mutated, but the fitting not performed before the instance was exported. This should only have happended in very rare occasions. +- [data] added the meuse dataset from the R-package ``'sp'`` Version 0.6.2 ============= diff --git a/skgstat/data/__init__.py b/skgstat/data/__init__.py index af4d0af..49641c6 100644 --- a/skgstat/data/__init__.py +++ b/skgstat/data/__init__.py @@ -1,3 +1,5 @@ +import pandas as pd + from skgstat.data import _loader from skgstat.data._loader import field_names @@ -14,6 +16,19 @@ field was generated using gstools. Copyright Mirko Mälicke, 2020. If you use this data, cite SciKit-GStat: https://doi.org/10.5281/zenodo.1345584 + """, + meuse=""""Sample dataset of real measurements of heavy metal pollutions + in the topsoil on a 15x15 meter plot along the river Meuse. + The data is distributed along with the R-package sp. + IMPORTANT: If you use this data, cite Pebesma and Bivand (2005) + and Bivand et al (2013): + + Pebesma EJ, Bivand RS (2005). “Classes and methods for spatial + data in R.” R News, 5(2), 9–13. https://CRAN.R-project.org/doc/Rnews/. + + Bivand RS, Pebesma E, Gomez-Rubio V (2013). Applied spatial data + analysis with R, Second edition. Springer, NY. https://asdar-book.org/. + """ ) @@ -212,3 +227,55 @@ def aniso_field(): sample=sample, origin=origins.get('aniso') ) + + +def meuse(variable='lead'): + """ + Returns one of the samples of the well-known Meuse dataset. + You can specify which heave metal data you want to load. + + Parameters + ---------- + Returns + ------- + result : dict + Dictionary of the sample and a citation information. + + Notes + ----- + The example data was taken from the R package 'sp' + as published on CRAN: https://cran.r-project.org/package=sp + The package is licensed under GPL-3, which applies + to the sample if used somewhere else. + If you use this sample, please cite the original sources + [502]_, [503]_ and not SciKit-GStat. + + References + ---------- + .. [502] Pebesma EJ, Bivand RS (2005). “Classes and methods for spatial + data in R.” R News, 5(2), 9–13. https://CRAN.R-project.org/doc/Rnews/. + + .. [503] Bivand RS, Pebesma E, Gomez-Rubio V (2013). Applied spatial data + analysis with R, Second edition. Springer, NY. https://asdar-book.org/. + + """ + # check variable + if variable not in ('cadmium', 'copper', 'lead', 'zinc'): + raise AttributeError( + "variable has to be in ['cadmium', 'copper', 'lead', 'zinc']" + ) + + # get the data + df = _loader.read_sample_file('meuse.txt') + + # get the coordinates + coords = df[['x', 'y']].values + + # get the correct variable + values = df[[variable]].values + + # return + return dict( + sample=(coords, values, ), + origin=origins.get('meuse') + ) diff --git a/skgstat/data/_loader.py b/skgstat/data/_loader.py index 8103da8..91d4166 100644 --- a/skgstat/data/_loader.py +++ b/skgstat/data/_loader.py @@ -3,6 +3,7 @@ import imageio from glob import glob import numpy as np +import pandas as pd PATH = os.path.abspath(os.path.dirname(__file__)) @@ -107,3 +108,19 @@ def get_sample( values = np.asarray([img[c[0], c[1]] for c in coordinates]) return coordinates, values + + +def read_sample_file(fname) -> pd.DataFrame: + """ + Return a sample from a sample-file as a + pandas DataFrame + + Returns + ------- + df : pandas.DataFrame + The file content + + """ + # build the path + path = os.path.join(PATH, 'samples', fname) + return pd.read_csv(path) diff --git a/skgstat/data/samples/README.md b/skgstat/data/samples/README.md new file mode 100644 index 0000000..abc72f5 --- /dev/null +++ b/skgstat/data/samples/README.md @@ -0,0 +1,11 @@ +# Meuse dataset + +Please note that the `./meuse.txt` is distributed under a GPL 3.0 license. +It is originally from https://cran.r-project.org/package=sp and if you +reuse the data, cite: + +Pebesma EJ, Bivand RS (2005). “Classes and methods for spatial + data in R.” R News, 5(2), 9–13. https://CRAN.R-project.org/doc/Rnews/. + +Bivand RS, Pebesma E, Gomez-Rubio V (2013). Applied spatial data + analysis with R, Second edition. Springer, NY. https://asdar-book.org/. \ No newline at end of file diff --git a/skgstat/data/samples/meuse.txt b/skgstat/data/samples/meuse.txt new file mode 100644 index 0000000..59621a1 --- /dev/null +++ b/skgstat/data/samples/meuse.txt @@ -0,0 +1,156 @@ +"x","y","cadmium","copper","lead","zinc","elev","dist","om","ffreq","soil","lime","landuse","dist.m" +181072,333611,11.7,85,299,1022,7.909,0.00135803,13.6,"1","1","1","Ah",50 +181025,333558,8.6,81,277,1141,6.983,0.0122243,14,"1","1","1","Ah",30 +181165,333537,6.5,68,199,640,7.8,0.103029,13,"1","1","1","Ah",150 +181298,333484,2.6,81,116,257,7.655,0.190094,8,"1","2","0","Ga",270 +181307,333330,2.8,48,117,269,7.48,0.27709,8.7,"1","2","0","Ah",380 +181390,333260,3,61,137,281,7.791,0.364067,7.8,"1","2","0","Ga",470 +181165,333370,3.2,31,132,346,8.217,0.190094,9.2,"1","2","0","Ah",240 +181027,333363,2.8,29,150,406,8.49,0.0921516,9.5,"1","1","0","Ab",120 +181060,333231,2.4,37,133,347,8.668,0.184614,10.6,"1","1","0","Ab",240 +181232,333168,1.6,24,80,183,9.049,0.309702,6.3,"1","2","0","W",420 +181191,333115,1.4,25,86,189,9.015,0.315116,6.4,"1","2","0","Fh",400 +181032,333031,1.8,25,97,251,9.073,0.228123,9,"1","1","0","Ag",300 +180874,333339,11.2,93,285,1096,7.32,0,15.4,"1","1","1","W",20 +180969,333252,2.5,31,183,504,8.815,0.113932,8.4,"1","1","0","Ah",130 +181011,333161,2,27,130,326,8.937,0.168336,9.1,"1","1","0","Ah",220 +180830,333246,9.5,86,240,1032,7.702,0,16.2,"1","1","1","W",10 +180763,333104,7,74,133,606,7.16,0.0122243,16,"1","1","1","W",10 +180694,332972,7.1,69,148,711,7.1,0.0122243,16,"1","1","1","W",10 +180625,332847,8.7,69,207,735,7.02,0,13.7,"1","1","1","W",10 +180555,332707,12.9,95,284,1052,6.86,0,14.8,"1","1","1",NA,10 +180642,332708,5.5,53,194,673,8.908,0.0703468,10.2,"1","1","1","Am",80 +180704,332717,2.8,35,123,402,8.99,0.0975136,7.2,"1","1","1","Am",140 +180704,332664,2.9,35,110,343,8.83,0.113932,7.2,"1","1","1","Ag",160 +181153,332925,1.7,24,85,218,9.02,0.342321,7,"1","2","0","Ah",440 +181147,332823,1.4,26,75,200,8.976,0.385804,6.9,"1","2","0","W",490 +181167,332778,1.5,22,76,194,8.973,0.429289,6.3,"1","2","0","W",530 +181008,332777,1.3,27,73,207,8.507,0.315116,5.6,"1","2","0","Ab",400 +180973,332687,1.3,24,67,180,8.743,0.320574,4.4,"1","2","0","Ag",400 +180916,332753,1.8,22,87,240,8.973,0.249863,5.3,"1","2","0","Ah",330 +181352,332946,1.5,21,65,180,9.043,0.489064,4.8,"1","2","0","Ag",630 +181133,332570,1.3,29,78,208,8.688,0.472778,2.6,"1","2","0","B",570 +180878,332489,1.3,21,64,198,8.727,0.287957,1,"1","2","0","Ag",390 +180829,332450,2.1,27,77,250,8.328,0.271622,2.4,"1","2","0","Ah",360 +180954,332399,1.2,26,80,192,7.971,0.385807,1.9,"1","2","0","B",500 +180956,332318,1.6,27,82,213,7.809,0.418417,3.1,"1","2","0","B",550 +180710,332330,3,32,97,321,6.986,0.244474,1.6,"1","2","0","Ab",340 +180632,332445,5.8,50,166,569,7.756,0.135709,3.5,"1","2","0","Ab",210 +180530,332538,7.9,67,217,833,7.784,0.0484965,8.1,"1","1","1","Am",60 +180478,332578,8.1,77,219,906,7,0,7.9,"1","1","1","W",10 +180383,332476,14.1,108,405,1454,6.92,0.00135803,9.5,"1","1","1","W",20 +180494,332330,2.4,32,102,298,7.516,0.135709,1.4,"1","2","0","Am",170 +180561,332193,1.2,21,48,167,8.18,0.26622,NA,"1","2","0","Ga",320 +180451,332175,1.7,22,65,176,8.694,0.211843,NA,"1","2","0","W",260 +180410,332031,1.3,21,62,258,9.28,0.320572,2,"1","2","0","Ah",360 +180355,332299,4.2,51,281,746,7.94,0.081222,5.1,"1","2","0","Ah",100 +180292,332157,4.3,50,294,746,6.36,0.190086,5.3,"1","2","0","Am",200 +180283,332014,3.1,38,211,464,7.78,0.287941,4.5,"1","2","0","Ah",320 +180282,331861,1.7,26,135,365,8.18,0.423826,4.9,"1","2","0","Ah",480 +180270,331707,1.7,24,112,282,9.42,0.554289,4.5,"1","2","0","Bw",660 +180199,331591,2.1,32,162,375,8.867,0.603225,5.5,"1","2","0","Bw",690 +180135,331552,1.7,24,94,222,8.292,0.614071,3.4,"1","2","0","Ab",710 +180237,332351,8.2,47,191,812,8.06,0.00135803,11.1,"1","1","1","Ah",10 +180103,332297,17,128,405,1548,7.98,0,12.3,"1","1","1","W",10 +179973,332255,12,117,654,1839,7.9,0.0054321,16.5,"1","1","1","W",10 +179826,332217,9.4,104,482,1528,7.74,0.0054321,13.9,"1","1","1","W",10 +179687,332161,8.2,76,276,933,7.552,0.0054321,8.1,"1","1","1","W",20 +179792,332035,2.6,36,180,432,7.76,0.146578,3.1,"1","1","0","Fw",200 +179902,332113,3.5,34,207,550,6.74,0.135684,5.8,"1","1","0","Fw",140 +180100,332213,10.9,90,541,1571,6.68,0.0703333,10.2,"1","1","1","Fw",70 +179604,332059,7.3,80,310,1190,7.4,0.0484831,12,"1","1","1","W",20 +179526,331936,9.4,78,210,907,7.44,0.0054321,14.1,"1","1","1","W",10 +179495,331770,8.3,77,158,761,7.36,0.0054321,14.5,"1","1","1","Fw",10 +179489,331633,7,65,141,659,7.2,0.0316663,14.8,"1","1","1","W",20 +179414,331494,6.8,66,144,643,7.22,0.0122243,13.3,"1","1","1","Ah",10 +179334,331366,7.4,72,181,801,7.36,0.0122243,15.2,"1","1","1","W",20 +179255,331264,6.6,75,173,784,5.18,0.0373395,11.4,"1","1","1","W",20 +179470,331125,7.8,75,399,1060,5.8,0.211846,9,"1","1","0","Ah",270 +179692,330933,0.7,22,45,119,7.64,0.451037,3.6,"1","1","1","Fw",560 +179852,330801,3.4,55,325,778,6.32,0.575877,6.9,"1","1","0","Bw",750 +179140,330955,3.9,47,268,703,5.76,0.0756869,7,"1","1","1","Ab",80 +179128,330867,3.5,46,252,676,6.48,0.12481,6.2,"1","1","1","Ab",130 +179065,330864,4.7,55,315,793,6.48,0.103024,6.5,"1","1","0","W",110 +179007,330727,3.9,49,260,685,6.32,0.157469,5.7,"1","1","0","W",200 +179110,330758,3.1,39,237,593,6.32,0.200976,7,"1","1","1","Ah",260 +179032,330645,2.9,45,228,549,6.16,0.200976,7.3,"1","1","0","W",270 +179095,330636,3.9,48,241,680,6.56,0.26622,8.2,"1","1","0","W",320 +179058,330510,2.7,36,201,539,6.9,0.298835,4.3,"1","1","0","Ah",360 +178810,330666,2.5,36,204,560,7.54,0.0812247,4.4,"1","1","1","Am",80 +178912,330779,5.6,68,429,1136,6.42,0.070355,8.2,"1","1","1","W",100 +178981,330924,9.4,88,462,1383,6.28,0.0122243,8.5,"1","1","1","W",70 +179076,331005,10.8,85,333,1161,6.34,0,9.6,"1","1","1","W",20 +180151,330353,18.1,76,464,1672,7.307,0.0537723,17,"1","1","1","W",50 +179211,331175,6.3,63,159,765,5.7,0.0593662,12.8,"1","1","1","W",80 +181118,333214,2.1,32,116,279,7.72,0.211843,5.9,"1","2","0","W",290 +179474,331304,1.8,25,81,241,7.932,0.12481,2.9,"2","2","1","Ah",160 +179559,331423,2.2,27,131,317,7.82,0.12481,4.5,"2","1","0","W",160 +179022,330873,2.8,36,216,545,8.575,0.0921516,10.7,"2","1","0","W",140 +178953,330742,2.4,41,145,505,8.536,0.113941,9.4,"2","1","0","W",150 +178875,330516,2.6,33,163,420,8.504,0.179216,9,"2","1","0","Ah",220 +178803,330349,1.8,27,129,332,8.659,0.233596,7,"2","1","0","Am",280 +179029,330394,2,38,148,400,7.633,0.336861,6.5,"2","1","1","Am",450 +178605,330406,2.7,37,214,553,8.538,0.070355,9.4,"2","1","1","Am",70 +178701,330557,2.7,34,226,577,7.68,0.0593662,10.2,"2","1","0","Am",70 +179547,330245,0.9,19,54,155,7.564,0.255341,6.4,"2","1","0","W",340 +179301,330179,0.9,22,70,224,7.76,0.364067,7.6,"2","1","0","W",470 +179405,330567,0.4,26,73,180,7.653,0.429295,7,"2","1","0","Am",630 +179462,330766,0.8,25,87,226,7.951,0.380328,5.6,"2","1","0","Am",460 +179293,330797,0.4,22,76,186,8.176,0.249874,6.5,"2","1","0","Am",320 +179180,330710,0.4,24,81,198,8.468,0.266212,6.6,"2","1","0","Ah",320 +179206,330398,0.4,18,68,187,8.41,0.451037,5.9,"2","1","0","W",540 +179618,330458,0.8,23,66,199,7.61,0.30971,6.5,"2","1","0","W",420 +179782,330540,0.4,22,49,157,7.792,0.293359,6.4,"2","1","0","SPO",380 +179980,330773,0.4,23,63,203,8.76,0.532351,7.2,"2","2","0","W",500 +180067,331185,0.4,23,48,143,9.879,0.619513,6.6,"2","3","0","Am",760 +180162,331387,0.2,23,51,136,9.097,0.684725,4.3,"2","2","0","Ah",750 +180451,331473,0.2,18,50,117,9.095,0.809742,5.3,"2","3","0","Fw",1000 +180328,331158,0.4,20,39,113,9.717,0.880389,4.1,"2","3","0","Ah",860 +180276,330963,0.2,22,48,130,9.924,0.749591,6.1,"2","3","0","Ah",680 +180114,330803,0.2,27,64,192,9.404,0.575752,7.5,"2","3","0","Fw",500 +179881,330912,0.4,25,84,240,10.52,0.581484,8.8,"2","3","0","STA",650 +179774,330921,0.2,30,67,221,8.84,0.49452,5.7,"2","3","0","DEN",630 +179657,331150,0.2,23,49,140,8.472,0.32058,6.1,"2","3","0","Fw",410 +179731,331245,0.2,24,48,128,9.634,0.336851,7.1,"2","3","0","Ah",390 +179717,331441,0.2,21,56,166,9.206,0.249852,4.1,"2","2","0","Ah",310 +179446,331422,0.2,24,65,191,8.47,0.0756869,6,"2","1","0","Ah",70 +179524,331565,0.2,21,84,232,8.463,0.0756869,6.6,"2","1","0","W",70 +179644,331730,0.2,23,75,203,9.691,0.162853,6.8,"2","1","1","STA",150 +180321,330366,3.7,53,250,722,8.704,0.0974916,9.1,"2","2","0","Bw",80 +180162,331837,0.2,33,81,210,9.42,0.440142,5.9,"2","2","0","Ah",450 +180029,331720,0.2,22,72,198,9.573,0.4619,4.9,"2","2","0","Aa",530 +179797,331919,0.2,23,86,139,9.555,0.222701,7.1,"2","1","0","W",240 +179642,331955,0.2,25,94,253,8.779,0.103024,8.1,"2","1","1","Tv",70 +179849,332142,1.2,30,244,703,8.54,0.0921353,8.3,"2","1","0","Fw",70 +180265,332297,2.4,47,297,832,8.809,0.0484884,10,"2","1","0","Ah",60 +180107,332101,0.2,31,96,262,9.523,0.168331,5.9,"2","1","0","Ah",190 +180462,331947,0.2,20,56,142,9.811,0.38581,5,"2","2","0","Ah",450 +180478,331822,0.2,16,49,119,9.604,0.489064,4.5,"2","2","0","Am",550 +180347,331700,0.2,17,50,152,9.732,0.57602,5.4,"2","2","0","Am",650 +180862,333116,0.4,26,148,415,9.518,0.0812194,2.3,"2","1","0","Am",100 +180700,332882,1.6,34,162,474,9.72,0.0373369,7.5,"2","1","0","W",170 +180201,331160,0.8,18,37,126,9.036,0.771698,4.6,"2","3","1","Ah",860 +180173,331923,1.2,23,80,210,9.528,0.336829,5.8,"2","2","0","W",410 +180923,332874,0.2,20,80,220,9.155,0.228123,4.4,"3","1","0","Aa",290 +180467,331694,0.2,14,49,133,10.08,0.597761,4.4,"3","2","0","Am",680 +179917,331325,0.8,46,42,141,9.97,0.44558,4.5,"3","2","0","Am",540 +179822,331242,1,29,48,158,10.136,0.396675,5.2,"3","2","0","Am",480 +179991,331069,0.8,19,41,129,10.32,0.581478,4.6,"3","3","0","W",720 +179120,330578,1.2,31,73,206,9.041,0.287966,6.9,"3","1","0","W",380 +179034,330561,2,27,146,451,7.86,0.233596,7,"3","1","0","W",310 +179085,330433,1.5,29,95,296,8.741,0.364067,5.4,"3","1","0","Ah",430 +179236,330046,1.1,22,72,189,7.822,0.331454,6.2,"3","1","0","Ah",370 +179456,330072,0.8,20,51,154,7.78,0.211846,5,"3","1","0","Fw",290 +179550,329940,0.8,20,54,169,8.121,0.103029,5.1,"3","1","0","W",150 +179445,329807,2.1,29,136,403,8.231,0.070355,8.1,"3","1","0","Bw",70 +179337,329870,2.5,38,170,471,8.351,0.146576,8,"3","1","0","Bw",220 +179245,329714,3.8,39,179,612,7.3,0.0537723,8.8,"3","1","0","W",80 +179024,329733,3.2,35,200,601,7.536,0.119286,9.3,"3","1","0","W",120 +178786,329822,3.1,42,258,783,7.706,0.0921435,8.4,"3","1","0","Ah",120 +179135,329890,1.5,24,93,258,8.07,0.249863,7.7,"3","1","0","Am",260 +179030,330082,1.2,20,68,214,8.226,0.37494,5.7,"3","1","0","Ah",440 +179184,330182,0.8,20,49,166,8.128,0.423837,4.7,"3","1","0","Am",540 +179085,330292,3.1,39,173,496,8.577,0.423837,9.1,"3","1","0","Ah",520 +178875,330311,2.1,31,119,342,8.429,0.27709,6.5,"3","1","0","Ah",350 +179466,330381,0.8,21,51,162,9.406,0.358606,5.7,"3","1","0","W",460 +180627,330190,2.7,27,124,375,8.261,0.0122243,5.5,"3","3","0","W",40 \ No newline at end of file diff --git a/skgstat/tests/test_data_loader.py b/skgstat/tests/test_data_loader.py index abe7e7d..fe26af1 100644 --- a/skgstat/tests/test_data_loader.py +++ b/skgstat/tests/test_data_loader.py @@ -1,3 +1,5 @@ +import pytest + from skgstat import data import numpy as np from numpy.testing import assert_array_almost_equal @@ -40,3 +42,20 @@ def test_aniso_data(): c, v = data.aniso(N=25).get('sample') assert len(c) == len(v) == 25 + + +def test_meuse_loads(): + df = data._loader.read_sample_file('meuse.txt') + + # get zinc data + _, zinc = data.meuse(variable='zinc').get('sample') + + assert_array_almost_equal( + zinc, df[['zinc']].values, decimal=6 + ) + + # check exeption + with pytest.raises(AttributeError) as e: + data.meuse(variable='unknown') + + assert 'variable has to be in' in str(e.value)