Skip to content

Commit

Permalink
Merge pull request #107 from mmaelicke/meuse_data
Browse files Browse the repository at this point in the history
Add Meuse data to skgstat
  • Loading branch information
mmaelicke authored Jun 4, 2021
2 parents f9316f5 + 3b810e7 commit 45b20c7
Show file tree
Hide file tree
Showing 7 changed files with 273 additions and 1 deletion.
3 changes: 2 additions & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ include TODO
include .coveragerc
include Dockerfile
include Dockerfile.legacy
graft skgstat/data/rf
graft skgstat/data/rf
graft skgstat/data/samples
1 change: 1 addition & 0 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ Version 0.6.3-rc
- [interfaces] If any of the gstools interfaces are used, the Variogram will call :func:`fit <skgstat.Variogram.fit>`
without forcing a full preprocessing cycle. This fixes edge cases, where a parameter was mutated, but the fitting
not performed before the instance was exported. This should only have happended in very rare occasions.
- [data] added the meuse dataset from the R-package ``'sp'``

Version 0.6.2
=============
Expand Down
67 changes: 67 additions & 0 deletions skgstat/data/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import pandas as pd

from skgstat.data import _loader
from skgstat.data._loader import field_names

Expand All @@ -14,6 +16,19 @@
field was generated using gstools.
Copyright Mirko Mälicke, 2020. If you use this data,
cite SciKit-GStat: https://doi.org/10.5281/zenodo.1345584
""",
meuse=""""Sample dataset of real measurements of heavy metal pollutions
in the topsoil on a 15x15 meter plot along the river Meuse.
The data is distributed along with the R-package sp.
IMPORTANT: If you use this data, cite Pebesma and Bivand (2005)
and Bivand et al (2013):
Pebesma EJ, Bivand RS (2005). “Classes and methods for spatial
data in R.” R News, 5(2), 9–13. https://CRAN.R-project.org/doc/Rnews/.
Bivand RS, Pebesma E, Gomez-Rubio V (2013). Applied spatial data
analysis with R, Second edition. Springer, NY. https://asdar-book.org/.
"""
)

Expand Down Expand Up @@ -212,3 +227,55 @@ def aniso_field():
sample=sample,
origin=origins.get('aniso')
)


def meuse(variable='lead'):
"""
Returns one of the samples of the well-known Meuse dataset.
You can specify which heave metal data you want to load.
Parameters
----------
Returns
-------
result : dict
Dictionary of the sample and a citation information.
Notes
-----
The example data was taken from the R package 'sp'
as published on CRAN: https://cran.r-project.org/package=sp
The package is licensed under GPL-3, which applies
to the sample if used somewhere else.
If you use this sample, please cite the original sources
[502]_, [503]_ and not SciKit-GStat.
References
----------
.. [502] Pebesma EJ, Bivand RS (2005). “Classes and methods for spatial
data in R.” R News, 5(2), 9–13. https://CRAN.R-project.org/doc/Rnews/.
.. [503] Bivand RS, Pebesma E, Gomez-Rubio V (2013). Applied spatial data
analysis with R, Second edition. Springer, NY. https://asdar-book.org/.
"""
# check variable
if variable not in ('cadmium', 'copper', 'lead', 'zinc'):
raise AttributeError(
"variable has to be in ['cadmium', 'copper', 'lead', 'zinc']"
)

# get the data
df = _loader.read_sample_file('meuse.txt')

# get the coordinates
coords = df[['x', 'y']].values

# get the correct variable
values = df[[variable]].values

# return
return dict(
sample=(coords, values, ),
origin=origins.get('meuse')
)
17 changes: 17 additions & 0 deletions skgstat/data/_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import imageio
from glob import glob
import numpy as np
import pandas as pd


PATH = os.path.abspath(os.path.dirname(__file__))
Expand Down Expand Up @@ -107,3 +108,19 @@ def get_sample(
values = np.asarray([img[c[0], c[1]] for c in coordinates])

return coordinates, values


def read_sample_file(fname) -> pd.DataFrame:
"""
Return a sample from a sample-file as a
pandas DataFrame
Returns
-------
df : pandas.DataFrame
The file content
"""
# build the path
path = os.path.join(PATH, 'samples', fname)
return pd.read_csv(path)
11 changes: 11 additions & 0 deletions skgstat/data/samples/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Meuse dataset

Please note that the `./meuse.txt` is distributed under a GPL 3.0 license.
It is originally from https://cran.r-project.org/package=sp and if you
reuse the data, cite:

Pebesma EJ, Bivand RS (2005). “Classes and methods for spatial
data in R.” R News, 5(2), 9–13. https://CRAN.R-project.org/doc/Rnews/.

Bivand RS, Pebesma E, Gomez-Rubio V (2013). Applied spatial data
analysis with R, Second edition. Springer, NY. https://asdar-book.org/.
156 changes: 156 additions & 0 deletions skgstat/data/samples/meuse.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
"x","y","cadmium","copper","lead","zinc","elev","dist","om","ffreq","soil","lime","landuse","dist.m"
181072,333611,11.7,85,299,1022,7.909,0.00135803,13.6,"1","1","1","Ah",50
181025,333558,8.6,81,277,1141,6.983,0.0122243,14,"1","1","1","Ah",30
181165,333537,6.5,68,199,640,7.8,0.103029,13,"1","1","1","Ah",150
181298,333484,2.6,81,116,257,7.655,0.190094,8,"1","2","0","Ga",270
181307,333330,2.8,48,117,269,7.48,0.27709,8.7,"1","2","0","Ah",380
181390,333260,3,61,137,281,7.791,0.364067,7.8,"1","2","0","Ga",470
181165,333370,3.2,31,132,346,8.217,0.190094,9.2,"1","2","0","Ah",240
181027,333363,2.8,29,150,406,8.49,0.0921516,9.5,"1","1","0","Ab",120
181060,333231,2.4,37,133,347,8.668,0.184614,10.6,"1","1","0","Ab",240
181232,333168,1.6,24,80,183,9.049,0.309702,6.3,"1","2","0","W",420
181191,333115,1.4,25,86,189,9.015,0.315116,6.4,"1","2","0","Fh",400
181032,333031,1.8,25,97,251,9.073,0.228123,9,"1","1","0","Ag",300
180874,333339,11.2,93,285,1096,7.32,0,15.4,"1","1","1","W",20
180969,333252,2.5,31,183,504,8.815,0.113932,8.4,"1","1","0","Ah",130
181011,333161,2,27,130,326,8.937,0.168336,9.1,"1","1","0","Ah",220
180830,333246,9.5,86,240,1032,7.702,0,16.2,"1","1","1","W",10
180763,333104,7,74,133,606,7.16,0.0122243,16,"1","1","1","W",10
180694,332972,7.1,69,148,711,7.1,0.0122243,16,"1","1","1","W",10
180625,332847,8.7,69,207,735,7.02,0,13.7,"1","1","1","W",10
180555,332707,12.9,95,284,1052,6.86,0,14.8,"1","1","1",NA,10
180642,332708,5.5,53,194,673,8.908,0.0703468,10.2,"1","1","1","Am",80
180704,332717,2.8,35,123,402,8.99,0.0975136,7.2,"1","1","1","Am",140
180704,332664,2.9,35,110,343,8.83,0.113932,7.2,"1","1","1","Ag",160
181153,332925,1.7,24,85,218,9.02,0.342321,7,"1","2","0","Ah",440
181147,332823,1.4,26,75,200,8.976,0.385804,6.9,"1","2","0","W",490
181167,332778,1.5,22,76,194,8.973,0.429289,6.3,"1","2","0","W",530
181008,332777,1.3,27,73,207,8.507,0.315116,5.6,"1","2","0","Ab",400
180973,332687,1.3,24,67,180,8.743,0.320574,4.4,"1","2","0","Ag",400
180916,332753,1.8,22,87,240,8.973,0.249863,5.3,"1","2","0","Ah",330
181352,332946,1.5,21,65,180,9.043,0.489064,4.8,"1","2","0","Ag",630
181133,332570,1.3,29,78,208,8.688,0.472778,2.6,"1","2","0","B",570
180878,332489,1.3,21,64,198,8.727,0.287957,1,"1","2","0","Ag",390
180829,332450,2.1,27,77,250,8.328,0.271622,2.4,"1","2","0","Ah",360
180954,332399,1.2,26,80,192,7.971,0.385807,1.9,"1","2","0","B",500
180956,332318,1.6,27,82,213,7.809,0.418417,3.1,"1","2","0","B",550
180710,332330,3,32,97,321,6.986,0.244474,1.6,"1","2","0","Ab",340
180632,332445,5.8,50,166,569,7.756,0.135709,3.5,"1","2","0","Ab",210
180530,332538,7.9,67,217,833,7.784,0.0484965,8.1,"1","1","1","Am",60
180478,332578,8.1,77,219,906,7,0,7.9,"1","1","1","W",10
180383,332476,14.1,108,405,1454,6.92,0.00135803,9.5,"1","1","1","W",20
180494,332330,2.4,32,102,298,7.516,0.135709,1.4,"1","2","0","Am",170
180561,332193,1.2,21,48,167,8.18,0.26622,NA,"1","2","0","Ga",320
180451,332175,1.7,22,65,176,8.694,0.211843,NA,"1","2","0","W",260
180410,332031,1.3,21,62,258,9.28,0.320572,2,"1","2","0","Ah",360
180355,332299,4.2,51,281,746,7.94,0.081222,5.1,"1","2","0","Ah",100
180292,332157,4.3,50,294,746,6.36,0.190086,5.3,"1","2","0","Am",200
180283,332014,3.1,38,211,464,7.78,0.287941,4.5,"1","2","0","Ah",320
180282,331861,1.7,26,135,365,8.18,0.423826,4.9,"1","2","0","Ah",480
180270,331707,1.7,24,112,282,9.42,0.554289,4.5,"1","2","0","Bw",660
180199,331591,2.1,32,162,375,8.867,0.603225,5.5,"1","2","0","Bw",690
180135,331552,1.7,24,94,222,8.292,0.614071,3.4,"1","2","0","Ab",710
180237,332351,8.2,47,191,812,8.06,0.00135803,11.1,"1","1","1","Ah",10
180103,332297,17,128,405,1548,7.98,0,12.3,"1","1","1","W",10
179973,332255,12,117,654,1839,7.9,0.0054321,16.5,"1","1","1","W",10
179826,332217,9.4,104,482,1528,7.74,0.0054321,13.9,"1","1","1","W",10
179687,332161,8.2,76,276,933,7.552,0.0054321,8.1,"1","1","1","W",20
179792,332035,2.6,36,180,432,7.76,0.146578,3.1,"1","1","0","Fw",200
179902,332113,3.5,34,207,550,6.74,0.135684,5.8,"1","1","0","Fw",140
180100,332213,10.9,90,541,1571,6.68,0.0703333,10.2,"1","1","1","Fw",70
179604,332059,7.3,80,310,1190,7.4,0.0484831,12,"1","1","1","W",20
179526,331936,9.4,78,210,907,7.44,0.0054321,14.1,"1","1","1","W",10
179495,331770,8.3,77,158,761,7.36,0.0054321,14.5,"1","1","1","Fw",10
179489,331633,7,65,141,659,7.2,0.0316663,14.8,"1","1","1","W",20
179414,331494,6.8,66,144,643,7.22,0.0122243,13.3,"1","1","1","Ah",10
179334,331366,7.4,72,181,801,7.36,0.0122243,15.2,"1","1","1","W",20
179255,331264,6.6,75,173,784,5.18,0.0373395,11.4,"1","1","1","W",20
179470,331125,7.8,75,399,1060,5.8,0.211846,9,"1","1","0","Ah",270
179692,330933,0.7,22,45,119,7.64,0.451037,3.6,"1","1","1","Fw",560
179852,330801,3.4,55,325,778,6.32,0.575877,6.9,"1","1","0","Bw",750
179140,330955,3.9,47,268,703,5.76,0.0756869,7,"1","1","1","Ab",80
179128,330867,3.5,46,252,676,6.48,0.12481,6.2,"1","1","1","Ab",130
179065,330864,4.7,55,315,793,6.48,0.103024,6.5,"1","1","0","W",110
179007,330727,3.9,49,260,685,6.32,0.157469,5.7,"1","1","0","W",200
179110,330758,3.1,39,237,593,6.32,0.200976,7,"1","1","1","Ah",260
179032,330645,2.9,45,228,549,6.16,0.200976,7.3,"1","1","0","W",270
179095,330636,3.9,48,241,680,6.56,0.26622,8.2,"1","1","0","W",320
179058,330510,2.7,36,201,539,6.9,0.298835,4.3,"1","1","0","Ah",360
178810,330666,2.5,36,204,560,7.54,0.0812247,4.4,"1","1","1","Am",80
178912,330779,5.6,68,429,1136,6.42,0.070355,8.2,"1","1","1","W",100
178981,330924,9.4,88,462,1383,6.28,0.0122243,8.5,"1","1","1","W",70
179076,331005,10.8,85,333,1161,6.34,0,9.6,"1","1","1","W",20
180151,330353,18.1,76,464,1672,7.307,0.0537723,17,"1","1","1","W",50
179211,331175,6.3,63,159,765,5.7,0.0593662,12.8,"1","1","1","W",80
181118,333214,2.1,32,116,279,7.72,0.211843,5.9,"1","2","0","W",290
179474,331304,1.8,25,81,241,7.932,0.12481,2.9,"2","2","1","Ah",160
179559,331423,2.2,27,131,317,7.82,0.12481,4.5,"2","1","0","W",160
179022,330873,2.8,36,216,545,8.575,0.0921516,10.7,"2","1","0","W",140
178953,330742,2.4,41,145,505,8.536,0.113941,9.4,"2","1","0","W",150
178875,330516,2.6,33,163,420,8.504,0.179216,9,"2","1","0","Ah",220
178803,330349,1.8,27,129,332,8.659,0.233596,7,"2","1","0","Am",280
179029,330394,2,38,148,400,7.633,0.336861,6.5,"2","1","1","Am",450
178605,330406,2.7,37,214,553,8.538,0.070355,9.4,"2","1","1","Am",70
178701,330557,2.7,34,226,577,7.68,0.0593662,10.2,"2","1","0","Am",70
179547,330245,0.9,19,54,155,7.564,0.255341,6.4,"2","1","0","W",340
179301,330179,0.9,22,70,224,7.76,0.364067,7.6,"2","1","0","W",470
179405,330567,0.4,26,73,180,7.653,0.429295,7,"2","1","0","Am",630
179462,330766,0.8,25,87,226,7.951,0.380328,5.6,"2","1","0","Am",460
179293,330797,0.4,22,76,186,8.176,0.249874,6.5,"2","1","0","Am",320
179180,330710,0.4,24,81,198,8.468,0.266212,6.6,"2","1","0","Ah",320
179206,330398,0.4,18,68,187,8.41,0.451037,5.9,"2","1","0","W",540
179618,330458,0.8,23,66,199,7.61,0.30971,6.5,"2","1","0","W",420
179782,330540,0.4,22,49,157,7.792,0.293359,6.4,"2","1","0","SPO",380
179980,330773,0.4,23,63,203,8.76,0.532351,7.2,"2","2","0","W",500
180067,331185,0.4,23,48,143,9.879,0.619513,6.6,"2","3","0","Am",760
180162,331387,0.2,23,51,136,9.097,0.684725,4.3,"2","2","0","Ah",750
180451,331473,0.2,18,50,117,9.095,0.809742,5.3,"2","3","0","Fw",1000
180328,331158,0.4,20,39,113,9.717,0.880389,4.1,"2","3","0","Ah",860
180276,330963,0.2,22,48,130,9.924,0.749591,6.1,"2","3","0","Ah",680
180114,330803,0.2,27,64,192,9.404,0.575752,7.5,"2","3","0","Fw",500
179881,330912,0.4,25,84,240,10.52,0.581484,8.8,"2","3","0","STA",650
179774,330921,0.2,30,67,221,8.84,0.49452,5.7,"2","3","0","DEN",630
179657,331150,0.2,23,49,140,8.472,0.32058,6.1,"2","3","0","Fw",410
179731,331245,0.2,24,48,128,9.634,0.336851,7.1,"2","3","0","Ah",390
179717,331441,0.2,21,56,166,9.206,0.249852,4.1,"2","2","0","Ah",310
179446,331422,0.2,24,65,191,8.47,0.0756869,6,"2","1","0","Ah",70
179524,331565,0.2,21,84,232,8.463,0.0756869,6.6,"2","1","0","W",70
179644,331730,0.2,23,75,203,9.691,0.162853,6.8,"2","1","1","STA",150
180321,330366,3.7,53,250,722,8.704,0.0974916,9.1,"2","2","0","Bw",80
180162,331837,0.2,33,81,210,9.42,0.440142,5.9,"2","2","0","Ah",450
180029,331720,0.2,22,72,198,9.573,0.4619,4.9,"2","2","0","Aa",530
179797,331919,0.2,23,86,139,9.555,0.222701,7.1,"2","1","0","W",240
179642,331955,0.2,25,94,253,8.779,0.103024,8.1,"2","1","1","Tv",70
179849,332142,1.2,30,244,703,8.54,0.0921353,8.3,"2","1","0","Fw",70
180265,332297,2.4,47,297,832,8.809,0.0484884,10,"2","1","0","Ah",60
180107,332101,0.2,31,96,262,9.523,0.168331,5.9,"2","1","0","Ah",190
180462,331947,0.2,20,56,142,9.811,0.38581,5,"2","2","0","Ah",450
180478,331822,0.2,16,49,119,9.604,0.489064,4.5,"2","2","0","Am",550
180347,331700,0.2,17,50,152,9.732,0.57602,5.4,"2","2","0","Am",650
180862,333116,0.4,26,148,415,9.518,0.0812194,2.3,"2","1","0","Am",100
180700,332882,1.6,34,162,474,9.72,0.0373369,7.5,"2","1","0","W",170
180201,331160,0.8,18,37,126,9.036,0.771698,4.6,"2","3","1","Ah",860
180173,331923,1.2,23,80,210,9.528,0.336829,5.8,"2","2","0","W",410
180923,332874,0.2,20,80,220,9.155,0.228123,4.4,"3","1","0","Aa",290
180467,331694,0.2,14,49,133,10.08,0.597761,4.4,"3","2","0","Am",680
179917,331325,0.8,46,42,141,9.97,0.44558,4.5,"3","2","0","Am",540
179822,331242,1,29,48,158,10.136,0.396675,5.2,"3","2","0","Am",480
179991,331069,0.8,19,41,129,10.32,0.581478,4.6,"3","3","0","W",720
179120,330578,1.2,31,73,206,9.041,0.287966,6.9,"3","1","0","W",380
179034,330561,2,27,146,451,7.86,0.233596,7,"3","1","0","W",310
179085,330433,1.5,29,95,296,8.741,0.364067,5.4,"3","1","0","Ah",430
179236,330046,1.1,22,72,189,7.822,0.331454,6.2,"3","1","0","Ah",370
179456,330072,0.8,20,51,154,7.78,0.211846,5,"3","1","0","Fw",290
179550,329940,0.8,20,54,169,8.121,0.103029,5.1,"3","1","0","W",150
179445,329807,2.1,29,136,403,8.231,0.070355,8.1,"3","1","0","Bw",70
179337,329870,2.5,38,170,471,8.351,0.146576,8,"3","1","0","Bw",220
179245,329714,3.8,39,179,612,7.3,0.0537723,8.8,"3","1","0","W",80
179024,329733,3.2,35,200,601,7.536,0.119286,9.3,"3","1","0","W",120
178786,329822,3.1,42,258,783,7.706,0.0921435,8.4,"3","1","0","Ah",120
179135,329890,1.5,24,93,258,8.07,0.249863,7.7,"3","1","0","Am",260
179030,330082,1.2,20,68,214,8.226,0.37494,5.7,"3","1","0","Ah",440
179184,330182,0.8,20,49,166,8.128,0.423837,4.7,"3","1","0","Am",540
179085,330292,3.1,39,173,496,8.577,0.423837,9.1,"3","1","0","Ah",520
178875,330311,2.1,31,119,342,8.429,0.27709,6.5,"3","1","0","Ah",350
179466,330381,0.8,21,51,162,9.406,0.358606,5.7,"3","1","0","W",460
180627,330190,2.7,27,124,375,8.261,0.0122243,5.5,"3","3","0","W",40
19 changes: 19 additions & 0 deletions skgstat/tests/test_data_loader.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import pytest

from skgstat import data
import numpy as np
from numpy.testing import assert_array_almost_equal
Expand Down Expand Up @@ -40,3 +42,20 @@ def test_aniso_data():

c, v = data.aniso(N=25).get('sample')
assert len(c) == len(v) == 25


def test_meuse_loads():
df = data._loader.read_sample_file('meuse.txt')

# get zinc data
_, zinc = data.meuse(variable='zinc').get('sample')

assert_array_almost_equal(
zinc, df[['zinc']].values, decimal=6
)

# check exeption
with pytest.raises(AttributeError) as e:
data.meuse(variable='unknown')

assert 'variable has to be in' in str(e.value)

0 comments on commit 45b20c7

Please sign in to comment.