Skip to content

Commit

Permalink
Merge pull request #2 from osm-without-borders/test_with_pandas
Browse files Browse the repository at this point in the history
check number of zones by country - POC
  • Loading branch information
antoine-de authored Apr 6, 2018
2 parents 91061ed + 04c720f commit 33cfaac
Show file tree
Hide file tree
Showing 9 changed files with 678 additions and 1 deletion.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
__pycache__
.pytest_cache
data_volumetric.csv
data_volumetric.json
cosmogony.geojson
18 changes: 18 additions & 0 deletions Pipfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[[source]]

name = "pypi"
url = "https://pypi.python.org/simple"
verify_ssl = true


[packages]

matplotlib = "*"
pandas = "*"
geopandas = "*"
pytest = "*"
ijson = "*"
ipython = "*"
cffi = "*"

[dev-packages]
435 changes: 435 additions & 0 deletions Pipfile.lock

Large diffs are not rendered by default.

22 changes: 21 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,31 @@
# cosmogony-data-dashboard

To show stats about the world [Cosmogony](https://github.com/osm-without-borders/cosmogony)
The purpose of this repo is to provide tools to compute and show stats about the world [Cosmogony](https://github.com/osm-without-borders/cosmogony).

It can help to check the quality (well, mostly the quantity actually...) of OpenStreetMap boundaries zones.

Contributions are very welcomed in the repo. If you have new ideas about tests to add, please take a look at the [founding issue](https://github.com/osm-without-borders/cosmogony/issues/4) first ;)

:construction::warning: This is a work in progress, and deeply connected to the Cosmogony output format. Follow on in [this issue](https://github.com/osm-without-borders/cosmogony/issues/4) :warning::construction:

## Country stats and tests

### Purpose

We want to compute the number of zones for each kind of zone and for each country. Then, we want to compare this output with some references values (the actual number of zones for each kind of zone in the real world)

### Compute and test against references values

You will need `python3` and a few dependancies you can install with `pipenv install --three`.

To compute the number of zones for each kind of zones (volumetric stats) and test them again reference values, just type:

`pipenv run py.test --cosmogony my-cosmogony.json`

Detailed test results are written to `data_volumetric.json`.

You can also get some visual results over the tests using the `index.html` file inside the repo.

### Reference values

For now, the references values is a big csv file.
Expand Down
21 changes: 21 additions & 0 deletions conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# coding: utf-8
import pandas as pd

from utils import ZonesIndex


def pytest_addoption(parser):
parser.addoption("--cosmogony", action="store", required=True,
help="a cosmogony json file")

def pytest_generate_tests(metafunc):
cosmogony_path = metafunc.config.getoption('cosmogony')
zones_index = ZonesIndex.init_from_cosmogony(cosmogony_path)

expected_values = pd.read_csv('reference_stats_values.csv')
rows = (row for idx,row in expected_values.iterrows())

if 'line' in metafunc.fixturenames:
metafunc.parametrize('line', rows)
if 'zones_index' in metafunc.fixturenames:
metafunc.parametrize('zones_index', [zones_index])
72 changes: 72 additions & 0 deletions index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
<!DOCTYPE html>
<html>
<head>
<title>Volumetric Data Dashboard</title>
<meta charset='utf-8'/>
<meta name='viewport' content='initial-scale=1,user-scalable=yes'/>
<style>
table,
td,
th {
margin: 10px 0;
padding: 2px 4px;
text-align: center;
border-collapse: collapse;
}
td,
th {
border: 1px solid black;
}
</style>
</head>
<body>
<table id="volumetric-dashboard" class="sort"></table>
<script>
fetch(`data_volumetric.json`).then((r) => r.json()).then((data) => {
var table = document.getElementById('volumetric-dashboard');

var col = ['name', 'zone_type', 'result', 'status']

for (var i = 0; i < data.length; i++) {
var result_text = `${ (data[i]['total'] != -1)
? data[i]['total']
: "??"} `
result_text += `<br>(expected : ${data[i]['expected_min']} ~ ${data[i]['expected_max']})`

var status = ''
if (data[i]['test_status'] == 'ok') {
status = '✅'
status += (data[i]['is_known_failure'] == "yes")
? " 😍 "
: "✅";
}
if (data[i]['test_status'] == 'ko') {
status += (data[i]['is_known_failure'] == "yes")
? "📉"
: "❎❎";
}
if (data[i]['test_status'] == 'skip') {
status = '🤔'
}

var tr = table.insertRow(-1);
tr.insertCell(-1).innerHTML = data[i]['name']
tr.insertCell(-1).innerHTML = data[i]['zone_type']
var tabCell = tr.insertCell(-1);
tabCell.innerHTML = result_text;
tr.insertCell(-1).innerHTML = status

}
var header = table.createTHead();
var trh = header.insertRow(0);
for (var i = 0; i < col.length; i++) {
var th = document.createElement("th");
th.innerHTML = col[i];
trh.appendChild(th);
}

})
</script>

</body>
</html>
45 changes: 45 additions & 0 deletions test_volumetries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# coding: utf-8
import pandas as pd
import json
import csv
import pytest

from utils import UnknownWikidataId

def check_if_test_passes(expected_min, expected_max, total):
if expected_min <= total <= expected_max:
return "ok"
else:
return "ko"

class TestCosmogony:
@classmethod
def setup_class(cls):
cls.results = pd.DataFrame()

@classmethod
def teardown_class(cls):
cls.results.to_json('data_volumetric.json', orient='records')

def test_row(self, line, zones_index):
try:
matched_zones = list(zones_index.iter_children(
line['wikidata_id'],
lambda z:z['zone_type']==line['zone_type']
))
except UnknownWikidataId:
total = -1
test_status = 'skip'
else:
total = len(matched_zones)
test_status = check_if_test_passes(line.expected_min, line.expected_max, total)

line['total'] = total
line['test_status'] = test_status
TestCosmogony.results = TestCosmogony.results.append(line)

if test_status == 'skip':
pytest.skip("no data for this test")

assert(test_status == "ok"), "Country {} - expected between {} and {} for {}, found {}".format(
line['name'], line['expected_min'], line['expected_max'], line['zone_type'], total)
1 change: 1 addition & 0 deletions utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .index import ZonesIndex, UnknownWikidataId
60 changes: 60 additions & 0 deletions utils/index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from collections import defaultdict

import ijson.backends.yajl2_cffi as ijson


class UnknownWikidataId(Exception):
pass

class ZonesIndex:
"""
Index cosmogony zones both by internal `id` and wikidata id
"""
@classmethod
def init_from_cosmogony(cls, cosmogony_path):
zones_index = cls()

print('Reading zones...')
with open(cosmogony_path, 'rb') as f:
zones = ijson.items(f, 'zones.item')
for z in zones:
z.pop('geometry', None)
zones_index.insert(z)
print('{} zones have been read'.format(len(zones_index)))

zones_index.build_children()
return zones_index

def __init__(self):
self.id_to_zone = dict()
self.wd_to_zone = dict()
self.id_to_children = defaultdict(list)

def insert(self, zone):
self.id_to_zone[zone['id']] = zone
wikidata_id = zone.get('wikidata')
if wikidata_id:
self.wd_to_zone[wikidata_id] = zone

def build_children(self):
for z in self.id_to_zone.values():
parent_id = z.get('parent')
if parent_id:
self.id_to_children[parent_id].append(z)

def _iter_all_children(self, zone):
children = self.id_to_children[zone['id']]
for c in children:
yield c
yield from self._iter_all_children(c)

def iter_children(self, wikidata_id, filter_fun=lambda x: True):
try:
zone = self.wd_to_zone[wikidata_id]
except KeyError as e:
raise UnknownWikidataId from e

return filter(filter_fun, self._iter_all_children(zone))

def __len__(self):
return len(self.id_to_zone)

0 comments on commit 33cfaac

Please sign in to comment.