-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpostprocess.py
68 lines (48 loc) · 2.67 KB
/
postprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
from SPARQLWrapper import SPARQLWrapper, JSON
import pandas as pd
sparql = SPARQLWrapper("https://landregistry.data.gov.uk/landregistry/query")
sparql.setReturnFormat(JSON)
sparql.setQuery("""
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
prefix owl: <http://www.w3.org/2002/07/owl#>
prefix xsd: <http://www.w3.org/2001/XMLSchema#>
prefix sr: <http://data.ordnancesurvey.co.uk/ontology/spatialrelations/>
prefix ukhpi: <http://landregistry.data.gov.uk/def/ukhpi/>
prefix lrppi: <http://landregistry.data.gov.uk/def/ppi/>
prefix skos: <http://www.w3.org/2004/02/skos/core#>
prefix lrcommon: <http://landregistry.data.gov.uk/def/common/>
# Returns house price index average prices for flats, semis, detacted and terraced for all LA since 2015, with GSS codes
SELECT ?regionName ?code ?date ?hpi ?hpiDetached ?hpiFlatMaisonette ?hpiSemiDetached ?hpiTerraced ?averagePriceDetached ?averagePriceFlatMaisonette ?averagePriceSemiDetached ?averagePriceTerraced
{
BIND( now() AS ?currentDateTime ) .
BIND( CONCAT( str(year(?currentDateTime)-6), "-", str(month(?currentDateTime)), "-", str(day(?currentDateTime)) ) AS ?currentDateString ) .
?region ukhpi:refPeriodStart ?date;
ukhpi:housePriceIndex ?hpi;
ukhpi:housePriceIndexFlatMaisonette ?hpiFlatMaisonette;
ukhpi:averagePriceFlatMaisonette ?averagePriceFlatMaisonette.
OPTIONAL{?region ukhpi:housePriceIndexDetached ?hpiDetached.}.
OPTIONAL{?region ukhpi:housePriceIndexSemiDetached ?hpiSemiDetached.}.
OPTIONAL{?region ukhpi:housePriceIndexTerraced ?hpiTerraced.}.
OPTIONAL{?region ukhpi:averagePriceDetached ?averagePriceDetached.}.
OPTIONAL{?region ukhpi:averagePriceSemiDetached ?averagePriceSemiDetached.}.
OPTIONAL{?region ukhpi:averagePriceTerraced ?averagePriceTerraced.}.
?region ukhpi:refRegion ?regionRef.
?regionRef rdfs:seeAlso ?code.
?regionRef rdfs:label ?regionName.
FILTER (langMatches( lang(?regionName), "EN")&&
?date > xsd:date(?currentDateString)).
FILTER contains(str(?code),"gov").
}
"""
)
try:
ret = sparql.queryAndConvert()
# for r in ret["results"]["bindings"]:
# print(r)
except Exception as e:
print(e)
df=pd.json_normalize(ret["results"]["bindings"])
df['code']=df['code.value'].str.split('/',expand=True)[5]
df2=df[['regionName.value', 'code','date.value','hpi.value','hpiDetached.value','hpiFlatMaisonette.value','hpiSemiDetached.value','hpiTerraced.value', 'averagePriceDetached.value','averagePriceFlatMaisonette.value','averagePriceSemiDetached.value','averagePriceTerraced.value' ]]
df2.to_csv('landreg.csv',index=False)