-
Notifications
You must be signed in to change notification settings - Fork 4
/
preset2style.py
executable file
·328 lines (287 loc) · 12.2 KB
/
preset2style.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__version__ = '0.1.1'
import os
import re
import sys
import bz2
import optparse
try:
import lxml.etree as etree
except ImportError:
try:
import xml.etree.ElementTree as etree
except ImportError:
import elementtree.ElementTree as etree
# TODO - autodetect postgres type, and guess at geometry specialization
# Are JOSM presents typed at all - or are values always text?
class Osm2PgsqlStyle:
def __init__(self):
self.rules = {}
def __str__(self):
for line in self:
sys.stdout.write(line)
return ''
def __iter__(self):
keys = sorted(self.rules.keys())
spaces = 0
for k in keys:
if len(k) > spaces:
spaces = len(k)
for tag in keys:
rule = self.rules[tag]
line = rule['osm_type']
line += ' '*abs(8-len(line)+1)
space = ' '*(spaces-len(tag)+1)
line += '%s %s %s' % (tag,space,rule['data_type'])
if rule.get('flags'):
line += ' '
line += rule.get('flags')
line += ' #%s' % rule.get('source')
# TODO - print where it came from...
line += '\n'
yield line
def print_keys_by_source(self,csv=True):
keys = sorted(self.rules.keys())
# default to outputting csv
if csv:
print 'tag,"in .osm file","in josm preset","in osm2pgsql style","in user supplied"'
for tag in keys:
rule = self.rules[tag]
line = '"%s"' % tag
if rule.get('in_osm'):
line += ',%s' % rule['in_osm']
else:
line += ',0'
if rule.get('in_preset'):
line += ',yes'
else:
line += ',no'
if rule.get('in_style'):
line += ',yes'
else:
line += ',no'
if rule.get('in_user'):
line += ',yes'
else:
line += ',no'
print line
else:
# formatting for terminal viewing
spaces = 25
for tag in keys:
rule = self.rules[tag]
space = ' '*(spaces-len(tag)+1)
within = []
if rule.get('in_style'):
within.append('style')
if rule.get('in_preset'):
within.append('preset')
if rule.get('in_osm'):
within.append('osm(%s)' % rule['in_osm'])
if rule.get('in_user'):
within.append('user')
print "%s%s --> '%s'" % (tag,space,'/'.join(within))
def write(self,filename):
# alpha sort them...
keys = sorted(self.rules.keys())
f_ = open(filename,'wb')
f_.writelines("# osm2pgsql style file. generated by 'present2style.py'\n")
for tag in keys:
rule = self.rules[tag]
line = '%s %s %s' % (rule['osm_type'],tag,rule['data_type'])
if rule.get('flags'):
line += rule.get('flags')
line += '\n'
f_.writelines(line)
f_.close()
def parse_style(self,filename):
""" parse an osm2pgsql style file.
TODO - check this logic against the read_style_file() function in
osm2pgsql to make sure logic is roughly equivalent
"""
# clear the rules dict
self.rules = {}
# match any word including commas and colons
pattern = '[a-zA-Z0-9_,:]+'
match = re.compile(pattern)
style = open(filename,'rb')
# OsmType Tag DataType Flags
existing_keys = self.rules.keys()
for i in style.readlines():
i = i.strip()
if not i.startswith('#'):
if '#' in i:
# strip comment off end
i = i[:i.index('#')]
if i:
parts = match.findall(i)
if not len(parts) > 2:
print 'problem!', parts, i
else:
key = parts[1]
# stop at keys with spaces
if ' ' in key:
raise ValueError('Tag "%s" has a space, which is not good, fix it before continuing!\n' % key)
if key not in existing_keys:
d = {}
d['osm_type'] = parts[0]
d['data_type'] = parts[2]
if len(parts) > 3:
d['flags'] = parts[3]
else:
d['flags'] = None
d['source'] = 'osm2pgsql style'
d['source_file'] = filename
d['in_style'] = True
self.rules[key] = d
else:
self.rules[key]['in_style'] = True
def merge_josm_preset_keys(self,filename):
tree = etree.parse(filename)
root = tree.getroot()
existing_keys = self.rules.keys()
ns = ''
if '{' in root.tag and '}' in root.tag and len(root.tag) > 3:
ns = '{%s}' % root.tag.split('}')[0][1:]
search = '%sgroup//%sitem' % (ns,ns)
results = root.findall(search)
if not results:
raise RuntimeError("Could not parse anything from the josm preset: %s" % filename)
for item in results:
if hasattr(item,'iterchildren'): # only lxml
children = item.iterchildren()
else:
children = item.getchildren()
for text in children: # text clashes
key = text.get('key')
if not key:
continue
if ' ' in key:
raise ValueError('Tag "%s" has a space, which is not good, fix it before continuing!\n' % key)
if key not in existing_keys:
d = {}
d['osm_type'] = 'node,way'
# TODO - safe to autodetect postgres type?
d['data_type'] = 'text'
# TODO - should we try to push just into line table or polygon table for certain keys?
#d['flags'] = 'linear'
d['flags'] = None
d['source'] = 'JOSM preset'
d['source_file'] = filename
d['in_preset'] = True
self.rules[key] = d
else:
self.rules[key]['in_preset'] = True
def merge_user_keys(self,keys):
if hasattr(keys,'split'):
keys = keys.split(' ')
for key in keys:
if key not in self.rules:
if ' ' in key:
raise ValueError('Tag "%s" has a space, which is not good, fix it before continuing!\n' % key)
d = {}
d['osm_type'] = 'node,way'
# TODO - safe to autodetect postgres type?
d['data_type'] = 'text'
# TODO - should we try to push just into line table or polygon table for certain keys?
#d['flags'] = 'linear'
d['flags'] = None
d['source'] = 'user'
d['source_file'] = None
d['in_user'] = 1
self.rules[key] = d
else:
self.rules[key]['in_user'] = True
def merge_osm_keys(self,filename):
if filename.endswith('bz2'):
# to dangerous to accept bz2 right now
sys.stderr.write('\nSorry, bz2 files are not accepted, please uncompress first with:\n\tbzip2 -d --keep %s\n' % filename)
sys.exit(1)
#filename = bz2.BZ2File(filename)
tree = etree.parse(filename)
root = tree.getroot()
#existing_keys = self.rules.keys()
for elem in root.findall('*/tag'):
key = elem.get('k')
if key not in self.rules:
d = {}
d['osm_type'] = 'node,way'
# TODO - safe to autodetect postgres type?
d['data_type'] = 'text'
# TODO - should we try to push just into line table or polygon table for certain keys?
#d['flags'] = 'linear'
d['flags'] = None
d['source'] = '.osm file'
d['source_file'] = filename
d['in_osm'] = 1
self.rules[key] = d
else:
if self.rules[key].has_key('in_osm'):
self.rules[key]['in_osm'] += 1
else:
self.rules[key]['in_osm'] = 1
if __name__ == "__main__":
parser = optparse.OptionParser(usage="""%prog [OPTIONS]
This tool is designed to author a style file suitable for
passing to osm2pgsql for importing custom tags into postgis.
It can ingest and merge tags from an existing style file,
a josm preset file, a .osm file itself (warning, not recommended
as this will likely get tags invalid for passing to osm2pgsql either
becuase they are too long or they have odd characters), or via custom
tags supplied on the command line.
It also has a --meta option which will dump which tags are in which sources
instead of printing out a style file for osm2pgsql. This is useful for seeing
how many of your tags show up in an actual .osm file, and is dumped as a csv.
Example usage
-------------
Full help:
$ %prog -h (or --help for possible options)
Read osm2pgsql's default.style, add a few custom tags:
$ %prog -s default.style --tags 'name:kr name:fr'
Merge a josm preset with the default.style
$ %prog -s default.style --preset kiosks_haiti.xml
Print metadata about an .osm file and which tags occur in a josm preset
$ %prog -o PaP.osm --preset kiosks_haiti.xml --meta""", version='%prog ' + __version__)
parser.add_option('-p','--preset', dest='preset',
default=None,
help='Read in josm preset tags')
parser.add_option('-s','--style', dest='style',
default=None,
help='Read in osm2pgsql style tags')
parser.add_option('-o', '--osm', dest='osm',
default=None,
help='Read in .osm file tags')
parser.add_option('-t','--tags', dest='user',
default=None,
help='Read in custom tags supplied by user (quoted and space delimited)')
parser.add_option('--meta', dest='metadata',
default=False,
help='print meta info about which tags are in which source files and if an osm file is passed then summarize the number of tag occurances',
action='store_true')
(options, arguments) = parser.parse_args()
if len(arguments) > 0 and not (options.osm or options.preset or options.user or options.style):
sys.stderr.write('This program does not accept any arguments, just keyword options like "--key value". Pass -h to see all the options\n')
sys.exit(1)
if not options.style and not options.preset and not options.osm and not options.user:
sys.stderr.write('This program requires one more more keyword options like "--key value". Pass -h to see all the options\n')
sys.exit(1)
# TODO - only accept keys in style file that exist in osm file
style = Osm2PgsqlStyle()
# blend with existing style file
if options.style:
style.parse_style(options.style)
# next blend with preset
if options.preset:
style.merge_josm_preset_keys(options.preset)
# validate against existing osm file
if options.osm:
style.merge_osm_keys(options.osm)
if options.user:
style.merge_user_keys(options.user)
# print out metadata
if options.metadata:
style.print_keys_by_source()
else:
# print out synthetic style
print style