-
Notifications
You must be signed in to change notification settings - Fork 2
/
methods.py
7502 lines (6258 loc) · 371 KB
/
methods.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import collections, errno, gc, glob, math, os, sys, time
from heapq import merge
import pandas as pd
import numpy as np
import geopandas as gpd
import osmnx as ox
import networkx as nx
from shapely.geometry import Point, shape, LineString, Polygon
from shapely.ops import transform
import pyproj
from pyproj import CRS
from scipy.spatial import cKDTree
from functools import partial
from network_wrangler import WranglerLogger
import peartree as pt
from urllib.request import urlopen
from zipfile import ZipFile
from io import BytesIO
# World Geodetic System 1984 (WGS84) used py GPS (latitude/longitude)
# https://epsg.io/4326
LAT_LONG_EPSG = 4326
# Planar CRS that can be used to measure distances in meters
# NAD83 / UTM zone 15N https://epsg.io/26915
NEAREST_MATCH_EPSG = 26915
# number of polygons used for SharedStreet extraction
# == number of rows in step0 INPUT_POLYGON
# == number of geojson files in step0 OUTPUT_BOUNDARY_DIR
NUM_SHST_BOUNDARIES = 14
# for ShSt docker work
DOCKER_SHST_IMAGE_NAME = 'shst:latest'
# Bay Area Counties
BayArea_COUNTIES = ['San Francisco', 'Santa Clara', 'Sonoma', 'Marin', 'San Mateo',
'Contra Costa', 'Solano', 'Napa', 'Alameda']
# dictionary for time-of-day and start/end hours. For NT and EA, a different set of start/end
# is used to calculated trip frequency.
TIME_OF_DAY_DICT = {
"AM": {"start": 6, "end": 10},
"MD": {"start": 10, "end": 15},
"PM": {"start": 15, "end": 19},
"NT": {"start": 19, "end": 3, "frequency_start": 19, "frequency_end": 22},
"EA": {"start": 3, "end": 6, "frequency_start": 5, "frequency_end": 6},
}
# dictionary for number of hours in each time period for transit frequency calculation
TOD_NUMHOURS_FREQUENCY_DICT = {"AM" : 4, "MD" : 5, "PM" :4, "NT" : 3, "EA" : 1}
# way (link) tags we want from OpenStreetMap (OSM)
# osmnx defaults are viewable here: https://osmnx.readthedocs.io/en/stable/osmnx.html?highlight=util.config#osmnx.utils.config
# and configurable as useful_tags_way
# These are used in step2_osmnx_extraction.py
TAG_NUMERIC = 1
TAG_STRING = 2
OSM_WAY_TAGS = {
'highway' : TAG_STRING, # https://wiki.openstreetmap.org/wiki/Key:highway
'tunnel' : TAG_STRING, # https://wiki.openstreetmap.org/wiki/Key:tunnel
'bridge' : TAG_STRING, # https://wiki.openstreetmap.org/wiki/Key:bridge
'junction' : TAG_STRING, # https://wiki.openstreetmap.org/wiki/Key:junction
'oneway' : TAG_STRING, # https://wiki.openstreetmap.org/wiki/Key:oneway
'name' : TAG_STRING, # https://wiki.openstreetmap.org/wiki/Key:name
'ref' : TAG_STRING, # https://wiki.openstreetmap.org/wiki/Key:ref
'width' : TAG_STRING, # https://wiki.openstreetmap.org/wiki/Key:width
'est_width' : TAG_NUMERIC, # https://wiki.openstreetmap.org/wiki/Key:est_width
'access' : TAG_STRING, # https://wiki.openstreetmap.org/wiki/Key:access
'area' : TAG_STRING, # https://wiki.openstreetmap.org/wiki/Key:area
'service' : TAG_STRING, # https://wiki.openstreetmap.org/wiki/Key:service
'maxspeed' : TAG_STRING, # https://wiki.openstreetmap.org/wiki/Key:maxspeed
# lanes accounting
'lanes' : TAG_NUMERIC, # https://wiki.openstreetmap.org/wiki/Key:lanes
'lanes:backward' : TAG_NUMERIC, # https://wiki.openstreetmap.org/wiki/Key:lanes#Lanes_in_different_directions
'lanes:forward' : TAG_NUMERIC, # https://wiki.openstreetmap.org/wiki/Key:lanes#Lanes_in_different_directions
'lanes:both_ways' : TAG_NUMERIC, # https://wiki.openstreetmap.org/wiki/Key:lanes#Lanes_in_different_directions
'bus' : TAG_STRING, # https://wiki.openstreetmap.org/wiki/Key:bus
'lanes:bus' : TAG_NUMERIC, # https://wiki.openstreetmap.org/wiki/Key:lanes:psv
'lanes:bus:forward' : TAG_NUMERIC, # https://wiki.openstreetmap.org/wiki/Key:lanes:psv
'lanes:bus:backward' : TAG_NUMERIC, # https://wiki.openstreetmap.org/wiki/Key:lanes:psv
'hov' : TAG_STRING, # https://wiki.openstreetmap.org/wiki/Key:hov
'hov:lanes' : TAG_STRING, # https://wiki.openstreetmap.org/wiki/Key:hov
'taxi' : TAG_STRING, # https://wiki.openstreetmap.org/wiki/Key:taxi
'lanes:hov' : TAG_NUMERIC, # https://wiki.openstreetmap.org/wiki/Key:hov
'shoulder' : TAG_STRING, # https://wiki.openstreetmap.org/wiki/Key:shoulder
'turn' : TAG_STRING, # https://wiki.openstreetmap.org/wiki/Key:turn
'turn:lanes' : TAG_STRING, # https://wiki.openstreetmap.org/wiki/Key:turn#Turning_indications_per_lane
'turn:lanes:forward' : TAG_STRING, # https://wiki.openstreetmap.org/wiki/Key:turn#Turning_indications_per_lane
'turn:lanes:backward': TAG_STRING, # https://wiki.openstreetmap.org/wiki/Key:turn#Turning_indications_per_lane
# active modes
'sidewalk' : TAG_STRING, # https://wiki.openstreetmap.org/wiki/Key:sidewalk
'cycleway' : TAG_STRING, # https://wiki.openstreetmap.org/wiki/Key:cycleway
}
# osmnx 'highway' tag to 'roadway' crosswalk
# OSMnx 'highway' tags have a multitude of values that are too detailed for us;
# Simplify the tag to a new column, 'roadway'
HIGHWAY_TO_ROADWAY = [
# highway # roadway # hierarchy
('bridleway', 'cycleway', 13),
('closed:path', 'cycleway', 13),
('cycleway', 'cycleway', 13),
('other', 'cycleway', 13), # ?
('path', 'cycleway', 13),
('socail_path', 'cycleway', 13),
('track', 'cycleway', 13),
('corridor', 'footway', 14),
('footpath', 'footway', 14),
('footway', 'footway', 14),
('pedestrian', 'footway', 14),
('steps', 'footway', 14),
('motorway', 'motorway', 1),
('motorway_link', 'motorway_link', 2),
('primary', 'primary', 5),
('primary_link', 'primary_link', 6),
('access', 'residential', 11),
('junction', 'residential', 11),
('residential', 'residential', 11),
('road', 'residential', 11),
('unclassified', 'residential', 11),
('unclassified_link', 'residential', 11),
('secondary', 'secondary', 7),
('secondary_link', 'secondary_link', 8),
('busway', 'service', 12),
('living_street', 'service', 12),
('service', 'service', 12),
('tertiary', 'tertiary', 9),
('tertiary_link', 'tertiary_link', 10),
('traffic_island', 'service', 12),
('trunk', 'trunk', 3),
('trunk_link', 'trunk_link', 4),
]
# 'roadway' to "drive_access", "walk_access", "bike_access" crosswalk
ROADWAY_TO_ACCESS = [
# roadway, drive_access, walk_access, bike_access
('cycleway', False, True, True ),
('footway', False, True, False),
('motorway', True, False, False),
('motorway_link', True, True, True ),
('primary', True, True, True ),
('primary_link', True, True, True ),
('residential', True, True, True ),
('secondary', True, True, True ),
('secondary_link', True, True, True ),
('service', True, True, True ),
('tertiary', True, True, True ),
('tertiary_link', True, True, True ),
('trunk', True, True, True ),
('trunk_link', True, True, True ),
('unknown', True, True, True ), # default to true to err on the side of granting more access
]
# number ranges for nodes by county
county_network_node_numbering_start_dict = {
"San Francisco": 1000000,
"San Mateo": 1500000,
"Santa Clara": 2000000,
"Alameda": 2500000,
"Contra Costa": 3000000,
"Solano": 3500000,
"Napa": 4000000,
"Sonoma": 4500000,
"Marin": 5000000
}
# some settings on GTFS data sources
# this is based on fields 'agency_raw_name' and 'agency_name' in GTFS agency.txt
# some operators have more than one GTFS datasets. Use the 2015 one only.
# TODO: for Blue & Gold Fleet, the GTFS_feed_name and agency_raw_name is 'Blue&Gold_gtfs_10_4_2017',
# and agency_name is 'Blue&Gold Fleet'. However, sharedstreet conflation cannot deal with "&" or space in input/output file
# names. Therefore, need to modify to 'Blue_Gold_gtfs_10_4_2017' and 'Blue Gold Fleet' respectively;
# similarly, modify 'Union_City_Transit_Aug-01-2015 to Jun-30-2017' to 'Union_City_Transit_Aug-01-2015_to_Jun-30-2017'.
# May need more generic solution.
gtfs_name_dict = {
'ACE_2017_3_20' : 'ACE Altamont Corridor Express',
'ACTransit_2015_8_14' : 'AC Transit',
'BART_2015_8_3' : 'Bay Area Rapid Transit',
'Blue_Gold_gtfs_10_4_2017' : 'Blue Gold Fleet',
'Caltrain_2015_5_13' : 'Caltrain',
'Capitol_2017_3_20' : 'Capitol Corridor',
'CCTA_2015_8_11' : 'County Connection',
'commuteDOTorg_GTFSImportExport_20160127_final_mj': 'Commute.org Shuttle', # Caltrain_shuttle
'Emeryville_2016_10_26' : 'Emery Go-Round',
# 'Fairfield_2015_10_14' : 'Fairfield and Suisun Transit',
'Fairfield_2015_10_14_updates' : 'Fairfield and Suisun Transit',
'GGFerries_2017_3_18' : 'Golden Gate Ferries',
'GGTransit_2015_9_3' : 'Golden Gate Transit',
'Marguerite_2016_10_10' : 'Stanford Marguerite Shuttle',
'MarinTransit_2015_8_31' : 'Marin Transit',
'MVGo_2016_10_26' : 'MVgo Mountain View',
'petalumatransit-petaluma-ca-us__11_12_15' : 'Petaluma Transit',
# 'Petaluma_2016_5_22': 'Petaluma Transit',
'RioVista_2015_8_20' : 'Rio Vista Delta Breeze',
'SamTrans_2015_8_20' : 'SamTrans',
'SantaRosa_google_transit_08_28_15' : 'Santa Rosa CityBus',
'SFMTA_2015_8_11' : 'San Francisco Municipal Transportation Agency',
'SF_Bay_Ferry2016_07_01' : 'San Francisco Bay Ferry',
'Soltrans_2016_5_20' : 'SolTrans',
'SonomaCounty_2015_8_18' : 'Sonoma County Transit',
'TriDelta-GTFS-2018-05-24_21-43-17' : 'Tri Delta Transit',
'Union_City_Transit_Aug-01-2015_to_Jun-30-2017' : 'Union City Transit',
'vacavillecitycoach-2020-ca-us' : 'Vacaville City Coach',
'Vine_GTFS_PLUS_2015' : 'Vine (Napa County)',
'VTA_2015_8_27' : 'VTA',
'westcat-ca-us_9_17_2015' : 'WestCat (Western Contra Costa)',
# 'WestCAT_2016_5_26' : 'WestCat (Western Contra Costa)',
'Wheels_2016_7_13' : 'Wheels Bus'
}
rail_gtfs = ['Bay Area Rapid Transit', 'Caltrain', 'Capitol Corridor']
ferry_gtfs = ['Golden Gate Ferries', 'San Francisco Bay Ferry']
# parameters for Ranch version of transit routing
RANCH_TRANSIT_ROUTING_PARAMETERS = {
"good_links_buffer_radius": 200,
"non_good_links_penalty": 5,
"bad_stops_buffer_radius": 100,
"ft_penalty": {
"residential": 2,
"service": 3,
"default": 1,
"motorway": 0.9,
}
}
def docker_path(non_docker_path):
"""
Simple script to transform a non docker path to a docker path for use with the docker container
created by create_docker_container(); returns that path
Supported non_docker_paths are in C:/Users/[USERNAME] or E:
Raises NotImplementedError otherwise
"""
# we're going to need to cd into OUTPUT_DATA_DIR -- create that path (on UNIX)
non_docker_path_list = non_docker_path.split(os.path.sep) # e.g. ['E:','tm2_network_version13']
if non_docker_path.startswith('E:'):
output_mount_target = '/usr/e_volume'
# drop the E: part only
non_docker_path_list = non_docker_path_list[1:]
elif non_docker_path.startswith('C:/Users/{}'.format(os.environ['USERNAME'])):
output_mount_target = '/usr/home'
# drop the C:/Users/[USERRNAME]
non_docker_path_list = non_docker_path_list[4:]
else:
WranglerLogger.error("docker_path() doesn't support non_docker_path {}".format(non_docker_path))
raise NotImplementedError
# prepare the path to cd into (OUTPUT_DATA_DIR) -- [output_mount_target]\[rest of OUTPUT_DATA_DIR]
non_docker_path_list.insert(0, output_mount_target)
WranglerLogger.debug('non_docker_path_list: {}'.format(non_docker_path_list))
LINUX_SEP = '/'
return LINUX_SEP.join(non_docker_path_list)
def get_docker_container(docker_container_name):
"""
Attempts to fetch the named docker container. Returns client and docker container instance.
Raises an exception on failure.
"""
import docker
client = docker.from_env()
container = client.containers.get(docker_container_name)
WranglerLogger.info('Docker container named {} found; status: {}'.format(docker_container_name, container.status))
if container.status != 'running':
container.restart()
# note: I have had difficulty reusing a container when the mount fails because my IP address (which is included in the volume) has changed
return (client, container)
def create_docker_container(mount_e: bool, mount_home: bool):
"""
Uses docker python package to:
1) If it doesn't already exist, create docker image from Dockerfile in local directory named DOCKER_SHST_IMAGE_NAME
2) If mount_e is True, creates mount for E: so that it is accessible at /usr/e_volume
3) If mount_home is True, creates mount for C:\\Users\\{USERNAME} so that it is acceessible at /usr/home
3) Starts docker container from the given image with given mounts
Returns (docker.Client instance,
running docker.models.containers.Container instance)
See https://docker-py.readthedocs.io/en/stable/containers.html?highlight=prune#docker.models.containers.ContainerCollection.prune
"""
import docker
client = docker.from_env()
# check if the docker image exists
shst_image = None
try:
shst_image = client.images.get('shst:latest')
WranglerLogger.info('shst image {} found; skipping docker image build'.format(shst_image))
except docker.errors.ImageNotFound:
# if not, create one using the local Dockerfile
dockerfile_dir = os.path.abspath(os.path.dirname(__file__))
WranglerLogger.info('Creating image using dockerfile dir {}'.format(dockerfile_dir))
shst_image = client.images.build(path=dockerfile_dir, tag='shst', rm=True)
WranglerLogger.info('Created docker image {}'.format(shst_image))
docker_mounts = []
if mount_e:
# check if the docker volume exists
try:
E_volume = client.volumes.get('E_volume')
WranglerLogger.info('E_volume volume {} found; skipping docker volume create'.format(E_volume))
except docker.errors.NotFound:
# if not, create one
# first we need our IP address
import socket
hostname = socket.gethostname()
IPAddr = socket.gethostbyname(hostname)
# and the Windows username, password
import getpass
username = getpass.getuser()
password = getpass.getpass(prompt='To create a docker volume for your E drive, please enter your password: ')
# print('username={} password={}'.format(username,password))
# create the docker volume
E_volume = client.volumes.create(
name = 'E_volume',
driver = 'local',
driver_opts = {'type' :'cifs',
'device':'//{}/e'.format(IPAddr),
'o':'user={},password={},file_mode=0777,dir_mode=0777'.format(username,password)
})
WranglerLogger.info('Created docker volume {}'.format(E_volume))
e_mount = docker.types.Mount(target='/usr/e_volume', source='E_volume', type='volume')
docker_mounts.append(e_mount)
if mount_home:
# mount Users home dir
WranglerLogger.info('Mouting C:/Users/{} as /usr/home'.format(os.environ['USERNAME']))
output_mount_target = '/usr/home'
home_mount = docker.types.Mount(target=output_mount_target, source=os.environ['USERPROFILE'], type='bind')
docker_mounts.append(home_mount)
# docker create
container = client.containers.create(
image = 'shst:latest',
command = '/bin/bash',
tty = True,
stdin_open = True,
auto_remove = False,
mounts = docker_mounts)
WranglerLogger.info('docker container {} named {} created'.format(container, container.name))
container.start()
WranglerLogger.info('docker container {} started; status: '.format(container.name, container.status))
return (client, container)
def extract_osm_links_from_shst_metadata(shst_gdf):
"""
Expand each shst extract record into osm ways; the information from this is within the metadata for the row:
https://github.com/sharedstreets/sharedstreets-ref-system#sharedstreets-osm-metadata
The returned GeoDataFrame contains the following fields:
'nodeIds' : from SharedStreets OSM Metadata waySections, OSM node IDs as a list of strings (which are ints)
'wayId' : from SharedStreets OSM Metadata waySections, OSM way ID as an int
'roadClass' : from SharedStreets OSM Metadata waySections; string, I'm guessing it's from the highway tag? https://wiki.openstreetmap.org/wiki/Key:highway#Roads
'oneWay' : from SharedStreets OSM Metadata waySections; boolean, I'm guessing it's from the oneway tag? https://wiki.openstreetmap.org/wiki/Key:oneway
'roundabout' : from SharedStreets OSM Metadata waySections; boolean, I'm guessing it's from the junction tag? https://wiki.openstreetmap.org/wiki/Tag:junction%3Droundabout
'link' : from SharedStreets OSM Metadata waySections; boolean, I'm guessing it's from the highway tag? https://wiki.openstreetmap.org/wiki/Highway_link
'name' : from SharedStreets OSM Metadata waySections; string
'waySections_len' : from SharedStreets OSM Metadata waySections; number of waySections
'waySections_ord' : from SharedStreets OSM Metadata waySections; starts at 1 and numbers the waySections in order
'geometryId' : from SharedStreets OSM Metadata
'u','v' : from SharedStreets OSM Metadata waySections, first and last elements in nodeIds
'id' : SharedStreets id of each geometry, equivalent to "geometryId" in SharedStreets OSM Metadata; 32-character hex
'forwardReferenceId': SharedStreets referenceId of the forward link on the given SharedStreets geometry; 32-character hex
'backReferenceId' : SharedStreets referenceId of the backward link on the given SharedStreets geometry if the geometry represents a two-way street; 32-character hex
'fromIntersectionId': SharedStreets id of the "from" node of the link represented by "forwardReferenceId"; 32-character hex
'toIntersectionId' : SharedStreets id of the "to" node of the link represented by "forwardReferenceId"; 32-character hex
(for the link represented by "backReferenceId", from/to intersections are reversed)
'geometry' : SharedStreets geometry
"""
# It is fast to iterate through a list
WranglerLogger.debug("Converting shst_gdf metadata to list")
metadata_list = shst_gdf['metadata'].tolist()
WranglerLogger.debug("metadata_list is length {}; first 10 items: {}".format(len(metadata_list), metadata_list[:10]))
# sharedstreet metadata example:
# {
# "gisMetadata": [],
# "geometryId": "7fd0e10cc0a694e96701e99c7c6f4525",
# "osmMetadata": {
# "waySections": [
# {
# "nodeIds": ["65324846", "4763953722", "4763953417"],
# "wayId": "255168049",
# "roadClass": "Tertiary",
# "oneWay": false,
# "roundabout": false,
# "link": false,
# "name": ""
# },
# {
# "nodeIds": ["4763953417", "65324849"],
# "wayId": "514442927",
# "roadClass": "Tertiary",
# "oneWay": false,
# "roundabout": false,
# "link": false,
# "name": ""
# }
# ],
# "name": "18th Street"
# }
# }
# will create a list of dicts to make a dataframe
# each dict will be an OSM way
osm_from_shst_link_list = []
for metadata in metadata_list:
name = metadata.get('osmMetadata').get('name')
waySections_len = len(metadata.get('osmMetadata').get('waySections'))
geometryId = metadata.get('geometryId')
waySections_order = 1
for osm_way in metadata.get('osmMetadata').get('waySections'):
osm_dict = osm_way
osm_dict['name'] = name
osm_dict['waySections_len'] = waySections_len
osm_dict['waySection_ord'] = waySections_order
osm_dict['geometryId'] = geometryId
osm_from_shst_link_list.append(osm_dict)
waySections_order = waySections_order + 1
WranglerLogger.debug("osm_from_shst_link_list has length {}".format(len(osm_from_shst_link_list)))
osm_from_shst_link_df = pd.DataFrame.from_records(osm_from_shst_link_list)
# convert wayId to numeric and waySections_len to int8
osm_from_shst_link_df["wayId"] = osm_from_shst_link_df["wayId"].astype(int)
osm_from_shst_link_df["waySections_len"] = osm_from_shst_link_df["waySections_len"].astype(np.int8)
osm_from_shst_link_df["waySection_ord"] = osm_from_shst_link_df["waySection_ord"].astype(np.int8)
WranglerLogger.debug("osm_from_shst_link_df has length {} and dtypes:\n{}".format(len(osm_from_shst_link_df),
osm_from_shst_link_df.dtypes))
# link bool
# name object
# nodeIds object
# oneWay bool
# roadClass object
# roundabout bool
# wayId int32
# waySections_len int8
# geometryId object
WranglerLogger.debug("osm_from_shst_link_df.head:\n{}".format(osm_from_shst_link_df.head()))
WranglerLogger.debug("osm_ways_from_shst_df.waySections_len.value_counts():\n{}".format(
osm_from_shst_link_df.waySections_len.value_counts()))
# add fields to represent each link's starting_node ("u") and ending_node ("v") from the nodeIds field
osm_from_shst_link_df['u'] = osm_from_shst_link_df.nodeIds.apply(lambda x: int(x[0]))
osm_from_shst_link_df['v'] = osm_from_shst_link_df.nodeIds.apply(lambda x: int(x[-1]))
# add remaining fields from shared streets geodataframe, including geometry, which makes it a GeoDataFrame with the SharedStreets geometries
osm_from_shst_link_gdf = pd.merge(
left = shst_gdf[['id', 'fromIntersectionId', 'toIntersectionId', 'forwardReferenceId', 'backReferenceId', 'geometry']],
right = osm_from_shst_link_df,
how = "left",
left_on = "id",
right_on = "geometryId"
)
WranglerLogger.debug("osm_from_shst_link_gdf has length {} and dtypes:\n{}".format(len(osm_from_shst_link_gdf),
osm_from_shst_link_gdf.dtypes))
# link bool
# name object
# nodeIds object
# oneWay bool
# roadClass object
# roundabout bool
# wayId int32
# waySections_len int8
# geometryId object
# u int64
# v int64
# id object
# fromIntersectionId object
# toIntersectionId object
# forwardReferenceId object
# backReferenceId object
# geometry geometry
WranglerLogger.debug("osm_from_shst_link_gdf.head:\n{}".format(osm_from_shst_link_gdf.head()))
return osm_from_shst_link_gdf
def merge_osmnx_with_shst(osm_ways_from_shst_gdf, osmnx_link_gdf, OUTPUT_DIR):
"""
merges link attributes and geometries from OSM extract into ShSt-derived OSM Ways dataframe
Parameters
------------
osm_ways_from_shst_gdf: osm Ways from shst extracts
osmnx_link_gdf: osm extract
OUTPUT_DIR: temporary for writing debug file(s)
Return
------------
OSMNX GeoDataFrame (including all the tags) merged with OSM ways from SharedStreets metadata
"""
WranglerLogger.debug(
"merge_osmnx_attributes_with_shst called with osm_ways_from_shst_gdf (type {}) and osmnx_link_gdf (type {})".format(
type(osm_ways_from_shst_gdf), type(osmnx_link_gdf)))
# rename name to make it clear it's from shst metadata
# and rename "oneWay" to "oneway" (same as in osmnx extracts), so later when merging with osmnx extracts, suffixes
# will be added to differentiate the source
osm_ways_from_shst_gdf.rename(columns={"name": "name_shst_metadata",
"oneWay": "oneway"}, inplace=True)
# the merge is based on "wayId" in osm_ways_from_shst_gdf and "osmid" in osmnx_link_gdf, so first examine duplicated 'osmid'
osmnx_link_gdf['osmid_cnt'] = osmnx_link_gdf.groupby(['osmid'])['length'].transform('size')
WranglerLogger.debug('stats on osmid occurances:\n{}'.format(osmnx_link_gdf['osmid_cnt'].value_counts()))
# export some examples with duplicated osmid to check on a map
chk_osmid_dup_gdf = osmnx_link_gdf.loc[(osmnx_link_gdf['osmid_cnt'] > 1) & \
osmnx_link_gdf['lanes:backward'].notnull() & \
osmnx_link_gdf['lanes:forward'].notnull()].sort_values('osmid')
chk_osmid_dup_gdf.reset_index(drop=True, inplace=True)
OSMID_DUP_DEBUG_FILE = os.path.join(OUTPUT_DIR, 'osmnx_osmid_dup.feather')
chk_osmid_dup_gdf.to_feather(OSMID_DUP_DEBUG_FILE)
WranglerLogger.debug('Wrote chk_osmid_dup_gdf to {}'.format(OSMID_DUP_DEBUG_FILE))
# Two reasons for duplicated osmid:
# 1. when osmnx generates a graph, it adds edges in both directions for two-way links, tags the reversed link in the
# boolean field "reversed", and copies link attributes to both edges. Since our osmnx extraction method already includes
# direction-dependent attributes, e.g. "lanes:forward", "lanes:backward", "turn:lanes:forward", "turn:lanes:backward",
# osm way links with "reversed==False" contain link attributes of reversed links, and are consistent with the direction
# of osm ways in sharedstreets metadata, therefore, drop reversed osm ways links before merging with shst.
osmnx_link_gdf = osmnx_link_gdf.loc[osmnx_link_gdf['reversed'] == False]
osmnx_link_gdf.drop(columns=['osmid_cnt', 'reversed'], inplace=True)
# 2. OSM way links can be chopped up into many nodes, presumably to give it shape
# for example, this link has a single osmid but 10 nodes:
# https://www.openstreetmap.org/way/5149900
# consolidate these -- we expect all the columns to be the same except for length, u, v, key and the geometry
osm_way_match_cols = list(osmnx_link_gdf.columns.values)
for remove_col in ['length', 'u', 'v', 'key', 'geometry']:
osm_way_match_cols.remove(remove_col)
# Log some debug info about this
# commented this out since it's not very useful; shows that only length/geometry/u/v are changing
# osmnx_link_gdf['dupes'] = osmnx_link_gdf.duplicated(subset=osm_way_match_cols, keep=False)
# WranglerLogger.debug("duplicates in osmnx_link_gdf based on {}: {} rows; " \
# "head(50):\n{}".format(osm_way_match_cols, osmnx_link_gdf['dupes'].sum(),
# osmnx_link_gdf.loc[ osmnx_link_gdf['dupes'] == True].head(50)))
# And consolidate to the each OSM; way we will drop the geometry here so it's a df now. The "geometry" field in the
# merged "osmnx_shst_gdf" is from sharedstreets, therefore multiple OSM ways derived from one sharedstreet record
# would have the same geometry. Retain the length of the OSM way (in meters)
# Note: I would have liked to use geopandas.dissolve() and keep/aggregate the geometry but I don't think it's possible
agg_dict = {}
for col in osm_way_match_cols:
if col=='osmid': continue # this is our groupby key
agg_dict[col] = 'first' # these are all the same for each osmid so take the first
agg_dict['length'] = 'sum' # sum this one
osmnx_link_df = osmnx_link_gdf.groupby(by=['osmid']).agg(agg_dict).reset_index(drop=False)
WranglerLogger.debug("After aggregating to osm ways, osmnx_link_df len={:,}, head():\n{}".format(len(osmnx_link_df), osmnx_link_df.head()))
# to keep this as a geodataframe, call merge with geodataframe as left
# https://geopandas.org/en/stable/docs/user_guide/mergingdata.html#attribute-joins
osmnx_shst_gdf = pd.merge(
left = osm_ways_from_shst_gdf,
right = osmnx_link_df,
left_on = 'wayId',
right_on = 'osmid',
how = 'outer',
indicator = True,
suffixes = ['_shst', '_osmnx']
)
# rename and recode indicator to be more clear
osmnx_shst_gdf.rename(columns={'_merge':'osmnx_shst_merge'}, inplace=True)
osmnx_shst_gdf['osmnx_shst_merge'] = osmnx_shst_gdf['osmnx_shst_merge'].cat.rename_categories({
'both' : 'both',
'left_only' : 'shst_only',
'right_only': 'osmnx_only'
})
WranglerLogger.debug("osmnx_shst_gdf type {}, len {:,}, dtypes:\n{}".format(
type(osmnx_shst_gdf), len(osmnx_shst_gdf), osmnx_shst_gdf.dtypes
))
WranglerLogger.debug("osmnx_shst_gdf.head():\n{}".format(osmnx_shst_gdf.head()))
# stats on merge results
# - "shst_only" rows: osm ways in the sharedstreets extracts only. I believe they are "private" ways since we
# pass network_type='all' rather than 'all_private' to osmnx.graph.graph_from_polygon() in step2.
# - "osmnx_only" rows: osm links in the osmnx extracts only, mostly likely roads added to the OSM network after
# the sharedstreets network was built. They also have geometry as None.
WranglerLogger.debug("merge indicator statistics:\n{}".format(osmnx_shst_gdf['osmnx_shst_merge'].value_counts()))
# Log rows with geometry as None (row count should be the same as 'osmnx_only') and remove
null_shst_geom_df = osmnx_shst_gdf.loc[pd.isnull(osmnx_shst_gdf.geometry)].copy()
WranglerLogger.debug("osmnx_shst_gdf has {:,} rows with null geometry; head:\n{}".format(
len(null_shst_geom_df), null_shst_geom_df.head()
))
WranglerLogger.debug('null_shst_geom_df.osmnx_shst_merge.value_counts():\n{}'.format(null_shst_geom_df.osmnx_shst_merge.value_counts()))
# temporary(?): drop null shst columns, and add geometry from osmnx extracts, and save them to look at
null_shst_geom_df.drop(
columns=['id', 'fromIntersectionId', 'toIntersectionId', 'forwardReferenceId', 'backReferenceId', 'geometry'],
inplace=True)
null_shst_geom_gdf = pd.merge(
left = osmnx_link_gdf[['osmid', 'geometry']],
right= null_shst_geom_df,
how = 'right',
on = 'osmid',
)
null_shst_geom_gdf.reset_index(drop=True, inplace=True)
OSMNX_ONLY_DEBUG_FILE = os.path.join(OUTPUT_DIR, 'osmnx_ways_without_shst.feather')
null_shst_geom_gdf.to_feather(OSMNX_ONLY_DEBUG_FILE)
WranglerLogger.debug('Wrote null_osmnx_geom_gdf to {}'.format(OSMNX_ONLY_DEBUG_FILE))
# remove those rows which didn't correspond to osmnx ways
# TODO: This is throwing away OSM data for about 180k links right now (out of 1.1M) because they don't correspond with
# the wayIds listed in the metadata from SharedStreets, leaving 96k SharedStreets links without OSM data.
# This is because some of the OSM way IDs have changed since the 2018 snapshot was made for SharedStreets.
# Rather than throwing away this data, we could try to bring it back by doing a sharedstreet match based on the link geometry
# between these two link sets.
osmnx_shst_gdf = osmnx_shst_gdf.loc[pd.notnull(osmnx_shst_gdf.geometry)]
# double check 'osmnx_shst_merge' indicator should only have 'both' and 'shst_only', not 'osmnx_only'
WranglerLogger.debug(
'Double check osmnx_shst_merge indicator - should only have "both" and "shst_only":\n{}'.format(
osmnx_shst_gdf['osmnx_shst_merge'].value_counts()
))
osmnx_shst_gdf.reset_index(drop=True, inplace=True)
# these are floats because of join failure; they can go back to their original dtype
original_dtypes = {
'wayId' :np.int32,
'waySections_len' :np.int8,
'waySection_ord' :np.int8,
'u' :np.int64,
'v' :np.int64
}
for col in original_dtypes.keys():
WranglerLogger.debug('column {} has {:,} null values; converting to {}'.format(
col, osmnx_shst_gdf[col].isnull().sum(), original_dtypes[col]))
osmnx_shst_gdf[col] = osmnx_shst_gdf[col].astype(original_dtypes[col])
# (temporary) QAQC links where 'oneway_shst' and 'oneway_osmnx' have discrepancies, export to check on the map
WranglerLogger.debug('QAQC discrepancy between oneway_shst and oneway_osm:\n{}\n{}\n{}\n{}'.format(
'oneway_shst value counts', osmnx_shst_gdf.oneway_shst.value_counts(dropna=False),
'oneway_osmnx value counts', osmnx_shst_gdf.oneway_osmnx.value_counts(dropna=False)
))
oneway_diff = osmnx_shst_gdf.loc[osmnx_shst_gdf.oneway_shst.notnull() & osmnx_shst_gdf.oneway_osmnx.notnull() & (
osmnx_shst_gdf.oneway_shst != osmnx_shst_gdf.oneway_osmnx)]
oneway_diff.reset_index(drop=True, inplace=True)
WranglerLogger.debug('export {} links with different oneway_shst and oneway_osm for debugging'.format(
oneway_diff.shape[0]))
ONEWAY_DEBUG_FILE = os.path.join(OUTPUT_DIR, 'shst_osmnx_oneway_diff.feather')
oneway_diff.to_feather(ONEWAY_DEBUG_FILE)
WranglerLogger.debug('Wrote oneway_diff to {}'.format(ONEWAY_DEBUG_FILE))
return osmnx_shst_gdf
def recode_osmnx_highway_tag(osmnx_shst_gdf, highway_roadway_crosswalk, roadway_access_crosswalk):
""""
OSMnx 'highway' tags have a multitude of values that are too detailed for us;
Simplify the tag to a new column, 'roadway'
Additionally, add boolean columns 'drive_access', 'walk_access', 'bike_access' representing
whether these links have this type of access.
"""
WranglerLogger.info('4a. Converting OSM highway variable into standard roadway variable')
highway_to_roadway_df = pd.DataFrame.from_records(highway_roadway_crosswalk, columns=['highway','roadway','hierarchy'])
osmnx_shst_gdf = pd.merge(
left = osmnx_shst_gdf,
right = highway_to_roadway_df,
how = 'left',
on = 'highway',
indicator = True,
)
osmnx_shst_gdf.fillna(value={'roadway':'unknown'}, inplace=True)
WranglerLogger.debug('osmnx_shst_gdf.dtypes:\n{}'.format(osmnx_shst_gdf.dtypes))
WranglerLogger.debug('osmnx_shst_gdf[["highway","roadway","hierarchy","_merge"]].value_counts():\n{}'.format(
osmnx_shst_gdf[['highway','roadway','hierarchy','_merge']].value_counts(dropna=False)))
osmnx_shst_gdf.drop(columns="_merge", inplace=True)
# add network type variables "drive_access", "walk_access", "bike_access" based on roadway
WranglerLogger.info('Adding network type variables "drive_access", "walk_access", "bike_access"')
network_type_df = pd.DataFrame.from_records(roadway_access_crosswalk, columns=['roadway','drive_access','walk_access','bike_access'])
osmnx_shst_gdf = pd.merge(
left = osmnx_shst_gdf,
right = network_type_df,
how = 'left',
on = 'roadway')
WranglerLogger.debug('osmnx_shst_gdf.drive_access.value_counts():\n{}'.format(osmnx_shst_gdf.drive_access.value_counts(dropna=False)))
WranglerLogger.debug('osmnx_shst_gdf.walk_access.value_counts():\n{}'.format(osmnx_shst_gdf.drive_access.value_counts(dropna=False)))
WranglerLogger.debug('osmnx_shst_gdf.bike_access.value_counts():\n{}'.format(osmnx_shst_gdf.drive_access.value_counts(dropna=False)))
return osmnx_shst_gdf
def modify_osmway_lane_accounting_field_type(osmnx_shst_gdf):
"""
For all fields related to lane accounting, convert numeric attributes to field type = numeric, and clean up the
mixture of None and non (both numeric and string attributes).
Does not return anything; modifies the passed DataFrame.
"""
WranglerLogger.info('Clean up fields type for attributes related to lane accounting')
for col in sorted(OSM_WAY_TAGS.keys()):
# this one is special and has been renamed to oneway_osmnx and it's a bool already
if col=='oneway': continue
if OSM_WAY_TAGS[col] == TAG_NUMERIC:
osmnx_shst_gdf[col] = pd.to_numeric(osmnx_shst_gdf[col], errors='coerce')
WranglerLogger.debug('converted {} to numeric, with value_counts:\n{}'.format(col, osmnx_shst_gdf[col].value_counts(dropna=False)))
elif OSM_WAY_TAGS[col] == TAG_STRING:
osmnx_shst_gdf[col].fillna('', inplace=True)
osmnx_shst_gdf[col] = osmnx_shst_gdf[col].astype(str)
WranglerLogger.debug('fillna for {}, with unique values_counts:\n{}'.format(col, osmnx_shst_gdf[col].value_counts(dropna=False)))
def tag_osm_ways_oneway_twoway(osmnx_shst_gdf):
"""
Adds column, osm_dir_tag; set to 1 for one-way links and 2 for two-way links
Does not return anything; modifies the passed DataFrame.
"""
WranglerLogger.info('Add "osm_dir_tag" to label 2 (for two-way) and 1 (for one-way) OSM ways')
# default to 1-way
osmnx_shst_gdf['osm_dir_tag'] = np.int8(1)
# Label 'two-way' links.
# Generally speaking, we'll defer to the SharedStreets version of oneway because the link geometry comes from SharedStreets and
# so it's more accurate generally since the oneway-ness is typically driven by geometry. For example, in situations where there's
# a partially divided street, SharedStreets will represent the divided part as two one-way shapes and the undivided part as one
# two-way geometry.
osmnx_shst_gdf.loc[(osmnx_shst_gdf.oneway_shst == False) &
(osmnx_shst_gdf.forwardReferenceId != osmnx_shst_gdf.backReferenceId) &
(osmnx_shst_gdf.u != osmnx_shst_gdf.v), 'osm_dir_tag'] = np.int8(2)
# However, there are some places where SharedStreets got it wrong, or there were two-way conversions.
# Having lanes:backward > 1 is a strong signal that the link is actually two way so override here
# Note: these links are labelled as two-way but without 'backReferenceId', therefore, after adding reverse links
# for two-way links, they will be missing 'shstReferenceId'.
osmnx_shst_gdf.loc[(osmnx_shst_gdf.oneway_osmnx == False) & (osmnx_shst_gdf['lanes:backward'] > 0), 'osm_dir_tag'] = np.int8(2)
WranglerLogger.debug('osmnx_shst_gdf has {:,} links: \n{}'.format(
osmnx_shst_gdf.shape[0], (osmnx_shst_gdf.osm_dir_tag.value_counts())))
def cleanup_osm_turn_values(osmnx_shst_gdf):
"""
Clean up inconsistent values representing non-turn in 'turn:lanes', 'turn:lanes:forward', 'turn:lanes:backward'.
In osm, 'none' or (empty) represents lanes with no turn indication, e.g. '||right' or 'none|none|right' represents a 3-lane road
(one-way or two-way), with '|' as the lane divider mark. Replace 'none' or (empty) with 'non_turn', so 'non_turn|non_turn|right'.
Not return anything; modifies 'turn:lanes', 'turn:lanes:forward', 'turn:lanes:backward' of the passed dataframe.
"""
def _fill_non_turn(turn_str):
"""
cleans up strings in OSM extract's turns:lanes-related attributes.
"""
# if not turn lanes value, pass
if len(turn_str) == 0:
pass
else:
# fill in all (empty) between each pair of lane divider marks
while '||' in turn_str:
turn_str = turn_str.replace('||', '|non_turn|')
# fill in (empty) of the first lane
if turn_str[0] == '|':
turn_str = 'non_turn' + turn_str
# fill in (empty) of the last lane
if turn_str[-1] == '|':
turn_str = turn_str + 'non_turn'
return turn_str
WranglerLogger.info('Standardize non-turn values')
for colname in ['turn:lanes', 'turn:lanes:forward', 'turn:lanes:backward']:
# replace 'none' with 'non_turn'
osmnx_shst_gdf[colname] = osmnx_shst_gdf[colname].apply(
lambda x: x.replace('none', 'non_turn'))
# replace (empty) with 'non_turn'
osmnx_shst_gdf[colname] = osmnx_shst_gdf[colname].apply(lambda x: _fill_non_turn(x))
return None
def get_lane_count_from_osm_turns(osmnx_shst_gdf, OUTPUT_DIR):
"""
Derive lane count from osm turn lane tags: 'turn:lanes', 'turn:lanes:forward', 'turn:lanes:backward'.
The turn lane info in osm is pretty clean. In almost all cases, turn lanes of one-way links are represented in 'turn:lanes',
turn lanes of two-way links are represented in 'turn:lanes:forward' and 'turn:lanes:backward'.
Does not return anything; adds the following two columns to the passed dataframe (-1 if unset):
- 'lanes_from_turns_forward': lane count derived from 'turn:lanes:forward' for the forward link of two-way links,
or from 'turn:lanes' for one-way links.
- 'lanes_from_turns_backward': lane count derived from 'turn:lanes:forward' for the backward link of two-way links.
"""
WranglerLogger.info('Start deriving lane counts from osm turn lane tags.')
# set up the new columns
osmnx_shst_gdf['lanes_from_turns_forward'] = np.int8(-1) # total lanes, backward-only direction
osmnx_shst_gdf['lanes_from_turns_backward'] = np.int8(-1)
# convert turn values into a list
# (e.g. 'through|right' -> ['through', 'right'], 'non_turn|non_turn|right' -> ['non_turn', 'non_turn', 'right'], '' -> [''])
osmnx_shst_gdf.loc[
osmnx_shst_gdf['osm_dir_tag'] == 2, 'turns_list_forward'] = osmnx_shst_gdf['turn:lanes:forward'].apply(lambda x: x.split('|'))
osmnx_shst_gdf.loc[
osmnx_shst_gdf['osm_dir_tag'] == 1, 'turns_list_forward'] = osmnx_shst_gdf['turn:lanes'].apply(lambda x: x.split('|'))
osmnx_shst_gdf['turns_list_backward'] = osmnx_shst_gdf['turn:lanes:backward'].apply(lambda x: x.split('|'))
# lane count
osmnx_shst_gdf.loc[(osmnx_shst_gdf['osm_dir_tag'] == 2) & (osmnx_shst_gdf['turn:lanes:forward'] != ''),
'lanes_from_turns_forward'] = osmnx_shst_gdf['turns_list_forward'].apply(lambda x: len(x))
osmnx_shst_gdf.loc[(osmnx_shst_gdf['osm_dir_tag'] == 2) & (osmnx_shst_gdf['turn:lanes:backward'] != ''),
'lanes_from_turns_backward'] = osmnx_shst_gdf['turns_list_backward'].apply(lambda x: len(x))
osmnx_shst_gdf.loc[(osmnx_shst_gdf['osm_dir_tag'] == 1) & (osmnx_shst_gdf['turn:lanes'] != ''),
'lanes_from_turns_forward'] = osmnx_shst_gdf['turns_list_forward'].apply(lambda x: len(x))
WranglerLogger.info('Finished deriving lane counts from osm turn lane tags.')
WranglerLogger.debug('One-way links with turn values (head 10): \n{}'.format(
osmnx_shst_gdf.loc[osmnx_shst_gdf['turn:lanes'] != ''][[
'turn:lanes', 'turn:lanes:forward', 'turn:lanes:backward',
'turns_list_forward', 'turns_list_backward', 'lanes_from_turns_forward', 'lanes_from_turns_backward']].head(10)))
WranglerLogger.debug('Two-way links with turn values (head 10): \n{}'.format(
osmnx_shst_gdf.loc[(osmnx_shst_gdf['turn:lanes:forward'] != '') | (osmnx_shst_gdf['turn:lanes:backward'] != '')][[
'turn:lanes', 'turn:lanes:forward', 'turn:lanes:backward',
'turns_list_forward', 'turns_list_backward', 'lanes_from_turns_forward', 'lanes_from_turns_backward']].head(10)))
# debug: check discrepancy between turn-tags-derived lane count and lane-tags-derived lane count
# case 1: 'lanes_from_turns_forward' != 'lanes:forward' or 'lanes_from_turns_backward' != 'lanes:backward'
lane_counts_diff_twoway_idx = ((osmnx_shst_gdf['osm_dir_tag'] == 2) & \
(osmnx_shst_gdf['lanes:forward'].notnull()) & \
(osmnx_shst_gdf['lanes_from_turns_forward'] != -1) & \
(osmnx_shst_gdf['lanes:forward'] != osmnx_shst_gdf['lanes_from_turns_forward'])) | \
((osmnx_shst_gdf['osm_dir_tag'] == 2) & \
(osmnx_shst_gdf['lanes:backward'].notnull()) & \
(osmnx_shst_gdf['lanes_from_turns_backward'] != -1) & \
(osmnx_shst_gdf['lanes:backward'] != osmnx_shst_gdf['lanes_from_turns_backward']))
lane_counts_diff_oneway_idx = (osmnx_shst_gdf['osm_dir_tag'] == 1) & \
(osmnx_shst_gdf['lanes_from_turns_forward'] != -1) & \
((osmnx_shst_gdf['lanes'].notnull() & (osmnx_shst_gdf['lanes'] != osmnx_shst_gdf['lanes_from_turns_forward'])) | \
(osmnx_shst_gdf['lanes:forward'].notnull() & (osmnx_shst_gdf['lanes:forward'] != osmnx_shst_gdf['lanes_from_turns_forward'])))
# case 2: 'lanes:forward' or 'lanes:backward' missing but have 'lanes_from_turns_forward' or 'lanes_from_turns_backward'
turn_lane_counts_only_twoway_idx = (osmnx_shst_gdf['osm_dir_tag'] == 2) & \
((osmnx_shst_gdf['lanes:forward'].isnull() & (osmnx_shst_gdf['lanes_from_turns_forward'] != -1)) | \
(osmnx_shst_gdf['lanes:backward'].isnull() & (osmnx_shst_gdf['lanes_from_turns_backward'] != -1)))
turn_lane_counts_only_oneway_idx = (osmnx_shst_gdf['osm_dir_tag'] == 1) & \
(osmnx_shst_gdf['lanes'].isnull() & osmnx_shst_gdf['lanes:forward'].isnull()) & \
(osmnx_shst_gdf['lanes_from_turns_forward'] != -1)
# export to inspect on a map
lane_count_debug = osmnx_shst_gdf.loc[
lane_counts_diff_twoway_idx | lane_counts_diff_oneway_idx | turn_lane_counts_only_twoway_idx | turn_lane_counts_only_oneway_idx]
lane_count_debug.reset_index(drop=True, inplace=True)
lane_count_debug.drop(columns=['turns_list_forward', 'turns_list_backward'], inplace=True)
WranglerLogger.debug(
'export {} links with different total lane counts from "lanes" and "turns" for debugging'.format(
lane_count_debug.shape[0]))
LANE_COUNT_DEBUG_FILE = os.path.join(OUTPUT_DIR, 'lane_turn_counts_diff.feather')
lane_count_debug.to_feather(LANE_COUNT_DEBUG_FILE)
WranglerLogger.debug('Wrote lane_counts_diff to {}'.format(LANE_COUNT_DEBUG_FILE))
return None
def impute_num_lanes_each_direction_from_osm(osmnx_shst_gdf, OUTPUT_DIR):
"""
Impute lane count of each direction of OSM ways. Creating 3 new columns 'forward_lanes', 'backward_lanes', 'bothways_lanes',
base on OSM lanes tags ('lanes', 'lanes:forward', 'lanes:backward', 'lanes:both_ways'), turns tags
('turn:lanes', 'turn:lanes:forward', 'turn:lanes:backward'), and roadway type tag ('roadway', derived from OSM 'highway' tag).
Lanes tags: In OSM data, 'lanes' represents the total number of lanes of a given road, so for links representing two-way roads,
lanes = lanes:forward + lanes:backward + lanes:both_ways, with 'lanes:forward' and 'lane:backward' representing lane
counts of each direction, and 'lanes:both_ways' (1 or None) representing middle turn lane shared by both directions.
Turns tags also provide info on lane count. The previous step 'get_lane_count_from_osm_turns()' extracts this info into 2 columns:
'lanes_from_turns_forward', 'lanes_from_turns_backward'. OSM turn info doesn't include both-way lanes, therefore when available,
lanes_from_turns_forward == lanes:forward, lanes_from_turns_backward == 'turn:lanes:backward'.
18 cases were identified based on data availabilities and imputation method. Use 'lane_count_type' (1 to 18) to track.
Basic imputation logic:
- when lanes tags provide sufficient info, use it to impute
- when lanes tags lack sufficient info, use turn tags to fill in
- when lanes tags and turn tags produce inconsistent lane counts, use turn tags (typically more accurate)
- when combining lanes and turns tags still cannot impute, use roadway to impute
Returns the passed dataframe, with 4 additional columns; these may be -1 if unset
- lane_count_type = a code indicating what imputation rules were used
- forward_lanes = number of lanes in the forward direction
- backward_lanes = number of lanes in the backward direction (two-way links only)
- bothways_lanes = number of lanes in bothways direction (max 1) (two-way links only)
"""
# let's tally the permutation of numeric lane columns (for drive_access links only)
osmnx_lane_tag_permutations_df = pd.DataFrame(osmnx_shst_gdf.loc[ osmnx_shst_gdf.drive_access == True]. \
value_counts(subset=['osm_dir_tag','roadway','lanes','lanes:forward','lanes:backward','lanes:both_ways',
'lanes_from_turns_forward', 'lanes_from_turns_backward',
'forward_bus_lane','backward_bus_lane','forward_hov_lane'], dropna=False)).reset_index(drop=False)
osmnx_lane_tag_permutations_df.rename(columns={0:'lane_count_type_numrows'},inplace=True) # the count column is named 0 by default
# give it a new index and write it
# osmnx_lane_tag_permutations_df['lane_count_type'] = osmnx_lane_tag_permutations_df.index
WranglerLogger.debug('osmnx_lane_permutations_df:\n{}'.format(osmnx_lane_tag_permutations_df))
OUTPUT_FILE = os.path.join(OUTPUT_DIR, 'osmnx_lane_tag_permutations.csv')
osmnx_lane_tag_permutations_df.to_csv(OUTPUT_FILE, header=True, index=False)
WranglerLogger.debug('Wrote {}'.format(OUTPUT_FILE))
# # join to the geodataframe and write that
# osmnx_lane_tag_permutations_df['drive_access'] = True
# osmnx_shst_temp_gdf = pd.merge(
# left = osmnx_shst_gdf,
# right = osmnx_lane_tag_permutations_df,
# on = ['drive_access','osm_dir_tag','lanes','lanes:forward','lanes:backward','lanes:both_ways','forward_bus_lane','backward_bus_lane','forward_hov_lane'],
# how = 'left'
# )
# OUTPUT_FILE = os.path.join(OUTPUT_DIR, 'osmnx_shst_lane_tag_permutations.feather')
# osmnx_shst_temp_gdf.to_feather(OUTPUT_FILE)
# WranglerLogger.debug('Wrote {}'.format(OUTPUT_FILE))
# these are the new columns we'll be setting; initialize them now to be the right type. -1 mean unset
osmnx_shst_gdf['lane_count_type'] = np.int8(-1) # unset
osmnx_shst_gdf['forward_lanes'] = np.int8(-1) # total lanes, forward-only direction
osmnx_shst_gdf['backward_lanes'] = np.int8(-1) # total lanes, backward-only direction
osmnx_shst_gdf['bothways_lanes'] = np.int8(-1) # total lanes, both directions
WranglerLogger.info('Impute lanes of each direction for two-way links')
# a list to track lane counting based on osm lanes tags, turns tags, a mix of those tags, and others
lane_tags_counting = []
turn_tags_counting = []
lane_turn_tags_counting = []
roadway_tags_counting = []
# CASE 1: two-way, missing 'lanes', have either 'lanes:backward' or 'lanes:forward' or both; no 'lanes:both_way'
type1_idx = (osmnx_shst_gdf['osm_dir_tag'] == 2) & \
osmnx_shst_gdf.lanes.isnull() & \
((osmnx_shst_gdf['lanes:forward'].notnull()) | (osmnx_shst_gdf['lanes:backward'].notnull())) & \
(osmnx_shst_gdf['lanes:both_ways'].isnull())
WranglerLogger.debug(
'{:,} links of type1, by roadway categories:\n{}'.format(
type1_idx.sum(), osmnx_shst_gdf.loc[type1_idx==True].roadway.value_counts()))
# add tag
osmnx_shst_gdf.loc[type1_idx, 'lane_count_type'] = np.int8(1)
if type1_idx.sum() > 0:
# Impute: tot_lanes_forward = lanes:forward, tot_lanes_backward = lanes:backward
osmnx_shst_gdf.loc[type1_idx & (osmnx_shst_gdf['lanes:forward'].notnull()), 'forward_lanes' ] = osmnx_shst_gdf['lanes:forward']
osmnx_shst_gdf.loc[type1_idx & (osmnx_shst_gdf['lanes:backward'].notnull()), 'backward_lanes'] = osmnx_shst_gdf['lanes:backward']
# update bothways_lanes to 0
osmnx_shst_gdf.loc[type1_idx, 'bothways_lanes'] = np.int8(0)
# append to tracking list
lane_tags_counting.append(np.int8(1))
# CASE 2: two-way, missing 'lanes' but have either 'lanes:backward' or 'lanes:forward' or both; have 'lanes:both_way'
type2_idx = (osmnx_shst_gdf['osm_dir_tag'] == 2) & \
osmnx_shst_gdf['lanes'].isnull() & \
((osmnx_shst_gdf['lanes:forward'].notnull()) | (osmnx_shst_gdf['lanes:backward'].notnull())) & \
(osmnx_shst_gdf['lanes:both_ways'].notnull())
WranglerLogger.debug(
'{:,} links of type2, by roadway categories:\n{}'.format(
type2_idx.sum(), osmnx_shst_gdf.loc[type2_idx==True].roadway.value_counts()))
# add tag
osmnx_shst_gdf.loc[type2_idx, 'lane_count_type'] = np.int8(2)
if type2_idx.sum() > 0:
# Impute: tot_lanes_forward = lanes:forward, tot_lanes_backward = lanes:backward
osmnx_shst_gdf.loc[type2_idx & (osmnx_shst_gdf['lanes:forward'].notnull()), 'forward_lanes' ] = osmnx_shst_gdf['lanes:forward']
osmnx_shst_gdf.loc[type2_idx & (osmnx_shst_gdf['lanes:backward'].isnull()), 'backward_lanes'] = osmnx_shst_gdf['lanes:backward']
# update bothways_lanes to 1
osmnx_shst_gdf.loc[type2_idx, 'bothways_lanes'] = np.int8(1)
# append to tracking list
lane_tags_counting.append(np.int8(2))
# CASE 3: two-way, missing 'lanes', 'lanes:backward' and 'lanes:forward'; no 'lanes:both_way'
# have either 'lanes_from_turns_forward' or 'lanes_from_turns_backward' or both != -1
type3_idx = (osmnx_shst_gdf['osm_dir_tag'] == 2) & \
osmnx_shst_gdf['lanes'].isnull() & \
(osmnx_shst_gdf['lanes:forward'].isnull()) & \
(osmnx_shst_gdf['lanes:backward'].isnull()) & \
(osmnx_shst_gdf['lanes:both_ways'].isnull()) & \
((osmnx_shst_gdf['lanes_from_turns_forward'] != -1) | (osmnx_shst_gdf['lanes_from_turns_backward'] != -1))
WranglerLogger.debug(
'{:,} links of type3, by roadway categories:\n{}'.format(
type3_idx.sum(), osmnx_shst_gdf.loc[type3_idx==True].roadway.value_counts()))
# add tag
osmnx_shst_gdf.loc[type3_idx, 'lane_count_type'] = np.int8(3)
if type3_idx.sum() > 0:
# forward
osmnx_shst_gdf.loc[type3_idx & (osmnx_shst_gdf['lanes_from_turns_forward'] != -1), 'forward_lanes'] = osmnx_shst_gdf['lanes_from_turns_forward']
# backward
osmnx_shst_gdf.loc[type3_idx & (osmnx_shst_gdf['lanes_from_turns_backward'] != -1), 'backward_lanes'] = osmnx_shst_gdf['lanes_from_turns_backward']
# update bothways_lanes to 0
osmnx_shst_gdf.loc[type3_idx, 'bothways_lanes'] = np.int8(0)
# append to tracking list