Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Historic intersections #185

Merged
merged 13 commits into from
Nov 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/e2etest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ jobs:
- name: Generate intersections
run: |
export PYTHONPATH=.
poetry run python oldnyc/geocode/historic_grid.py
poetry run python oldnyc/geocode/osm/generate_intersections.py
- name: Generate truth data
run: |
Expand Down
1,637 changes: 1,637 additions & 0 deletions data/historic-intersections.csv

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion data/lat-lon-to-ids.json

Large diffs are not rendered by default.

839 changes: 839 additions & 0 deletions data/originals/nyc-streets.geojson

Large diffs are not rendered by default.

24 changes: 24 additions & 0 deletions data/self-hosted-sizes.txt
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@
2040741,614,760
2040746,760,595
2040755,760,561
2040756,760,556
2040759,760,567
2040760,760,553
2040762,760,570
Expand Down Expand Up @@ -142,10 +143,17 @@
2040871,760,617
2040872,760,617
2040873,760,618
2040883,760,670
2040884,760,679
2040885,760,675
2040886,760,669
2040893,760,675
2040894,760,671
3984206,760,545
3984207,760,544
3984209,760,546
3984214,544,760
3984215,760,546
3984216,546,760
3984217,760,550
3984220,760,546
Expand All @@ -171,6 +179,7 @@
3984250,546,760
3984251,760,543
3984252,760,544
3984253,546,760
3984254,546,760
3984255,760,546
3984256,760,549
Expand All @@ -190,14 +199,20 @@
3984301,760,546
3984302,760,546
3984305,760,544
3984311,760,546
3984314,760,546
3984318,760,546
3984322,760,546
3984323,760,546
3984328,760,541
3984329,760,549
3984330,760,544
3984331,760,547
3984332,546,760
3984333,760,546
3984334,760,547
3984336,546,760
3984337,760,548
3984351,760,546
3984352,760,546
3984354,760,546
Expand Down Expand Up @@ -229,6 +244,7 @@
3984426,546,760
3984436,760,546
3984438,546,760
3984439,546,760
3984440,546,760
3984443,546,760
3984445,760,546
Expand Down Expand Up @@ -266,6 +282,7 @@
3984511,549,760
3984514,543,760
3984515,760,549
3984517,549,760
3984519,760,549
3984520,760,545
3984521,760,549
Expand Down Expand Up @@ -364,6 +381,7 @@
3984791,545,760
3984793,545,760
3984794,549,760
3984800,549,760
3984807,541,760
3984812,546,760
3984816,548,760
Expand Down Expand Up @@ -786,15 +804,20 @@
485729,479,760
485730,476,760
485731,482,760
485732,505,760
485735,760,509
485737,510,760
485738,760,506
485739,760,509
485740,504,760
485741,504,760
485742,481,760
485744,511,760
485745,509,760
485746,509,760
485747,760,512
485750,760,508
485751,760,508
485752,509,760
485753,760,484
485754,510,760
Expand Down Expand Up @@ -1114,6 +1137,7 @@
703511f,760,489
703517f,760,489
703518f,485,760
703519f,760,490
703520f,489,760
703524f,760,491
703525f,760,489
Expand Down
8 changes: 6 additions & 2 deletions oldnyc/geocode/grid.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env python

import csv
import itertools
import re
import statistics
import sys
Expand Down Expand Up @@ -99,10 +100,13 @@ def strip_ave(street: str) -> str:
).strip()


def load_all_intersections():
def load_all_intersections(exclude_historic=False):
# Completely unambiguous intersections
ints = dict[Intersection, Point]()
for row in csv.DictReader(open("data/nyc-intersections.csv")):
inputs = csv.DictReader(open("data/nyc-intersections.csv"))
if not exclude_historic:
inputs = itertools.chain(inputs, csv.DictReader(open("data/historic-intersections.csv")))
for row in inputs:
# Street1,Street2,Borough,Lat,Lon,Nodes
str1 = row["Street1"]
str2 = row["Street2"]
Expand Down
16 changes: 12 additions & 4 deletions oldnyc/geocode/grid_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@
def assert_close(ll1: tuple[float, float] | None, ll2: tuple[float, float]):
"""Assert that two latitude & longitude tuples are "close"."""
assert ll1 is not None
assert ll1[0] == pytest.approx(ll2[0], 1e-6)
assert ll1[1] == pytest.approx(ll2[1], 1e-6)
assert ll1 == pytest.approx(ll2, 1e-6)


def test_exact():
Expand Down Expand Up @@ -73,9 +72,9 @@ def test_parse_street_ave():
def test_geocode_broadway():
g = Grid()
# this is an exact intersection
assert_close(g.geocode_intersection("Broadway", "59th Street", M), (40.767696, -73.981679))
assert_close(g.geocode_intersection("Broadway", "59th Street", M), (40.76801, -73.98194))
# these are interpolations
assert_close(g.geocode_intersection("Broadway", "24th Street", M), (40.7422035, -73.989151))
assert_close(g.geocode_intersection("Broadway", "24th Street", M), (40.74229, -73.98919))
assert_close(g.geocode_intersection("Broadway", "124th St", M), (40.8140625, -73.959421))
# this isn't great, but probably not too far off the intended location.
assert_close(g.geocode_intersection("Broadway", "127th St.", M), (40.816311, -73.957802))
Expand Down Expand Up @@ -175,3 +174,12 @@ def test_preserved_streets():
def test_cursed_intersection():
g = Grid()
assert g.geocode_intersection("Broadway", "Amsterdam Ave", "Manhattan") is None


def test_geocode_historic():
# Historic intersections from nyc-streets.geojson
g = Grid()
assert_close(
g.geocode_intersection("Broome Street", "Tompkins Street", "Manhattan"),
(40.71418, -73.97697),
)
115 changes: 115 additions & 0 deletions oldnyc/geocode/historic_grid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
"""Construct historic intersections from nyc-streets.geojson"""

import csv
import itertools
import sys
from collections import defaultdict

import pygeojson
import shapely
from haversine import haversine
from tqdm import tqdm

from oldnyc.geocode.geocode_types import Point
from oldnyc.geocode.grid import (
Intersection,
load_all_intersections,
normalize_street_for_osm,
strip_dir,
)


def geom_to_shape(geom: pygeojson.GeometryObject | None):
if geom is None:
raise ValueError("Expected geometry, got None")
if isinstance(geom, pygeojson.LineString):
return shapely.geometry.LineString(geom.coordinates)
elif isinstance(geom, pygeojson.MultiLineString):
return shapely.geometry.MultiLineString(geom.coordinates)
else:
raise ValueError(f"Unexpected geometry type: {geom.type}")


def bounds_diam_m(g: shapely.geometry.base.BaseGeometry) -> float:
lng1, lat1, lng2, lat2 = g.bounds
return 1000 * haversine((lat1, lng1), (lat2, lng2))


def centroid_if_small(g: shapely.geometry.base.BaseGeometry) -> shapely.geometry.Point | None:
d_m = bounds_diam_m(g)
if d_m < 60:
return g.centroid
return None


def main():
n_ambig, n_match_current, n_match_stripped = 0, 0, 0
streets = pygeojson.load_feature_collection(open("data/originals/nyc-streets.geojson")).features
geoms = [geom_to_shape(f.geometry) for f in streets]

current_ixs, current_stripped_ixs = load_all_intersections(exclude_historic=True)

ix_to_pt = defaultdict[Intersection, list[Point]](list)
for i, j in tqdm(itertools.combinations(range(len(geoms)), 2)):
name1 = normalize_street_for_osm(streets[i].properties["name"])
name2 = normalize_street_for_osm(streets[j].properties["name"])
if name1 == name2 or strip_dir(name1) == strip_dir(name2):
continue
g1, g2 = geoms[i], geoms[j]
if not g1.intersects(g2):
continue
pt = g1.intersection(g2)
full_ix = pt
if not isinstance(pt, shapely.geometry.Point):
pt = centroid_if_small(pt)
if not pt:
sys.stderr.write(
f'Multiple intersections between "{name1}" ({i}) and "{name2}" ({j}): {full_ix}: {bounds_diam_m(full_ix):.2f}m\n'
)
n_ambig += 1
continue

boro1 = streets[i].properties["data"]["borough"]
boro2 = streets[i].properties["data"]["borough"]
assert boro1 == boro2, f"{boro1} != {boro2}"

ix = Intersection(name1, name2, boro1)
strip_ix = Intersection(strip_dir(name1), strip_dir(name2), boro1)

if ix in current_ixs:
n_match_current += 1
continue

if strip_ix in current_stripped_ixs:
n_match_stripped += 1
continue

ix_to_pt[ix].append((pt.y, pt.x))

out = csv.DictWriter(
open("data/historic-intersections.csv", "w"),
fieldnames=["Street1", "Street2", "Borough", "Lat", "Lon"],
)
out.writeheader()
n_ambig_dedupe = 0
for ix, pts in ix_to_pt.items():
mp = shapely.geometry.MultiPoint([(x, y) for (y, x) in pts])
pt = centroid_if_small(mp)
if not pt:
sys.stderr.write(f"Skipping {ix} with {len(pts)} points: {mp} / {bounds_diam_m(mp)}m\n")
n_ambig_dedupe += 1
continue
out.writerow(
{
"Street1": ix.str1,
"Street2": ix.str2,
"Borough": ix.boro,
"Lat": "%.6f" % pt.y,
"Lon": "%.6f" % pt.x,
}
)
sys.stderr.write(f"{n_ambig=}, {n_match_current=}, {n_match_stripped=}, {n_ambig_dedupe=}\n")


if __name__ == "__main__":
main()
60 changes: 59 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ imagehash = "^4.3.1"
pygeojson = "^0.2.0"
natsort = "^8.4.0"
word2number = "^1.1"
shapely = "^2.0.6"

[tool.ruff]
line-length = 100
Expand Down
4 changes: 2 additions & 2 deletions test/geocode-performance.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

Results:
Geocodes
229 / 269 = 85.13% of locatable images correctly located.
12 / 241 = 4.98% incorrectly located.
232 / 269 = 86.25% of locatable images correctly located.
10 / 242 = 4.13% incorrectly located.

Loading