From ee471a9ee68353ddeb8ae0c76aec7017292e666b Mon Sep 17 00:00:00 2001 From: Matt Hicks Date: Tue, 12 Nov 2024 09:46:39 -0600 Subject: [PATCH] Lots of improvements to Geo support particularly around parsing GeoString and GeoJson --- build.sbt | 2 +- core/src/main/scala/lightdb/spatial/Geo.scala | 72 ++++++++- .../main/scala/lightdb/spatial/Spatial.scala | 41 ++++++ .../lightdb/spatial/SpatialRelation.scala | 10 ++ core/src/test/scala/spec/GeoParsingSpec.scala | 138 ++++++++++++++++++ .../scala/lightdb/lucene/LuceneStore.scala | 18 ++- 6 files changed, 265 insertions(+), 16 deletions(-) create mode 100644 core/src/main/scala/lightdb/spatial/SpatialRelation.scala create mode 100644 core/src/test/scala/spec/GeoParsingSpec.scala diff --git a/build.sbt b/build.sbt index 92708b1a..2c35c59b 100644 --- a/build.sbt +++ b/build.sbt @@ -15,7 +15,7 @@ val developerURL: String = "https://matthicks.com" name := projectName ThisBuild / organization := org -ThisBuild / version := "0.15.0" +ThisBuild / version := "0.16.0-SNAPSHOT" ThisBuild / scalaVersion := scala213 ThisBuild / crossScalaVersions := allScalaVersions ThisBuild / scalacOptions ++= Seq("-unchecked", "-deprecation") diff --git a/core/src/main/scala/lightdb/spatial/Geo.scala b/core/src/main/scala/lightdb/spatial/Geo.scala index 27ebd582..66e59fe5 100644 --- a/core/src/main/scala/lightdb/spatial/Geo.scala +++ b/core/src/main/scala/lightdb/spatial/Geo.scala @@ -1,6 +1,7 @@ package lightdb.spatial import fabric._ +import fabric.io.JsonFormatter import fabric.rw._ sealed trait Geo { @@ -8,19 +9,68 @@ sealed trait Geo { } object Geo { - implicit val pRW: RW[Point] = RW.gen[Point] + implicit lazy val pRW: RW[Point] = RW.gen[Point] .withPreWrite(_.merge(obj("type" -> "Point"))) .withPostRead((_, json) => json.merge(obj("type" -> "Point"))) - private implicit val mpRW: RW[MultiPoint] = RW.gen - private implicit val lsRW: RW[Line] = RW.gen - private implicit val mlsRW: RW[MultiLine] = RW.gen - private implicit val plyRW: RW[Polygon] = RW.gen - private implicit val mplyRW: RW[MultiPolygon] = RW.gen + private implicit lazy val mpRW: RW[MultiPoint] = RW.gen + private implicit lazy val lsRW: RW[Line] = RW.gen + private implicit lazy val mlsRW: RW[MultiLine] = RW.gen + private implicit lazy val plyRW: RW[Polygon] = RW.gen + private implicit lazy val mplyRW: RW[MultiPolygon] = RW.gen implicit val rw: RW[Geo] = RW.poly[Geo](className = Some("lightdb.spatial.Geo"))( - pRW, mpRW, lsRW, mlsRW, plyRW, mplyRW + pRW, mpRW, lsRW, mlsRW, plyRW, mplyRW //, RW.gen[GeometryCollection] ) + private lazy val PointStringRegex = """POINT\((.+) (.+)\)""".r + private lazy val PolygonRegex = """POLYGON\(\((.+)\)\)""".r + private lazy val MultiPolygonRegex = """MULTIPOLYGON\(\(\((.+)\)\)\)""".r + + def parseString(s: String): Geo = s match { + case PointStringRegex(lon, lat) => Geo.Point( + latitude = lat.toDouble, longitude = lon.toDouble + ) + case PolygonRegex(p) => parsePolyString(p) + case MultiPolygonRegex(p) => MultiPolygon(p.split("""\)\),\(\(""").toList.map(parsePolyString)) + case _ => throw new RuntimeException(s"Unsupported GeoString: $s") + } + + private def parsePolyString(s: String): Geo.Polygon = Geo.Polygon(s.split(',').toList.map { p => + val v = p.split(' ').map(_.trim) + Geo.Point( + latitude = v(1).toDouble, longitude = v(0).toDouble + ) + }) + + def parse(json: Json): Geo = json("type").asString match { + case "Point" => + val v = json("coordinates").asVector + Geo.Point(latitude = v(1).asDouble, longitude = v(0).asDouble) + case "LineString" => Line( + json("coordinates").asVector.toList.map { p => + val v = p.asVector + Geo.Point(latitude = v(1).asDouble, longitude = v(0).asDouble) + } + ) + case "Polygon" => Polygon( + json("coordinates").asVector.head.asVector.toList.map { p => + val v = p.asVector + Geo.Point(latitude = v(1).asDouble, longitude = v(0).asDouble) + } + ) + case "MultiPolygon" => MultiPolygon( + json("coordinates").asVector.toList.map { p => + p.asVector.head.asVector.toList.map(_.asVector.toList).map { v => + Geo.Point(latitude = v(1).asDouble, longitude = v(0).asDouble) + } + }.map(list => Polygon(list)) + ) + case "GeometryCollection" => GeometryCollection( + json("geometries").asVector.toList.map(parse) + ).normalized + case t => throw new RuntimeException(s"Unsupported GeoJson type $t:\n${JsonFormatter.Default(json)}") + } + def min(points: List[Point]): Point = { val latitude = points.map(_.latitude).min val longitude = points.map(_.longitude).min @@ -64,4 +114,12 @@ object Geo { case class MultiPolygon(polygons: List[Polygon]) extends Geo { lazy val center: Point = Geo.center(polygons.flatMap(_.points)) } + case class GeometryCollection(geometries: List[Geo]) extends Geo { + lazy val center: Point = Geo.center(geometries.map(_.center)) + + lazy val normalized: Geo = geometries match { + case geo :: Nil => geo + case _ => this + } + } } \ No newline at end of file diff --git a/core/src/main/scala/lightdb/spatial/Spatial.scala b/core/src/main/scala/lightdb/spatial/Spatial.scala index 3d76866b..79f1aaac 100644 --- a/core/src/main/scala/lightdb/spatial/Spatial.scala +++ b/core/src/main/scala/lightdb/spatial/Spatial.scala @@ -3,6 +3,9 @@ package lightdb.spatial import lightdb.distance._ import org.locationtech.spatial4j.context.SpatialContext import org.locationtech.spatial4j.distance.DistanceUtils +import org.locationtech.spatial4j.shape +import org.locationtech.spatial4j.shape.Shape +import org.locationtech.spatial4j.shape.ShapeFactory.{LineStringBuilder, PolygonBuilder} object Spatial { private lazy val context = SpatialContext.GEO @@ -14,4 +17,42 @@ object Spatial { val distance = DistanceUtils.degrees2Dist(degrees, DistanceUtils.EARTH_MEAN_RADIUS_KM) distance.kilometers } + + private def line2Builder(line: Geo.Line): LineStringBuilder = + line.points.foldLeft(context.getShapeFactory.lineString())((b, p) => + b.pointLatLon(p.latitude, p.longitude) + ) + + private def polygon2Builder(polygon: Geo.Polygon): PolygonBuilder = + polygon.points.foldLeft(context.getShapeFactory.polygon())((b, p) => + b.pointLatLon(p.latitude, p.longitude) + ) + + private def toShape(g: Geo): Shape = g match { + case Geo.Point(lat, lon) => context.getShapeFactory.pointLatLon(lat, lon) + case Geo.MultiPoint(points) => points.foldLeft(context.getShapeFactory.multiPoint())((b, p) => + b.pointLatLon(p.latitude, p.longitude) + ).build() + case line: Geo.Line => line2Builder(line).build() + case Geo.MultiLine(lines) => lines.foldLeft(context.getShapeFactory.multiLineString())((b, l) => + b.add(line2Builder(l)) + ).build() + case polygon: Geo.Polygon => polygon2Builder(polygon).build() + case Geo.MultiPolygon(polygons) => polygons.foldLeft(context.getShapeFactory.multiPolygon())((b, p) => + b.add(polygon2Builder(p)) + ).build() + } + + def relation(g1: Geo, g2: Geo): SpatialRelation = { + val s1 = toShape(g1) + val s2 = toShape(g2) + s1.relate(s2) match { + case shape.SpatialRelation.WITHIN => SpatialRelation.Within + case shape.SpatialRelation.CONTAINS => SpatialRelation.Contains + case shape.SpatialRelation.INTERSECTS => SpatialRelation.Intersects + case shape.SpatialRelation.DISJOINT => SpatialRelation.Disjoint + } + } + + def overlap(g1: Geo, g2: Geo): Boolean = relation(g1, g2) != SpatialRelation.Disjoint } diff --git a/core/src/main/scala/lightdb/spatial/SpatialRelation.scala b/core/src/main/scala/lightdb/spatial/SpatialRelation.scala new file mode 100644 index 00000000..bdfdceac --- /dev/null +++ b/core/src/main/scala/lightdb/spatial/SpatialRelation.scala @@ -0,0 +1,10 @@ +package lightdb.spatial + +sealed trait SpatialRelation + +object SpatialRelation { + case object Within extends SpatialRelation + case object Contains extends SpatialRelation + case object Intersects extends SpatialRelation + case object Disjoint extends SpatialRelation +} \ No newline at end of file diff --git a/core/src/test/scala/spec/GeoParsingSpec.scala b/core/src/test/scala/spec/GeoParsingSpec.scala new file mode 100644 index 00000000..4594eaba --- /dev/null +++ b/core/src/test/scala/spec/GeoParsingSpec.scala @@ -0,0 +1,138 @@ +package spec + +import fabric.io.JsonParser +import lightdb.spatial.Geo +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +@EmbeddedTest +class GeoParsingSpec extends AnyWordSpec with Matchers { + "Geo Parsing" should { + "parse a String point" in { + val geo = Geo.parseString("POINT(-103.793467263 32.331700182)") + geo should be(Geo.Point( + latitude = 32.331700182, + longitude = -103.793467263 + )) + } + "parse a JSON point" in { + val json = JsonParser("""{"crs": {"type": "name", "properties": {"name": "urn:ogc:def:crs:EPSG::4269"}}, "type": "Point", "coordinates": [-103.793467263, 32.331700182]}""") + val geo = Geo.parse(json) + geo should be(Geo.Point( + latitude = 32.331700182, + longitude = -103.793467263 + )) + } + "parse a String polygon" in { + val geo = Geo.parseString("""POLYGON((-104.260036453 32.598867934,-104.242881455 32.598815009,-104.225751274 32.598860916,-104.225734354 32.602483742,-104.242847836 32.602447649,-104.260003704 32.602492467,-104.260003848 32.602476407,-104.260036453 32.598867934))""") + geo should be(Geo.Polygon(List( + Geo.Point(longitude = -104.260036453, latitude = 32.598867934), + Geo.Point(longitude = -104.242881455, latitude = 32.598815009), + Geo.Point(longitude = -104.225751274, latitude = 32.598860916), + Geo.Point(longitude = -104.225734354, latitude = 32.602483742), + Geo.Point(longitude = -104.242847836, latitude = 32.602447649), + Geo.Point(longitude = -104.260003704, latitude = 32.602492467), + Geo.Point(longitude = -104.260003848, latitude = 32.602476407), + Geo.Point(longitude = -104.260036453, latitude = 32.598867934) + ))) + } + "parse a JSON polygon" in { + val json = JsonParser("""{"crs": {"type": "name", "properties": {"name": "urn:ogc:def:crs:EPSG::4269"}}, "type": "GeometryCollection", "geometries": [{"type": "Polygon", "coordinates": [[[-104.260036453, 32.598867934], [-104.242881455, 32.598815009], [-104.225751274, 32.598860916], [-104.225734354, 32.602483742], [-104.242847836, 32.602447649], [-104.260003704, 32.602492467], [-104.260003848, 32.602476407], [-104.260036453, 32.598867934]]]}]}""") + val geo = Geo.parse(json) + geo should be(Geo.Polygon(List( + Geo.Point(longitude = -104.260036453, latitude = 32.598867934), + Geo.Point(longitude = -104.242881455, latitude = 32.598815009), + Geo.Point(longitude = -104.225751274, latitude = 32.598860916), + Geo.Point(longitude = -104.225734354, latitude = 32.602483742), + Geo.Point(longitude = -104.242847836, latitude = 32.602447649), + Geo.Point(longitude = -104.260003704, latitude = 32.602492467), + Geo.Point(longitude = -104.260003848, latitude = 32.602476407), + Geo.Point(longitude = -104.260036453, latitude = 32.598867934) + ))) + } + "parse a String multipolygon" in { + val geo = Geo.parseString("""MULTIPOLYGON(((-103.894776713 32.000149899,-103.890489841 32.000150665,-103.890490162 32.002410672,-103.894776629 32.002407742,-103.894776713 32.000149899)),((-103.894818052 32.020661933,-103.894807646 32.017011035,-103.894797235 32.013359954,-103.894786827 32.009709053,-103.894776598 32.006058151,-103.894776611 32.004581088,-103.894776574 32.003104024,-103.890490727 32.003106436,-103.890490669 32.006061933,-103.890527994 32.020667698,-103.894818052 32.020661933)))""") + geo should be(Geo.MultiPolygon(List( + Geo.Polygon(List( + Geo.Point(longitude = -103.894776713, latitude = 32.000149899), + Geo.Point(longitude = -103.890489841, latitude = 32.000150665), + Geo.Point(longitude = -103.890490162, latitude = 32.002410672), + Geo.Point(longitude = -103.894776629, latitude = 32.002407742), + Geo.Point(longitude = -103.894776713, latitude = 32.000149899) + )), + Geo.Polygon(List( + Geo.Point(longitude = -103.894818052, latitude = 32.020661933), + Geo.Point(longitude = -103.894807646, latitude = 32.017011035), + Geo.Point(longitude = -103.894797235, latitude = 32.013359954), + Geo.Point(longitude = -103.894786827, latitude = 32.009709053), + Geo.Point(longitude = -103.894776598, latitude = 32.006058151), + Geo.Point(longitude = -103.894776611, latitude = 32.004581088), + Geo.Point(longitude = -103.894776574, latitude = 32.003104024), + Geo.Point(longitude = -103.890490727, latitude = 32.003106436), + Geo.Point(longitude = -103.890490669, latitude = 32.006061933), + Geo.Point(longitude = -103.890527994, latitude = 32.020667698), + Geo.Point(longitude = -103.894818052, latitude = 32.020661933) + )) + ))) + } + "parse a JSON multipolygon" in { + val json = JsonParser("""{"crs": {"type": "name", "properties": {"name": "urn:ogc:def:crs:EPSG::4269"}}, "type": "GeometryCollection", "geometries": [{"type": "MultiPolygon", "coordinates": [[[[-103.894776713, 32.000149899], [-103.890489841, 32.000150665], [-103.890490162, 32.002410672], [-103.894776629, 32.002407742], [-103.894776713, 32.000149899]]], [[[-103.894818052, 32.020661933], [-103.894807646, 32.017011035], [-103.894797235, 32.013359954], [-103.894786827, 32.009709053], [-103.894776598, 32.006058151], [-103.894776611, 32.004581088], [-103.894776574, 32.003104024], [-103.890490727, 32.003106436], [-103.890490669, 32.006061933], [-103.890527994, 32.020667698], [-103.894818052, 32.020661933]]]]}]}""") + val geo = Geo.parse(json) + geo should be(Geo.MultiPolygon(List( + Geo.Polygon(List( + Geo.Point(longitude = -103.894776713, latitude = 32.000149899), + Geo.Point(longitude = -103.890489841, latitude = 32.000150665), + Geo.Point(longitude = -103.890490162, latitude = 32.002410672), + Geo.Point(longitude = -103.894776629, latitude = 32.002407742), + Geo.Point(longitude = -103.894776713, latitude = 32.000149899) + )), + Geo.Polygon(List( + Geo.Point(longitude = -103.894818052, latitude = 32.020661933), + Geo.Point(longitude = -103.894807646, latitude = 32.017011035), + Geo.Point(longitude = -103.894797235, latitude = 32.013359954), + Geo.Point(longitude = -103.894786827, latitude = 32.009709053), + Geo.Point(longitude = -103.894776598, latitude = 32.006058151), + Geo.Point(longitude = -103.894776611, latitude = 32.004581088), + Geo.Point(longitude = -103.894776574, latitude = 32.003104024), + Geo.Point(longitude = -103.890490727, latitude = 32.003106436), + Geo.Point(longitude = -103.890490669, latitude = 32.006061933), + Geo.Point(longitude = -103.890527994, latitude = 32.020667698), + Geo.Point(longitude = -103.894818052, latitude = 32.020661933) + )) + ))) + } + "parse a multi-level geometry collection" in { + val json = JsonParser("""{"crs": {"type": "name", "properties": {"name": "urn:ogc:def:crs:EPSG::4269"}}, "type": "GeometryCollection", "geometries": [{"type": "GeometryCollection", "geometries": [{"type": "Polygon", "coordinates": [[[-103.520208844, 33.519270954], [-103.520208848, 33.519270999], [-103.52011766, 33.526514651], [-103.520117659, 33.526514651], [-103.519644278, 33.540956508], [-103.519644272, 33.540956552], [-103.519644229, 33.540956557], [-103.510915724, 33.541056059], [-103.510915678, 33.541056055], [-103.510915675, 33.54105601], [-103.511152616, 33.533861856], [-103.51115433, 33.533809829], [-103.511271087, 33.530264804], [-103.511389559, 33.526667664], [-103.511389559, 33.526667642], [-103.511472584, 33.519463527], [-103.511472999, 33.519427506], [-103.511473004, 33.519427455], [-103.511473049, 33.51942745], [-103.520208798, 33.51927095], [-103.520208844, 33.519270954]]]}, {"type": "LineString", "coordinates": [[-103.52011766, 33.526514651], [-103.520117662, 33.526514651]]}]}]}""") + val geo = Geo.parse(json) + geo should be(Geo.GeometryCollection(List( + Geo.Polygon(List( + Geo.Point(33.519270954, -103.520208844), + Geo.Point(33.519270999, -103.520208848), + Geo.Point(33.526514651, -103.52011766), + Geo.Point(33.526514651, -103.520117659), + Geo.Point(33.540956508, -103.519644278), + Geo.Point(33.540956552, -103.519644272), + Geo.Point(33.540956557, -103.519644229), + Geo.Point(33.541056059, -103.510915724), + Geo.Point(33.541056055, -103.510915678), + Geo.Point(33.54105601, -103.510915675), + Geo.Point(33.533861856, -103.511152616), + Geo.Point(33.533809829, -103.51115433), + Geo.Point(33.530264804, -103.511271087), + Geo.Point(33.526667664, -103.511389559), + Geo.Point(33.526667642, -103.511389559), + Geo.Point(33.519463527, -103.511472584), + Geo.Point(33.519427506, -103.511472999), + Geo.Point(33.519427455, -103.511473004), + Geo.Point(33.51942745, -103.511473049), + Geo.Point(33.51927095, -103.520208798), + Geo.Point(33.519270954, -103.520208844) + )), + Geo.Line(List( + Geo.Point(33.526514651, -103.52011766), + Geo.Point(33.526514651, -103.520117662) + )) + ))) + } + } +} diff --git a/lucene/src/main/scala/lightdb/lucene/LuceneStore.scala b/lucene/src/main/scala/lightdb/lucene/LuceneStore.scala index bfc3a81c..4c118e4b 100644 --- a/lucene/src/main/scala/lightdb/lucene/LuceneStore.scala +++ b/lucene/src/main/scala/lightdb/lucene/LuceneStore.scala @@ -106,19 +106,21 @@ class LuceneStore[Doc <: Document[Doc], Model <: DocumentModel[Doc]](directory: val polygon = convert(p) LatLonShape.createIndexableFields(field.name, polygon) } + def indexGeo(geo: Geo): Unit = geo match { + case p: Geo.Point => indexPoint(p) + case Geo.MultiPoint(points) => points.foreach(indexPoint) + case l: Geo.Line => indexLine(l) + case Geo.MultiLine(lines) => lines.foreach(indexLine) + case p: Geo.Polygon => indexPolygon(p) + case Geo.MultiPolygon(polygons) => polygons.foreach(indexPolygon) + case Geo.GeometryCollection(geometries) => geometries.foreach(indexGeo) + } val list = json match { case Arr(value, _) => value.toList.map(_.as[Geo]) case _ => List(json.as[Geo]) } list.foreach { geo => - geo match { - case p: Geo.Point => indexPoint(p) - case Geo.MultiPoint(points) => points.foreach(indexPoint) - case l: Geo.Line => indexLine(l) - case Geo.MultiLine(lines) => lines.foreach(indexLine) - case p: Geo.Polygon => indexPolygon(p) - case Geo.MultiPolygon(polygons) => polygons.foreach(indexPolygon) - } + indexGeo(geo) add(new LatLonPoint(field.name, geo.center.latitude, geo.center.longitude)) } if (list.isEmpty) {