Much better distance support for sort and filtering.

outr · May 25, 2024 · ef462cc · ef462cc
1 parent 1ca51e2
commit ef462cc
Show file tree

Hide file tree

Showing 10 changed files with 213 additions and 107 deletions.
diff --git a/all/src/test/scala/spec/SimpleHaloAndLuceneSpec.scala b/all/src/test/scala/spec/SimpleHaloAndLuceneSpec.scala
@@ -14,12 +14,14 @@ import lightdb.util.DistanceCalculator
 import org.scalatest.matchers.should.Matchers
 import org.scalatest.wordspec.AsyncWordSpec
 import scribe.{Level, Logger}
+import squants.space.LengthConversions.LengthConversions
 
 import java.nio.file.{Path, Paths}
 
 class SimpleHaloAndLuceneSpec extends AsyncWordSpec with AsyncIOSpec with Matchers {
   private val id1 = Id[Person]("john")
   private val id2 = Id[Person]("jane")
+  private val id3 = Id[Person]("bob")
 
   private val newYorkCity = GeoPoint(40.7142, -74.0119)
   private val chicago = GeoPoint(41.8119, -87.6873)
@@ -42,6 +44,13 @@ class SimpleHaloAndLuceneSpec extends AsyncWordSpec with AsyncIOSpec with Matche
     point = noble,
     _id = id2
   )
+  private val p3 = Person(
+    name = "Bob Dole",
+    age = 123,
+    tags = Set("dog", "monkey"),
+    point = yonkers,
+    _id = id3
+  )
 
   "Simple database" should {
     "initialize the database" in {
@@ -58,7 +67,7 @@ class SimpleHaloAndLuceneSpec extends AsyncWordSpec with AsyncIOSpec with Matche
         o should be(Some(p1))
       }
     }
-    "storage Jane Doe" in {
+    "store Jane Doe" in {
       Person.set(p2).map { p =>
         p._id should be(id2)
       }
@@ -73,17 +82,32 @@ class SimpleHaloAndLuceneSpec extends AsyncWordSpec with AsyncIOSpec with Matche
         size should be(2)
       }
     }
+    "store Bob Dole" in {
+      Person.set(p3).map { p =>
+        p._id should be(id3)
+      }
+    }
+    "verify Bob Dole exists" in {
+      Person.get(id3).map { o =>
+        o should be(Some(p3))
+      }
+    }
+    "verify exactly three objects in data" in {
+      Person.size.map { size =>
+        size should be(3)
+      }
+    }
     "flush data" in {
       Person.commit()
     }
-    "verify exactly two objects in index" in {
+    "verify exactly three objects in index" in {
       Person.index.count().map { size =>
-        size should be(2)
+        size should be(3)
       }
     }
-    "verify exactly two objects in the store" in {
+    "verify exactly three objects in the store" in {
       Person.idStream.compile.toList.map { ids =>
-        ids.toSet should be(Set(id1, id2))
+        ids.toSet should be(Set(id1, id2, id3))
       }
     }
     "search by name for positive result" in {
@@ -145,84 +169,92 @@ class SimpleHaloAndLuceneSpec extends AsyncWordSpec with AsyncIOSpec with Matche
     }
     "search by tag" in {
       Person.query.filter(Person.tag === "dog").toList.map { people =>
-        people.map(_.name) should be(List("John Doe"))
+        people.map(_.name) should be(List("John Doe", "Bob Dole"))
       }
     }
     "do paginated search" in {
       Person.withSearchContext { implicit context =>
         Person.query.pageSize(1).countTotal(true).search().flatMap { page1 =>
           page1.page should be(0)
-          page1.pages should be(2)
+          page1.pages should be(3)
           page1.hasNext should be(true)
           page1.docs.flatMap { people1 =>
             people1.length should be(1)
             page1.next().flatMap {
               case Some(page2) =>
                 page2.page should be(1)
-                page2.pages should be(2)
-                page2.hasNext should be(false)
-                page2.docs.map { people2 =>
+                page2.pages should be(3)
+                page2.hasNext should be(true)
+                page2.docs.flatMap { people2 =>
                   people2.length should be(1)
+                  page2.next().flatMap {
+                    case Some(page3) =>
+                      page3.page should be(2)
+                      page3.pages should be(3)
+                      page3.hasNext should be(false)
+                      page3.docs.map { people3 =>
+                        people3.length should be(1)
+                      }
+                    case None => fail("Should have a third page")
+                  }
                 }
               case None => fail("Should have a second page")
             }
           }
         }
       }
     }
-    "do paginated search as a stream" in {
+    "do paginated search as a stream converting to name" in {
       Person.withSearchContext { implicit context =>
-        Person.query.pageSize(1).countTotal(true).stream.compile.toList.map { people =>
-          people.length should be(2)
-          people.map(_.name).toSet should be(Set("John Doe", "Jane Doe"))
+        Person.query.convert(p => IO.pure(p.name)).pageSize(1).countTotal(true).stream.compile.toList.map { names =>
+          names.length should be(3)
+          names.toSet should be(Set("John Doe", "Jane Doe", "Bob Dole"))
         }
       }
     }
     "sort by age" in {
       Person.query.sort(Sort.ByField(Person.age)).toList.map { people =>
-        people.map(_.name) should be(List("Jane Doe", "John Doe"))
+        people.map(_.name) should be(List("Jane Doe", "John Doe", "Bob Dole"))
       }
     }
     "sort by distance from Oklahoma City" in {
       Person.query
         .scoreDocs(true)
-        .sort(Sort.ByDistance(Person.point, oklahomaCity))
-        .scored
+        .distance(
+          field = Person.point,
+          from = oklahomaCity,
+          radius = Some(1320.miles)
+        )
         .toList
-        .map { peopleAndScores =>
-          val people = peopleAndScores.map(_._1)
-          val scores = peopleAndScores.map(_._2)
+        .map { peopleAndDistances =>
+          val people = peopleAndDistances.map(_.doc)
+          val distances = peopleAndDistances.map(_.distance)
           people.map(_.name) should be(List("Jane Doe", "John Doe"))
-          scores should be(List(1.0, 1.0))
-          val calculated = people.map(p => DistanceCalculator(oklahomaCity, p.point).toUsMiles)
-          calculated should be(List(28.555228128634383, 1316.1223938032729))
+          distances should be(List(28.555228128634383.miles, 1316.1223938032729.miles))
         }
     }
     "delete John" in {
       Person.delete(id1).map { deleted =>
         deleted should be(id1)
       }
     }
-    "verify exactly one object in data" in {
+    "verify exactly two objects in data again" in {
       Person.size.map { size =>
-        size should be(1)
+        size should be(2)
       }
     }
     "commit data" in {
       Person.commit()
     }
-    "verify exactly one object in index" in {
+    "verify exactly two objects in index again" in {
       Person.index.count().map { size =>
-        size should be(1)
+        size should be(2)
       }
     }
     "list all documents" in {
       Person.stream.compile.toList.map { people =>
-        people.length should be(1)
-        val p = people.head
-        p._id should be(id2)
-        p.name should be("Jane Doe")
-        p.age should be(19)
+        people.length should be(2)
+        people.map(_._id).toSet should be(Set(id2, id3))
       }
     }
     "replace Jane Doe" in {
@@ -242,11 +274,9 @@ class SimpleHaloAndLuceneSpec extends AsyncWordSpec with AsyncIOSpec with Matche
     }
     "list new documents" in {
       Person.stream.compile.toList.map { results =>
-        results.length should be(1)
-        val doc = results.head
-        doc._id should be(id2)
-        doc.name should be("Jan Doe")
-        doc.age should be(20)
+        results.length should be(2)
+        results.map(_.name).toSet should be(Set("Jan Doe", "Bob Dole"))
+        results.map(_.age).toSet should be(Set(20, 123))
       }
     }
     "verify start time has been set" in {

diff --git a/build.sbt b/build.sbt
@@ -15,7 +15,7 @@ val developerURL: String = "https://matthicks.com"
 
 name := projectName
 ThisBuild / organization := org
-ThisBuild / version := "0.6.1-SNAPSHOT"
+ThisBuild / version := "0.7.0-SNAPSHOT"
 ThisBuild / scalaVersion := scala213
 ThisBuild / crossScalaVersions := allScalaVersions
 ThisBuild / scalacOptions ++= Seq("-unchecked", "-deprecation")

diff --git a/core/src/main/scala/lightdb/index/IndexSupport.scala b/core/src/main/scala/lightdb/index/IndexSupport.scala
@@ -2,17 +2,20 @@ package lightdb.index
 
 import cats.effect.IO
 import lightdb.model.{AbstractCollection, Collection, DocumentAction, DocumentListener, DocumentModel}
-import lightdb.query.{PagedResults, Query, SearchContext}
+import lightdb.query.{Filter, PagedResults, Query, SearchContext}
 import lightdb.Document
+import lightdb.spatial.GeoPoint
+import squants.space.Length
 
 trait IndexSupport[D <: Document[D]] extends DocumentModel[D] {
   private var _collection: Option[AbstractCollection[D]] = None
+
   protected def collection: AbstractCollection[D] = this match {
     case c: AbstractCollection[_] => c.asInstanceOf[AbstractCollection[D]]
     case _ => _collection.getOrElse(throw new RuntimeException("DocumentModel not initialized with Collection (yet)"))
   }
 
-  def query: Query[D] = Query(this, collection)
+  def query: Query[D, D] = Query(this, collection, doc => IO.pure(doc))
 
   override protected[lightdb] def initModel(collection: AbstractCollection[D]): Unit = {
     super.initModel(collection)
@@ -26,14 +29,17 @@ trait IndexSupport[D <: Document[D]] extends DocumentModel[D] {
     })
   }
 
+  def distanceFilter(field: IndexedField[GeoPoint, D], from: GeoPoint, radius: Length): Filter[D] =
+    throw new UnsupportedOperationException("Distance filtering is not supported on this indexer")
+
   def index: Indexer[D]
 
   def withSearchContext[Return](f: SearchContext[D] => IO[Return]): IO[Return] = index.withSearchContext(f)
 
-  def doSearch(query: Query[D],
-               context: SearchContext[D],
-               offset: Int,
-               after: Option[PagedResults[D]]): IO[PagedResults[D]]
+  def doSearch[V](query: Query[D, V],
+                  context: SearchContext[D],
+                  offset: Int,
+                  after: Option[PagedResults[D, V]]): IO[PagedResults[D, V]]
 
   protected def indexDoc(doc: D, fields: List[IndexedField[_, D]]): IO[Unit]
 }
diff --git a/core/src/main/scala/lightdb/query/DistanceAndDoc.scala b/core/src/main/scala/lightdb/query/DistanceAndDoc.scala
@@ -0,0 +1,6 @@
+package lightdb.query
+
+import lightdb.Document
+import squants.space.Length
+
+case class DistanceAndDoc[D <: Document[D]](doc: D, distance: Length)
diff --git a/core/src/main/scala/lightdb/query/PageContext.scala b/core/src/main/scala/lightdb/query/PageContext.scala
@@ -6,7 +6,7 @@ import lightdb.Document
 trait PageContext[D <: Document[D]] {
   def context: SearchContext[D]
 
-  def nextPage(currentPage: PagedResults[D]): IO[Option[PagedResults[D]]] = if (currentPage.hasNext) {
+  def nextPage[V](currentPage: PagedResults[D, V]): IO[Option[PagedResults[D, V]]] = if (currentPage.hasNext) {
     currentPage.query.indexSupport.doSearch(
       query = currentPage.query,
       context = context,

diff --git a/core/src/main/scala/lightdb/query/PagedResults.scala b/core/src/main/scala/lightdb/query/PagedResults.scala
@@ -4,41 +4,47 @@ import cats.effect.IO
 import cats.implicits.toTraverseOps
 import lightdb.{Document, Id}
 
-case class PagedResults[D <: Document[D]](query: Query[D],
-                                          context: PageContext[D],
-                                          offset: Int,
-                                          total: Int,
-                                          idsAndScores: List[(Id[D], Double)],
-                                          getter: Option[Id[D] => IO[D]] = None) {
+case class PagedResults[D <: Document[D], V](query: Query[D, V],
+                                             context: PageContext[D],
+                                             offset: Int,
+                                             total: Int,
+                                             idsAndScores: List[(Id[D], Double)],
+                                             getter: Option[Id[D] => IO[D]] = None) {
   lazy val page: Int = offset / query.pageSize
   lazy val pages: Int = math.ceil(total.toDouble / query.pageSize.toDouble).toInt
 
   lazy val ids: List[Id[D]] = idsAndScores.map(_._1)
   lazy val scores: List[Double] = idsAndScores.map(_._2)
 
+  def docs: IO[List[D]] = ids.map(query.collection(_)).sequence
+
+  def values: IO[List[V]] = docs.flatMap(_.map(query.convert).sequence)
+
   def idStream: fs2.Stream[IO, Id[D]] = fs2.Stream(ids: _*)
 
   def idAndScoreStream: fs2.Stream[IO, (Id[D], Double)] = fs2.Stream(idsAndScores: _*)
 
-  def stream: fs2.Stream[IO, D] = idStream
+  def stream: fs2.Stream[IO, V] = idStream
     .evalMap { id =>
       getter match {
         case Some(g) => g(id)
         case None => query.collection(id)
       }
     }
+    .evalMap(query.convert)
 
-  def scoredStream: fs2.Stream[IO, (D, Double)] = idAndScoreStream
+  def scoredStream: fs2.Stream[IO, (V, Double)] = idAndScoreStream
     .evalMap {
       case (id, score) => getter match {
         case Some(g) => g(id).map(doc => doc -> score)
         case None => query.collection(id).map(doc => doc -> score)
       }
     }
-
-  def docs: IO[List[D]] = ids.map(query.collection(_)).sequence
+    .evalMap {
+      case (doc, score) => query.convert(doc).map(v => v -> score)
+    }
 
   def hasNext: Boolean = pages > (page + 1)
 
-  def next(): IO[Option[PagedResults[D]]] = context.nextPage(this)
+  def next(): IO[Option[PagedResults[D, V]]] = context.nextPage(this)
 }