Skip to content

Commit

Permalink
Much better distance support for sort and filtering.
Browse files Browse the repository at this point in the history
  • Loading branch information
darkfrog26 committed May 25, 2024
1 parent 1ca51e2 commit ef462cc
Show file tree
Hide file tree
Showing 10 changed files with 213 additions and 107 deletions.
104 changes: 67 additions & 37 deletions all/src/test/scala/spec/SimpleHaloAndLuceneSpec.scala
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,14 @@ import lightdb.util.DistanceCalculator
import org.scalatest.matchers.should.Matchers
import org.scalatest.wordspec.AsyncWordSpec
import scribe.{Level, Logger}
import squants.space.LengthConversions.LengthConversions

import java.nio.file.{Path, Paths}

class SimpleHaloAndLuceneSpec extends AsyncWordSpec with AsyncIOSpec with Matchers {
private val id1 = Id[Person]("john")
private val id2 = Id[Person]("jane")
private val id3 = Id[Person]("bob")

private val newYorkCity = GeoPoint(40.7142, -74.0119)
private val chicago = GeoPoint(41.8119, -87.6873)
Expand All @@ -42,6 +44,13 @@ class SimpleHaloAndLuceneSpec extends AsyncWordSpec with AsyncIOSpec with Matche
point = noble,
_id = id2
)
private val p3 = Person(
name = "Bob Dole",
age = 123,
tags = Set("dog", "monkey"),
point = yonkers,
_id = id3
)

"Simple database" should {
"initialize the database" in {
Expand All @@ -58,7 +67,7 @@ class SimpleHaloAndLuceneSpec extends AsyncWordSpec with AsyncIOSpec with Matche
o should be(Some(p1))
}
}
"storage Jane Doe" in {
"store Jane Doe" in {
Person.set(p2).map { p =>
p._id should be(id2)
}
Expand All @@ -73,17 +82,32 @@ class SimpleHaloAndLuceneSpec extends AsyncWordSpec with AsyncIOSpec with Matche
size should be(2)
}
}
"store Bob Dole" in {
Person.set(p3).map { p =>
p._id should be(id3)
}
}
"verify Bob Dole exists" in {
Person.get(id3).map { o =>
o should be(Some(p3))
}
}
"verify exactly three objects in data" in {
Person.size.map { size =>
size should be(3)
}
}
"flush data" in {
Person.commit()
}
"verify exactly two objects in index" in {
"verify exactly three objects in index" in {
Person.index.count().map { size =>
size should be(2)
size should be(3)
}
}
"verify exactly two objects in the store" in {
"verify exactly three objects in the store" in {
Person.idStream.compile.toList.map { ids =>
ids.toSet should be(Set(id1, id2))
ids.toSet should be(Set(id1, id2, id3))
}
}
"search by name for positive result" in {
Expand Down Expand Up @@ -145,84 +169,92 @@ class SimpleHaloAndLuceneSpec extends AsyncWordSpec with AsyncIOSpec with Matche
}
"search by tag" in {
Person.query.filter(Person.tag === "dog").toList.map { people =>
people.map(_.name) should be(List("John Doe"))
people.map(_.name) should be(List("John Doe", "Bob Dole"))
}
}
"do paginated search" in {
Person.withSearchContext { implicit context =>
Person.query.pageSize(1).countTotal(true).search().flatMap { page1 =>
page1.page should be(0)
page1.pages should be(2)
page1.pages should be(3)
page1.hasNext should be(true)
page1.docs.flatMap { people1 =>
people1.length should be(1)
page1.next().flatMap {
case Some(page2) =>
page2.page should be(1)
page2.pages should be(2)
page2.hasNext should be(false)
page2.docs.map { people2 =>
page2.pages should be(3)
page2.hasNext should be(true)
page2.docs.flatMap { people2 =>
people2.length should be(1)
page2.next().flatMap {
case Some(page3) =>
page3.page should be(2)
page3.pages should be(3)
page3.hasNext should be(false)
page3.docs.map { people3 =>
people3.length should be(1)
}
case None => fail("Should have a third page")
}
}
case None => fail("Should have a second page")
}
}
}
}
}
"do paginated search as a stream" in {
"do paginated search as a stream converting to name" in {
Person.withSearchContext { implicit context =>
Person.query.pageSize(1).countTotal(true).stream.compile.toList.map { people =>
people.length should be(2)
people.map(_.name).toSet should be(Set("John Doe", "Jane Doe"))
Person.query.convert(p => IO.pure(p.name)).pageSize(1).countTotal(true).stream.compile.toList.map { names =>
names.length should be(3)
names.toSet should be(Set("John Doe", "Jane Doe", "Bob Dole"))
}
}
}
"sort by age" in {
Person.query.sort(Sort.ByField(Person.age)).toList.map { people =>
people.map(_.name) should be(List("Jane Doe", "John Doe"))
people.map(_.name) should be(List("Jane Doe", "John Doe", "Bob Dole"))
}
}
"sort by distance from Oklahoma City" in {
Person.query
.scoreDocs(true)
.sort(Sort.ByDistance(Person.point, oklahomaCity))
.scored
.distance(
field = Person.point,
from = oklahomaCity,
radius = Some(1320.miles)
)
.toList
.map { peopleAndScores =>
val people = peopleAndScores.map(_._1)
val scores = peopleAndScores.map(_._2)
.map { peopleAndDistances =>
val people = peopleAndDistances.map(_.doc)
val distances = peopleAndDistances.map(_.distance)
people.map(_.name) should be(List("Jane Doe", "John Doe"))
scores should be(List(1.0, 1.0))
val calculated = people.map(p => DistanceCalculator(oklahomaCity, p.point).toUsMiles)
calculated should be(List(28.555228128634383, 1316.1223938032729))
distances should be(List(28.555228128634383.miles, 1316.1223938032729.miles))
}
}
"delete John" in {
Person.delete(id1).map { deleted =>
deleted should be(id1)
}
}
"verify exactly one object in data" in {
"verify exactly two objects in data again" in {
Person.size.map { size =>
size should be(1)
size should be(2)
}
}
"commit data" in {
Person.commit()
}
"verify exactly one object in index" in {
"verify exactly two objects in index again" in {
Person.index.count().map { size =>
size should be(1)
size should be(2)
}
}
"list all documents" in {
Person.stream.compile.toList.map { people =>
people.length should be(1)
val p = people.head
p._id should be(id2)
p.name should be("Jane Doe")
p.age should be(19)
people.length should be(2)
people.map(_._id).toSet should be(Set(id2, id3))
}
}
"replace Jane Doe" in {
Expand All @@ -242,11 +274,9 @@ class SimpleHaloAndLuceneSpec extends AsyncWordSpec with AsyncIOSpec with Matche
}
"list new documents" in {
Person.stream.compile.toList.map { results =>
results.length should be(1)
val doc = results.head
doc._id should be(id2)
doc.name should be("Jan Doe")
doc.age should be(20)
results.length should be(2)
results.map(_.name).toSet should be(Set("Jan Doe", "Bob Dole"))
results.map(_.age).toSet should be(Set(20, 123))
}
}
"verify start time has been set" in {
Expand Down
2 changes: 1 addition & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ val developerURL: String = "https://matthicks.com"

name := projectName
ThisBuild / organization := org
ThisBuild / version := "0.6.1-SNAPSHOT"
ThisBuild / version := "0.7.0-SNAPSHOT"
ThisBuild / scalaVersion := scala213
ThisBuild / crossScalaVersions := allScalaVersions
ThisBuild / scalacOptions ++= Seq("-unchecked", "-deprecation")
Expand Down
18 changes: 12 additions & 6 deletions core/src/main/scala/lightdb/index/IndexSupport.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,20 @@ package lightdb.index

import cats.effect.IO
import lightdb.model.{AbstractCollection, Collection, DocumentAction, DocumentListener, DocumentModel}
import lightdb.query.{PagedResults, Query, SearchContext}
import lightdb.query.{Filter, PagedResults, Query, SearchContext}
import lightdb.Document
import lightdb.spatial.GeoPoint
import squants.space.Length

trait IndexSupport[D <: Document[D]] extends DocumentModel[D] {
private var _collection: Option[AbstractCollection[D]] = None

protected def collection: AbstractCollection[D] = this match {
case c: AbstractCollection[_] => c.asInstanceOf[AbstractCollection[D]]
case _ => _collection.getOrElse(throw new RuntimeException("DocumentModel not initialized with Collection (yet)"))
}

def query: Query[D] = Query(this, collection)
def query: Query[D, D] = Query(this, collection, doc => IO.pure(doc))

override protected[lightdb] def initModel(collection: AbstractCollection[D]): Unit = {
super.initModel(collection)
Expand All @@ -26,14 +29,17 @@ trait IndexSupport[D <: Document[D]] extends DocumentModel[D] {
})
}

def distanceFilter(field: IndexedField[GeoPoint, D], from: GeoPoint, radius: Length): Filter[D] =
throw new UnsupportedOperationException("Distance filtering is not supported on this indexer")

def index: Indexer[D]

def withSearchContext[Return](f: SearchContext[D] => IO[Return]): IO[Return] = index.withSearchContext(f)

def doSearch(query: Query[D],
context: SearchContext[D],
offset: Int,
after: Option[PagedResults[D]]): IO[PagedResults[D]]
def doSearch[V](query: Query[D, V],
context: SearchContext[D],
offset: Int,
after: Option[PagedResults[D, V]]): IO[PagedResults[D, V]]

protected def indexDoc(doc: D, fields: List[IndexedField[_, D]]): IO[Unit]
}
6 changes: 6 additions & 0 deletions core/src/main/scala/lightdb/query/DistanceAndDoc.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
package lightdb.query

import lightdb.Document
import squants.space.Length

case class DistanceAndDoc[D <: Document[D]](doc: D, distance: Length)
2 changes: 1 addition & 1 deletion core/src/main/scala/lightdb/query/PageContext.scala
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import lightdb.Document
trait PageContext[D <: Document[D]] {
def context: SearchContext[D]

def nextPage(currentPage: PagedResults[D]): IO[Option[PagedResults[D]]] = if (currentPage.hasNext) {
def nextPage[V](currentPage: PagedResults[D, V]): IO[Option[PagedResults[D, V]]] = if (currentPage.hasNext) {
currentPage.query.indexSupport.doSearch(
query = currentPage.query,
context = context,
Expand Down
28 changes: 17 additions & 11 deletions core/src/main/scala/lightdb/query/PagedResults.scala
Original file line number Diff line number Diff line change
Expand Up @@ -4,41 +4,47 @@ import cats.effect.IO
import cats.implicits.toTraverseOps
import lightdb.{Document, Id}

case class PagedResults[D <: Document[D]](query: Query[D],
context: PageContext[D],
offset: Int,
total: Int,
idsAndScores: List[(Id[D], Double)],
getter: Option[Id[D] => IO[D]] = None) {
case class PagedResults[D <: Document[D], V](query: Query[D, V],
context: PageContext[D],
offset: Int,
total: Int,
idsAndScores: List[(Id[D], Double)],
getter: Option[Id[D] => IO[D]] = None) {
lazy val page: Int = offset / query.pageSize
lazy val pages: Int = math.ceil(total.toDouble / query.pageSize.toDouble).toInt

lazy val ids: List[Id[D]] = idsAndScores.map(_._1)
lazy val scores: List[Double] = idsAndScores.map(_._2)

def docs: IO[List[D]] = ids.map(query.collection(_)).sequence

def values: IO[List[V]] = docs.flatMap(_.map(query.convert).sequence)

def idStream: fs2.Stream[IO, Id[D]] = fs2.Stream(ids: _*)

def idAndScoreStream: fs2.Stream[IO, (Id[D], Double)] = fs2.Stream(idsAndScores: _*)

def stream: fs2.Stream[IO, D] = idStream
def stream: fs2.Stream[IO, V] = idStream
.evalMap { id =>
getter match {
case Some(g) => g(id)
case None => query.collection(id)
}
}
.evalMap(query.convert)

def scoredStream: fs2.Stream[IO, (D, Double)] = idAndScoreStream
def scoredStream: fs2.Stream[IO, (V, Double)] = idAndScoreStream
.evalMap {
case (id, score) => getter match {
case Some(g) => g(id).map(doc => doc -> score)
case None => query.collection(id).map(doc => doc -> score)
}
}

def docs: IO[List[D]] = ids.map(query.collection(_)).sequence
.evalMap {
case (doc, score) => query.convert(doc).map(v => v -> score)
}

def hasNext: Boolean = pages > (page + 1)

def next(): IO[Option[PagedResults[D]]] = context.nextPage(this)
def next(): IO[Option[PagedResults[D, V]]] = context.nextPage(this)
}
Loading

0 comments on commit ef462cc

Please sign in to comment.