Skip to content

Commit

Permalink
Major refactoring of LuceneSupport for greater flexibility and simpli…
Browse files Browse the repository at this point in the history
…city in indexes
  • Loading branch information
darkfrog26 committed May 15, 2024
1 parent dc0e418 commit a29d33b
Show file tree
Hide file tree
Showing 16 changed files with 113 additions and 237 deletions.
25 changes: 16 additions & 9 deletions all/src/test/scala/spec/SimpleHaloAndLuceneSpec.scala
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,8 @@ import cats.effect.testing.scalatest.AsyncIOSpec
import fabric.rw._
import lightdb._
import lightdb.halo.HaloDBSupport
import lightdb.lucene.LuceneSupport
import lightdb.lucene.index.{IntField, StringField}
import lightdb.lucene.{LuceneIndex, LuceneSupport}
import lightdb.model.Collection
import lightdb.sqlite.{SQLIndexedField, SQLiteSupport}
import lightdb.upgrade.DatabaseUpgrade
import org.scalatest.matchers.should.Matchers
import org.scalatest.wordspec.AsyncWordSpec
Expand All @@ -20,8 +18,8 @@ class SimpleHaloAndLuceneSpec extends AsyncWordSpec with AsyncIOSpec with Matche
private val id1 = Id[Person]("john")
private val id2 = Id[Person]("jane")

private val p1 = Person("John Doe", 21, id1)
private val p2 = Person("Jane Doe", 19, id2)
private val p1 = Person("John Doe", 21, Set("dog", "cat"), id1)
private val p2 = Person("Jane Doe", 19, Set("cat"), id2)

"Simple database" should {
"initialize the database" in {
Expand Down Expand Up @@ -123,6 +121,11 @@ class SimpleHaloAndLuceneSpec extends AsyncWordSpec with AsyncIOSpec with Matche
}
}
}
"search by tag" in {
Person.query.filter(Person.tag === "dog").toList.map { people =>
people.map(_.name) should be(List("John Doe"))
}
}
"do paginated search" in {
Person.withSearchContext { implicit context =>
Person.query.pageSize(1).countTotal(true).search().flatMap { page1 =>
Expand Down Expand Up @@ -181,7 +184,7 @@ class SimpleHaloAndLuceneSpec extends AsyncWordSpec with AsyncIOSpec with Matche
}
}
"replace Jane Doe" in {
Person.set(Person("Jan Doe", 20, id2)).map { p =>
Person.set(Person("Jan Doe", 20, Set("cat", "bear"), id2)).map { p =>
p._id should be(id2)
}
}
Expand Down Expand Up @@ -229,14 +232,18 @@ class SimpleHaloAndLuceneSpec extends AsyncWordSpec with AsyncIOSpec with Matche
override def upgrades: List[DatabaseUpgrade] = List(InitialSetupUpgrade)
}

case class Person(name: String, age: Int, _id: Id[Person] = Id()) extends Document[Person]
case class Person(name: String,
age: Int,
tags: Set[String],
_id: Id[Person] = Id()) extends Document[Person]

object Person extends Collection[Person]("people", DB) with LuceneSupport[Person] {
override implicit val rw: RW[Person] = RW.gen

val name: StringField[Person] = index("name").string(_.name)
val age: IntField[Person] = index("age").int(_.age)
val name: LuceneIndex[String, Person] = index.one("name", _.name)
val age: LuceneIndex[Int, Person] = index.one("age", _.age)
val ageLinks: IndexedLinks[Int, Person] = indexedLinks[Int]("age", _.toString, _.age)
val tag: LuceneIndex[String, Person] = index("tag", _.tags.toList)
}

object InitialSetupUpgrade extends DatabaseUpgrade {
Expand Down
7 changes: 2 additions & 5 deletions core/src/main/scala/lightdb/index/IndexedField.scala
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,8 @@ trait IndexedField[F, D <: Document[D]] {

def fieldName: String
def indexSupport: IndexSupport[D]
def get: D => Option[F]
def getJson: D => Json = (doc: D) => get(doc) match {
case Some(value) => value.json
case None => Null
}
def get: D => List[F]
def getJson: D => List[Json] = (doc: D) => get(doc).map(_.json)

indexSupport.index.register(this)
}
66 changes: 66 additions & 0 deletions lucene/src/main/scala/lightdb/lucene/LuceneIndex.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
package lightdb.lucene

import fabric._
import fabric.define.DefType
import fabric.rw._
import lightdb.Document
import lightdb.index.{IndexSupport, IndexedField}
import org.apache.lucene.document.Field
import org.apache.lucene.index.Term
import org.apache.lucene.search._
import org.apache.lucene.{document => ld}

case class LuceneIndex[F, D <: Document[D]](fieldName: String,
indexSupport: IndexSupport[D],
get: D => List[F],
store: Boolean,
tokenized: Boolean)
(implicit val rw: RW[F]) extends IndexedField[F, D] {
def ===(value: F): LuceneFilter[D] = is(value)
def is(value: F): LuceneFilter[D] = LuceneFilter(() => value.json match {
case Str(s, _) => new TermQuery(new Term(fieldName, s))
case json => throw new RuntimeException(s"Unsupported equality check: $json (${rw.definition})")
})

def IN(values: Seq[F]): LuceneFilter[D] = {
val b = new BooleanQuery.Builder
b.setMinimumNumberShouldMatch(1)
values.foreach { value =>
b.add(is(value).asQuery(), BooleanClause.Occur.SHOULD)
}
LuceneFilter(() => b.build())
}

def between(lower: F, upper: F): LuceneFilter[D] = LuceneFilter(() => (lower.json, upper.json) match {
case (NumInt(l, _), NumInt(u, _)) => ld.LongField.newRangeQuery(fieldName, l, u)
case _ => throw new RuntimeException(s"Unsupported between for $lower - $upper (${rw.definition})")
})

protected[lightdb] def createFields(doc: D): List[ld.Field] = if (tokenized) {
getJson(doc).flatMap {
case Null => Nil
case Str(s, _) => List(s)
case f => throw new RuntimeException(s"Unsupported tokenized value: $f (${rw.definition})")
}.map { value =>
new ld.Field(fieldName, value, if (store) ld.TextField.TYPE_STORED else ld.TextField.TYPE_NOT_STORED)
}
} else {
def fs: Field.Store = if (store) ld.Field.Store.YES else ld.Field.Store.NO

getJson(doc).flatMap {
case Null => None
case Str(s, _) => Some(new ld.StringField(fieldName, s, fs))
case Bool(b, _) => Some(new ld.StringField(fieldName, b.toString, fs))
case NumInt(l, _) => Some(new ld.LongField(fieldName, l, fs))
case NumDec(bd, _) => Some(new ld.StringField(fieldName, bd.toString(), fs))
case json => throw new RuntimeException(s"Unsupported JSON: $json (${rw.definition})")
}
}

protected[lightdb] def sortType: SortField.Type = rw.definition match {
case DefType.Str => SortField.Type.STRING
case DefType.Dec => SortField.Type.DOUBLE
case DefType.Int => SortField.Type.LONG
case _ => throw new RuntimeException(s"Unsupported sort type for ${rw.definition}")
}
}
12 changes: 0 additions & 12 deletions lucene/src/main/scala/lightdb/lucene/LuceneIndexedField.scala

This file was deleted.

41 changes: 17 additions & 24 deletions lucene/src/main/scala/lightdb/lucene/LuceneIndexer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package lightdb.lucene

import cats.effect.IO
import lightdb.index.{IndexSupport, Indexer}
import lightdb.lucene.index.{BigDecimalField, DoubleField, FloatField, IntField, LongField, StringField, TokenizedField}
import lightdb.query.SearchContext
import lightdb.{Document, Id}
import org.apache.lucene.analysis.Analyzer
Expand Down Expand Up @@ -68,33 +67,27 @@ case class LuceneIndexer[D <: Document[D]](indexSupport: IndexSupport[D],
searcherManager.maybeRefreshBlocking()
}

def apply(name: String): IndexedFieldBuilder = IndexedFieldBuilder(name)
def apply[F](name: String,
get: D => List[F],
store: Boolean = false,
tokenized: Boolean = false)
(implicit rw: RW[F]): LuceneIndex[F, D] = LuceneIndex(
fieldName = name,
indexSupport = indexSupport,
get = get,
store = store,
tokenized = tokenized
)

def one[F](name: String,
get: D => F,
store: Boolean = false,
tokenized: Boolean = false)
(implicit rw: RW[F]): LuceneIndex[F, D] = apply[F](name, doc => List(get(doc)), store, tokenized)

override def commit(): IO[Unit] = IO(commitBlocking())

override def count(): IO[Int] = withSearchContext { context =>
IO(context.indexSupport.asInstanceOf[LuceneSupport[D]].indexSearcher(context).count(new MatchAllDocsQuery))
}

case class IndexedFieldBuilder(fieldName: String) {
private def o[F](f: D => F): D => Option[F] = doc => Some(f(doc))
def tokenized(f: D => String): TokenizedField[D] = TokenizedField(fieldName, indexSupport, o(f))
def string(f: D => String, store: Boolean = false): StringField[D] = StringField(fieldName, indexSupport, o(f), store)
def id[T](f: D => Id[T], store: Boolean = false): StringField[D] = StringField(fieldName, indexSupport, o(doc => f(doc).value), store)
def int(f: D => Int): IntField[D] = IntField(fieldName, indexSupport, o(f))
def long(f: D => Long): LongField[D] = LongField(fieldName, indexSupport, o(f))
def float(f: D => Float): FloatField[D] = FloatField(fieldName, indexSupport, o(f))
def double(f: D => Double): DoubleField[D] = DoubleField(fieldName, indexSupport, o(f))
def bigDecimal(f: D => BigDecimal): BigDecimalField[D] = BigDecimalField(fieldName, indexSupport, o(f))
object option {
def tokenized(f: D => Option[String]): TokenizedField[D] = TokenizedField(fieldName, indexSupport, f)
def string(f: D => Option[String], store: Boolean = false): StringField[D] = StringField(fieldName, indexSupport, f, store)
def id[T](f: D => Option[Id[T]], store: Boolean = false): StringField[D] = StringField(fieldName, indexSupport, doc => f(doc).map(_.value), store)
def int(f: D => Option[Int]): IntField[D] = IntField(fieldName, indexSupport, f)
def long(f: D => Option[Long]): LongField[D] = LongField(fieldName, indexSupport, f)
def float(f: D => Option[Float]): FloatField[D] = FloatField(fieldName, indexSupport, f)
def double(f: D => Option[Double]): DoubleField[D] = DoubleField(fieldName, indexSupport, f)
def bigDecimal(f: D => Option[BigDecimal]): BigDecimalField[D] = BigDecimalField(fieldName, indexSupport, f)
}
}
}
20 changes: 5 additions & 15 deletions lucene/src/main/scala/lightdb/lucene/LuceneSupport.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2,32 +2,22 @@ package lightdb.lucene

import cats.effect.IO
import lightdb._
import lightdb.index.{IndexSupport, IndexedField, Indexer}
import lightdb.lucene.index._
import lightdb.index.{IndexSupport, IndexedField}
import lightdb.query.{Filter, PageContext, PagedResults, Query, SearchContext, Sort}
import org.apache.lucene.search.{IndexSearcher, MatchAllDocsQuery, ScoreDoc, SearcherFactory, SearcherManager, SortField, TopFieldDocs, Query => LuceneQuery, Sort => LuceneSort}
import org.apache.lucene.{document => ld}
import org.apache.lucene.analysis.Analyzer
import org.apache.lucene.analysis.standard.StandardAnalyzer
import org.apache.lucene.index.{IndexWriter, IndexWriterConfig, StoredFields, Term}
import org.apache.lucene.queryparser.classic.QueryParser
import org.apache.lucene.store.{ByteBuffersDirectory, FSDirectory}
import org.apache.lucene.document.{Document => LuceneDocument, Field => LuceneField}

import java.nio.file.{Files, Path}
import java.util.concurrent.ConcurrentHashMap
import org.apache.lucene.index.StoredFields

trait LuceneSupport[D <: Document[D]] extends IndexSupport[D] {
override lazy val index: LuceneIndexer[D] = LuceneIndexer(this, collection)

val _id: StringField[D] = index("_id").string(_._id.value, store = true)
val _id: LuceneIndex[Id[D], D] = index("_id", doc => List(doc._id), store = true)

protected[lucene] def indexSearcher(context: SearchContext[D]): IndexSearcher = index.contextMapping.get(context)

private def sort2SortField(sort: Sort): SortField = sort match {
case Sort.BestMatch => SortField.FIELD_SCORE
case Sort.IndexOrder => SortField.FIELD_DOC
case Sort.ByField(field, reverse) => new SortField(field.fieldName, field.asInstanceOf[LuceneIndexedField[_, D]].sortType, reverse)
case Sort.ByField(field, reverse) => new SortField(field.fieldName, field.asInstanceOf[LuceneIndex[_, D]].sortType, reverse)
}

override def doSearch(query: Query[D],
Expand Down Expand Up @@ -66,7 +56,7 @@ trait LuceneSupport[D <: Document[D]] extends IndexSupport[D] {

override protected def indexDoc(doc: D, fields: List[IndexedField[_, D]]): IO[Unit] = for {
fields <- IO(fields.flatMap { field =>
field.asInstanceOf[LuceneIndexedField[_, D]].createFields(doc)
field.asInstanceOf[LuceneIndex[_, D]].createFields(doc)
})
_ = index.addDoc(doc._id, fields)
} yield ()
Expand Down
21 changes: 0 additions & 21 deletions lucene/src/main/scala/lightdb/lucene/index/BigDecimalField.scala

This file was deleted.

21 changes: 0 additions & 21 deletions lucene/src/main/scala/lightdb/lucene/index/DoubleField.scala

This file was deleted.

21 changes: 0 additions & 21 deletions lucene/src/main/scala/lightdb/lucene/index/FloatField.scala

This file was deleted.

28 changes: 0 additions & 28 deletions lucene/src/main/scala/lightdb/lucene/index/IntField.scala

This file was deleted.

21 changes: 0 additions & 21 deletions lucene/src/main/scala/lightdb/lucene/index/LongField.scala

This file was deleted.

Loading

0 comments on commit a29d33b

Please sign in to comment.