Major refactoring of LuceneSupport for greater flexibility and simpli…

…city in indexes
outr · May 15, 2024 · a29d33b · a29d33b
1 parent dc0e418
commit a29d33b
Show file tree

Hide file tree

Showing 16 changed files with 113 additions and 237 deletions.
diff --git a/all/src/test/scala/spec/SimpleHaloAndLuceneSpec.scala b/all/src/test/scala/spec/SimpleHaloAndLuceneSpec.scala
@@ -5,10 +5,8 @@ import cats.effect.testing.scalatest.AsyncIOSpec
 import fabric.rw._
 import lightdb._
 import lightdb.halo.HaloDBSupport
-import lightdb.lucene.LuceneSupport
-import lightdb.lucene.index.{IntField, StringField}
+import lightdb.lucene.{LuceneIndex, LuceneSupport}
 import lightdb.model.Collection
-import lightdb.sqlite.{SQLIndexedField, SQLiteSupport}
 import lightdb.upgrade.DatabaseUpgrade
 import org.scalatest.matchers.should.Matchers
 import org.scalatest.wordspec.AsyncWordSpec
@@ -20,8 +18,8 @@ class SimpleHaloAndLuceneSpec extends AsyncWordSpec with AsyncIOSpec with Matche
   private val id1 = Id[Person]("john")
   private val id2 = Id[Person]("jane")
 
-  private val p1 = Person("John Doe", 21, id1)
-  private val p2 = Person("Jane Doe", 19, id2)
+  private val p1 = Person("John Doe", 21, Set("dog", "cat"), id1)
+  private val p2 = Person("Jane Doe", 19, Set("cat"), id2)
 
   "Simple database" should {
     "initialize the database" in {
@@ -123,6 +121,11 @@ class SimpleHaloAndLuceneSpec extends AsyncWordSpec with AsyncIOSpec with Matche
           }
       }
     }
+    "search by tag" in {
+      Person.query.filter(Person.tag === "dog").toList.map { people =>
+        people.map(_.name) should be(List("John Doe"))
+      }
+    }
     "do paginated search" in {
       Person.withSearchContext { implicit context =>
         Person.query.pageSize(1).countTotal(true).search().flatMap { page1 =>
@@ -181,7 +184,7 @@ class SimpleHaloAndLuceneSpec extends AsyncWordSpec with AsyncIOSpec with Matche
       }
     }
     "replace Jane Doe" in {
-      Person.set(Person("Jan Doe", 20, id2)).map { p =>
+      Person.set(Person("Jan Doe", 20, Set("cat", "bear"), id2)).map { p =>
         p._id should be(id2)
       }
     }
@@ -229,14 +232,18 @@ class SimpleHaloAndLuceneSpec extends AsyncWordSpec with AsyncIOSpec with Matche
     override def upgrades: List[DatabaseUpgrade] = List(InitialSetupUpgrade)
   }
 
-  case class Person(name: String, age: Int, _id: Id[Person] = Id()) extends Document[Person]
+  case class Person(name: String,
+                    age: Int,
+                    tags: Set[String],
+                    _id: Id[Person] = Id()) extends Document[Person]
 
   object Person extends Collection[Person]("people", DB) with LuceneSupport[Person] {
     override implicit val rw: RW[Person] = RW.gen
 
-    val name: StringField[Person] = index("name").string(_.name)
-    val age: IntField[Person] = index("age").int(_.age)
+    val name: LuceneIndex[String, Person] = index.one("name", _.name)
+    val age: LuceneIndex[Int, Person] = index.one("age", _.age)
     val ageLinks: IndexedLinks[Int, Person] = indexedLinks[Int]("age", _.toString, _.age)
+    val tag: LuceneIndex[String, Person] = index("tag", _.tags.toList)
   }
 
   object InitialSetupUpgrade extends DatabaseUpgrade {

diff --git a/core/src/main/scala/lightdb/index/IndexedField.scala b/core/src/main/scala/lightdb/index/IndexedField.scala
@@ -10,11 +10,8 @@ trait IndexedField[F, D <: Document[D]] {
 
   def fieldName: String
   def indexSupport: IndexSupport[D]
-  def get: D => Option[F]
-  def getJson: D => Json = (doc: D) => get(doc) match {
-    case Some(value) => value.json
-    case None => Null
-  }
+  def get: D => List[F]
+  def getJson: D => List[Json] = (doc: D) => get(doc).map(_.json)
 
   indexSupport.index.register(this)
 }
diff --git a/lucene/src/main/scala/lightdb/lucene/LuceneIndex.scala b/lucene/src/main/scala/lightdb/lucene/LuceneIndex.scala
@@ -0,0 +1,66 @@
+package lightdb.lucene
+
+import fabric._
+import fabric.define.DefType
+import fabric.rw._
+import lightdb.Document
+import lightdb.index.{IndexSupport, IndexedField}
+import org.apache.lucene.document.Field
+import org.apache.lucene.index.Term
+import org.apache.lucene.search._
+import org.apache.lucene.{document => ld}
+
+case class LuceneIndex[F, D <: Document[D]](fieldName: String,
+                                            indexSupport: IndexSupport[D],
+                                            get: D => List[F],
+                                            store: Boolean,
+                                            tokenized: Boolean)
+                                           (implicit val rw: RW[F]) extends IndexedField[F, D] {
+  def ===(value: F): LuceneFilter[D] = is(value)
+  def is(value: F): LuceneFilter[D] = LuceneFilter(() => value.json match {
+    case Str(s, _) => new TermQuery(new Term(fieldName, s))
+    case json => throw new RuntimeException(s"Unsupported equality check: $json (${rw.definition})")
+  })
+
+  def IN(values: Seq[F]): LuceneFilter[D] = {
+    val b = new BooleanQuery.Builder
+    b.setMinimumNumberShouldMatch(1)
+    values.foreach { value =>
+      b.add(is(value).asQuery(), BooleanClause.Occur.SHOULD)
+    }
+    LuceneFilter(() => b.build())
+  }
+
+  def between(lower: F, upper: F): LuceneFilter[D] = LuceneFilter(() => (lower.json, upper.json) match {
+    case (NumInt(l, _), NumInt(u, _)) => ld.LongField.newRangeQuery(fieldName, l, u)
+    case _ => throw new RuntimeException(s"Unsupported between for $lower - $upper (${rw.definition})")
+  })
+
+  protected[lightdb] def createFields(doc: D): List[ld.Field] = if (tokenized) {
+    getJson(doc).flatMap {
+      case Null => Nil
+      case Str(s, _) => List(s)
+      case f => throw new RuntimeException(s"Unsupported tokenized value: $f (${rw.definition})")
+    }.map { value =>
+      new ld.Field(fieldName, value, if (store) ld.TextField.TYPE_STORED else ld.TextField.TYPE_NOT_STORED)
+    }
+  } else {
+    def fs: Field.Store = if (store) ld.Field.Store.YES else ld.Field.Store.NO
+
+    getJson(doc).flatMap {
+      case Null => None
+      case Str(s, _) => Some(new ld.StringField(fieldName, s, fs))
+      case Bool(b, _) => Some(new ld.StringField(fieldName, b.toString, fs))
+      case NumInt(l, _) => Some(new ld.LongField(fieldName, l, fs))
+      case NumDec(bd, _) => Some(new ld.StringField(fieldName, bd.toString(), fs))
+      case json => throw new RuntimeException(s"Unsupported JSON: $json (${rw.definition})")
+    }
+  }
+
+  protected[lightdb] def sortType: SortField.Type = rw.definition match {
+    case DefType.Str => SortField.Type.STRING
+    case DefType.Dec => SortField.Type.DOUBLE
+    case DefType.Int => SortField.Type.LONG
+    case _ => throw new RuntimeException(s"Unsupported sort type for ${rw.definition}")
+  }
+}
diff --git a/lucene/src/main/scala/lightdb/lucene/LuceneIndexedField.scala b/lucene/src/main/scala/lightdb/lucene/LuceneIndexedField.scala
diff --git a/lucene/src/main/scala/lightdb/lucene/LuceneIndexer.scala b/lucene/src/main/scala/lightdb/lucene/LuceneIndexer.scala
@@ -2,7 +2,6 @@ package lightdb.lucene
 
 import cats.effect.IO
 import lightdb.index.{IndexSupport, Indexer}
-import lightdb.lucene.index.{BigDecimalField, DoubleField, FloatField, IntField, LongField, StringField, TokenizedField}
 import lightdb.query.SearchContext
 import lightdb.{Document, Id}
 import org.apache.lucene.analysis.Analyzer
@@ -68,33 +67,27 @@ case class LuceneIndexer[D <: Document[D]](indexSupport: IndexSupport[D],
     searcherManager.maybeRefreshBlocking()
   }
 
-  def apply(name: String): IndexedFieldBuilder = IndexedFieldBuilder(name)
+  def apply[F](name: String,
+               get: D => List[F],
+               store: Boolean = false,
+               tokenized: Boolean = false)
+              (implicit rw: RW[F]): LuceneIndex[F, D] = LuceneIndex(
+    fieldName = name,
+    indexSupport = indexSupport,
+    get = get,
+    store = store,
+    tokenized = tokenized
+  )
+
+  def one[F](name: String,
+             get: D => F,
+             store: Boolean = false,
+             tokenized: Boolean = false)
+            (implicit rw: RW[F]): LuceneIndex[F, D] = apply[F](name, doc => List(get(doc)), store, tokenized)
 
   override def commit(): IO[Unit] = IO(commitBlocking())
 
   override def count(): IO[Int] = withSearchContext { context =>
     IO(context.indexSupport.asInstanceOf[LuceneSupport[D]].indexSearcher(context).count(new MatchAllDocsQuery))
   }
-
-  case class IndexedFieldBuilder(fieldName: String) {
-    private def o[F](f: D => F): D => Option[F] = doc => Some(f(doc))
-    def tokenized(f: D => String): TokenizedField[D] = TokenizedField(fieldName, indexSupport, o(f))
-    def string(f: D => String, store: Boolean = false): StringField[D] = StringField(fieldName, indexSupport, o(f), store)
-    def id[T](f: D => Id[T], store: Boolean = false): StringField[D] = StringField(fieldName, indexSupport, o(doc => f(doc).value), store)
-    def int(f: D => Int): IntField[D] = IntField(fieldName, indexSupport, o(f))
-    def long(f: D => Long): LongField[D] = LongField(fieldName, indexSupport, o(f))
-    def float(f: D => Float): FloatField[D] = FloatField(fieldName, indexSupport, o(f))
-    def double(f: D => Double): DoubleField[D] = DoubleField(fieldName, indexSupport, o(f))
-    def bigDecimal(f: D => BigDecimal): BigDecimalField[D] = BigDecimalField(fieldName, indexSupport, o(f))
-    object option {
-      def tokenized(f: D => Option[String]): TokenizedField[D] = TokenizedField(fieldName, indexSupport, f)
-      def string(f: D => Option[String], store: Boolean = false): StringField[D] = StringField(fieldName, indexSupport, f, store)
-      def id[T](f: D => Option[Id[T]], store: Boolean = false): StringField[D] = StringField(fieldName, indexSupport, doc => f(doc).map(_.value), store)
-      def int(f: D => Option[Int]): IntField[D] = IntField(fieldName, indexSupport, f)
-      def long(f: D => Option[Long]): LongField[D] = LongField(fieldName, indexSupport, f)
-      def float(f: D => Option[Float]): FloatField[D] = FloatField(fieldName, indexSupport, f)
-      def double(f: D => Option[Double]): DoubleField[D] = DoubleField(fieldName, indexSupport, f)
-      def bigDecimal(f: D => Option[BigDecimal]): BigDecimalField[D] = BigDecimalField(fieldName, indexSupport, f)
-    }
-  }
 }
diff --git a/lucene/src/main/scala/lightdb/lucene/LuceneSupport.scala b/lucene/src/main/scala/lightdb/lucene/LuceneSupport.scala
@@ -2,32 +2,22 @@ package lightdb.lucene
 
 import cats.effect.IO
 import lightdb._
-import lightdb.index.{IndexSupport, IndexedField, Indexer}
-import lightdb.lucene.index._
+import lightdb.index.{IndexSupport, IndexedField}
 import lightdb.query.{Filter, PageContext, PagedResults, Query, SearchContext, Sort}
 import org.apache.lucene.search.{IndexSearcher, MatchAllDocsQuery, ScoreDoc, SearcherFactory, SearcherManager, SortField, TopFieldDocs, Query => LuceneQuery, Sort => LuceneSort}
-import org.apache.lucene.{document => ld}
-import org.apache.lucene.analysis.Analyzer
-import org.apache.lucene.analysis.standard.StandardAnalyzer
-import org.apache.lucene.index.{IndexWriter, IndexWriterConfig, StoredFields, Term}
-import org.apache.lucene.queryparser.classic.QueryParser
-import org.apache.lucene.store.{ByteBuffersDirectory, FSDirectory}
-import org.apache.lucene.document.{Document => LuceneDocument, Field => LuceneField}
-
-import java.nio.file.{Files, Path}
-import java.util.concurrent.ConcurrentHashMap
+import org.apache.lucene.index.StoredFields
 
 trait LuceneSupport[D <: Document[D]] extends IndexSupport[D] {
   override lazy val index: LuceneIndexer[D] = LuceneIndexer(this, collection)
 
-  val _id: StringField[D] = index("_id").string(_._id.value, store = true)
+  val _id: LuceneIndex[Id[D], D] = index("_id", doc => List(doc._id), store = true)
 
   protected[lucene] def indexSearcher(context: SearchContext[D]): IndexSearcher = index.contextMapping.get(context)
 
   private def sort2SortField(sort: Sort): SortField = sort match {
     case Sort.BestMatch => SortField.FIELD_SCORE
     case Sort.IndexOrder => SortField.FIELD_DOC
-    case Sort.ByField(field, reverse) => new SortField(field.fieldName, field.asInstanceOf[LuceneIndexedField[_, D]].sortType, reverse)
+    case Sort.ByField(field, reverse) => new SortField(field.fieldName, field.asInstanceOf[LuceneIndex[_, D]].sortType, reverse)
   }
 
   override def doSearch(query: Query[D],
@@ -66,7 +56,7 @@ trait LuceneSupport[D <: Document[D]] extends IndexSupport[D] {
 
   override protected def indexDoc(doc: D, fields: List[IndexedField[_, D]]): IO[Unit] = for {
     fields <- IO(fields.flatMap { field =>
-      field.asInstanceOf[LuceneIndexedField[_, D]].createFields(doc)
+      field.asInstanceOf[LuceneIndex[_, D]].createFields(doc)
     })
     _ = index.addDoc(doc._id, fields)
   } yield ()

diff --git a/lucene/src/main/scala/lightdb/lucene/index/BigDecimalField.scala b/lucene/src/main/scala/lightdb/lucene/index/BigDecimalField.scala
diff --git a/lucene/src/main/scala/lightdb/lucene/index/DoubleField.scala b/lucene/src/main/scala/lightdb/lucene/index/DoubleField.scala
diff --git a/lucene/src/main/scala/lightdb/lucene/index/FloatField.scala b/lucene/src/main/scala/lightdb/lucene/index/FloatField.scala
diff --git a/lucene/src/main/scala/lightdb/lucene/index/IntField.scala b/lucene/src/main/scala/lightdb/lucene/index/IntField.scala
diff --git a/lucene/src/main/scala/lightdb/lucene/index/LongField.scala b/lucene/src/main/scala/lightdb/lucene/index/LongField.scala