Skip to content

Commit

Permalink
WIP moving Lucene back out to allow SQLite to be an alternative indexer
Browse files Browse the repository at this point in the history
  • Loading branch information
darkfrog26 committed Apr 9, 2024
1 parent 3dcb2b1 commit 31d11c7
Show file tree
Hide file tree
Showing 21 changed files with 212 additions and 355 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package benchmark

import cats.effect.IO
import fabric.rw.RW
import lightdb.index.StringField
import lightdb.upgrade.DatabaseUpgrade
import lightdb.{Collection, Document, Id, IndexedLinks, LightDB, MaxLinks}

Expand Down
17 changes: 14 additions & 3 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ val scalaTestVersion: String = "3.2.18"
val catsEffectTestingVersion: String = "1.5.0"

lazy val root = project.in(file("."))
.aggregate(core)
.aggregate(core, lucene)
.settings(
name := projectName,
publish := {},
Expand All @@ -72,8 +72,6 @@ lazy val core = project.in(file("core"))
"co.fs2" %% "fs2-core" % fs2Version,
"com.outr" %% "scribe-slf4j" % scribeVersion,
"com.github.yahoo" % "HaloDB" % haloDBVersion,
"org.apache.lucene" % "lucene-core" % luceneVersion,
"org.apache.lucene" % "lucene-queryparser" % luceneVersion,
"org.scalatest" %% "scalatest" % scalaTestVersion % Test,
"org.typelevel" %% "cats-effect-testing-scalatest" % catsEffectTestingVersion % Test
),
Expand All @@ -95,6 +93,19 @@ lazy val core = project.in(file("core"))
}
)

lazy val lucene = project.in(file("lucene"))
.dependsOn(core)
.settings(
name := s"$projectName-lucene",
fork := true,
libraryDependencies ++= Seq(
"org.apache.lucene" % "lucene-core" % luceneVersion,
"org.apache.lucene" % "lucene-queryparser" % luceneVersion,
"org.scalatest" %% "scalatest" % scalaTestVersion % Test,
"org.typelevel" %% "cats-effect-testing-scalatest" % catsEffectTestingVersion % Test
)
)

lazy val benchmark = project.in(file("benchmark"))
.dependsOn(core)
.settings(
Expand Down
36 changes: 1 addition & 35 deletions core/src/main/scala/lightdb/Collection.scala
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@ abstract class Collection[D <: Document[D]](val collectionName: String,

implicit val rw: RW[D]

val _id: StringField[D] = index("_id").string(_._id.value, store = true)

protected lazy val store: Store = db.createStore(collectionName)

private var _indexedLinks = List.empty[IndexedLinks[_, D]]
Expand All @@ -34,10 +32,6 @@ abstract class Collection[D <: Document[D]](val collectionName: String,
protected def postSet(doc: D): IO[Unit] = for {
// Update IndexedLinks
_ <- _indexedLinks.map(_.add(doc)).sequence
fields = index.fields.flatMap { field =>
field.createFields(doc)
}
_ = index.indexer.addDoc(doc._id, fields)
_ <- commit().whenA(autoCommit)
} yield ()

Expand All @@ -46,7 +40,6 @@ abstract class Collection[D <: Document[D]](val collectionName: String,
protected def postDelete(doc: D): IO[Unit] = for {
// Update IndexedLinks
_ <- _indexedLinks.map(_.remove(doc)).sequence
_ <- index.indexer.delete(doc._id)
_ <- commit().whenA(autoCommit)
} yield ()

Expand Down Expand Up @@ -103,38 +96,11 @@ abstract class Collection[D <: Document[D]](val collectionName: String,
il
}

lazy val query: Query[D] = Query(this)

object index {
private var _fields = List.empty[IndexedField[_, D]]

lazy val indexer: Indexer[D] = Indexer(Collection.this)
def fields: List[IndexedField[_, D]] = _fields

def apply(name: String): IndexedFieldBuilder = IndexedFieldBuilder(name)

protected[lightdb] def register[F](field: IndexedField[F, D]): Unit = synchronized {
_fields = field :: _fields
}
}

def withSearchContext[Return](f: SearchContext[D] => IO[Return]): IO[Return] = index.indexer.withSearchContext(f)

def size: IO[Int] = store.size

def commit(): IO[Unit] = index.indexer.commit()
def commit(): IO[Unit] = IO.unit

def dispose(): IO[Unit] = IO.unit

case class IndexedFieldBuilder(fieldName: String) {
def tokenized(f: D => String): TokenizedField[D] = TokenizedField(fieldName, Collection.this, f)
def string(f: D => String, store: Boolean = false): StringField[D] = StringField(fieldName, Collection.this, f, store)
def int(f: D => Int): IntField[D] = IntField(fieldName, Collection.this, f)
def long(f: D => Long): LongField[D] = LongField(fieldName, Collection.this, f)
def float(f: D => Float): FloatField[D] = FloatField(fieldName, Collection.this, f)
def double(f: D => Double): DoubleField[D] = DoubleField(fieldName, Collection.this, f)
def bigDecimal(f: D => BigDecimal): BigDecimalField[D] = BigDecimalField(fieldName, Collection.this, f)
}
}

object Collection {
Expand Down
25 changes: 25 additions & 0 deletions core/src/main/scala/lightdb/index/IndexSupport.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package lightdb.index

import cats.effect.IO
import lightdb.query.Query
import lightdb.{Collection, Document}

trait IndexSupport[D <: Document[D], IF[F] <: IndexedField[F, D]] extends Collection[D] {
lazy val query: Query[D] = Query(this)

protected def indexer: Indexer[D]

override def commit(): IO[Unit] = super.commit().flatMap(_ => indexer.commit())

def index: IndexManager[D, IF]
}

trait IndexManager[D <: Document[D], IF <: IndexedField[_, D]] {
protected var _fields = List.empty[IF]

def fields: List[IF] = _fields

protected[lightdb] def register[F, Field <: IF with IndexedField[F, D]](field: Field): Unit = synchronized {
_fields = field :: _fields
}
}
7 changes: 0 additions & 7 deletions core/src/main/scala/lightdb/index/IndexedField.scala
Original file line number Diff line number Diff line change
@@ -1,16 +1,9 @@
package lightdb.index

import lightdb.{Collection, Document}
import org.apache.lucene.search.SortField
import org.apache.lucene.{document => ld}

trait IndexedField[F, D <: Document[D]] {
def fieldName: String
def collection: Collection[D]
def get: D => F

collection.index.register(this)

protected[lightdb] def createFields(doc: D): List[ld.Field]
protected[lightdb] def sortType: SortField.Type
}
59 changes: 5 additions & 54 deletions core/src/main/scala/lightdb/index/Indexer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2,62 +2,13 @@ package lightdb.index

import cats.effect.IO
import lightdb.{Collection, Document, Id}
import org.apache.lucene.analysis.Analyzer
import org.apache.lucene.analysis.standard.StandardAnalyzer
import org.apache.lucene.index.{IndexWriter, IndexWriterConfig, Term}
import org.apache.lucene.queryparser.classic.QueryParser
import org.apache.lucene.search.{IndexSearcher, MatchAllDocsQuery, SearcherFactory, SearcherManager}
import org.apache.lucene.store.{ByteBuffersDirectory, FSDirectory}
import org.apache.lucene.document.{Document => LuceneDocument, Field => LuceneField}

import java.nio.file.{Files, Path}
trait Indexer[D <: Document[D]] {
def collection: Collection[D]

case class Indexer[D <: Document[D]](collection: Collection[D],
persistent: Boolean = true,
analyzer: Analyzer = new StandardAnalyzer) {
private lazy val path: Option[Path] = if (persistent) {
val p = collection.db.directory.resolve(collection.collectionName).resolve("index")
Files.createDirectories(p)
Some(p)
} else {
None
}
private lazy val directory = path
.map(p => FSDirectory.open(p))
.getOrElse(new ByteBuffersDirectory())
private lazy val config = new IndexWriterConfig(analyzer)
private lazy val indexWriter = new IndexWriter(directory, config)
private[lightdb] def delete(id: Id[D]): IO[Unit]

private lazy val searcherManager = new SearcherManager(indexWriter, new SearcherFactory)
def commit(): IO[Unit]

private lazy val parser = new QueryParser("_id", analyzer)

def withSearchContext[Return](f: SearchContext[D] => IO[Return]): IO[Return] = {
val indexSearcher = searcherManager.acquire()
val context = SearchContext(collection, indexSearcher)
f(context)
.guarantee(IO(searcherManager.release(indexSearcher)))
}

private[lightdb] def addDoc(id: Id[D], fields: List[LuceneField]): Unit = if (fields.length > 1) {
val document = new LuceneDocument
fields.foreach(document.add)
indexWriter.updateDocument(new Term("_id", id.value), document)
}

private[lightdb] def delete(id: Id[D]): IO[Unit] = IO {
indexWriter.deleteDocuments(parser.parse(s"_id:${id.value}"))
}

private def commitBlocking(): Unit = {
indexWriter.flush()
indexWriter.commit()
searcherManager.maybeRefreshBlocking()
}

def commit(): IO[Unit] = IO(commitBlocking())

def count(): IO[Int] = withSearchContext { context =>
IO(context.indexSearcher.count(new MatchAllDocsQuery))
}
def count(): IO[Int]
}
11 changes: 1 addition & 10 deletions core/src/main/scala/lightdb/query/Filter.scala
Original file line number Diff line number Diff line change
@@ -1,14 +1,5 @@
package lightdb.query

import lightdb.Document
import org.apache.lucene.search.{Query => LuceneQuery}

trait Filter[D <: Document[D]] {
protected[lightdb] def asQuery: LuceneQuery
}

object Filter {
def apply[D <: Document[D]](f: => LuceneQuery): Filter[D] = new Filter[D] {
override protected[lightdb] def asQuery: LuceneQuery = f
}
}
trait Filter[D <: Document[D]]
21 changes: 7 additions & 14 deletions core/src/main/scala/lightdb/query/PagedResults.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,17 @@ package lightdb.query

import cats.effect.IO
import cats.implicits.toTraverseOps
import lightdb.index.SearchContext
import lightdb.{Document, Id}
import org.apache.lucene.search.ScoreDoc

trait IndexContext[D <: Document[D]] {
def nextPage(): IO[Option[PagedResults[D]]]
}

case class PagedResults[D <: Document[D]](query: Query[D],
context: SearchContext[D],
context: IndexContext[D],
offset: Int,
total: Int,
ids: List[Id[D]],
private val lastScoreDoc: Option[ScoreDoc]) {
ids: List[Id[D]]) {
lazy val page: Int = offset / query.pageSize
lazy val pages: Int = math.ceil(total.toDouble / query.pageSize.toDouble).toInt

Expand All @@ -22,13 +23,5 @@ case class PagedResults[D <: Document[D]](query: Query[D],

def hasNext: Boolean = pages > (page + 1)

def next(): IO[Option[PagedResults[D]]] = if (hasNext) {
query.doSearch(
context = context,
offset = offset + query.pageSize,
after = lastScoreDoc
).map(Some.apply)
} else {
IO.pure(None)
}
def next(): IO[Option[PagedResults[D]]] = context.nextPage()
}
5 changes: 2 additions & 3 deletions core/src/main/scala/lightdb/query/Query.scala
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
package lightdb.query

import cats.effect.IO
import lightdb.index.SearchContext
import lightdb.index.IndexSupport
import lightdb.{Collection, Document, Id}
import org.apache.lucene.index.StoredFields
import org.apache.lucene.search.{MatchAllDocsQuery, ScoreDoc, SortField, TopFieldDocs, Query => LuceneQuery, Sort => LuceneSort}

case class Query[D <: Document[D]](collection: Collection[D],
filter: Option[Filter[D]] = None,
case class Query[D <: Document[D]](filter: Option[Filter[D]] = None,
sort: List[Sort] = Nil,
scoreDocs: Boolean = false,
pageSize: Int = 1_000) {
Expand Down
1 change: 0 additions & 1 deletion core/src/test/scala/spec/SimpleSpec.scala
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import cats.effect.IO
import cats.effect.testing.scalatest.AsyncIOSpec
import fabric.rw._
import lightdb._
import lightdb.index.{IntField, StringField}
import lightdb.query._
import lightdb.upgrade.DatabaseUpgrade
import org.scalatest.matchers.should.Matchers
Expand Down
Loading

0 comments on commit 31d11c7

Please sign in to comment.