Skip to content

Commit

Permalink
Preliminary support added for DuckDB
Browse files Browse the repository at this point in the history
  • Loading branch information
darkfrog26 committed Jun 7, 2024
1 parent 46b8819 commit c72a49c
Show file tree
Hide file tree
Showing 6 changed files with 351 additions and 8 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Computationally focused database using pluggable store + indexer
## Provided Indexers
- Apache Lucene (https://lucene.apache.org) - Most featureful
- SQLite (https://www.sqlite.org) - Fastest
- DuckDB (https://duckdb.org) - Experimental

## 1.0 TODO
- [ ] More performance improvements to SQLite integration
Expand Down
269 changes: 269 additions & 0 deletions all/src/test/scala/spec/SimpleHaloAndDuckDBSpec.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,269 @@
package spec

import cats.effect.IO
import cats.effect.testing.scalatest.AsyncIOSpec
import fabric.rw._
import lightdb._
import lightdb.duckdb.DuckDBSupport
import lightdb.halo.HaloDBSupport
import lightdb.model.Collection
import lightdb.sqlite.SQLiteSupport
import lightdb.upgrade.DatabaseUpgrade
import org.scalatest.matchers.should.Matchers
import org.scalatest.wordspec.AsyncWordSpec

import java.nio.file.{Path, Paths}

class SimpleHaloAndDuckDBSpec extends AsyncWordSpec with AsyncIOSpec with Matchers {
private val id1 = Id[Person]("john")
private val id2 = Id[Person]("jane")

private val p1 = Person("John Doe", 21, id1)
private val p2 = Person("Jane Doe", 19, id2)

"Simple database" should {
"initialize the database" in {
DB.init(truncate = true)
}
"store John Doe" in {
Person.set(p1).map { p =>
p._id should be(id1)
}
}
"verify John Doe exists" in {
Person.get(id1).map { o =>
o should be(Some(p1))
}
}
"storage Jane Doe" in {
Person.set(p2).map { p =>
p._id should be(id2)
}
}
"verify Jane Doe exists" in {
Person.get(id2).map { o =>
o should be(Some(p2))
}
}
"verify exactly two objects in data" in {
Person.size.map { size =>
size should be(2)
}
}
"flush data" in {
Person.commit()
}
"verify exactly two objects in index" in {
Person.index.size.map { size =>
size should be(2)
}
}
"verify exactly two objects in the store" in {
Person.idStream.compile.toList.map { ids =>
ids.toSet should be(Set(id1, id2))
}
}
"search by name for positive result" in {
Person.withSearchContext { implicit context =>
Person
.query
.countTotal(true)
.filter(Person.name.is("Jane Doe"))
.search()
.flatMap { page =>
page.page should be(0)
page.pages should be(1)
page.offset should be(0)
page.total should be(1)
page.ids should be(List(id2))
page.hasNext should be(false)
page.docs.map { people =>
people.length should be(1)
val p = people.head
p._id should be(id2)
p.name should be("Jane Doe")
p.age should be(19)
}
}
}
}
"search by age for positive result" in {
Person.ageLinks.query(19).compile.toList.map { people =>
people.length should be(1)
val p = people.head
p._id should be(id2)
p.name should be("Jane Doe")
p.age should be(19)
}
}
"search by id for John" in {
Person(id1).map { person =>
person._id should be(id1)
person.name should be("John Doe")
person.age should be(21)
}
}
"search for age range" in {
Person.withSearchContext { implicit context =>
Person
.query
.filter(Person.age BETWEEN 19 -> 21)
.search()
.flatMap { results =>
results.docs.map { people =>
people.length should be(2)
val names = people.map(_.name).toSet
names should be(Set("John Doe", "Jane Doe"))
val ages = people.map(_.age).toSet
ages should be(Set(21, 19))
}
}
}
}
"do paginated search" in {
Person.withSearchContext { implicit context =>
Person.query.pageSize(1).countTotal(true).search().flatMap { page1 =>
page1.page should be(0)
page1.pages should be(2)
page1.hasNext should be(true)
page1.docs.flatMap { people1 =>
people1.length should be(1)
page1.next().flatMap {
case Some(page2) =>
page2.page should be(1)
page2.pages should be(2)
page2.hasNext should be(false)
page2.docs.map { people2 =>
people2.length should be(1)
}
case None => fail("Should have a second page")
}
}
}
}
}
"do paginated search as a stream" in {
Person.withSearchContext { implicit context =>
Person.query.pageSize(1).countTotal(true).stream.compile.toList.map { people =>
people.length should be(2)
people.map(_.name).toSet should be(Set("John Doe", "Jane Doe"))
}
}
}
"verify the number of records" in {
Person.index.size.map { size =>
size should be(2)
}
}
"modify John" in {
Person.modify(id1) {
case Some(john) => IO(Some(john.copy(name = "Johnny Doe")))
case None => throw new RuntimeException("John not found!")
}.map { person =>
person.get.name should be("Johnny Doe")
}
}
"commit modified data" in {
Person.commit()
}
"verify the number of records has not changed after modify" in {
Person.index.size.map { size =>
size should be(2)
}
}
"verify John was modified" in {
Person(id1).map { person =>
person.name should be("Johnny Doe")
}
}
"delete John" in {
Person.delete(id1).map { deleted =>
deleted should be(id1)
}
}
"verify exactly one object in data" in {
Person.size.map { size =>
size should be(1)
}
}
"commit data" in {
Person.commit()
}
"verify exactly one object in index" in {
Person.index.size.map { size =>
size should be(1)
}
}
"list all documents" in {
Person.stream.compile.toList.map { people =>
people.length should be(1)
val p = people.head
p._id should be(id2)
p.name should be("Jane Doe")
p.age should be(19)
}
}
"replace Jane Doe" in {
Person.set(Person("Jan Doe", 20, id2)).map { p =>
p._id should be(id2)
}
}
"verify Jan Doe" in {
Person(id2).map { p =>
p._id should be(id2)
p.name should be("Jan Doe")
p.age should be(20)
}
}
"commit new data" in {
Person.commit()
}
"list new documents" in {
Person.stream.compile.toList.map { results =>
results.length should be(1)
val doc = results.head
doc._id should be(id2)
doc.name should be("Jan Doe")
doc.age should be(20)
}
}
"verify start time has been set" in {
DB.startTime.get().map { startTime =>
startTime should be > 0L
}
}
"dispose" in {
DB.dispose()
}
}

object DB extends LightDB with HaloDBSupport {
override lazy val directory: Path = Paths.get("testdb")

val startTime: StoredValue[Long] = stored[Long]("startTime", -1L)

override lazy val userCollections: List[Collection[_]] = List(
Person
)

override def upgrades: List[DatabaseUpgrade] = List(InitialSetupUpgrade)
}

case class Person(name: String, age: Int, _id: Id[Person] = Id()) extends Document[Person]

object Person extends Collection[Person]("people", DB) with DuckDBSupport[Person] {
override implicit val rw: RW[Person] = RW.gen

val name: I[String] = index.one("name", _.name)
val age: I[Int] = index.one("age", _.age)
val ageLinks: IndexedLinks[Int, Person] = indexedLinks[Int]("age", _.toString, _.age)
}

object InitialSetupUpgrade extends DatabaseUpgrade {
override def applyToNew: Boolean = true
override def blockStartup: Boolean = true
override def alwaysRun: Boolean = false

override def upgrade(ldb: LightDB): IO[Unit] = DB.startTime.set(System.currentTimeMillis()).map(_ => ())
}
}
17 changes: 15 additions & 2 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -54,14 +54,15 @@ val fs2Version: String = "3.10.2"
val scribeVersion: String = "3.14.0"
val luceneVersion: String = "9.10.0"
val sqliteVersion: String = "3.46.0.0"
val duckdbVersion: String = "1.0.0"
val keysemaphoreVersion: String = "0.3.0-M1"
val squantsVersion: String = "1.8.3"

val scalaTestVersion: String = "3.2.18"
val catsEffectTestingVersion: String = "1.5.0"

lazy val root = project.in(file("."))
.aggregate(core, halodb, rocksdb, mapdb, lucene, sql, sqlite, all)
.aggregate(core, halodb, rocksdb, mapdb, lucene, sql, sqlite, duckdb, all)
.settings(
name := projectName,
publish := {},
Expand Down Expand Up @@ -174,8 +175,20 @@ lazy val sqlite = project.in(file("sqlite"))
)
)

lazy val duckdb = project.in(file("duckdb"))
.dependsOn(sql)
.settings(
name := s"$projectName-duckdb",
fork := true,
libraryDependencies ++= Seq(
"org.duckdb" % "duckdb_jdbc" % duckdbVersion,
"org.scalatest" %% "scalatest" % scalaTestVersion % Test,
"org.typelevel" %% "cats-effect-testing-scalatest" % catsEffectTestingVersion % Test
)
)

lazy val all = project.in(file("all"))
.dependsOn(core, halodb, rocksdb, mapdb, lucene, sqlite)
.dependsOn(core, halodb, rocksdb, mapdb, lucene, sqlite, duckdb)
.settings(
name := s"$projectName-all",
fork := true,
Expand Down
45 changes: 45 additions & 0 deletions duckdb/src/main/scala/lightdb/duckdb/DuckDBSupport.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package lightdb.duckdb

import fabric.define.DefType
import lightdb.Document
import lightdb.sql.SQLSupport

import java.nio.file.{Files, Path}
import java.sql.{Connection, DriverManager}

trait DuckDBSupport[D <: Document[D]] extends SQLSupport[D] {
private lazy val path: Path = {
val p = collection.db.directory.resolve(collection.collectionName).resolve("duckdb.db")
Files.createDirectories(p.getParent)
p
}
// TODO: Should each collection have a connection?

override protected def enableAutoCommit: Boolean = true

override protected def createTable(): String = {
val indexes = index.fields.map { i =>
if (i.fieldName == "_id") {
"_id VARCHAR PRIMARY KEY"
} else {
val t = i.rw.definition match {
case DefType.Str => "VARCHAR"
case DefType.Int => "INTEGER"
case d => throw new UnsupportedOperationException(s"${i.fieldName} has an unsupported type: $d")
}
s"${i.fieldName} $t"
}
}.mkString(", ")
val sql = s"CREATE TABLE IF NOT EXISTS ${collection.collectionName}($indexes)"
scribe.info(sql)
sql
}

override protected def createConnection(): Connection = {
Class.forName("org.duckdb.DuckDBDriver")
val url = s"jdbc:duckdb:${path.toFile.getCanonicalPath}"
DriverManager.getConnection(url)
}

override protected def truncateSQL: String = s"TRUNCATE ${collection.collectionName}"
}
Loading

0 comments on commit c72a49c

Please sign in to comment.