From f7cf3996f06edf82f8fda105c858b930eb16bb6e Mon Sep 17 00:00:00 2001 From: Matt Hicks Date: Sun, 7 Apr 2024 20:10:25 -0500 Subject: [PATCH] Added SQLite --- benchmark/benchmark.csv | 10 +- benchmark/graphs/imports.svg | 2 +- benchmark/graphs/search-titles.svg | 2 +- benchmark/graphs/validate-ids.svg | 2 +- .../main/scala/benchmark/IMDBBenchmark.scala | 2 +- .../benchmark/SQLiteImplementation.scala | 211 ++++++++++++++++++ build.sbt | 1 + 7 files changed, 221 insertions(+), 9 deletions(-) create mode 100644 benchmark/src/main/scala/benchmark/SQLiteImplementation.scala diff --git a/benchmark/benchmark.csv b/benchmark/benchmark.csv index 53052920..664cd8dc 100644 --- a/benchmark/benchmark.csv +++ b/benchmark/benchmark.csv @@ -1,5 +1,5 @@ -Type,LightDB,PostgreSQL,MongoDB,ArangoDB,MariaDB -AKA Import: 52858.99 34138.795 78600.213 22415.47 8439.405 -Basics Import: 96395.498 69027.068 80287.444 24408.508 9846.711 -Validate Ids: 3312929.638 61656.255 38804.383 193541.718 21305.547 -Search Titles: 426549.103 59856.777 37117.721 32062.46 7096.348 \ No newline at end of file +Type,LightDB,PostgreSQL,MongoDB,ArangoDB,MariaDB,SQLite +AKA Import: 52858.99 34138.795 78600.213 22415.47 8439.405 51622.243 +Basics Import: 96395.498 69027.068 80287.444 24408.508 9846.711 56590.546 +Validate Ids: 3312929.638 61656.255 38804.383 193541.718 21305.547 798917.720 +Search Titles: 426549.103 59856.777 37117.721 32062.46 7096.348 556886.747 \ No newline at end of file diff --git a/benchmark/graphs/imports.svg b/benchmark/graphs/imports.svg index 29ae0d18..91a81b69 100644 --- a/benchmark/graphs/imports.svg +++ b/benchmark/graphs/imports.svg @@ -1 +1 @@ -Import Data BenchmarkAKAImportBasicsImport020,00040,00060,00080,000100,000LightDBPostgreSQLMongoDBArangoDBMariaDBDatabaseRecords Per Second \ No newline at end of file +Data Import020,00040,00060,00080,000100,000LightDBPostgreSQLMongoDBArangoDBMariaDBSQLiteDatabaseRecords Per Second \ No newline at end of file diff --git a/benchmark/graphs/search-titles.svg b/benchmark/graphs/search-titles.svg index dc95ba80..af624f55 100644 --- a/benchmark/graphs/search-titles.svg +++ b/benchmark/graphs/search-titles.svg @@ -1 +1 @@ -Search Titles0100,000200,000300,000400,000500,000LightDBPostgreSQLMongoDBArangoDBMariaDBDatabaseRecords Per Second \ No newline at end of file +Search Titles0200,000400,000600,000LightDBPostgreSQLMongoDBArangoDBMariaDBSQLiteDatabaseRecords Per Second \ No newline at end of file diff --git a/benchmark/graphs/validate-ids.svg b/benchmark/graphs/validate-ids.svg index 66e7d11a..e5c470a0 100644 --- a/benchmark/graphs/validate-ids.svg +++ b/benchmark/graphs/validate-ids.svg @@ -1 +1 @@ -Validate IdsValidate01,000,0002,000,0003,000,0004,000,000LightDBPostgreSQLMongoDBArangoDBMariaDBDatabaseRecords Per Second \ No newline at end of file +Validate Ids01,000,0002,000,0003,000,0004,000,000LightDBPostgreSQLMongoDBArangoDBMariaDBSQLiteDatabaseRecords Per Second \ No newline at end of file diff --git a/benchmark/src/main/scala/benchmark/IMDBBenchmark.scala b/benchmark/src/main/scala/benchmark/IMDBBenchmark.scala index aec4238b..c4733130 100644 --- a/benchmark/src/main/scala/benchmark/IMDBBenchmark.scala +++ b/benchmark/src/main/scala/benchmark/IMDBBenchmark.scala @@ -50,7 +50,7 @@ import java.util.concurrent.atomic.{AtomicBoolean, AtomicInteger} */ object IMDBBenchmark { // extends IOApp { implicit val runtime: IORuntime = IORuntime.global - val implementation: BenchmarkImplementation = MariaDBImplementation + val implementation: BenchmarkImplementation = SQLiteImplementation private var ids: List[Ids] = Nil diff --git a/benchmark/src/main/scala/benchmark/SQLiteImplementation.scala b/benchmark/src/main/scala/benchmark/SQLiteImplementation.scala new file mode 100644 index 00000000..f28beb88 --- /dev/null +++ b/benchmark/src/main/scala/benchmark/SQLiteImplementation.scala @@ -0,0 +1,211 @@ +package benchmark + +import cats.effect.IO +import cats.effect.unsafe.IORuntime +import lightdb.util.FlushingBacklog + +import java.sql.{Connection, DriverManager, ResultSet} + +object SQLiteImplementation extends BenchmarkImplementation { + implicit val runtime: IORuntime = IORuntime.global + + override type TitleAka = TitleAkaPG + override type TitleBasics = TitleBasicsPG + + private lazy val connection: Connection = { + val c = DriverManager.getConnection("jdbc:sqlite:imdb.db") + c.setAutoCommit(false) + c + } + + private lazy val backlogAka = new FlushingBacklog[TitleAka](1000, 10000) { + override protected def write(list: List[TitleAkaPG]): IO[Unit] = IO { + val ps = connection.prepareStatement("INSERT INTO title_aka(id, titleId, ordering, title, region, language, types, attributes, isOriginalTitle) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)") + try { + list.foreach { t => + ps.setString(1, t.id) + ps.setString(2, t.titleId) + ps.setInt(3, t.ordering) + ps.setString(4, t.title) + ps.setString(5, t.region) + ps.setString(6, t.language) + ps.setString(7, t.types) + ps.setString(8, t.attributes) + ps.setInt(9, t.isOriginalTitle) + ps.addBatch() + } + ps.executeBatch() + } finally { + ps.close() + } + } + } + + private lazy val backlogBasics = new FlushingBacklog[TitleBasics](1000, 10000) { + override protected def write(list: List[TitleBasicsPG]): IO[Unit] = IO { + val ps = connection.prepareStatement("INSERT INTO title_basics(id, tconst, titleType, primaryTitle, originalTitle, isAdult, startYear, endYear, runtimeMinutes, genres) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)") + try { + list.foreach { t => + ps.setString(1, t.id) + ps.setString(2, t.tconst) + ps.setString(3, t.titleType) + ps.setString(4, t.primaryTitle) + ps.setString(5, t.originalTitle) + ps.setInt(6, t.isAdult) + ps.setInt(7, t.startYear) + ps.setInt(8, t.endYear) + ps.setInt(9, t.runtimeMinutes) + ps.setString(10, t.genres) + ps.addBatch() + } + ps.executeBatch() + } finally { + ps.close() + } + } + } + + override def name: String = "SQLite" + + override def init(): IO[Unit] = IO { + executeUpdate("DROP TABLE IF EXISTS title_aka") + executeUpdate("DROP TABLE IF EXISTS title_basics") + executeUpdate("CREATE TABLE title_aka(id VARCHAR NOT NULL, titleId TEXT, ordering INTEGER, title TEXT, region TEXT, language TEXT, types TEXT, attributes TEXT, isOriginalTitle SMALLINT, PRIMARY KEY (id))") + executeUpdate("CREATE TABLE title_basics(id VARCHAR NOT NULL, tconst TEXT, titleType TEXT, primaryTitle TEXT, originalTitle TEXT, isAdult INTEGER, startYear INTEGER, endYear INTEGER, runtimeMinutes INTEGER, genres TEXT, PRIMARY KEY (id))") + executeUpdate("CREATE INDEX title_aka_title_id_idx ON title_aka(titleId)") + } + + override def map2TitleAka(map: Map[String, String]): TitleAka = TitleAkaPG( + id = map.option("id").getOrElse(lightdb.Unique()), + titleId = map.value("titleId"), + ordering = map.int("ordering"), + title = map.value("title"), + region = map.option("region").getOrElse(""), + language = map.option("language").getOrElse(""), + types = map.option("types").getOrElse(""), + attributes = map.option("attributes").getOrElse(""), + isOriginalTitle = map.boolOption("isOriginalTitle").map(b => if (b) 1 else 0).getOrElse(-1) + ) + + override def map2TitleBasics(map: Map[String, String]): TitleBasicsPG = TitleBasicsPG( + id = map.option("id").getOrElse(lightdb.Unique()), + tconst = map.value("tconst"), + titleType = map.value("titleType"), + primaryTitle = map.value("primaryTitle"), + originalTitle = map.value("originalTitle"), + isAdult = map.int("isAdult"), + startYear = map.int("startYear"), + endYear = map.int("endYear"), + runtimeMinutes = map.int("runtimeMinutes"), + genres = map.value("genres") + ) + + override def persistTitleAka(t: TitleAka): IO[Unit] = backlogAka.enqueue(t).map(_ => ()) + + override def persistTitleBasics(t: TitleBasicsPG): IO[Unit] = backlogBasics.enqueue(t).map(_ => ()) + + private def fromRS(rs: ResultSet): TitleAkaPG = TitleAkaPG( + id = rs.getString("id"), + titleId = rs.getString("titleId"), + ordering = rs.getInt("ordering"), + title = rs.getString("title"), + region = rs.getString("region"), + language = rs.getString("language"), + types = rs.getString("types"), + attributes = rs.getString("attributes"), + isOriginalTitle = rs.getInt("isOriginalTitle") + ) + + override def streamTitleAka(): fs2.Stream[IO, TitleAkaPG] = { + val s = connection.createStatement() + try { + val rs = s.executeQuery("SELECT * FROM title_aka") + val iterator = Iterator.unfold(rs) { rs => + if (rs.next()) { + Some(fromRS(rs) -> rs) + } else { + None + } + } + fs2.Stream.fromBlockingIterator[IO](iterator, 512) + } finally { + s.closeOnCompletion() + } + } + + override def idFor(t: TitleAkaPG): String = t.id + + override def titleIdFor(t: TitleAkaPG): String = t.titleId + + override def get(id: String): IO[TitleAkaPG] = IO { + val s = connection.prepareStatement("SELECT * FROM title_aka WHERE id = ?") + try { + s.setString(1, id) + val rs = s.executeQuery() + try { + rs.next() + fromRS(rs) + } finally { + rs.close() + } + } finally { + s.close() + } + } + + override def findByTitleId(titleId: String): IO[List[TitleAkaPG]] = IO { + val s = connection.prepareStatement("SELECT * FROM title_aka WHERE titleId = ?") + try { + s.setString(1, titleId) + val rs = s.executeQuery() + try { + new Iterator[TitleAkaPG] { + override def hasNext: Boolean = rs.next() + + override def next(): TitleAkaPG = fromRS(rs) + }.toList + } finally { + rs.close() + } + } finally { + s.close() + } + } + + override def flush(): IO[Unit] = for { + _ <- backlogAka.flush() + _ <- IO(commit()) + } yield { + () + } + + override def verifyTitleAka(): IO[Unit] = IO { + val s = connection.createStatement() + val rs = s.executeQuery("SELECT COUNT(1) FROM title_aka") + rs.next() + val count = rs.getInt(1) + scribe.info(s"Counted $count records in title_aka table") + } + + override def verifyTitleBasics(): IO[Unit] = IO { + val s = connection.createStatement() + val rs = s.executeQuery("SELECT COUNT(1) FROM title_basics") + rs.next() + val count = rs.getInt(1) + scribe.info(s"Counted $count records in title_basics table") + } + + private def executeUpdate(sql: String): Unit = { + val s = connection.createStatement() + try { + s.executeUpdate(sql) + } finally { + s.close() + } + } + + private def commit(): Unit = connection.commit() + + case class TitleAkaPG(id: String, titleId: String, ordering: Int, title: String, region: String, language: String, types: String, attributes: String, isOriginalTitle: Int) + case class TitleBasicsPG(id: String, tconst: String, titleType: String, primaryTitle: String, originalTitle: String, isAdult: Int, startYear: Int, endYear: Int, runtimeMinutes: Int, genres: String) +} diff --git a/build.sbt b/build.sbt index d6d10a62..dc2e7c18 100644 --- a/build.sbt +++ b/build.sbt @@ -102,6 +102,7 @@ lazy val benchmark = project.in(file("benchmark")) "org.mongodb" % "mongodb-driver-sync" % "5.0.1", "org.postgresql" % "postgresql" % "42.7.3", "org.mariadb.jdbc" % "mariadb-java-client" % "3.3.3", + "org.xerial" % "sqlite-jdbc" % "3.45.2.0", "com.outr" %% "scarango-driver" % "3.20.0" ) ) \ No newline at end of file