Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Graph Read Benchmarks #257

Merged
merged 9 commits into from
Jul 15, 2024
Prev Previous commit
Next Next commit
Neo4j working & started benchmarking SBT task
DavidBakerEffendi committed Jul 15, 2024

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
commit 2be5c74f870491ea4240b7b6cae0eaad70d116f5
41 changes: 39 additions & 2 deletions build.sbt
Original file line number Diff line number Diff line change
@@ -45,8 +45,8 @@ libraryDependencies ++= Seq(
"org.openjdk.jmh" % "jmh-generator-reflection" % Versions.jmh,
"org.openjdk.jmh" % "jmh-generator-asm" % Versions.jmh,
"org.slf4j" % "slf4j-api" % Versions.slf4j,
"org.apache.logging.log4j" % "log4j-core" % Versions.log4j % Test,
"org.apache.logging.log4j" % "log4j-slf4j-impl" % Versions.log4j % Test,
"org.apache.logging.log4j" % "log4j-core" % Versions.log4j % Test,
"org.apache.logging.log4j" % "log4j-slf4j-impl" % Versions.log4j % Test,
"org.scalatest" %% "scalatest" % Versions.scalatest % Test
)

@@ -64,3 +64,40 @@ developers := List(
Global / onChangedBuildSource := ReloadOnSourceChanges

publishMavenStyle := true

// Benchmark Tasks

lazy val datasetDir = taskKey[File]("Dataset directory")
datasetDir := baseDirectory.value / "workspace" / "defects4"
lazy val driversToBenchmark = taskKey[Seq[String]]("Drivers to benchmark")
driversToBenchmark := Seq("overflowdb")

lazy val defect4jDataset = taskKey[Seq[(String, String)]]("JARs for projects used in `defects4j`")
defect4jDataset :=
Seq("Chart" -> "https://repo1.maven.org/maven2/org/jfree/jfreechart/1.5.5/jfreechart-1.5.5.jar")

lazy val benchmarkDownloadTask = taskKey[Unit](s"Download `defects4j` candidates for benchmarking")
benchmarkDownloadTask := {
defect4jDataset.value.foreach { case (name, url) =>
DownloadHelper.ensureIsAvailable(url, datasetDir.value / s"$name.jar")
}
}

lazy val benchmarkTask = taskKey[Unit](s"Run JMH benchmarks against drivers")
benchmarkTask := {

def benchmarkArgs(driver: String, project: String): String = {
val projectDir = (datasetDir.value / project).getAbsolutePath
val resultsPath = baseDirectory.value / "results" / s"results-$driver-$project"
val outputPath = baseDirectory.value / "results" / s"output-$driver-$project"
s"com.github.plume.oss.Benchmark $driver $projectDir -o ${outputPath.getAbsolutePath} -r ${resultsPath.getAbsolutePath}"
}

driversToBenchmark.value.foreach { driver =>
defect4jDataset.value.foreach { case (_, project) =>
println(s"[INFO] Benchmarking $driver on $project")
(Jmh / runMain).toTask(benchmarkArgs(driver, project))
}
}

}
48 changes: 48 additions & 0 deletions project/DownloadHelper.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import java.io.File
import java.net.URI
import java.nio.file.{Files, Path, Paths}

object DownloadHelper {
val LocalStorageDir = Paths.get(".local/source-urls")

/** Downloads the remote file from the given url if either
* - the localFile is not available,
* - or the url is different from the previously downloaded file
* - or we don't have the original url from the previously downloaded file
* We store the information about the previously downloaded urls and the localFile in `.local`
*/
def ensureIsAvailable(url: String, localFile: File): Unit = {
if (!localFile.exists() || Option(url) != previousUrlForLocalFile(localFile)) {
val localPath = localFile.toPath
Files.deleteIfExists(localPath)

println(s"[INFO] downloading $url to $localFile")
sbt.io.Using.urlInputStream(new URI(url).toURL) { inputStream =>
sbt.IO.transfer(inputStream, localFile)
}

// persist url in local storage
val storageFile = storageInfoFileFor(localFile)
Files.createDirectories(storageFile.getParent)
Files.writeString(storageFile, url)
}
}

private def relativePathToProjectRoot(path: Path): String =
Paths
.get("")
.toAbsolutePath
.normalize()
.relativize(path.toAbsolutePath)
.toString

private def previousUrlForLocalFile(localFile: File): Option[String] = {
Option(storageInfoFileFor(localFile))
.filter(Files.exists(_))
.map(Files.readString)
.filter(_.nonEmpty)
}

private def storageInfoFileFor(localFile: File): Path =
LocalStorageDir.resolve(relativePathToProjectRoot(localFile.toPath))
}
9 changes: 7 additions & 2 deletions src/main/scala/com/github/plume/oss/Benchmark.scala
Original file line number Diff line number Diff line change
@@ -2,7 +2,12 @@ package com.github.plume.oss

import better.files.File
import com.github.plume.oss.Benchmark.BenchmarkType.*
import com.github.plume.oss.benchmarking.{OverflowDbReadBenchmark, TinkerGraphReadBenchmark}
import com.github.plume.oss.benchmarking.{
GraphWriteBenchmark,
Neo4jEmbedReadBenchmark,
OverflowDbReadBenchmark,
TinkerGraphReadBenchmark
}
import com.github.plume.oss.drivers.{IDriver, TinkerGraphDriver}
import org.cache2k.benchmark.jmh.ForcedGcMemoryProfiler
import org.openjdk.jmh.annotations.Mode
@@ -44,7 +49,7 @@ object Benchmark {
case _: Neo4jEmbeddedConfig =>
Option(
createOptionsBoilerPlate(config, READ)
.include(classOf[OverflowDbReadBenchmark].getSimpleName)
.include(classOf[Neo4jEmbedReadBenchmark].getSimpleName)
.build()
)
case x =>
Original file line number Diff line number Diff line change
@@ -80,14 +80,8 @@ trait GraphReadBenchmark {
@Benchmark
def unindexedMethodFullName(bh: Blackhole): Unit

@Setup(Level.Iteration)
def clearDriver(params: BenchmarkParams): Unit = {
driver.clear()
}

@TearDown
def cleanupBenchmark(): Unit = {
driver.clear()
driver.close()
}

Original file line number Diff line number Diff line change
@@ -1,17 +1,15 @@
package com.github.plume.oss.benchmarking

import com.github.plume.oss.benchmarking.GraphReadBenchmark
import com.github.plume.oss.drivers.{Neo4jEmbeddedDriver, TinkerGraphDriver}
import com.github.plume.oss.drivers.Neo4jEmbeddedDriver
import io.shiftleft.codepropertygraph.generated.EdgeTypes.AST
import io.shiftleft.codepropertygraph.generated.NodeTypes.{CALL, METHOD}
import io.shiftleft.codepropertygraph.generated.PropertyNames.{FULL_NAME, ORDER}
import org.apache.tinkerpop.gremlin.process.traversal.P
import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.{GraphTraversalSource, __}
import org.neo4j.graphdb.{GraphDatabaseService, Label}
import org.neo4j.graphdb.GraphDatabaseService
import org.openjdk.jmh.annotations.{Benchmark, Scope, Setup, State}
import org.openjdk.jmh.infra.{BenchmarkParams, Blackhole}
import overflowdb.traversal.*

import java.util
import scala.compiletime.uninitialized
import scala.jdk.CollectionConverters.*
import scala.util.{Random, Using}
@@ -33,9 +31,9 @@ class Neo4jEmbedReadBenchmark extends GraphReadBenchmark {
tx.execute(s"""
|MATCH (n)-[$AST]->()
|WHERE NOT (n)<-[$AST]-()
|RETURN n.id
|RETURN n.id AS ID
|""".stripMargin)
.map { result => result.get("n.id").asInstanceOf[Long] }
.map { result => result.get("ID").asInstanceOf[Long] }
.toArray
}
}
@@ -44,9 +42,9 @@ class Neo4jEmbedReadBenchmark extends GraphReadBenchmark {
Using.resource(g.beginTx) { tx =>
tx.execute(s"""
|MATCH (n)-[$AST]->()
|RETURN n.id
|RETURN n.id AS ID
|""".stripMargin)
.map { result => result.get("n.id").asInstanceOf[Long] }
.map { result => result.get("ID").asInstanceOf[Long] }
.toArray
}
}
@@ -56,9 +54,9 @@ class Neo4jEmbedReadBenchmark extends GraphReadBenchmark {
tx.execute(s"""
|MATCH (n)
|WHERE n.$ORDER IS NOT NULL
|RETURN n.id
|RETURN n.id AS ID
|""".stripMargin)
.map { result => result.get("n.id").asInstanceOf[Long] }
.map { result => result.get("ID").asInstanceOf[Long] }
.toArray
}
}
@@ -69,11 +67,10 @@ class Neo4jEmbedReadBenchmark extends GraphReadBenchmark {
.execute(s"""
|MATCH (n: $CALL)
|WHERE n.$ORDER IS NOT NULL
|RETURN n.id
|RETURN n.id AS ID
|""".stripMargin)
.map { result => result.get("n.id").asInstanceOf[Long] }
.map { result => result.get("ID").asInstanceOf[Long] }
.toList
println(res)
res.toArray
}
}
@@ -83,9 +80,9 @@ class Neo4jEmbedReadBenchmark extends GraphReadBenchmark {
tx.execute(s"""
|MATCH (n: $METHOD)
|WHERE n.$FULL_NAME IS NOT NULL
|RETURN n.$FULL_NAME
|RETURN n.$FULL_NAME as $FULL_NAME
|""".stripMargin)
.map { result => result.get(s"n.$FULL_NAME").asInstanceOf[String] }
.map { result => result.get(FULL_NAME).asInstanceOf[String] }
.toArray
}
fullNames = new Random(1234).shuffle(fullNames_).toArray
@@ -99,12 +96,16 @@ class Neo4jEmbedReadBenchmark extends GraphReadBenchmark {
var nnodes = nodeStart.length
while (stack.nonEmpty) {
val childrenIds = Using.resource(g.beginTx) { tx =>
tx.execute(s"""
tx.execute(
s"""
|MATCH (n)-[AST]->(m)
|WHERE n.id = ${stack.removeLast()}
|RETURN m.id
|""".stripMargin)
.map { result => result.get("m.id").asInstanceOf[Long] }
|WHERE n.id = $$nodeId
|RETURN m.id AS ID
|""".stripMargin,
new util.HashMap[String, Object](1) {
put("nodeId", stack.removeLast().asInstanceOf[Object])
}
).map { result => result.get("ID").asInstanceOf[Long] }
.toArray
}
stack.appendAll(childrenIds)
@@ -120,12 +121,16 @@ class Neo4jEmbedReadBenchmark extends GraphReadBenchmark {
for (node <- nodeStart) {
var nodeId = node
def getResult = Using.resource(g.beginTx) { tx =>
tx.execute(s"""
tx.execute(
s"""
|MATCH (n)<-[AST]-(m)
|WHERE n.id = $nodeId
|RETURN m.id
|""".stripMargin)
.map { result => result.get("m.id").asInstanceOf[Long] }
|WHERE n.id = $$nodeId
|RETURN m.id AS ID
|""".stripMargin,
new util.HashMap[String, Object](1) {
put("nodeId", nodeId.asInstanceOf[Object])
}
).map { result => result.get("ID").asInstanceOf[Long] }
.toArray
}
var result = getResult
@@ -145,12 +150,16 @@ class Neo4jEmbedReadBenchmark extends GraphReadBenchmark {
var sumOrder = 0
for (nodeId <- nodeStart) {
val orderArr = Using.resource(g.beginTx) { tx =>
tx.execute(s"""
tx.execute(
s"""
|MATCH (n)
|WHERE n.id = $nodeId
|RETURN n.$ORDER
|""".stripMargin)
.map { result => result.get(s"n.$ORDER").asInstanceOf[Int] }
|WHERE n.id = $$nodeId
|RETURN n.$ORDER AS $ORDER
|""".stripMargin,
new util.HashMap[String, Object](1) {
put("nodeId", nodeId.asInstanceOf[Object])
}
).map { result => result.get(ORDER).asInstanceOf[Int] }
.toArray
}
sumOrder += orderArr.head
@@ -162,12 +171,16 @@ class Neo4jEmbedReadBenchmark extends GraphReadBenchmark {
@Benchmark
override def callOrderTrav(blackhole: Blackhole): Int = {
val res = Using.resource(g.beginTx) { tx =>
tx.execute(s"""
tx.execute(
s"""
|MATCH (n: $CALL)
|WHERE n.$ORDER > 2 AND n.id IN [${nodeStart.mkString(",")}]
|RETURN COUNT(n)
|""".stripMargin)
.map(_.get("COUNT(n)").asInstanceOf[Int])
|WHERE n.$ORDER > 2 AND n.id IN $$nodeIds
|RETURN COUNT(n) AS SIZE
|""".stripMargin,
new util.HashMap[String, Object](1) {
put("nodeIds", nodeStart.toList.asJava.asInstanceOf[Object])
}
).map(_.get("SIZE").asInstanceOf[Int])
.next()
}
Option(blackhole).foreach(_.consume(res))
@@ -178,12 +191,16 @@ class Neo4jEmbedReadBenchmark extends GraphReadBenchmark {
override def callOrderExplicit(blackhole: Blackhole): Int = {
var res = 0
val nodes = Using.resource(g.beginTx) { tx =>
tx.execute(s"""
tx.execute(
s"""
|MATCH (n: $CALL)
|WHERE n.id IN [${nodeStart.mkString(",")}]
|RETURN n.$ORDER
|""".stripMargin)
.map(_.get(s"n.$ORDER").asInstanceOf[Int])
|WHERE n.id IN $$nodeIds
|RETURN n.$ORDER as $ORDER
|""".stripMargin,
new util.HashMap[String, Object](1) {
put("nodeIds", nodeStart.toList.asJava.asInstanceOf[Object])
}
).map(_.get(ORDER).asInstanceOf[Int])
.toArray
}
for (order <- nodes) {
@@ -198,12 +215,16 @@ class Neo4jEmbedReadBenchmark extends GraphReadBenchmark {
fullNames.foreach { fullName =>
Using
.resource(g.beginTx) { tx =>
tx.execute(s"""
tx.execute(
s"""
|MATCH (n: $METHOD)
|WHERE n.$FULL_NAME = $fullName
|RETURN n
|""".stripMargin)
.map(_.get(s"n"))
|WHERE n.$FULL_NAME = $$fullName
|RETURN n AS NODE
|""".stripMargin,
new util.HashMap[String, Object](1) {
put("fullName", fullName.asInstanceOf[Object])
}
).map(_.get("NODE"))
.toArray
}
.foreach(bh.consume)
@@ -215,12 +236,16 @@ class Neo4jEmbedReadBenchmark extends GraphReadBenchmark {
fullNames.foreach { fullName =>
Using
.resource(g.beginTx) { tx =>
tx.execute(s"""
tx.execute(
s"""
|MATCH (n)
|WHERE n.$FULL_NAME = $fullName and $METHOD IN labels(n)
|RETURN n
|""".stripMargin)
.map(_.get(s"n"))
|WHERE n.$FULL_NAME = $$fullName and $METHOD IN labels(n)
|RETURN n AS NODE
|""".stripMargin,
new util.HashMap[String, Object](1) {
put("fullName", fullName.asInstanceOf[Object])
}
).map(_.get("NODE"))
.toArray
}
.foreach(bh.consume)