diff --git a/drivers/neo4j-embedded/src/main/scala/com/github/plume/oss/drivers/Neo4jEmbeddedDriver.scala b/drivers/neo4j-embedded/src/main/scala/com/github/plume/oss/drivers/Neo4jEmbeddedDriver.scala index a58949d8..8e840853 100644 --- a/drivers/neo4j-embedded/src/main/scala/com/github/plume/oss/drivers/Neo4jEmbeddedDriver.scala +++ b/drivers/neo4j-embedded/src/main/scala/com/github/plume/oss/drivers/Neo4jEmbeddedDriver.scala @@ -13,6 +13,7 @@ import overflowdb.{BatchedUpdate, DetachedNodeData} import java.util import java.util.concurrent.atomic.AtomicBoolean +import scala.compiletime.uninitialized import scala.jdk.CollectionConverters import scala.jdk.CollectionConverters.* import scala.util.{Failure, Success, Try, Using} @@ -27,24 +28,29 @@ final class Neo4jEmbeddedDriver( ) extends IDriver with ISchemaSafeDriver { - private val logger = LoggerFactory.getLogger(getClass) - private val connected = new AtomicBoolean(true) - private var managementService = new DatabaseManagementServiceBuilder(databaseDir.path).build() - registerShutdownHook(managementService) - private var graphDb = managementService.database(databaseName) - - private def registerShutdownHook(managementService: DatabaseManagementService): Unit = { - Runtime.getRuntime.addShutdownHook(new Thread() { - override def run(): Unit = { - managementService.shutdown() - } - }) - } + private val logger = LoggerFactory.getLogger(getClass) + private val connected = new AtomicBoolean(false) + private var managementService: DatabaseManagementService = uninitialized + private var graphDb: GraphDatabaseService = uninitialized + connect() + /** @return + * a direct reference to the underlying graph database service. + */ def graph: GraphDatabaseService = graphDb private def connect(): Unit = { + + def registerShutdownHook(managementService: DatabaseManagementService): Unit = { + Runtime.getRuntime.addShutdownHook(new Thread() { + override def run(): Unit = { + managementService.shutdown() + } + }) + } + managementService = new DatabaseManagementServiceBuilder(databaseDir.path).build() + registerShutdownHook(managementService) graphDb = managementService.database(databaseName) connected.set(true) } diff --git a/runBenchmarks.sc b/runBenchmarks.sc index c0d7fa43..d39a4d5a 100644 --- a/runBenchmarks.sc +++ b/runBenchmarks.sc @@ -1,7 +1,19 @@ +import java.io.{BufferedReader, File, FileReader} import scala.sys.process.* import java.nio.file.Files import java.nio.file.Path +import scala.concurrent.duration.Duration +import scala.concurrent.{Await, Future, blocking} import scala.jdk.CollectionConverters.* +import scala.concurrent.ExecutionContext.Implicits.global + +// Combinations of driver, project, Gb mem, known to OOM +val oomCombinations: Set[(String, String, Int)] = Set(("tinkergraph", "compress", 2)) +val drivers = Seq( + "overflowdb", + "tinkergraph", + "neo4j-embedded" +) @main def main(): Unit = { println("[info] Ensuring compilation status and benchmark dataset availability...") @@ -10,13 +22,17 @@ import scala.jdk.CollectionConverters.* val datasetDir = Path.of("workspace", "defects4j") val resultsDir = Path.of("results").createIfNotExists - def benchmarkArgs(driver: String, project: String, memGb: Int): String = { - val projectDir = Path.of(datasetDir.toString, project) - val projectName = project.toLowerCase.stripSuffix(".jar") + def benchmarkArgs(driver: String, project: String, memGb: Int): JmhProcessInfo = { + val projectDir = Path.of(datasetDir.toString, project) + val projectName = project.toLowerCase.stripSuffix(".jar") val driverResultsDir = Path.of(resultsDir.toString, driver, projectName).createIfNotExists - val resultsPath = Path.of(driverResultsDir.toString, s"results-Xmx${memGb}G") - val outputPath = Path.of(driverResultsDir.toString, s"output-Xmx${memGb}G") - s"Jmh/runMain com.github.plume.oss.Benchmark $driver $projectDir -o ${outputPath.toAbsolutePath} -r ${resultsPath.toAbsolutePath} -m $memGb" + val resultsPath = Path.of(driverResultsDir.toString, s"results-Xmx${memGb}G") + val outputPath = Path.of(driverResultsDir.toString, s"output-Xmx${memGb}G") + val resultsExist = + Path.of(s"$resultsPath-read.csv").toFile.exists() && Path.of(s"$outputPath-read.txt").toFile.exists() + val cmd = + s"Jmh/runMain com.github.plume.oss.Benchmark $driver $projectDir -o ${outputPath.toAbsolutePath} -r ${resultsPath.toAbsolutePath} -m $memGb" + JmhProcessInfo(cmd, resultsExist, outputPath.toFile) } println("[info] Available projects:") @@ -24,25 +40,76 @@ import scala.jdk.CollectionConverters.* projects.foreach(p => println(s" - ${p.getFileName.toString}")) println("[info] Available drivers:") - val drivers = Seq("overflowdb", "tinkergraph", "neo4j-embedded") drivers.foreach(d => println(s" - $d")) val memoryConfigs = Seq(2, 4, 6, 8) - memoryConfigs.foreach { memConfig => + memoryConfigs.reverse.foreach { memConfig => drivers.foreach { driver => projects.foreach { project => - val cmd = benchmarkArgs(driver, project.getFileName.toString, memConfig) - println(s"[info] Benchmarking '$driver' on project '$project' with `-Xmx${memConfig}G`") - s"sbt \"$cmd\"".! + val projectName = project.getFileName.toString.toLowerCase.stripSuffix(".jar") + if (oomCombinations.contains(driver, projectName, memConfig)) { + println( + s"[info] '$driver' on project '$project' with `-Xmx${memConfig}G` will cause an OutOfMemoryException. Skipping..." + ) + } else { + val JmhProcessInfo(cmd, resultsExist, outputFile) = + benchmarkArgs(driver, project.getFileName.toString, memConfig) + if (resultsExist) { + println( + s"[info] Results for '$driver' on project '$project' with `-Xmx${memConfig}G` already exist. Skipping..." + ) + } else { + println(s"[info] Benchmarking '$driver' on project '$project' with `-Xmx${memConfig}G`") + runAndMonitorBenchmarkProcess(cmd, outputFile) + } + } } } } } +def runAndMonitorBenchmarkProcess(cmd: String, outputFile: File): Unit = { + val processBuilder = Process(s"sbt \"$cmd\"") + + // Start the process + val process = processBuilder.#>(outputFile).run() + + // Monitor the output file for timeout messages + val timeoutFuture = Future { + blocking { + val reader = new BufferedReader(new FileReader(outputFile)) + try { + var line: String = null + while ({ + line = reader.readLine(); line != null + }) { + println(line) // Log the output + if (line.contains("benchmark timed out")) { + println("Timeout detected. Terminating process...") + process.destroy() + return + } else if (line.contains("java.lang.OutOfMemoryError")) { + println("OutOfMemory error detected. Terminating process...") + process.destroy() + return + } + } + } finally { + reader.close() + } + } + } + + // Wait for the process to finish or timeout monitoring to detect a timeout + Await.result(timeoutFuture, Duration.Inf) +} + implicit class PathExt(x: Path) { def createIfNotExists: Path = { if (!Files.exists(x)) Files.createDirectories(x) x } -} \ No newline at end of file +} + +case class JmhProcessInfo(cmd: String, resultsExist: Boolean, outputFile: File) diff --git a/src/main/scala/com/github/plume/oss/Benchmark.scala b/src/main/scala/com/github/plume/oss/Benchmark.scala index 3b7213d7..46bb1088 100644 --- a/src/main/scala/com/github/plume/oss/Benchmark.scala +++ b/src/main/scala/com/github/plume/oss/Benchmark.scala @@ -9,15 +9,12 @@ import com.github.plume.oss.benchmarking.{ TinkerGraphReadBenchmark } import com.github.plume.oss.drivers.{IDriver, TinkerGraphDriver} -import org.cache2k.benchmark.jmh.ForcedGcMemoryProfiler +import org.cache2k.benchmark.jmh.HeapProfiler import org.openjdk.jmh.annotations.Mode import org.openjdk.jmh.runner.Runner import org.openjdk.jmh.runner.options.{ChainedOptionsBuilder, OptionsBuilder, TimeValue} import upickle.default.* -import java.util -import java.util.concurrent.TimeUnit - object Benchmark { def main(args: Array[String]): Unit = { @@ -27,6 +24,7 @@ object Benchmark { .foreach { config => val writeOptsBenchmark = createOptionsBoilerPlate(config, WRITE) .include(classOf[GraphWriteBenchmark].getSimpleName) + .warmupIterations(5) .build() new Runner(writeOptsBenchmark).run() println( @@ -38,18 +36,21 @@ object Benchmark { Option( createOptionsBoilerPlate(config, READ) .include(classOf[TinkerGraphReadBenchmark].getSimpleName) + .warmupIterations(1) .build() ) case _: OverflowDbConfig => Option( createOptionsBoilerPlate(config, READ) .include(classOf[OverflowDbReadBenchmark].getSimpleName) + .warmupIterations(1) .build() ) case _: Neo4jEmbeddedConfig => Option( createOptionsBoilerPlate(config, READ) .include(classOf[Neo4jEmbedReadBenchmark].getSimpleName) + .warmupIterations(1) .build() ) case x => @@ -68,16 +69,13 @@ object Benchmark { private def createOptionsBoilerPlate(config: PlumeConfig, benchmarkType: BenchmarkType): ChainedOptionsBuilder = { new OptionsBuilder() - .addProfiler(classOf[ForcedGcMemoryProfiler]) - .warmupIterations(1) - .warmupTime(TimeValue.seconds(1)) - .measurementTime(TimeValue.seconds(2)) - .measurementIterations(5) + .addProfiler(classOf[HeapProfiler]) + .warmupTime(TimeValue.seconds(30)) + .measurementIterations(3) .mode(Mode.AverageTime) - .timeUnit(TimeUnit.NANOSECONDS) - .forks(2) - .output(s"${config.jmhOutputFile}-$benchmarkType.txt") - .result(s"${config.jmhResultFile}-$benchmarkType.csv") + .forks(1) + .output(s"${config.jmhOutputFile}-${benchmarkType.toString.toLowerCase}.txt") + .result(s"${config.jmhResultFile}-${benchmarkType.toString.toLowerCase}.csv") .param("configStr", write(config)) .jvmArgsAppend(s"-Xmx${config.jmhMemoryGb}G", "-XX:+UseZGC") } diff --git a/src/main/scala/com/github/plume/oss/benchmarking/GraphReadBenchmark.scala b/src/main/scala/com/github/plume/oss/benchmarking/GraphReadBenchmark.scala index 559c74cc..1690b28e 100644 --- a/src/main/scala/com/github/plume/oss/benchmarking/GraphReadBenchmark.scala +++ b/src/main/scala/com/github/plume/oss/benchmarking/GraphReadBenchmark.scala @@ -4,14 +4,27 @@ import com.github.plume.oss import com.github.plume.oss.{Benchmark, JimpleAst2Database, PlumeConfig, TinkerGraphConfig} import com.github.plume.oss.drivers.{IDriver, TinkerGraphDriver} import io.joern.jimple2cpg.Config -import org.openjdk.jmh.annotations.{Benchmark, Level, Param, Scope, Setup, State, TearDown, Timeout} +import io.shiftleft.codepropertygraph.generated.{NodeTypes, PropertyNames} +import org.openjdk.jmh.annotations.{ + Benchmark, + Level, + Measurement, + OutputTimeUnit, + Param, + Scope, + Setup, + State, + TearDown, + Timeout +} import org.openjdk.jmh.infra.{BenchmarkParams, Blackhole} import java.util.concurrent.TimeUnit import scala.compiletime.uninitialized @State(Scope.Benchmark) -@Timeout(2, TimeUnit.MINUTES) +@Timeout(5, TimeUnit.MINUTES) +@OutputTimeUnit(TimeUnit.MILLISECONDS) trait GraphReadBenchmark { @Param(Array("")) @@ -40,7 +53,7 @@ trait GraphReadBenchmark { val (driver_, config_) = oss.Benchmark.initializeDriverAndInputDir(configStr, useCachedGraph = true) driver = driver_ config = config_ - if (!driver.exists(1L)) { + if (driver.propertyFromNodes(NodeTypes.FILE, PropertyNames.NAME).isEmpty) { JimpleAst2Database(driver).createAst(Config().withInputPath(config_.inputDir)) config.dbConfig match { case TinkerGraphConfig(_, Some(exportPath)) => driver.asInstanceOf[TinkerGraphDriver].exportGraph(exportPath) @@ -60,24 +73,31 @@ trait GraphReadBenchmark { protected def setUpMethodFullName(): Array[String] @Benchmark + @Measurement(time = 5, timeUnit = TimeUnit.SECONDS) def astDFS(blackhole: Blackhole): Int @Benchmark + @Measurement(time = 5, timeUnit = TimeUnit.SECONDS) def astUp(blackhole: Blackhole): Int @Benchmark + @Measurement(time = 5, timeUnit = TimeUnit.SECONDS) def orderSum(blackhole: Blackhole): Int @Benchmark + @Measurement(time = 5, timeUnit = TimeUnit.SECONDS) def callOrderTrav(blackhole: Blackhole): Int @Benchmark + @Measurement(time = 5, timeUnit = TimeUnit.SECONDS) def callOrderExplicit(blackhole: Blackhole): Int @Benchmark + @Measurement(time = 5, timeUnit = TimeUnit.SECONDS) def indexedMethodFullName(bh: Blackhole): Unit @Benchmark + @Measurement(time = 5, timeUnit = TimeUnit.SECONDS) def unindexedMethodFullName(bh: Blackhole): Unit @TearDown @@ -85,4 +105,9 @@ trait GraphReadBenchmark { driver.close() } + @TearDown(Level.Iteration) + def teardown(): Unit = { + System.gc() + } + } diff --git a/src/main/scala/com/github/plume/oss/benchmarking/GraphWriteBenchmark.scala b/src/main/scala/com/github/plume/oss/benchmarking/GraphWriteBenchmark.scala index a887faea..45021026 100644 --- a/src/main/scala/com/github/plume/oss/benchmarking/GraphWriteBenchmark.scala +++ b/src/main/scala/com/github/plume/oss/benchmarking/GraphWriteBenchmark.scala @@ -1,17 +1,18 @@ package com.github.plume.oss.benchmarking import com.github.plume.oss -import com.github.plume.oss.{Benchmark, JimpleAst2Database} import com.github.plume.oss.drivers.IDriver +import com.github.plume.oss.{Benchmark, JimpleAst2Database} import io.joern.jimple2cpg.Config -import org.openjdk.jmh.annotations.{Benchmark, Level, Param, Scope, Setup, State, TearDown, Timeout} +import org.openjdk.jmh.annotations.* import org.openjdk.jmh.infra.{BenchmarkParams, Blackhole} import java.util.concurrent.TimeUnit import scala.compiletime.uninitialized @State(Scope.Benchmark) -@Timeout(5, TimeUnit.MINUTES) +@Timeout(6, TimeUnit.MINUTES) +@OutputTimeUnit(TimeUnit.SECONDS) class GraphWriteBenchmark { @Param(Array("")) @@ -32,9 +33,12 @@ class GraphWriteBenchmark { } @Benchmark - def createAst(blackhole: Blackhole): Unit = { + @Measurement(time = 10, timeUnit = TimeUnit.SECONDS) + def createAst(blackhole: Blackhole): Unit = try { JimpleAst2Database(driver).createAst(Config().withInputPath(inputDir)) Option(blackhole).foreach(_.consume(driver)) + } catch { + case e: Throwable => Option(blackhole).foreach(_.consume(e)) } @TearDown @@ -43,4 +47,9 @@ class GraphWriteBenchmark { driver.close() } + @TearDown(Level.Iteration) + def teardown(): Unit = { + System.gc() + } + } diff --git a/src/main/scala/com/github/plume/oss/benchmarking/Neo4jEmbedReadBenchmark.scala b/src/main/scala/com/github/plume/oss/benchmarking/Neo4jEmbedReadBenchmark.scala index 985f9cc9..3d3e0316 100644 --- a/src/main/scala/com/github/plume/oss/benchmarking/Neo4jEmbedReadBenchmark.scala +++ b/src/main/scala/com/github/plume/oss/benchmarking/Neo4jEmbedReadBenchmark.scala @@ -180,7 +180,7 @@ class Neo4jEmbedReadBenchmark extends GraphReadBenchmark { new util.HashMap[String, Object](1) { put("nodeIds", nodeStart.toList.asJava.asInstanceOf[Object]) } - ).map(_.get("SIZE").asInstanceOf[Int]) + ).map(_.get("SIZE").asInstanceOf[Long].toInt) .next() } Option(blackhole).foreach(_.consume(res)) @@ -239,7 +239,7 @@ class Neo4jEmbedReadBenchmark extends GraphReadBenchmark { tx.execute( s""" |MATCH (n) - |WHERE n.$FULL_NAME = $$fullName and $METHOD IN labels(n) + |WHERE n.$FULL_NAME = $$fullName and \"$METHOD\" IN labels(n) |RETURN n AS NODE |""".stripMargin, new util.HashMap[String, Object](1) { diff --git a/src/main/scala/com/github/plume/oss/benchmarking/TinkerGraphReadBenchmark.scala b/src/main/scala/com/github/plume/oss/benchmarking/TinkerGraphReadBenchmark.scala index 9b084973..9ada4c7a 100644 --- a/src/main/scala/com/github/plume/oss/benchmarking/TinkerGraphReadBenchmark.scala +++ b/src/main/scala/com/github/plume/oss/benchmarking/TinkerGraphReadBenchmark.scala @@ -54,7 +54,7 @@ class TinkerGraphReadBenchmark extends GraphReadBenchmark { var nnodes = nodeStart.length while (stack.nonEmpty) { val nx = g().V(stack.removeLast()) - stack.appendAll(nx.out(AST).id().map(_.asInstanceOf[Long]).asScala.toArray) + stack.appendAll(nx.out(AST).id().toList.asScala.map(_.asInstanceOf[Long]).toArray) nnodes += 1 } Option(blackhole).foreach(_.consume(nnodes)) @@ -97,7 +97,7 @@ class TinkerGraphReadBenchmark extends GraphReadBenchmark { override def callOrderExplicit(blackhole: Blackhole): Int = { var res = 0 for (node <- g().V(nodeStart*).hasLabel(CALL)) { - if (node.property(ORDER).asInstanceOf[Int] > 2) res += 1 + if (node.property(ORDER).value().asInstanceOf[Int] > 2) res += 1 } Option(blackhole).foreach(_.consume(res)) res diff --git a/src/main/scala/com/github/plume/oss/package.scala b/src/main/scala/com/github/plume/oss/package.scala index 7dceda43..b0e0bf64 100644 --- a/src/main/scala/com/github/plume/oss/package.scala +++ b/src/main/scala/com/github/plume/oss/package.scala @@ -16,11 +16,15 @@ package object oss { sealed trait DatabaseConfig derives ReadWriter { def toDriver: IDriver + + def shortName: String } case class TinkerGraphConfig(importPath: Option[String] = None, exportPath: Option[String] = None) extends DatabaseConfig { override def toDriver: IDriver = new TinkerGraphDriver() + + override def shortName: String = "tinkergraph" } case class OverflowDbConfig( @@ -30,6 +34,8 @@ package object oss { ) extends DatabaseConfig { override def toDriver: IDriver = new OverflowDbDriver(Option(storageLocation), heapPercentageThreshold, serializationStatsEnabled) + + override def shortName: String = "overflowdb" } case class Neo4jConfig( @@ -40,11 +46,14 @@ package object oss { txMax: Int = 25 ) extends DatabaseConfig { override def toDriver: IDriver = new Neo4jDriver(hostname, port, username, password, txMax) + override def shortName: String = "neo4j" } case class Neo4jEmbeddedConfig(databaseName: String = "neo4j", databaseDir: String = "neo4j-db", txMax: Int = 25) extends DatabaseConfig { override def toDriver: IDriver = new Neo4jEmbeddedDriver(databaseName, File(databaseDir), txMax) + + override def shortName: String = "neo4j-embedded" } case class TigerGraphConfig( @@ -59,6 +68,7 @@ package object oss { ) extends DatabaseConfig { override def toDriver: IDriver = new TigerGraphDriver(hostname, restPpPort, gsqlPort, username, password, timeout, scheme, txMax) + override def shortName: String = "tigergraph" } case class NeptuneConfig( @@ -68,6 +78,7 @@ package object oss { txMax: Int = 50 ) extends DatabaseConfig { override def toDriver: IDriver = new NeptuneDriver(hostname, port, keyCertChainFile, txMax) + override def shortName: String = "neptune" } }