From bbf5e9209fb9bb4e504f4b4befda5c7aad64b7bc Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Fri, 9 Oct 2020 19:52:37 +0300 Subject: [PATCH 001/308] add default args with standart lib usage to benchmarks --- src/jmh/kotlin/cli/Code2VecExtractorBenchmarks.kt | 11 +++++------ src/jmh/kotlin/cli/PathContextsExtractorBenchmarks.kt | 8 +++++--- src/jmh/kotlin/cli/ProjectParserCsvBenchmarks.kt | 8 +++++--- src/jmh/kotlin/cli/ProjectParserDotBenchmarks.kt | 11 +++++------ 4 files changed, 20 insertions(+), 18 deletions(-) diff --git a/src/jmh/kotlin/cli/Code2VecExtractorBenchmarks.kt b/src/jmh/kotlin/cli/Code2VecExtractorBenchmarks.kt index c5da396c..1c32406d 100644 --- a/src/jmh/kotlin/cli/Code2VecExtractorBenchmarks.kt +++ b/src/jmh/kotlin/cli/Code2VecExtractorBenchmarks.kt @@ -6,6 +6,8 @@ import astminer.cli.* @State(Scope.Benchmark) open class Code2VecExtractorBenchmarks { + private val defaultArgs = listOf("--split-tokens", "--granularity", "method", "--lang", "java") + @Setup fun pathsSetup() { BenchmarksSetup().setup() @@ -14,24 +16,21 @@ open class Code2VecExtractorBenchmarks { @Benchmark fun simpleProject() { val args = listOf("--project", BenchmarksSetup().simpleProjectPath, - "--output", BenchmarksSetup().simpleProjectResultsPath, - "--lang", "java") + "--output", BenchmarksSetup().simpleProjectResultsPath) + defaultArgs Code2VecExtractor().main(args) } @Benchmark fun longFileProject() { val args = listOf("--project", BenchmarksSetup().longFilePath, - "--output", BenchmarksSetup().longFileResultsPath, - "--lang", "java") + "--output", BenchmarksSetup().longFileResultsPath) + defaultArgs Code2VecExtractor().main(args) } @Benchmark fun bigProject() { val args = listOf("--project", BenchmarksSetup().bigProjectPath, - "--output", BenchmarksSetup().bigProjectResultsPath, - "--lang", "java") + "--output", BenchmarksSetup().bigProjectResultsPath) + defaultArgs Code2VecExtractor().main(args) } } \ No newline at end of file diff --git a/src/jmh/kotlin/cli/PathContextsExtractorBenchmarks.kt b/src/jmh/kotlin/cli/PathContextsExtractorBenchmarks.kt index 90b1e67f..77749c67 100644 --- a/src/jmh/kotlin/cli/PathContextsExtractorBenchmarks.kt +++ b/src/jmh/kotlin/cli/PathContextsExtractorBenchmarks.kt @@ -6,6 +6,8 @@ import astminer.cli.* @State(Scope.Benchmark) open class PathContextsExtractorBenchmarks { + private val defaultArgs = listOf("--split-tokens", "--granularity", "method", "--lang", "java") + @Setup fun pathsSetup() { BenchmarksSetup().setup() @@ -14,21 +16,21 @@ open class PathContextsExtractorBenchmarks { @Benchmark fun simpleProject() { val args = listOf("--project", BenchmarksSetup().simpleProjectPath, - "--output", BenchmarksSetup().simpleProjectResultsPath) + "--output", BenchmarksSetup().simpleProjectResultsPath) + defaultArgs PathContextsExtractor().main(args) } @Benchmark fun longFileProject() { val args = listOf("--project", BenchmarksSetup().longFilePath, - "--output", BenchmarksSetup().longFileResultsPath) + "--output", BenchmarksSetup().longFileResultsPath) + defaultArgs PathContextsExtractor().main(args) } @Benchmark fun bigProject() { val args = listOf("--project", BenchmarksSetup().bigProjectPath, - "--output", BenchmarksSetup().bigProjectResultsPath) + "--output", BenchmarksSetup().bigProjectResultsPath) + defaultArgs PathContextsExtractor().main(args) } } \ No newline at end of file diff --git a/src/jmh/kotlin/cli/ProjectParserCsvBenchmarks.kt b/src/jmh/kotlin/cli/ProjectParserCsvBenchmarks.kt index 0e7ad5c3..f5c24f14 100644 --- a/src/jmh/kotlin/cli/ProjectParserCsvBenchmarks.kt +++ b/src/jmh/kotlin/cli/ProjectParserCsvBenchmarks.kt @@ -6,6 +6,8 @@ import astminer.cli.* @State(Scope.Benchmark) open class ProjectParserCsvBenchmarks { + private val defaultArgs = listOf("--split-tokens", "--granularity", "method", "--lang", "java", "--storage", "csv") + @Setup fun pathsSetup() { BenchmarksSetup().setup() @@ -14,21 +16,21 @@ open class ProjectParserCsvBenchmarks { @Benchmark fun simpleProject() { val args = listOf("--project", BenchmarksSetup().simpleProjectPath, - "--output", BenchmarksSetup().simpleProjectResultsPath) + "--output", BenchmarksSetup().simpleProjectResultsPath) + defaultArgs ProjectParser().main(args) } @Benchmark fun longFileProject() { val args = listOf("--project", BenchmarksSetup().longFilePath, - "--output", BenchmarksSetup().longFileResultsPath) + "--output", BenchmarksSetup().longFileResultsPath) + defaultArgs ProjectParser().main(args) } @Benchmark fun bigProject() { val args = listOf("--project", BenchmarksSetup().bigProjectPath, - "--output", BenchmarksSetup().bigProjectResultsPath) + "--output", BenchmarksSetup().bigProjectResultsPath) + defaultArgs ProjectParser().main(args) } } \ No newline at end of file diff --git a/src/jmh/kotlin/cli/ProjectParserDotBenchmarks.kt b/src/jmh/kotlin/cli/ProjectParserDotBenchmarks.kt index 2b204f96..0d832a50 100644 --- a/src/jmh/kotlin/cli/ProjectParserDotBenchmarks.kt +++ b/src/jmh/kotlin/cli/ProjectParserDotBenchmarks.kt @@ -6,6 +6,8 @@ import astminer.cli.* @State(Scope.Benchmark) open class ProjectParserDotBenchmarks { + private val defaultArgs = listOf("--split-tokens", "--granularity", "method", "--lang", "java", "--storage", "dot") + @Setup fun pathsSetup() { BenchmarksSetup().setup() @@ -14,24 +16,21 @@ open class ProjectParserDotBenchmarks { @Benchmark fun simpleProject() { val args = listOf("--project", BenchmarksSetup().simpleProjectPath, - "--output", BenchmarksSetup().simpleProjectResultsPath, - "--storage", "dot") + "--output", BenchmarksSetup().simpleProjectResultsPath) + defaultArgs ProjectParser().main(args) } @Benchmark fun longFileProject() { val args = listOf("--project", BenchmarksSetup().longFilePath, - "--output", BenchmarksSetup().longFileResultsPath, - "--storage", "dot") + "--output", BenchmarksSetup().longFileResultsPath) + defaultArgs ProjectParser().main(args) } @Benchmark fun bigProject() { val args = listOf("--project", BenchmarksSetup().bigProjectPath, - "--output", BenchmarksSetup().bigProjectResultsPath, - "--storage", "dot") + "--output", BenchmarksSetup().bigProjectResultsPath) + defaultArgs ProjectParser().main(args) } } \ No newline at end of file From d83d06e66d2cedb1eaf15907f9ef85656be3f971 Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Fri, 9 Oct 2020 19:54:59 +0300 Subject: [PATCH 002/308] add script for parsing result and building table --- src/jmh/kotlin/cli/BenchmarkResultWorker.kt | 108 ++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 src/jmh/kotlin/cli/BenchmarkResultWorker.kt diff --git a/src/jmh/kotlin/cli/BenchmarkResultWorker.kt b/src/jmh/kotlin/cli/BenchmarkResultWorker.kt new file mode 100644 index 00000000..b584ed65 --- /dev/null +++ b/src/jmh/kotlin/cli/BenchmarkResultWorker.kt @@ -0,0 +1,108 @@ +package cli + +import java.io.File +import kotlin.math.round + +data class BenchmarkResult(val taskName: String, val projectName: String) { + var totalTime: Float = 0f + var timeStd: Float = 0f + var totalAllocatedMemory: Float = 0f + var allocatedMemoryStd: Float = 0f +} + +enum class MemoryMeasure { + GB, + MB +} + +class BenchmarkResultWorker { + object TableFields { + val taskToCsvField = hashMapOf( + "Code2Vec" to "cli.Code2VecExtractorBenchmarks", + "PathContext" to "cli.PathContextsExtractorBenchmarks", + "ProjectParseCSV" to "cli.ProjectParserCsvBenchmarks", + "ProjectParseDOT" to "cli.ProjectParserDotBenchmarks" + ) + val projectToCsvField = hashMapOf( + "Long file" to "longFileProject", + "Small project (Gradle)" to "simpleProject", + "Big project (IntelliJ IDEA)" to "bigProject" + ) + } + + private val tasks = listOf("Code2Vec", "PathContext", "ProjectParseCSV", "ProjectParseDOT") + private val projects = listOf("Long file", "Small project (Gradle)", "Big project (IntelliJ IDEA)") + + private fun convertBytes(bytes: Float, memoryMeasure: MemoryMeasure): Float { + val kilobytes = bytes / 1024 + return when (memoryMeasure) { + MemoryMeasure.MB -> kilobytes / 1024 + MemoryMeasure.GB -> kilobytes / 1024 / 1024 + } + } + + fun parseCsvFile(pathToCsvFile: String): Map, BenchmarkResult> { + val taskToResult = hashMapOf, BenchmarkResult>() + tasks.forEach {task -> + projects.forEach { project -> + taskToResult[task to project] = BenchmarkResult(task, project) + } + } + + File(pathToCsvFile).forEachLine { line -> + val csvFields = line.split(',') + val taskName = csvFields[0].drop(1).dropLast(1) + val resultValue = csvFields[4].toFloatOrNull() ?: 0f + val resultStd = csvFields[5].toFloatOrNull() ?: 0f + TableFields.taskToCsvField.entries.forEach { task -> + TableFields.projectToCsvField.entries.forEach { project -> + val correctCsvField = "${task.value}.${project.value}" + if (taskName == correctCsvField) { + taskToResult[task.key to project.key]?.let { + it.totalTime = resultValue + it.timeStd = resultStd + } + } else if (taskName == "$correctCsvField:·gc.alloc.rate.norm") { + taskToResult[task.key to project.key]?. let { + it.totalAllocatedMemory = resultValue + it.allocatedMemoryStd = resultStd + } + } + } + } + } + return taskToResult + } + + fun saveToMarkdown(results: Map, BenchmarkResult>, pathToMarkdownFile: String, memoryMeasure: MemoryMeasure) { + val outputFileWriter = File(pathToMarkdownFile).printWriter() + outputFileWriter.println("| | ${projects.joinToString(" | ")} |") + outputFileWriter.println("| --- |${"--- | ".repeat(projects.size)}") + tasks.forEach { task -> + outputFileWriter.print("| $task (time) |") + projects.forEach { project -> + val totalTime = "%.2f".format(results[task to project]?.totalTime) + val timeStd = "%.2f".format(results[task to project]?.timeStd) + outputFileWriter.print(" $totalTime ± $timeStd sec |") + } + outputFileWriter.print("\n") + outputFileWriter.print("| $task (total allocated memory) |") + projects.forEach { project -> + val totalMemory = "%.2f".format(convertBytes(results[task to project]?.totalAllocatedMemory ?: 0f, memoryMeasure)) + val memoryStd = "%.2f".format(convertBytes(results[task to project]?.allocatedMemoryStd ?: 0f, memoryMeasure)) + outputFileWriter.print(" $totalMemory ± $memoryStd ${memoryMeasure.name} |") + } + outputFileWriter.print("\n") + if (task != tasks.last()) + outputFileWriter.println("| | ${" | ".repeat(projects.size)}") + } + outputFileWriter.close() + } +} + + +fun main(args: Array) { + val benchmarkResultWorker = BenchmarkResultWorker() + val results = benchmarkResultWorker.parseCsvFile("src/jmh/benchmarks.csv") + benchmarkResultWorker.saveToMarkdown(results, "src/jmh/new_results.md", MemoryMeasure.GB) +} \ No newline at end of file From 484889186bbf85d85925c6142b4eb18b96d98ce4 Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Fri, 9 Oct 2020 21:31:35 +0300 Subject: [PATCH 003/308] remove wrong args for path contexts --- src/jmh/kotlin/cli/PathContextsExtractorBenchmarks.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/jmh/kotlin/cli/PathContextsExtractorBenchmarks.kt b/src/jmh/kotlin/cli/PathContextsExtractorBenchmarks.kt index 77749c67..9a5c05a0 100644 --- a/src/jmh/kotlin/cli/PathContextsExtractorBenchmarks.kt +++ b/src/jmh/kotlin/cli/PathContextsExtractorBenchmarks.kt @@ -6,7 +6,7 @@ import astminer.cli.* @State(Scope.Benchmark) open class PathContextsExtractorBenchmarks { - private val defaultArgs = listOf("--split-tokens", "--granularity", "method", "--lang", "java") + private val defaultArgs = listOf("--lang", "java") @Setup fun pathsSetup() { From 84434c0864e0fff1e0914e4503823fcbac11c873 Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Fri, 9 Oct 2020 21:32:11 +0300 Subject: [PATCH 004/308] write memory measure in lower case --- src/jmh/kotlin/cli/BenchmarkResultWorker.kt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/jmh/kotlin/cli/BenchmarkResultWorker.kt b/src/jmh/kotlin/cli/BenchmarkResultWorker.kt index b584ed65..e3194e42 100644 --- a/src/jmh/kotlin/cli/BenchmarkResultWorker.kt +++ b/src/jmh/kotlin/cli/BenchmarkResultWorker.kt @@ -1,7 +1,6 @@ package cli import java.io.File -import kotlin.math.round data class BenchmarkResult(val taskName: String, val projectName: String) { var totalTime: Float = 0f @@ -90,7 +89,7 @@ class BenchmarkResultWorker { projects.forEach { project -> val totalMemory = "%.2f".format(convertBytes(results[task to project]?.totalAllocatedMemory ?: 0f, memoryMeasure)) val memoryStd = "%.2f".format(convertBytes(results[task to project]?.allocatedMemoryStd ?: 0f, memoryMeasure)) - outputFileWriter.print(" $totalMemory ± $memoryStd ${memoryMeasure.name} |") + outputFileWriter.print(" $totalMemory ± $memoryStd ${memoryMeasure.name.toLowerCase()} |") } outputFileWriter.print("\n") if (task != tasks.last()) From 8f97139a174f22a59b312d2870a132065170b265 Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Thu, 15 Oct 2020 11:29:11 +0300 Subject: [PATCH 005/308] use allocation rate in result instead of normalized allocation rate --- src/jmh/kotlin/cli/BenchmarkResultWorker.kt | 35 +++++++++++---------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/src/jmh/kotlin/cli/BenchmarkResultWorker.kt b/src/jmh/kotlin/cli/BenchmarkResultWorker.kt index e3194e42..d6e57bad 100644 --- a/src/jmh/kotlin/cli/BenchmarkResultWorker.kt +++ b/src/jmh/kotlin/cli/BenchmarkResultWorker.kt @@ -5,8 +5,8 @@ import java.io.File data class BenchmarkResult(val taskName: String, val projectName: String) { var totalTime: Float = 0f var timeStd: Float = 0f - var totalAllocatedMemory: Float = 0f - var allocatedMemoryStd: Float = 0f + var allocatedMemoryRate: Float = 0f + var allocatedMemoryRateStd: Float = 0f } enum class MemoryMeasure { @@ -32,12 +32,10 @@ class BenchmarkResultWorker { private val tasks = listOf("Code2Vec", "PathContext", "ProjectParseCSV", "ProjectParseDOT") private val projects = listOf("Long file", "Small project (Gradle)", "Big project (IntelliJ IDEA)") - private fun convertBytes(bytes: Float, memoryMeasure: MemoryMeasure): Float { - val kilobytes = bytes / 1024 - return when (memoryMeasure) { - MemoryMeasure.MB -> kilobytes / 1024 - MemoryMeasure.GB -> kilobytes / 1024 / 1024 - } + private fun convertMegabytes(megabytes: Float): Pair { + if (megabytes < 1024) + return megabytes to MemoryMeasure.MB + return megabytes / 1024 to MemoryMeasure.GB } fun parseCsvFile(pathToCsvFile: String): Map, BenchmarkResult> { @@ -61,10 +59,10 @@ class BenchmarkResultWorker { it.totalTime = resultValue it.timeStd = resultStd } - } else if (taskName == "$correctCsvField:·gc.alloc.rate.norm") { + } else if (taskName == "$correctCsvField:·gc.alloc.rate") { taskToResult[task.key to project.key]?. let { - it.totalAllocatedMemory = resultValue - it.allocatedMemoryStd = resultStd + it.allocatedMemoryRate = resultValue + it.allocatedMemoryRateStd = resultStd } } } @@ -73,7 +71,7 @@ class BenchmarkResultWorker { return taskToResult } - fun saveToMarkdown(results: Map, BenchmarkResult>, pathToMarkdownFile: String, memoryMeasure: MemoryMeasure) { + fun saveToMarkdown(results: Map, BenchmarkResult>, pathToMarkdownFile: String) { val outputFileWriter = File(pathToMarkdownFile).printWriter() outputFileWriter.println("| | ${projects.joinToString(" | ")} |") outputFileWriter.println("| --- |${"--- | ".repeat(projects.size)}") @@ -85,11 +83,14 @@ class BenchmarkResultWorker { outputFileWriter.print(" $totalTime ± $timeStd sec |") } outputFileWriter.print("\n") - outputFileWriter.print("| $task (total allocated memory) |") + outputFileWriter.print("| $task (allocated memory per sec) |") projects.forEach { project -> - val totalMemory = "%.2f".format(convertBytes(results[task to project]?.totalAllocatedMemory ?: 0f, memoryMeasure)) - val memoryStd = "%.2f".format(convertBytes(results[task to project]?.allocatedMemoryStd ?: 0f, memoryMeasure)) - outputFileWriter.print(" $totalMemory ± $memoryStd ${memoryMeasure.name.toLowerCase()} |") + val totalMemory = convertMegabytes(results[task to project]?.allocatedMemoryRate ?: 0f) + val memoryStd = convertMegabytes(results[task to project]?.allocatedMemoryRateStd ?: 0f) + outputFileWriter.print( + " ${"%.2f".format(totalMemory.first)} ${totalMemory.second.name.toLowerCase()} ± " + + "${"%.2f".format(memoryStd.first)} ${memoryStd.second.name.toLowerCase()} |" + ) } outputFileWriter.print("\n") if (task != tasks.last()) @@ -103,5 +104,5 @@ class BenchmarkResultWorker { fun main(args: Array) { val benchmarkResultWorker = BenchmarkResultWorker() val results = benchmarkResultWorker.parseCsvFile("src/jmh/benchmarks.csv") - benchmarkResultWorker.saveToMarkdown(results, "src/jmh/new_results.md", MemoryMeasure.GB) + benchmarkResultWorker.saveToMarkdown(results, "src/jmh/new_results.md") } \ No newline at end of file From e46916941e46b6e6afb9064ae8b6eda5100d9be6 Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Thu, 15 Oct 2020 11:30:48 +0300 Subject: [PATCH 006/308] Upload new benchmark result --- src/jmh/benchmarks.csv | 103 ++++++++++++++++++++ src/jmh/kotlin/cli/BenchmarkResultWorker.kt | 2 +- src/jmh/results.md | 26 ++--- 3 files changed, 117 insertions(+), 14 deletions(-) create mode 100644 src/jmh/benchmarks.csv diff --git a/src/jmh/benchmarks.csv b/src/jmh/benchmarks.csv new file mode 100644 index 00000000..02a28510 --- /dev/null +++ b/src/jmh/benchmarks.csv @@ -0,0 +1,103 @@ +"Benchmark","Mode","Threads","Samples","Score","Score Error (99.9%)","Unit" +"cli.Code2VecExtractorBenchmarks.bigProject","avgt",1,4,257.449218,18.887113,"s/op" +"cli.Code2VecExtractorBenchmarks.bigProject:·gc.alloc.rate","avgt",1,4,1079.698991,78.832117,"MB/sec" +"cli.Code2VecExtractorBenchmarks.bigProject:·gc.alloc.rate.norm","avgt",1,4,292008937596.000000,759030387.259436,"B/op" +"cli.Code2VecExtractorBenchmarks.bigProject:·gc.churn.PS_Eden_Space","avgt",1,4,1075.100078,81.539687,"MB/sec" +"cli.Code2VecExtractorBenchmarks.bigProject:·gc.churn.PS_Eden_Space.norm","avgt",1,4,290766061568.000000,8037163724.485502,"B/op" +"cli.Code2VecExtractorBenchmarks.bigProject:·gc.churn.PS_Survivor_Space","avgt",1,4,0.520222,0.920481,"MB/sec" +"cli.Code2VecExtractorBenchmarks.bigProject:·gc.churn.PS_Survivor_Space.norm","avgt",1,4,140908712.000000,256704718.451890,"B/op" +"cli.Code2VecExtractorBenchmarks.bigProject:·gc.count","avgt",1,4,389.000000,NaN,"counts" +"cli.Code2VecExtractorBenchmarks.bigProject:·gc.time","avgt",1,4,4898.000000,NaN,"ms" +"cli.Code2VecExtractorBenchmarks.longFileProject","avgt",1,4,0.435963,0.069128,"s/op" +"cli.Code2VecExtractorBenchmarks.longFileProject:·gc.alloc.rate","avgt",1,4,1250.807002,188.215176,"MB/sec" +"cli.Code2VecExtractorBenchmarks.longFileProject:·gc.alloc.rate.norm","avgt",1,4,599492595.652174,16717117.281462,"B/op" +"cli.Code2VecExtractorBenchmarks.longFileProject:·gc.churn.PS_Eden_Space","avgt",1,4,1176.625268,540.269616,"MB/sec" +"cli.Code2VecExtractorBenchmarks.longFileProject:·gc.churn.PS_Eden_Space.norm","avgt",1,4,564309363.014493,287025514.462391,"B/op" +"cli.Code2VecExtractorBenchmarks.longFileProject:·gc.churn.PS_Survivor_Space","avgt",1,4,0.966032,6.388467,"MB/sec" +"cli.Code2VecExtractorBenchmarks.longFileProject:·gc.churn.PS_Survivor_Space.norm","avgt",1,4,461027.619565,3075766.819041,"B/op" +"cli.Code2VecExtractorBenchmarks.longFileProject:·gc.count","avgt",1,4,21.000000,NaN,"counts" +"cli.Code2VecExtractorBenchmarks.longFileProject:·gc.time","avgt",1,4,76.000000,NaN,"ms" +"cli.Code2VecExtractorBenchmarks.simpleProject","avgt",1,4,27.153595,0.904767,"s/op" +"cli.Code2VecExtractorBenchmarks.simpleProject:·gc.alloc.rate","avgt",1,4,1106.676024,31.717964,"MB/sec" +"cli.Code2VecExtractorBenchmarks.simpleProject:·gc.alloc.rate.norm","avgt",1,4,32090342828.000000,372411452.120528,"B/op" +"cli.Code2VecExtractorBenchmarks.simpleProject:·gc.churn.PS_Eden_Space","avgt",1,4,1025.486225,135.965407,"MB/sec" +"cli.Code2VecExtractorBenchmarks.simpleProject:·gc.churn.PS_Eden_Space.norm","avgt",1,4,29738008576.000000,4616229609.315597,"B/op" +"cli.Code2VecExtractorBenchmarks.simpleProject:·gc.count","avgt",1,4,32.000000,NaN,"counts" +"cli.Code2VecExtractorBenchmarks.simpleProject:·gc.time","avgt",1,4,205.000000,NaN,"ms" +"cli.PathContextsExtractorBenchmarks.bigProject","avgt",1,4,223.764264,8.243985,"s/op" +"cli.PathContextsExtractorBenchmarks.bigProject:·gc.alloc.rate","avgt",1,4,1142.237142,47.491930,"MB/sec" +"cli.PathContextsExtractorBenchmarks.bigProject:·gc.alloc.rate.norm","avgt",1,4,268599702660.000000,1371364176.737936,"B/op" +"cli.PathContextsExtractorBenchmarks.bigProject:·gc.churn.PS_Eden_Space","avgt",1,4,1138.936796,42.426358,"MB/sec" +"cli.PathContextsExtractorBenchmarks.bigProject:·gc.churn.PS_Eden_Space.norm","avgt",1,4,267824529408.000000,2120467185.192489,"B/op" +"cli.PathContextsExtractorBenchmarks.bigProject:·gc.churn.PS_Survivor_Space","avgt",1,4,0.539942,0.562919,"MB/sec" +"cli.PathContextsExtractorBenchmarks.bigProject:·gc.churn.PS_Survivor_Space.norm","avgt",1,4,126940612.000000,131024484.723262,"B/op" +"cli.PathContextsExtractorBenchmarks.bigProject:·gc.count","avgt",1,4,418.000000,NaN,"counts" +"cli.PathContextsExtractorBenchmarks.bigProject:·gc.time","avgt",1,4,4353.000000,NaN,"ms" +"cli.PathContextsExtractorBenchmarks.longFileProject","avgt",1,4,0.626323,0.033845,"s/op" +"cli.PathContextsExtractorBenchmarks.longFileProject:·gc.alloc.rate","avgt",1,4,1185.950058,72.860863,"MB/sec" +"cli.PathContextsExtractorBenchmarks.longFileProject:·gc.alloc.rate.norm","avgt",1,4,816574896.720588,69853.062348,"B/op" +"cli.PathContextsExtractorBenchmarks.longFileProject:·gc.churn.PS_Eden_Space","avgt",1,4,1116.234803,385.671179,"MB/sec" +"cli.PathContextsExtractorBenchmarks.longFileProject:·gc.churn.PS_Eden_Space.norm","avgt",1,4,768569584.941176,260446575.197948,"B/op" +"cli.PathContextsExtractorBenchmarks.longFileProject:·gc.churn.PS_Survivor_Space","avgt",1,4,0.847134,7.053726,"MB/sec" +"cli.PathContextsExtractorBenchmarks.longFileProject:·gc.churn.PS_Survivor_Space.norm","avgt",1,4,585812.345588,4903625.828174,"B/op" +"cli.PathContextsExtractorBenchmarks.longFileProject:·gc.count","avgt",1,4,20.000000,NaN,"counts" +"cli.PathContextsExtractorBenchmarks.longFileProject:·gc.time","avgt",1,4,100.000000,NaN,"ms" +"cli.PathContextsExtractorBenchmarks.simpleProject","avgt",1,4,26.544189,1.249203,"s/op" +"cli.PathContextsExtractorBenchmarks.simpleProject:·gc.alloc.rate","avgt",1,4,1120.104864,51.889739,"MB/sec" +"cli.PathContextsExtractorBenchmarks.simpleProject:·gc.alloc.rate.norm","avgt",1,4,31763229496.000000,1078062.624071,"B/op" +"cli.PathContextsExtractorBenchmarks.simpleProject:·gc.churn.PS_Eden_Space","avgt",1,4,1043.613110,302.543916,"MB/sec" +"cli.PathContextsExtractorBenchmarks.simpleProject:·gc.churn.PS_Eden_Space.norm","avgt",1,4,29602217984.000000,9913538750.714693,"B/op" +"cli.PathContextsExtractorBenchmarks.simpleProject:·gc.count","avgt",1,4,32.000000,NaN,"counts" +"cli.PathContextsExtractorBenchmarks.simpleProject:·gc.time","avgt",1,4,192.000000,NaN,"ms" +"cli.ProjectParserCsvBenchmarks.bigProject","avgt",1,4,180.371234,2.981116,"s/op" +"cli.ProjectParserCsvBenchmarks.bigProject:·gc.alloc.rate","avgt",1,4,1230.495669,20.164663,"MB/sec" +"cli.ProjectParserCsvBenchmarks.bigProject:·gc.alloc.rate.norm","avgt",1,4,233372029542.000000,132825856.530797,"B/op" +"cli.ProjectParserCsvBenchmarks.bigProject:·gc.churn.PS_Eden_Space","avgt",1,4,1199.017557,44.611256,"MB/sec" +"cli.ProjectParserCsvBenchmarks.bigProject:·gc.churn.PS_Eden_Space.norm","avgt",1,4,227400744960.000000,5135066231.488147,"B/op" +"cli.ProjectParserCsvBenchmarks.bigProject:·gc.churn.PS_Survivor_Space","avgt",1,4,0.345020,0.630478,"MB/sec" +"cli.ProjectParserCsvBenchmarks.bigProject:·gc.churn.PS_Survivor_Space.norm","avgt",1,4,65404892.000000,118694148.932435,"B/op" +"cli.ProjectParserCsvBenchmarks.bigProject:·gc.count","avgt",1,4,90.000000,NaN,"counts" +"cli.ProjectParserCsvBenchmarks.bigProject:·gc.time","avgt",1,4,3546.000000,NaN,"ms" +"cli.ProjectParserCsvBenchmarks.longFileProject","avgt",1,4,0.334106,0.042951,"s/op" +"cli.ProjectParserCsvBenchmarks.longFileProject:·gc.alloc.rate","avgt",1,4,1406.933177,180.288170,"MB/sec" +"cli.ProjectParserCsvBenchmarks.longFileProject:·gc.alloc.rate.norm","avgt",1,4,516966578.664516,181451.741185,"B/op" +"cli.ProjectParserCsvBenchmarks.longFileProject:·gc.churn.PS_Eden_Space","avgt",1,4,1297.327305,292.912975,"MB/sec" +"cli.ProjectParserCsvBenchmarks.longFileProject:·gc.churn.PS_Eden_Space.norm","avgt",1,4,476606120.464516,57099756.867953,"B/op" +"cli.ProjectParserCsvBenchmarks.longFileProject:·gc.churn.PS_Survivor_Space","avgt",1,4,0.503796,2.005817,"MB/sec" +"cli.ProjectParserCsvBenchmarks.longFileProject:·gc.churn.PS_Survivor_Space.norm","avgt",1,4,186834.683871,759795.352873,"B/op" +"cli.ProjectParserCsvBenchmarks.longFileProject:·gc.count","avgt",1,4,22.000000,NaN,"counts" +"cli.ProjectParserCsvBenchmarks.longFileProject:·gc.time","avgt",1,4,76.000000,NaN,"ms" +"cli.ProjectParserCsvBenchmarks.simpleProject","avgt",1,4,20.403745,1.115891,"s/op" +"cli.ProjectParserCsvBenchmarks.simpleProject:·gc.alloc.rate","avgt",1,4,1214.270360,68.761970,"MB/sec" +"cli.ProjectParserCsvBenchmarks.simpleProject:·gc.alloc.rate.norm","avgt",1,4,26615112204.000000,153463185.255269,"B/op" +"cli.ProjectParserCsvBenchmarks.simpleProject:·gc.churn.PS_Eden_Space","avgt",1,4,1069.541462,303.093519,"MB/sec" +"cli.ProjectParserCsvBenchmarks.simpleProject:·gc.churn.PS_Eden_Space.norm","avgt",1,4,23446159360.000000,7149530979.860157,"B/op" +"cli.ProjectParserCsvBenchmarks.simpleProject:·gc.count","avgt",1,4,26.000000,NaN,"counts" +"cli.ProjectParserCsvBenchmarks.simpleProject:·gc.time","avgt",1,4,189.000000,NaN,"ms" +"cli.ProjectParserDotBenchmarks.bigProject","avgt",1,4,285.639391,3.040175,"s/op" +"cli.ProjectParserDotBenchmarks.bigProject:·gc.alloc.rate","avgt",1,4,1082.387084,16.588036,"MB/sec" +"cli.ProjectParserDotBenchmarks.bigProject:·gc.alloc.rate.norm","avgt",1,4,324758822476.000000,5787126931.438186,"B/op" +"cli.ProjectParserDotBenchmarks.bigProject:·gc.churn.PS_Eden_Space","avgt",1,4,1081.368891,19.864221,"MB/sec" +"cli.ProjectParserDotBenchmarks.bigProject:·gc.churn.PS_Eden_Space.norm","avgt",1,4,324453090758.000000,5981937765.644653,"B/op" +"cli.ProjectParserDotBenchmarks.bigProject:·gc.churn.PS_Survivor_Space","avgt",1,4,0.938662,0.426174,"MB/sec" +"cli.ProjectParserDotBenchmarks.bigProject:·gc.churn.PS_Survivor_Space.norm","avgt",1,4,281651016.000000,129925532.920107,"B/op" +"cli.ProjectParserDotBenchmarks.bigProject:·gc.count","avgt",1,4,1920.000000,NaN,"counts" +"cli.ProjectParserDotBenchmarks.bigProject:·gc.time","avgt",1,4,6093.000000,NaN,"ms" +"cli.ProjectParserDotBenchmarks.longFileProject","avgt",1,4,0.426770,0.048574,"s/op" +"cli.ProjectParserDotBenchmarks.longFileProject:·gc.alloc.rate","avgt",1,4,1256.262364,136.309511,"MB/sec" +"cli.ProjectParserDotBenchmarks.longFileProject:·gc.alloc.rate.norm","avgt",1,4,589531214.750000,47400.704610,"B/op" +"cli.ProjectParserDotBenchmarks.longFileProject:·gc.churn.PS_Eden_Space","avgt",1,4,1083.105600,545.253452,"MB/sec" +"cli.ProjectParserDotBenchmarks.longFileProject:·gc.churn.PS_Eden_Space.norm","avgt",1,4,508482901.333333,275324175.683678,"B/op" +"cli.ProjectParserDotBenchmarks.longFileProject:·gc.churn.PS_Survivor_Space","avgt",1,4,0.798736,3.176831,"MB/sec" +"cli.ProjectParserDotBenchmarks.longFileProject:·gc.churn.PS_Survivor_Space.norm","avgt",1,4,376249.500000,1507169.919826,"B/op" +"cli.ProjectParserDotBenchmarks.longFileProject:·gc.count","avgt",1,4,19.000000,NaN,"counts" +"cli.ProjectParserDotBenchmarks.longFileProject:·gc.time","avgt",1,4,82.000000,NaN,"ms" +"cli.ProjectParserDotBenchmarks.simpleProject","avgt",1,4,32.983427,2.512751,"s/op" +"cli.ProjectParserDotBenchmarks.simpleProject:·gc.alloc.rate","avgt",1,4,1049.552205,78.141154,"MB/sec" +"cli.ProjectParserDotBenchmarks.simpleProject:·gc.alloc.rate.norm","avgt",1,4,36846641168.000000,214127.207194,"B/op" +"cli.ProjectParserDotBenchmarks.simpleProject:·gc.churn.PS_Eden_Space","avgt",1,4,1029.500338,74.389215,"MB/sec" +"cli.ProjectParserDotBenchmarks.simpleProject:·gc.churn.PS_Eden_Space.norm","avgt",1,4,36147691520.000000,4435530434.048316,"B/op" +"cli.ProjectParserDotBenchmarks.simpleProject:·gc.churn.PS_Survivor_Space","avgt",1,4,0.048848,0.223101,"MB/sec" +"cli.ProjectParserDotBenchmarks.simpleProject:·gc.churn.PS_Survivor_Space.norm","avgt",1,4,1717580.000000,7824580.788872,"B/op" +"cli.ProjectParserDotBenchmarks.simpleProject:·gc.count","avgt",1,4,38.000000,NaN,"counts" +"cli.ProjectParserDotBenchmarks.simpleProject:·gc.time","avgt",1,4,306.000000,NaN,"ms" diff --git a/src/jmh/kotlin/cli/BenchmarkResultWorker.kt b/src/jmh/kotlin/cli/BenchmarkResultWorker.kt index d6e57bad..44294238 100644 --- a/src/jmh/kotlin/cli/BenchmarkResultWorker.kt +++ b/src/jmh/kotlin/cli/BenchmarkResultWorker.kt @@ -104,5 +104,5 @@ class BenchmarkResultWorker { fun main(args: Array) { val benchmarkResultWorker = BenchmarkResultWorker() val results = benchmarkResultWorker.parseCsvFile("src/jmh/benchmarks.csv") - benchmarkResultWorker.saveToMarkdown(results, "src/jmh/new_results.md") + benchmarkResultWorker.saveToMarkdown(results, "src/jmh/results.md") } \ No newline at end of file diff --git a/src/jmh/results.md b/src/jmh/results.md index 7e40cb4c..6b6b62d7 100644 --- a/src/jmh/results.md +++ b/src/jmh/results.md @@ -1,13 +1,13 @@ -| | Long File | Simple Project (Gradle) | Big Project (InteliJ IDEA) | -|----------------------------------------------------|--------------------------|----------------------------|------------------------------| -| Code2Vec (time) | 0.31 sec ± 0.01 sec | 16.4 sec ± 0.52 sec | 168 sec ± 0.7 sec | -| Code2Vec (total allocated memory) | 417.4 mb ± 0.755 mb | 16.14 gb ± 0.04 gb | 147 gb ± 0.01 gb | -| | | | | -| PathContexts (time) | 4.97 sec ± 0.12 sec | 31.4 sec ± 2.1 sec | 1438 sec ± 69.5 sec | -| PathContexts (total allocated memory) | 404.9 mb ± 0.48 mb | 17.5 gb ± 0.13 gb | 278.2 gb ± 0.17 gb | -| | | | | -| ProjectParserCSV (time) | 0.41 sec ± 0.02 sec | 14.01 sec ± 1.7 sec | 391.7 sec ± 9.5 sec | -| ProjectParserCSV (total allocated memory) | 643,2 mb ± 0.008 mb | 15.4 gb ± 0.05 gb | 397.06 gb ± 250.4 gb | -| | | | | -| ProjectParserDOT (time) | 0.48 sec ± 0.01 sec | 23.61 sec ± 0.94 sec | 497 sec ± 1.02 sec | -| ProjectParserDOT (total allocated memory) | 713.2 mb ± 0.009 mb | 25,31 gb ± 0.001 gb | 463.7 gb ± 237.5 gb | +| | Long file | Small project (Gradle) | Big project (IntelliJ IDEA) | +| --- |--- | --- | --- | +| Code2Vec (time) | 0.44 ± 0.07 sec | 27.15 ± 0.90 sec | 257.45 ± 18.89 sec | +| Code2Vec (allocated memory per sec) | 1.22 gb ± 188.22 mb | 1.08 gb ± 31.72 mb | 1.05 gb ± 78.83 mb | +| | | | | +| PathContext (time) | 0.63 ± 0.03 sec | 26.54 ± 1.25 sec | 223.76 ± 8.24 sec | +| PathContext (allocated memory per sec) | 1.16 gb ± 72.86 mb | 1.09 gb ± 51.89 mb | 1.12 gb ± 47.49 mb | +| | | | | +| ProjectParseCSV (time) | 0.33 ± 0.04 sec | 20.40 ± 1.12 sec | 180.37 ± 2.98 sec | +| ProjectParseCSV (allocated memory per sec) | 1.37 gb ± 180.29 mb | 1.19 gb ± 68.76 mb | 1.20 gb ± 20.16 mb | +| | | | | +| ProjectParseDOT (time) | 0.43 ± 0.05 sec | 32.98 ± 2.51 sec | 285.64 ± 3.04 sec | +| ProjectParseDOT (allocated memory per sec) | 1.23 gb ± 136.31 mb | 1.02 gb ± 78.14 mb | 1.06 gb ± 16.59 mb | From 6ca1c4ca8a327398030a5403841250e371a0e7e5 Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Thu, 15 Oct 2020 13:07:31 +0300 Subject: [PATCH 007/308] Add the readme to jmh package --- build.gradle.kts | 1 + src/jmh/README.md | 43 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 src/jmh/README.md diff --git a/build.gradle.kts b/build.gradle.kts index f5df35a6..ca624c82 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -175,6 +175,7 @@ jmh { warmupIterations = 1 iterations = 4 fork = 2 + jvmArgs = listOf("-Xmx32g") benchmarkMode = listOf("AverageTime") resultsFile = file("build/reports/benchmarks.csv") } diff --git a/src/jmh/README.md b/src/jmh/README.md new file mode 100644 index 00000000..5dee616f --- /dev/null +++ b/src/jmh/README.md @@ -0,0 +1,43 @@ +# JMH benchmarks + +This benchmark runs ASTMiner in several CLI modes. These arguments are used for each run: +- Code2Vec +```(bash) +code2vec --project --output --split-tokens --granularity method --lang java +``` +- PathContext +```(bash) +pathContexts --project --output --lang java +``` +- Parse (CSV storage) +```(bash) +parse --project --output --split-tokens --granularity method --lang java --storage csv +``` +- Parse (DOT storage) +```(bash) +parse --project --output --split-tokens --granularity method --lang java --storage dot +``` + +As data, we use 3 types of data: +1. Long file - long java file with ~5000 lines of code. It can be found in [resources](resources/LongJavaFile.java). +2. Small project - concrete version of [Gradle](https://github.com/gradle/gradle) project. We clone branch with tag `v6.3.0`. +3. Big project - concrete version of [Intellij Community](https://github.com/JetBrains/intellij-community) project. We clone branch with tag `idea/193.7288.8`. + +## Results + +Current results can be found in [results.md](results.md). + +These results were on achieved on EC2 instance `i3.8xlarge`. Parameters can be bound in [build.gradle.kts](../../build.gradle.kts) file. + +## How to run benchmarks + +Steps to runs benchmarks: +1. gradle daemons should be stopped before, so execute `./gradlew --stop` +2. jmh plugin is unable to compile code incrementally, so execute `./gradlew clean` +3. to run benchmarks execute `./gradlew jmh` + +After that you will found results in `build/reports/benchmarks.csv`. +You can convert these results into markdown table (like [results.md](results.md)) using [benchmark result worker](kotlin/cli/BenchmarkResultWorker.kt). +You should write needed paths in `main`, compile the file and run it. + + From fedbd99b23dd5d16d7ee54bfb9e1bfe5d360657e Mon Sep 17 00:00:00 2001 From: "Egor.Bogomolov" Date: Tue, 20 Oct 2020 14:19:30 +0300 Subject: [PATCH 008/308] Update used version of ShiftLeft C/C++ parser --- build.gradle.kts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/build.gradle.kts b/build.gradle.kts index ed3e12f9..152dadb8 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -52,9 +52,9 @@ dependencies { api("com.github.gumtreediff", "gen.jdt", "2.1.0") // https://mvnrepository.com/artifact/io.shiftleft/fuzzyc2cpg - api("io.shiftleft", "fuzzyc2cpg_2.12", "0.1.74") { - exclude("org.slf4j", "slf4j-simple") - } + api("io.shiftleft", "fuzzyc2cpg_2.13", "1.2.9") + // https://mvnrepository.com/artifact/org.slf4j/slf4j-simple + testImplementation("org.slf4j", "slf4j-simple", "1.7.30") testImplementation("junit:junit:4.11") testImplementation(kotlin("test-junit")) From 679ca5395cb6d67c88a1c418d65947124a0ba153 Mon Sep 17 00:00:00 2001 From: "Egor.Bogomolov" Date: Tue, 20 Oct 2020 14:42:19 +0300 Subject: [PATCH 009/308] Use the updated version of ShiftLeft parser in FuzzyCppParser --- .../astminer/parse/cpp/FuzzyCppParser.kt | 121 ++++++++++-------- 1 file changed, 70 insertions(+), 51 deletions(-) diff --git a/src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt b/src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt index 8dcc14d0..e2539c83 100644 --- a/src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt +++ b/src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt @@ -2,17 +2,16 @@ package astminer.parse.cpp import astminer.common.model.ParseResult import astminer.common.model.Parser -import gremlin.scala.Key import io.shiftleft.codepropertygraph.Cpg import io.shiftleft.codepropertygraph.generated.EdgeTypes import io.shiftleft.codepropertygraph.generated.NodeKeys import io.shiftleft.codepropertygraph.generated.NodeTypes import io.shiftleft.fuzzyc2cpg.FuzzyC2Cpg -import io.shiftleft.fuzzyc2cpg.output.inmemory.OutputModuleFactory -import org.apache.commons.io.FileUtils -import org.apache.tinkerpop.gremlin.structure.Edge -import org.apache.tinkerpop.gremlin.structure.Element -import org.apache.tinkerpop.gremlin.structure.Vertex +import overflowdb.Edge +import overflowdb.Node +import overflowdb.Element +import scala.Option +import scala.collection.immutable.Set import java.io.File import java.io.InputStream @@ -50,22 +49,22 @@ class FuzzyCppParser : Parser { ), 0) ) - data class ReplaceableNodeKey(val key: String, val condition: (Vertex) -> Boolean) + data class ReplaceableNodeKey(val key: String, val condition: (Node) -> Boolean) private val replaceableNodeKeys = listOf( - ReplaceableNodeKey("NAME") { v -> - v.keys().contains("NAME") && - v.value("NAME").startsWith("") - }, - ReplaceableNodeKey("PARSER_TYPE_NAME") { v -> - v.keys().contains("PARSER_TYPE_NAME") - } + ReplaceableNodeKey("NAME") { v -> + v.propertyKeys().contains("NAME") && + v.property("NAME").toString().startsWith("") + }, + ReplaceableNodeKey("PARSER_TYPE_NAME") { v -> + v.propertyKeys().contains("PARSER_TYPE_NAME") + } ) } /** * Parse input stream and create an AST. - * If you already have a file with code you need to parse, better use [parseFiles] or [parseInputStream], + * If you already have a file with code you need to parse, better use [parseFile], * otherwise temporary file for input stream will be created because of fuzzyc2cpg API. * @param content to parse * @return root of AST if content was parsed, null otherwise @@ -73,20 +72,32 @@ class FuzzyCppParser : Parser { override fun parseInputStream(content: InputStream): FuzzyNode? { val file = File.createTempFile("fuzzy", ".cpp") file.deleteOnExit() - FileUtils.copyInputStreamToFile(content, file) - val nodes = parseFiles(listOf(file)) - return nodes[0].root + file.outputStream().use { + content.copyTo(it) + } + val nodes = parseFile(file) + return nodes.root } /** - * @see [Parser.parseInputStream] + * Parse a single file and create an AST. + * @param file to parse + * @return [ParseResult] with root of an AST (null if parsing failed) and file path */ - override fun parseFiles(files: List): List> { - val outputModuleFactory = OutputModuleFactory() - val paths = files.map { it.path } - FuzzyC2Cpg(outputModuleFactory).runAndOutput(paths.toTypedArray()) - val cpg = outputModuleFactory.internalGraph - return cpg2Nodes(cpg) + override fun parseFile(file: File): ParseResult { + // We need some tweaks to create Scala sets from Kotlin code + val pathSetScalaBuilder = Set.newBuilder() + pathSetScalaBuilder.addOne(file.path) + val pathSet = pathSetScalaBuilder.result() + val extensionSetScalaBuilder = Set.newBuilder() + extensionSetScalaBuilder.addOne(".${file.extension}") + val extensionSet = extensionSetScalaBuilder.result() + + // Kotlin cannot use default value Scala:None for the argument, so we create it manually + val optionalOutputPath: Option = Option.empty() + + val cpg = FuzzyC2Cpg().runAndOutput(pathSet, extensionSet, optionalOutputPath) + return cpg2Nodes(cpg, file.path) } /** @@ -94,13 +105,27 @@ class FuzzyCppParser : Parser { * to list of [FuzzyNode][astminer.parse.cpp.FuzzyNode]. * Cpg may contain graphs for several files, in that case several ASTs will be created. * @param cpg to be converted + * @param filePath to the parsed file that will be used if parsing failed * @return list of AST roots */ - private fun cpg2Nodes(cpg: Cpg): List> { - val g = cpg.graph().traversal() - val vertexToNode = HashMap() - g.E().hasLabel(EdgeTypes.AST).forEach { addNodesFromEdge(it, vertexToNode) } - return g.V().hasLabel(NodeTypes.FILE).toList().map { ParseResult(vertexToNode[it], it.value("NAME")) } + private fun cpg2Nodes(cpg: Cpg, filePath: String): ParseResult { + val g = cpg.graph() + val vertexToNode = HashMap() + g.E().forEach { + if (it.label() == EdgeTypes.AST) { + addNodesFromEdge(it, vertexToNode) + } + } + g.V().forEach { + if (it.label() == NodeTypes.FILE) { + val actualFilePath = it.property("NAME").toString() + if (actualFilePath != filePath) { + println("While parsing $filePath, actually parsed $actualFilePath") + } + return ParseResult(vertexToNode[it], actualFilePath) + } + } + return ParseResult(null, filePath) } /** @@ -139,50 +164,44 @@ class FuzzyCppParser : Parser { */ fun elementToString(e: Element) = with(StringBuilder()) { append("${e.label()} | ") - e.keys().forEach { k -> append("$k:${e.value(k)} ") } + e.propertyKeys().forEach { k -> append("$k:${e.property(k)} ") } appendln() toString() } - private fun addNodesFromEdge(e: Edge, map: HashMap) { - val parentNode = map.getOrPut(e.outVertex()) { createNodeFromVertex(e.outVertex()) } - val childNode = map.getOrPut(e.inVertex()) { createNodeFromVertex(e.inVertex()) } + private fun addNodesFromEdge(e: Edge, map: HashMap) { + val parentNode = map.getOrPut(e.outNode()) { createNodeFromVertex(e.outNode()) } + val childNode = map.getOrPut(e.inNode()) { createNodeFromVertex(e.inNode()) } parentNode.addChild(childNode) } - private fun createNodeFromVertex(v: Vertex): FuzzyNode { - val token: String? = v.getValueOrNull(NodeKeys.CODE) - val order: Int? = v.getValueOrNull(NodeKeys.ORDER) + private fun createNodeFromVertex(v: Node): FuzzyNode { + val token: String? = v.property(NodeKeys.CODE) + val order: Int? = v.property(NodeKeys.ORDER) for (replaceableNodeKey in replaceableNodeKeys) { if (replaceableNodeKey.condition(v)) { - val node = FuzzyNode(v.value(replaceableNodeKey.key), token, order) - v.keys().forEach { k -> - node.setMetadata(k, v.value(k)) + val node = FuzzyNode(v.property(replaceableNodeKey.key).toString(), token, order) + v.propertyKeys().forEach { k -> + val property = v.property(k) ?: return@forEach + node.setMetadata(k, property.toString()) } return node } } val node = FuzzyNode(v.label(), token, order) - v.keys().forEach { k -> + v.propertyKeys().forEach { k -> + val property = v.property(k)?.toString() ?: return@forEach for (expandableNodeKey in expandableNodeKeys) { if (expandableNodeKey.key == k && expandableNodeKey.supportedNodeLabels.contains(v.label())) { - val keyNode = FuzzyNode(k, v.value(k).toString(), expandableNodeKey.order) + val keyNode = FuzzyNode(k, property, expandableNodeKey.order) node.addChild(keyNode) return@forEach } } - node.setMetadata(k, v.value(k)) + node.setMetadata(k, property) } return node } - - private fun Vertex.getValueOrNull(key: Key): T? { - return try { - this.value(key.name()) - } catch (e: IllegalStateException) { - null - } - } } From 268b507f42f4e6edc9030531ca500d32d8c9d6dc Mon Sep 17 00:00:00 2001 From: "Egor.Bogomolov" Date: Tue, 20 Oct 2020 14:42:32 +0300 Subject: [PATCH 010/308] Update C++ tests --- src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt b/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt index 619459cb..6cd30fc4 100644 --- a/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt +++ b/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt @@ -14,6 +14,7 @@ class FuzzyCppParserTest { val file = File("src/test/resources/fuzzy/test.cpp") val nodes = parser.parseFiles(listOf(file)) + nodes[0].root?.prettyPrint() Assert.assertTrue("Parse tree for a valid file should not be null", nodes.size == 1 && nodes[0].root != null) } @@ -35,7 +36,10 @@ class FuzzyCppParserTest { fun testProjectParsing() { val folder = File("src/test/resources/fuzzy/") val parser = FuzzyCppParser() - val nodes = parser.parseFiles(getProjectFilesWithExtension(folder,"cpp")).map { it.root } + val nodes = mutableListOf() + parser.parseFiles(getProjectFilesWithExtension(folder,"cpp")) { + nodes.add(it.root) + } Assert.assertEquals( "There is only 3 file with .cpp extension in 'testData/examples' folder", 3, From 615c87254c9fa78cc928a006fffbb1683aa318d6 Mon Sep 17 00:00:00 2001 From: "Egor.Bogomolov" Date: Tue, 20 Oct 2020 14:44:14 +0300 Subject: [PATCH 011/308] Get rid of deprecated calls --- .../kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt b/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt index 6cd30fc4..40a02187 100644 --- a/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt +++ b/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt @@ -13,10 +13,8 @@ class FuzzyCppParserTest { val parser = FuzzyCppParser() val file = File("src/test/resources/fuzzy/test.cpp") - val nodes = parser.parseFiles(listOf(file)) - nodes[0].root?.prettyPrint() - Assert.assertTrue("Parse tree for a valid file should not be null", - nodes.size == 1 && nodes[0].root != null) + val nodes = parser.parseFile(file) + Assert.assertTrue("Parse tree for a valid file should not be null",nodes.root != null) } @Test @@ -91,7 +89,10 @@ class FuzzyCppParserTest { val parser = FuzzyCppParser() parser.preprocessProject(projectRoot, preprocessedRoot) - val nodes = parser.parseFiles(getProjectFilesWithExtension(projectRoot, "cpp")).map { it.root } + val nodes = mutableListOf() + parser.parseFiles(getProjectFilesWithExtension(projectRoot, "cpp")) { + nodes.add(it.root) + } Assert.assertEquals( "Parse tree for a valid file should not be null. There are 5 files in example project.", From 5b7bedce285f83f05578c618db4fec168a5e69c7 Mon Sep 17 00:00:00 2001 From: "Egor.Bogomolov" Date: Tue, 20 Oct 2020 14:44:37 +0300 Subject: [PATCH 012/308] Add missing space --- src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt b/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt index 40a02187..fb508fe3 100644 --- a/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt +++ b/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt @@ -14,7 +14,7 @@ class FuzzyCppParserTest { val file = File("src/test/resources/fuzzy/test.cpp") val nodes = parser.parseFile(file) - Assert.assertTrue("Parse tree for a valid file should not be null",nodes.root != null) + Assert.assertTrue("Parse tree for a valid file should not be null", nodes.root != null) } @Test @@ -35,7 +35,7 @@ class FuzzyCppParserTest { val folder = File("src/test/resources/fuzzy/") val parser = FuzzyCppParser() val nodes = mutableListOf() - parser.parseFiles(getProjectFilesWithExtension(folder,"cpp")) { + parser.parseFiles(getProjectFilesWithExtension(folder, "cpp")) { nodes.add(it.root) } Assert.assertEquals( From 6702efb4ead2b4152316bc998899514a91c5ed20 Mon Sep 17 00:00:00 2001 From: "Egor.Bogomolov" Date: Tue, 20 Oct 2020 14:45:29 +0300 Subject: [PATCH 013/308] Fix indentation --- .../kotlin/astminer/parse/cpp/FuzzyCppParser.kt | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt b/src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt index e2539c83..0166b042 100644 --- a/src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt +++ b/src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt @@ -52,13 +52,13 @@ class FuzzyCppParser : Parser { data class ReplaceableNodeKey(val key: String, val condition: (Node) -> Boolean) private val replaceableNodeKeys = listOf( - ReplaceableNodeKey("NAME") { v -> - v.propertyKeys().contains("NAME") && - v.property("NAME").toString().startsWith("") - }, - ReplaceableNodeKey("PARSER_TYPE_NAME") { v -> - v.propertyKeys().contains("PARSER_TYPE_NAME") - } + ReplaceableNodeKey("NAME") { v -> + v.propertyKeys().contains("NAME") && + v.property("NAME").toString().startsWith("") + }, + ReplaceableNodeKey("PARSER_TYPE_NAME") { v -> + v.propertyKeys().contains("PARSER_TYPE_NAME") + } ) } From f09aa013598920c66328ef4dc6c19e57b93e9f74 Mon Sep 17 00:00:00 2001 From: "Egor.Bogomolov" Date: Tue, 20 Oct 2020 14:50:55 +0300 Subject: [PATCH 014/308] Keep the deprecated API --- src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt b/src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt index 0166b042..f1e1c341 100644 --- a/src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt +++ b/src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt @@ -100,6 +100,11 @@ class FuzzyCppParser : Parser { return cpg2Nodes(cpg, file.path) } + /** + * @see [Parser.parseInputStream] + */ + override fun parseFiles(files: List): List> = files.map { parseFile(it) } + /** * Convert [cpg][io.shiftleft.codepropertygraph.Cpg] created by fuzzyc2cpg * to list of [FuzzyNode][astminer.parse.cpp.FuzzyNode]. From 4247e597b6737ad217dd3493783143f543f06775 Mon Sep 17 00:00:00 2001 From: Vladimir Kovalenko Date: Wed, 21 Oct 2020 02:37:45 +0200 Subject: [PATCH 015/308] remove Parser.parseFiles --- src/main/kotlin/astminer/common/model/ParsingModel.kt | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/main/kotlin/astminer/common/model/ParsingModel.kt b/src/main/kotlin/astminer/common/model/ParsingModel.kt index 84fa1f2b..c5aba214 100644 --- a/src/main/kotlin/astminer/common/model/ParsingModel.kt +++ b/src/main/kotlin/astminer/common/model/ParsingModel.kt @@ -46,14 +46,6 @@ interface Parser { */ fun parseFile(file: File) = ParseResult(parseInputStream(file.inputStream()), file.path) - /** - * Parse list of files. - * @param files files to parse - * @return list of ParseResult instances, one for each parsed file - */ - @Deprecated("Please use parseFiles (List, (ParseResult) -> Any) to avoid clogging memory") - fun parseFiles(files: List): List> = files.map { ParseResult(parseInputStream(it.inputStream()), it.path) } - /** * Parse list of files. * @param files files to parse From e967cd5e7d25c62a956758eb0890848e374b0fb6 Mon Sep 17 00:00:00 2001 From: Vladimir Kovalenko Date: Wed, 21 Oct 2020 02:39:14 +0200 Subject: [PATCH 016/308] remove bulk parsing method from Parser interface --- src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt b/src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt index f1e1c341..0166b042 100644 --- a/src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt +++ b/src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt @@ -100,11 +100,6 @@ class FuzzyCppParser : Parser { return cpg2Nodes(cpg, file.path) } - /** - * @see [Parser.parseInputStream] - */ - override fun parseFiles(files: List): List> = files.map { parseFile(it) } - /** * Convert [cpg][io.shiftleft.codepropertygraph.Cpg] created by fuzzyc2cpg * to list of [FuzzyNode][astminer.parse.cpp.FuzzyNode]. From 34ff8a4c094bb0827daf0fc542daada39bec3cfc Mon Sep 17 00:00:00 2001 From: Vladimir Kovalenko Date: Wed, 21 Oct 2020 02:48:52 +0200 Subject: [PATCH 017/308] fix usages of deleted API in tests --- .../kotlin/astminer/parse/antlr/java/ANTLRJavaParserTest.kt | 5 +++-- .../astminer/parse/antlr/python/ANTLRPythonParserTest.kt | 4 +++- src/test/kotlin/astminer/parse/java/GumTreeJavaParserTest.kt | 5 ++++- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/test/kotlin/astminer/parse/antlr/java/ANTLRJavaParserTest.kt b/src/test/kotlin/astminer/parse/antlr/java/ANTLRJavaParserTest.kt index 0cc761bc..49efc0db 100644 --- a/src/test/kotlin/astminer/parse/antlr/java/ANTLRJavaParserTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/java/ANTLRJavaParserTest.kt @@ -1,6 +1,7 @@ package astminer.parse.antlr.java import astminer.common.getProjectFilesWithExtension +import astminer.common.model.Node import org.junit.Assert import org.junit.Test import java.io.File @@ -57,8 +58,8 @@ class ANTLRJavaParserTest { fun testProjectParsing() { val parser = JavaParser() val projectRoot = File("src/test/resources/arrayCalls") - val trees = parser.parseFiles( - getProjectFilesWithExtension(projectRoot, "java")).map { it.root } + val trees = mutableListOf() + parser.parseFiles(getProjectFilesWithExtension(projectRoot, "java")) { trees.add(it.root) } Assert.assertEquals("There is only 5 file with .java extension in 'testData/arrayCalls' folder",5, trees.size) trees.forEach { Assert.assertNotNull("Parse tree for a valid file should not be null", it) } } diff --git a/src/test/kotlin/astminer/parse/antlr/python/ANTLRPythonParserTest.kt b/src/test/kotlin/astminer/parse/antlr/python/ANTLRPythonParserTest.kt index 543b96fb..dd45298c 100644 --- a/src/test/kotlin/astminer/parse/antlr/python/ANTLRPythonParserTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/python/ANTLRPythonParserTest.kt @@ -1,6 +1,7 @@ package astminer.parse.antlr.python import astminer.common.getProjectFilesWithExtension +import astminer.common.model.Node import org.junit.Assert import org.junit.Test import java.io.File @@ -20,7 +21,8 @@ class ANTLRPythonParserTest { fun testProjectParsing() { val parser = PythonParser() val projectRoot = File("src/test/resources/examples") - val trees = parser.parseFiles(getProjectFilesWithExtension(projectRoot, "py")).map { it.root } + val trees = mutableListOf() + parser.parseFiles(getProjectFilesWithExtension(projectRoot, "py")) { trees.add(it.root) } Assert.assertEquals("There is only 1 file with .py extension in 'testData/examples' folder",1, trees.size) trees.forEach { Assert.assertNotNull("Parse tree for a valid file should not be null", it) } } diff --git a/src/test/kotlin/astminer/parse/java/GumTreeJavaParserTest.kt b/src/test/kotlin/astminer/parse/java/GumTreeJavaParserTest.kt index e9098a97..04e359e1 100644 --- a/src/test/kotlin/astminer/parse/java/GumTreeJavaParserTest.kt +++ b/src/test/kotlin/astminer/parse/java/GumTreeJavaParserTest.kt @@ -1,6 +1,7 @@ package astminer.parse.java import astminer.common.getProjectFilesWithExtension +import astminer.common.model.Node import org.junit.Assert import org.junit.Test import java.io.* @@ -19,7 +20,9 @@ class GumTreeJavaParserTest { fun testProjectParsing() { val parser = GumTreeJavaParser() val projectRoot = File("src/test/resources/examples") - val trees = parser.parseFiles(getProjectFilesWithExtension(projectRoot, "java")).map { it.root } + + val trees = mutableListOf() + parser.parseFiles(getProjectFilesWithExtension(projectRoot, "java")) { trees.add(it.root) } Assert.assertEquals("There is only 2 file with .java extension in 'testData/examples' folder",2, trees.size) trees.forEach { Assert.assertNotNull("Parse tree for a valid file should not be null", it) } } From ecf5e8320e4ba9bb1e246e416e7225f71d9be3ae Mon Sep 17 00:00:00 2001 From: "Egor.Bogomolov" Date: Wed, 21 Oct 2020 15:36:49 +0300 Subject: [PATCH 018/308] Compare absolute paths to avoid mixing relative/absolute --- src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt b/src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt index f1e1c341..03e2d628 100644 --- a/src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt +++ b/src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt @@ -124,7 +124,7 @@ class FuzzyCppParser : Parser { g.V().forEach { if (it.label() == NodeTypes.FILE) { val actualFilePath = it.property("NAME").toString() - if (actualFilePath != filePath) { + if (File(actualFilePath).absolutePath != File(filePath).absolutePath) { println("While parsing $filePath, actually parsed $actualFilePath") } return ParseResult(vertexToNode[it], actualFilePath) From cdee47c36f17e2a5ddb8a556bc13ee030216f511 Mon Sep 17 00:00:00 2001 From: "Egor.Bogomolov" Date: Wed, 21 Oct 2020 15:39:01 +0300 Subject: [PATCH 019/308] Use MutableMap instead of HashMap --- src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt b/src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt index ec42526d..62497eb5 100644 --- a/src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt +++ b/src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt @@ -110,7 +110,7 @@ class FuzzyCppParser : Parser { */ private fun cpg2Nodes(cpg: Cpg, filePath: String): ParseResult { val g = cpg.graph() - val vertexToNode = HashMap() + val vertexToNode = mutableMapOf() g.E().forEach { if (it.label() == EdgeTypes.AST) { addNodesFromEdge(it, vertexToNode) @@ -169,7 +169,7 @@ class FuzzyCppParser : Parser { toString() } - private fun addNodesFromEdge(e: Edge, map: HashMap) { + private fun addNodesFromEdge(e: Edge, map: MutableMap) { val parentNode = map.getOrPut(e.outNode()) { createNodeFromVertex(e.outNode()) } val childNode = map.getOrPut(e.inNode()) { createNodeFromVertex(e.inNode()) } parentNode.addChild(childNode) From a456b902e3f0fc396de47e9df50bd8cebe8b429f Mon Sep 17 00:00:00 2001 From: "Egor.Bogomolov" Date: Wed, 21 Oct 2020 15:39:16 +0300 Subject: [PATCH 020/308] Remove redundant method --- src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt b/src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt index 62497eb5..38fd3f36 100644 --- a/src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt +++ b/src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt @@ -157,18 +157,6 @@ class FuzzyCppParser : Parser { } } - /** - * Create string from element with its label and all its properties. - * @param e - element for converting to string - * @return created string - */ - fun elementToString(e: Element) = with(StringBuilder()) { - append("${e.label()} | ") - e.propertyKeys().forEach { k -> append("$k:${e.property(k)} ") } - appendln() - toString() - } - private fun addNodesFromEdge(e: Edge, map: MutableMap) { val parentNode = map.getOrPut(e.outNode()) { createNodeFromVertex(e.outNode()) } val childNode = map.getOrPut(e.inNode()) { createNodeFromVertex(e.inNode()) } From a860ede6c06eec4eedb403d3f92ef88a2e1fe3e9 Mon Sep 17 00:00:00 2001 From: Egor Spirin <> Date: Thu, 22 Oct 2020 11:53:24 +0000 Subject: [PATCH 021/308] Update README (test space integration) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 01b07bee..091effd4 100644 --- a/README.md +++ b/README.md @@ -139,7 +139,7 @@ Support for other languages and documentation are the key areas of improvement. ## Citing astminer A [paper](https://zenodo.org/record/2595271) dedicated to astminer (more precisely, to its older version [PathMiner](https://github.com/vovak/astminer/tree/pathminer)) was presented at [MSR'19](https://2019.msrconf.org/). -If you use astminer in your academic work, please consider citing it. +If you use astminer in your academic work, please cite it. ``` @inproceedings{kovalenko2019pathminer, title={PathMiner: a library for mining of path-based representations of code}, From 3820356cd22352ae1350be6083e9d120d1f5a835 Mon Sep 17 00:00:00 2001 From: Alexander Date: Thu, 22 Oct 2020 20:26:34 +0200 Subject: [PATCH 022/308] cli.sh to work with any release Otherwise, it fails as in the current master there is no `lib-0.5.jar` after `buildShadow`. --- cli.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli.sh b/cli.sh index 41dc7ffd..985a253a 100755 --- a/cli.sh +++ b/cli.sh @@ -1,3 +1,3 @@ #!/bin/bash -java -jar build/shadow/lib-0.5.jar "$@" \ No newline at end of file +java -jar build/shadow/lib-*.jar "$@" From 95387880e2b4bf0fde8cb687886617fcb70b3d36 Mon Sep 17 00:00:00 2001 From: "Egor.Bogomolov" Date: Wed, 2 Dec 2020 17:10:18 +0300 Subject: [PATCH 023/308] Do not fail preprocessing for the file on top of the directory --- src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt b/src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt index 38fd3f36..e8cacb76 100644 --- a/src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt +++ b/src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt @@ -151,7 +151,11 @@ class FuzzyCppParser : Parser { .filter { file -> supportedExtensions.contains(file.extension) } files.forEach { file -> val relativeFilePath = file.relativeTo(projectRoot) - val outputPath = outputDir.resolve(relativeFilePath.parent) + val outputPath = if (relativeFilePath.parent != null){ + outputDir.resolve(relativeFilePath.parent) + } else { + outputDir + } outputPath.mkdirs() preprocessFile(file, outputPath) } From e1cfc7800fb90e001de2292cc0da1859478af61f Mon Sep 17 00:00:00 2001 From: Vladimir Kovalenko Date: Tue, 9 Feb 2021 16:42:50 +0100 Subject: [PATCH 024/308] fix typo in readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 091effd4..129ccbc5 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ Supported languages of the input: See [changelog](changelog.md) ## About -Astminer was first implemented as a part of pipeline in the [the code style extraction project](https://arxiv.org/abs/2002.03997) and later converted into a reusable tool. +Astminer was first implemented as a part of pipeline in the [code style extraction project](https://arxiv.org/abs/2002.03997) and later converted into a reusable tool. Currently it supports extraction of: * Path-based representations of files From a615489e720839c970b2e1df92025f6ded55efd7 Mon Sep 17 00:00:00 2001 From: "Egor.Bogomolov" Date: Tue, 16 Feb 2021 16:17:24 +0300 Subject: [PATCH 025/308] Add JavaScript to all CLI commands --- src/main/kotlin/astminer/cli/Code2VecExtractor.kt | 4 ++-- src/main/kotlin/astminer/cli/LabelExtractors.kt | 5 +++++ src/main/kotlin/astminer/cli/PathContextsExtractor.kt | 4 +++- src/main/kotlin/astminer/cli/ProjectParser.kt | 4 ++-- src/main/kotlin/astminer/cli/utils.kt | 2 ++ 5 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/main/kotlin/astminer/cli/Code2VecExtractor.kt b/src/main/kotlin/astminer/cli/Code2VecExtractor.kt index 402c9092..26b3e263 100644 --- a/src/main/kotlin/astminer/cli/Code2VecExtractor.kt +++ b/src/main/kotlin/astminer/cli/Code2VecExtractor.kt @@ -17,12 +17,12 @@ import java.io.File class Code2VecExtractor(private val customLabelExtractor: LabelExtractor? = null) : CliktCommand() { - private val supportedLanguages = listOf("java", "c", "cpp", "py") + private val supportedLanguages = listOf("java", "c", "cpp", "py", "js") val extensions: List by option( "--lang", help = "Comma-separated list of file extensions that will be parsed.\n" + - "Supports 'c', 'cpp', 'java', 'py', defaults to all these extensions." + "Supports 'c', 'cpp', 'java', 'py', 'js', defaults to all these extensions." ).split(",").default(supportedLanguages) val projectRoot: String by option( diff --git a/src/main/kotlin/astminer/cli/LabelExtractors.kt b/src/main/kotlin/astminer/cli/LabelExtractors.kt index 1d195449..cb035e9c 100644 --- a/src/main/kotlin/astminer/cli/LabelExtractors.kt +++ b/src/main/kotlin/astminer/cli/LabelExtractors.kt @@ -7,6 +7,7 @@ import astminer.common.preOrder import astminer.common.setNormalizedToken import astminer.parse.antlr.SimpleNode import astminer.parse.antlr.java.JavaMethodSplitter +import astminer.parse.antlr.javascript.JavaScriptMethodSplitter import astminer.parse.antlr.python.PythonMethodSplitter import astminer.parse.cpp.FuzzyMethodSplitter import astminer.parse.cpp.FuzzyNode @@ -76,6 +77,10 @@ abstract class MethodLabelExtractor( val methodSplitter = PythonMethodSplitter() methodSplitter.splitIntoMethods(root as SimpleNode) } + "js" -> { + val methodSplitter = JavaScriptMethodSplitter() + methodSplitter.splitIntoMethods(root as SimpleNode) + } else -> throw UnsupportedOperationException("Unsupported extension $fileExtension") }.filter { methodInfo -> filterPredicates.all { predicate -> diff --git a/src/main/kotlin/astminer/cli/PathContextsExtractor.kt b/src/main/kotlin/astminer/cli/PathContextsExtractor.kt index 7cd53b9d..ff8d4d61 100644 --- a/src/main/kotlin/astminer/cli/PathContextsExtractor.kt +++ b/src/main/kotlin/astminer/cli/PathContextsExtractor.kt @@ -4,6 +4,7 @@ import astminer.common.getNormalizedToken import astminer.common.getProjectFilesWithExtension import astminer.common.model.* import astminer.parse.antlr.java.JavaParser +import astminer.parse.antlr.javascript.JavaScriptParser import astminer.parse.antlr.python.PythonParser import astminer.parse.cpp.FuzzyCppParser import astminer.parse.java.GumTreeJavaParser @@ -33,7 +34,8 @@ class PathContextsExtractor(private val customLabelExtractor: LabelExtractor? = SupportedLanguage(GumTreeJavaParser(), "java"), SupportedLanguage(FuzzyCppParser(), "c"), SupportedLanguage(FuzzyCppParser(), "cpp"), - SupportedLanguage(PythonParser(), "py") + SupportedLanguage(PythonParser(), "py"), + SupportedLanguage(JavaScriptParser(), "js") ) val extensions: List by option( diff --git a/src/main/kotlin/astminer/cli/ProjectParser.kt b/src/main/kotlin/astminer/cli/ProjectParser.kt index 4e161ecc..7941f1b7 100644 --- a/src/main/kotlin/astminer/cli/ProjectParser.kt +++ b/src/main/kotlin/astminer/cli/ProjectParser.kt @@ -12,12 +12,12 @@ import java.io.File class ProjectParser(private val customLabelExtractor: LabelExtractor? = null) : CliktCommand() { - private val supportedLanguages = listOf("java", "c", "cpp", "py") + private val supportedLanguages = listOf("java", "c", "cpp", "py", "js") val extensions: List by option( "--lang", help = "Comma-separated list of file extensions that will be parsed.\n" + - "Supports 'c', 'cpp', 'java', 'py', defaults to all these extensions." + "Supports 'c', 'cpp', 'java', 'py', 'js', defaults to all these extensions." ).split(",").default(supportedLanguages) val projectRoot: String by option( diff --git a/src/main/kotlin/astminer/cli/utils.kt b/src/main/kotlin/astminer/cli/utils.kt index edf36664..815e4b5e 100644 --- a/src/main/kotlin/astminer/cli/utils.kt +++ b/src/main/kotlin/astminer/cli/utils.kt @@ -10,6 +10,7 @@ import astminer.common.model.Parser import astminer.common.preOrder import astminer.common.setNormalizedToken import astminer.common.splitToSubtokens +import astminer.parse.antlr.javascript.JavaScriptParser fun getParser( extension: String, @@ -28,6 +29,7 @@ fun getParser( "c" -> FuzzyCppParser() "cpp" -> FuzzyCppParser() "py" -> PythonParser() + "js" -> JavaScriptParser() else -> { throw UnsupportedOperationException("Unsupported extension $extension") } From a76c9fc5b933091c9c3736fa293ed0071875c674 Mon Sep 17 00:00:00 2001 From: "Egor.Bogomolov" Date: Tue, 16 Feb 2021 16:18:35 +0300 Subject: [PATCH 026/308] Add JavaScript to README --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 129ccbc5..0851c456 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ Supported languages of the input: - [x] Java - [x] Python - [x] C/C++ -- [x] Javascript (beta) (see [issue](https://github.com/vovak/astminer/issues/22)) +- [x] Javascript ### Version history @@ -46,14 +46,14 @@ In other tasks, if you feed C/C++ file with macroses, they will be dropped as we Extract ASTs from all the files in supported languages. ```shell script -./cli.sh parse --lang py,java,c,cpp --project path/to/project --output path/to/result --storage dot +./cli.sh parse --lang py,java,c,cpp,js --project path/to/project --output path/to/result --storage dot ``` #### PathContexts Extract path contexts from all the files in supported languages and store in form `fileName triplesOfPathContexts`. ```shell script -./cli.sh pathContexts --lang py,java,c,cpp --project path/to/project --output path/to/results --maxL L --maxW W --maxContexts C --maxTokens T --maxPaths P +./cli.sh pathContexts --lang py,java,c,cpp,js --project path/to/project --output path/to/results --maxL L --maxW W --maxContexts C --maxTokens T --maxPaths P ``` #### Code2vec @@ -61,7 +61,7 @@ Extract path contexts from all the files in supported languages and store in for Extract data suitable as input for [code2vec](https://github.com/tech-srl/code2vec) model. Parse all files written in specified language into ASTs, split into methods, and store in form `method|name triplesOfPathContexts`. ```shell script -./cli.sh code2vec --lang py,java,c,cpp --project path/to/project --output path/to/results --maxL L --maxW W --maxContexts C --maxTokens T --maxPaths P --split-tokens --granularity method +./cli.sh code2vec --lang py,java,c,cpp,js --project path/to/project --output path/to/results --maxL L --maxW W --maxContexts C --maxTokens T --maxPaths P --split-tokens --granularity method ``` ### Integrate in your mining pipeline From dd43f0ffdf8fb56483cb64def7d090b352724fcf Mon Sep 17 00:00:00 2001 From: Ivan Rybin Date: Wed, 3 Mar 2021 13:47:24 +0300 Subject: [PATCH 027/308] gumtree python support --- build.gradle.kts | 3 +- .../astminer/examples/AllPythonMethods.kt | 45 +++++++++ .../python/GumTreePythonMethodSplitter.kt | 98 +++++++++++++++++++ .../parse/python/GumTreePythonNode.kt | 45 +++++++++ .../parse/python/GumTreePythonParser.kt | 24 +++++ src/test/resources/gumTreeMethodSplitter/1.py | 44 +++++++++ src/test/resources/gumTreeMethodSplitter/2.py | 43 ++++++++ src/test/resources/gumTreeMethodSplitter/3.py | 16 +++ src/test/resources/gumTreeMethodSplitter/4.py | 22 +++++ 9 files changed, 339 insertions(+), 1 deletion(-) create mode 100644 src/main/kotlin/astminer/examples/AllPythonMethods.kt create mode 100644 src/main/kotlin/astminer/parse/python/GumTreePythonMethodSplitter.kt create mode 100644 src/main/kotlin/astminer/parse/python/GumTreePythonNode.kt create mode 100644 src/main/kotlin/astminer/parse/python/GumTreePythonParser.kt create mode 100644 src/test/resources/gumTreeMethodSplitter/1.py create mode 100644 src/test/resources/gumTreeMethodSplitter/2.py create mode 100644 src/test/resources/gumTreeMethodSplitter/3.py create mode 100644 src/test/resources/gumTreeMethodSplitter/4.py diff --git a/build.gradle.kts b/build.gradle.kts index 152dadb8..71436212 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -47,9 +47,10 @@ dependencies { implementation(kotlin("stdlib")) // https://mvnrepository.com/artifact/com.github.gumtreediff - api("com.github.gumtreediff", "core", "2.1.0") + api("com.github.gumtreediff", "core", "2.1.2") api("com.github.gumtreediff", "client", "2.1.0") api("com.github.gumtreediff", "gen.jdt", "2.1.0") + api("com.github.gumtreediff", "gen.python", "2.1.2") // https://mvnrepository.com/artifact/io.shiftleft/fuzzyc2cpg api("io.shiftleft", "fuzzyc2cpg_2.13", "1.2.9") diff --git a/src/main/kotlin/astminer/examples/AllPythonMethods.kt b/src/main/kotlin/astminer/examples/AllPythonMethods.kt new file mode 100644 index 00000000..488bb4eb --- /dev/null +++ b/src/main/kotlin/astminer/examples/AllPythonMethods.kt @@ -0,0 +1,45 @@ +package astminer.examples + +import astminer.common.model.LabeledPathContexts +import astminer.common.model.MethodInfo +import astminer.parse.python.GumTreePythonMethodSplitter +import astminer.parse.python.GumTreePythonNode +import astminer.parse.python.GumTreePythonParser +import astminer.paths.CsvPathStorage +import astminer.paths.PathMiner +import astminer.paths.PathRetrievalSettings +import astminer.paths.toPathContext +import java.io.File + +private fun getCsvFriendlyMethodId(methodInfo: MethodInfo): String { + val className = methodInfo.enclosingElementName() ?: "" + val methodName = methodInfo.name() ?: "unknown_method" + val parameterTypes = methodInfo.methodParameters.joinToString("|") { it.name() ?: "_" } + return "$className.$methodName($parameterTypes)" +} + +fun allPythonMethods() { + val inputDir = "src/test/resources/gumTreeMethodSplitter" + + val miner = PathMiner(PathRetrievalSettings(5, 5)) + val outputDir = "out_examples/allPythonMethods" + val storage = CsvPathStorage(outputDir) + + File(inputDir).forFilesWithSuffix(".py") { file -> + // parse file + val fileNode = GumTreePythonParser().parseInputStream(file.inputStream()) ?: return@forFilesWithSuffix + + // extract method nodes + val methodNodes = GumTreePythonMethodSplitter().splitIntoMethods(fileNode) + + methodNodes.forEach { methodInfo -> + // Retrieve paths from every node individually + val paths = miner.retrievePaths(methodInfo.method.root) + // Retrieve a method identifier + val entityId = "${file.path}::${getCsvFriendlyMethodId(methodInfo)}" + storage.store(LabeledPathContexts(entityId, paths.map { toPathContext(it) })) + } + } + + storage.close() +} diff --git a/src/main/kotlin/astminer/parse/python/GumTreePythonMethodSplitter.kt b/src/main/kotlin/astminer/parse/python/GumTreePythonMethodSplitter.kt new file mode 100644 index 00000000..6673f811 --- /dev/null +++ b/src/main/kotlin/astminer/parse/python/GumTreePythonMethodSplitter.kt @@ -0,0 +1,98 @@ +package astminer.parse.python + +import astminer.common.model.ElementNode +import astminer.common.model.MethodInfo +import astminer.common.model.MethodNode +import astminer.common.model.ParameterNode +import astminer.common.model.TreeMethodSplitter +import astminer.common.preOrder + +class GumTreePythonMethodSplitter : TreeMethodSplitter { + companion object { + private object TypeLabels { + const val classDefinition = "ClassDef" + const val functionDefinition = "FunctionDef" + const val asyncFunctionDefinition = "AsyncFunctionDef" + const val nameLoad = "Name_Load" + const val posOnlyArgs = "posonlyargs" + const val kwOnlyArgs = "kwonlyargs" + const val arguments = "arguments" + const val args = "args" + const val arg = "arg" + + const val body = "body" + const val returnTypeLabel = "Return" + const val constantType = "Constant-" + + val methodDefinitions = listOf(functionDefinition, asyncFunctionDefinition) + val funcArgsTypesNodes = listOf(args, posOnlyArgs, kwOnlyArgs) + } + } + + override fun splitIntoMethods(root: GumTreePythonNode): Collection> { + val methodRoots = root.preOrder().filter { TypeLabels.methodDefinitions.contains(it.getTypeLabel()) } + return methodRoots.map { collectMethodInfo(it as GumTreePythonNode) } + } + + private fun collectMethodInfo(methodNode: GumTreePythonNode): MethodInfo { + val methodReturnType = getElementType(methodNode) // no methods return types for current parser + val methodName = getElementName(methodNode) + + val classRoot = getEnclosingClass(methodNode) + val className = classRoot?.let { getElementName(it) } + + val parameters = getParameters(methodNode) + + return MethodInfo( + MethodNode(methodNode, methodReturnType, methodName), + ElementNode(classRoot, className), + parameters + ) + } + + private fun getElementName(node: GumTreePythonNode) = node + + private fun getElementType(node: GumTreePythonNode): GumTreePythonNode? { + if (node.getTypeLabel() == TypeLabels.arg) { + return node.getChildOfType(TypeLabels.nameLoad) as GumTreePythonNode? + } + // if return statement has "Constant-`Type`" return value => function type is `Type` + if (TypeLabels.methodDefinitions.contains(node.getTypeLabel())) { + return node.getChildOfType(TypeLabels.body)?.getChildOfType(TypeLabels.returnTypeLabel)?.let { + it.getChildren().firstOrNull { child -> + child.getTypeLabel().startsWith(TypeLabels.constantType) + } as GumTreePythonNode? + } + } + return null + } + + private fun getEnclosingClass(node: GumTreePythonNode): GumTreePythonNode? { + if (node.getTypeLabel() == TypeLabels.classDefinition) { + return node + } + val parentNode = node.getParent() as? GumTreePythonNode + return parentNode?.let { getEnclosingClass(it) } + } + + private fun getParameters(methodNode: GumTreePythonNode): List> { + val params = methodNode.getChildrenOfType(TypeLabels.arguments).flatMap { + it.getChildren() + }.filter { + TypeLabels.funcArgsTypesNodes.contains(it.getTypeLabel()) + }.flatMap { + it.getChildren() + }.filter { + it.getTypeLabel() == TypeLabels.arg + } + + return params.map { + val node = it as GumTreePythonNode + ParameterNode( + node, + getElementType(node), + getElementName(node) + ) + }.toList() + } +} diff --git a/src/main/kotlin/astminer/parse/python/GumTreePythonNode.kt b/src/main/kotlin/astminer/parse/python/GumTreePythonNode.kt new file mode 100644 index 00000000..e696f21a --- /dev/null +++ b/src/main/kotlin/astminer/parse/python/GumTreePythonNode.kt @@ -0,0 +1,45 @@ +package astminer.parse.python + +import astminer.common.model.Node +import com.github.gumtreediff.tree.ITree +import com.github.gumtreediff.tree.TreeContext + +class GumTreePythonNode(val wrappedNode: ITree, val context: TreeContext, val parent: GumTreePythonNode?) : Node { + private val metadata: MutableMap = HashMap() + + override fun getMetadata(key: String): Any? { + return metadata[key] + } + + override fun setMetadata(key: String, value: Any) { + metadata[key] = value + } + + override fun isLeaf(): Boolean { + return childrenList.isEmpty() + } + + private val childrenList: MutableList by lazy { + wrappedNode.children.map { GumTreePythonNode(it, context, this) }.toMutableList() + } + + override fun getTypeLabel(): String { + return context.getTypeLabel(wrappedNode) + } + + override fun getChildren(): List { + return childrenList + } + + override fun getParent(): Node? { + return parent + } + + override fun getToken(): String { + return wrappedNode.label + } + + override fun removeChildrenOfType(typeLabel: String) { + childrenList.removeIf { it.getTypeLabel() == typeLabel } + } +} diff --git a/src/main/kotlin/astminer/parse/python/GumTreePythonParser.kt b/src/main/kotlin/astminer/parse/python/GumTreePythonParser.kt new file mode 100644 index 00000000..842f1f10 --- /dev/null +++ b/src/main/kotlin/astminer/parse/python/GumTreePythonParser.kt @@ -0,0 +1,24 @@ +package astminer.parse.python + +import astminer.common.model.Parser +import com.github.gumtreediff.client.Run +import com.github.gumtreediff.gen.python.PythonTreeGenerator +import com.github.gumtreediff.tree.TreeContext +import java.io.InputStream +import java.io.InputStreamReader + +class GumTreePythonParser : Parser { + init { + Run.initGenerators() + } + + override fun parseInputStream(content: InputStream): GumTreePythonNode { + val context = PythonTreeGenerator().generate(InputStreamReader(content)) + return wrapGumTreeNode(context) + } +} + +fun wrapGumTreeNode(treeContext: TreeContext): GumTreePythonNode { + return GumTreePythonNode(treeContext.root, treeContext, null) +} + diff --git a/src/test/resources/gumTreeMethodSplitter/1.py b/src/test/resources/gumTreeMethodSplitter/1.py new file mode 100644 index 00000000..0fd6c75d --- /dev/null +++ b/src/test/resources/gumTreeMethodSplitter/1.py @@ -0,0 +1,44 @@ +from typing import Dict, List, Union + + +def no_args_func(): + """ + :return: None + """ + return None + + +def with_args_no_typed(a, b, c, d = 42): + return a, b, c + + +def with_typed_args(a: int, z: str): + return None + + +def with_typed_return_no_args() -> str: + x: int = 42 + return "str" + + +def full_typed(filename: str) -> str: + """ + :param filename: path to file + :return: string with file content + """ + with open(filename, 'rt') as f: + content = f.read() + return content + + +def func_dif_args_typed_return(a, b, /, c, d, *, e, f) -> int: + """ + python doc + """ + return 42 + + +JsonNodeType = Dict[str, Union[str, List[int]]] + +def complex_args_full_typed(node: JsonNodeType) -> JsonNodeType: + return node diff --git a/src/test/resources/gumTreeMethodSplitter/2.py b/src/test/resources/gumTreeMethodSplitter/2.py new file mode 100644 index 00000000..3b487363 --- /dev/null +++ b/src/test/resources/gumTreeMethodSplitter/2.py @@ -0,0 +1,43 @@ +class A: + def __init__(self): + """ + outer init + """ + pass + + + def __add__(self, other): + pass + + + def foo(self, x): + return x * x + + + def foo_typed(self, x: int, y: int) -> int: + return x * y + + + class B: + def __init__(self): + """ + inner init + """ + pass + + + def __get__(self, instance, owner): + pass + + + def foo_typed(self, x: int, y: int) -> int: + return x + y + + + class C: + + def __init__(self): + pass + + def bar_typed(self, x: int) -> int: + return x diff --git a/src/test/resources/gumTreeMethodSplitter/3.py b/src/test/resources/gumTreeMethodSplitter/3.py new file mode 100644 index 00000000..254dc280 --- /dev/null +++ b/src/test/resources/gumTreeMethodSplitter/3.py @@ -0,0 +1,16 @@ + +async def async_simple_no_typed(gh, original_issue, branch, backport_pr_number): + """ + async doc + """ + pass + + +@router.register("pull_request", action="opened") +@router.register("pull_request", action="edited") +async def async_schrecklich_typed(event: str, x: int , *args, **kwargs) -> int: + + def inner(): + pass + + return 42 diff --git a/src/test/resources/gumTreeMethodSplitter/4.py b/src/test/resources/gumTreeMethodSplitter/4.py new file mode 100644 index 00000000..e13209ba --- /dev/null +++ b/src/test/resources/gumTreeMethodSplitter/4.py @@ -0,0 +1,22 @@ +def foo(a: int): + + def foo_1(b): + + def foo_2(c): + return None + + return foo_2(b) + + + def bar_1(b: int, c: int) -> int: + + def bar_2(d: int, e: int) -> int: + return 42 + + return bar_2(b, c) + + + foo_1(a) + bar_1(a, a) + + return None From 03fb818758f8fbf23a25795046a1c63aee8762e6 Mon Sep 17 00:00:00 2001 From: Ivan Rybin Date: Sun, 7 Mar 2021 13:25:22 +0300 Subject: [PATCH 028/308] include *args and **kwargs in parameters list --- .../parse/python/GumTreePythonMethodSplitter.kt | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/main/kotlin/astminer/parse/python/GumTreePythonMethodSplitter.kt b/src/main/kotlin/astminer/parse/python/GumTreePythonMethodSplitter.kt index 6673f811..01571c92 100644 --- a/src/main/kotlin/astminer/parse/python/GumTreePythonMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/python/GumTreePythonMethodSplitter.kt @@ -17,11 +17,14 @@ class GumTreePythonMethodSplitter : TreeMethodSplitter { const val posOnlyArgs = "posonlyargs" const val kwOnlyArgs = "kwonlyargs" const val arguments = "arguments" + const val vararg = "vararg" + const val kwarg = "kwarg" const val args = "args" const val arg = "arg" const val body = "body" const val returnTypeLabel = "Return" + const val passTypeLabel = "Pass" const val constantType = "Constant-" val methodDefinitions = listOf(functionDefinition, asyncFunctionDefinition) @@ -84,6 +87,14 @@ class GumTreePythonMethodSplitter : TreeMethodSplitter { it.getChildren() }.filter { it.getTypeLabel() == TypeLabels.arg + } as MutableList + + methodNode.getChildrenOfType(TypeLabels.arguments).flatMap { + it.getChildren() + }.filter { + it.getTypeLabel() == TypeLabels.vararg || it.getTypeLabel() == TypeLabels.kwarg + }.forEach { + params.add(it) } return params.map { From 04dfed76606dd2807f3c6a9d4835f92dd2d50be0 Mon Sep 17 00:00:00 2001 From: Ivan Rybin Date: Sun, 7 Mar 2021 13:26:34 +0300 Subject: [PATCH 029/308] no exceptions for invalid files --- .../kotlin/astminer/parse/python/GumTreePythonParser.kt | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/main/kotlin/astminer/parse/python/GumTreePythonParser.kt b/src/main/kotlin/astminer/parse/python/GumTreePythonParser.kt index 842f1f10..c6d791fc 100644 --- a/src/main/kotlin/astminer/parse/python/GumTreePythonParser.kt +++ b/src/main/kotlin/astminer/parse/python/GumTreePythonParser.kt @@ -12,13 +12,14 @@ class GumTreePythonParser : Parser { Run.initGenerators() } - override fun parseInputStream(content: InputStream): GumTreePythonNode { + override fun parseInputStream(content: InputStream): GumTreePythonNode? = try { val context = PythonTreeGenerator().generate(InputStreamReader(content)) - return wrapGumTreeNode(context) + wrapGumTreeNode(context) + } catch (e: Exception) { + null } } fun wrapGumTreeNode(treeContext: TreeContext): GumTreePythonNode { return GumTreePythonNode(treeContext.root, treeContext, null) } - From 08225506ab6b4a4a8f30d713fe959f2ed7b88b0c Mon Sep 17 00:00:00 2001 From: Ivan Rybin Date: Sun, 7 Mar 2021 13:27:52 +0300 Subject: [PATCH 030/308] tests for python parser and method splitting --- .../parse/python/GumTreeMethodSplitterTest.kt | 169 ++++++++++++++++++ .../parse/python/GumTreePythonParserTets.kt | 50 ++++++ 2 files changed, 219 insertions(+) create mode 100644 src/test/kotlin/astminer/parse/python/GumTreeMethodSplitterTest.kt create mode 100644 src/test/kotlin/astminer/parse/python/GumTreePythonParserTets.kt diff --git a/src/test/kotlin/astminer/parse/python/GumTreeMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/python/GumTreeMethodSplitterTest.kt new file mode 100644 index 00000000..303e66fe --- /dev/null +++ b/src/test/kotlin/astminer/parse/python/GumTreeMethodSplitterTest.kt @@ -0,0 +1,169 @@ +package astminer.parse.python + +import astminer.common.model.MethodInfo +import org.junit.Test +import java.io.File +import kotlin.test.assertEquals +import kotlin.test.assertNotNull + +class GumTreeMethodSplitterTest { + private fun parse(filename: String): GumTreePythonNode? = + GumTreePythonParser().parseInputStream(File(filename).inputStream()) + + private fun splitMethods(filename: String): Collection> = parse(filename)?.let { + GumTreePythonMethodSplitter().splitIntoMethods(it) + } ?: emptyList() + + private fun createPath(file: String) = "src/test/resources/gumTreeMethodSplitter/$file" + + @Test + fun methodsCountTest() { + assertEquals(7, splitMethods(createPath("1.py")).size) + assertEquals(9, splitMethods(createPath("2.py")).size) + assertEquals(3, splitMethods(createPath("3.py")).size) + assertEquals(5, splitMethods(createPath("4.py")).size) + } + + @Test + fun funcNamesTest() { + val realNames = setOf( + "no_args_func", "with_args_no_typed", "with_typed_args", + "with_typed_return_no_args", "full_typed", + "func_dif_args_typed_return", "complex_args_full_typed" + ) + val methodInfos = splitMethods(createPath("1.py")) + val parsedNames = methodInfos.map { it.name() }.toSet() + assertEquals(realNames, parsedNames) + } + + @Test + fun methodInfoTest1TypedArgs() { + val methodInfos = splitMethods(createPath("1.py")) + val method = methodInfos.firstOrNull { it.name() == "complex_args_full_typed" } + assertNotNull(method) + with(method) { + assertEquals("complex_args_full_typed", name()) + assertEquals(null, this.method.returnTypeNode) + assertEquals(1, methodParameters.size) + assertEquals(listOf("node"), methodParameters.map { it.name() }.toList()) + assertEquals(listOf("JsonNodeType"), methodParameters.map { it.returnType() }.toList()) + } + } + + @Test + fun methodInfoTest2ManyArgs() { + val methodInfos = splitMethods(createPath("1.py")) + val method = methodInfos.firstOrNull { it.name() == "func_dif_args_typed_return" } + assertNotNull(method) + with(method) { + assertEquals("func_dif_args_typed_return", name()) + assertEquals("Constant-int", this.method.returnTypeNode?.getTypeLabel()) + assertEquals(6, methodParameters.size) + assertEquals(listOf("a", "b", "c", "d", "e", "f"), methodParameters.map { it.name() }.toList()) + assertEquals(emptyList(), methodParameters.mapNotNull { it.returnType() }.toList()) + } + } + + @Test + fun methodInfoTest3EnclosingClass() { + val methodInfos = splitMethods(createPath("2.py")) + val method = methodInfos.firstOrNull { it.name() == "foo_typed" } + assertNotNull(method) + with(method) { + assertEquals("foo_typed", name()) + assertEquals("A", enclosingElementName()) + assertEquals(null, this.method.returnTypeNode) + assertEquals(3, methodParameters.size) + assertEquals(listOf("self", "x", "y"), methodParameters.map { it.name() }.toList()) + assertEquals(listOf(null, "int", "int"), methodParameters.map { it.returnType() }.toList()) + } + } + + @Test + fun methodInfoTest4EnclosingClass() { + val methodInfos = splitMethods(createPath("2.py")) + val method = methodInfos.firstOrNull { it.name() == "bar_typed" } + assertNotNull(method) + with(method) { + assertEquals("bar_typed", name()) + assertEquals("C", enclosingElementName()) + assertEquals(null, this.method.returnTypeNode) + assertEquals(2, methodParameters.size) + assertEquals(listOf("self", "x"), methodParameters.map { it.name() }.toList()) + assertEquals(listOf(null, "int"), methodParameters.map { it.returnType() }.toList()) + } + } + + @Test + fun methodInfoTest5AsyncDef() { + val methodInfos = splitMethods(createPath("3.py")) + val method = methodInfos.firstOrNull { it.name() == "async_schrecklich_typed" } + assertNotNull(method) + with(method) { + assertEquals("async_schrecklich_typed", name()) + assertEquals("AsyncFunctionDef", this.method.root.getTypeLabel()) + assertEquals(null, enclosingElementName()) + assertEquals("Constant-int", this.method.returnTypeNode?.getTypeLabel()) + assertEquals(4, methodParameters.size) + assertEquals(listOf("event", "x", "args", "kwargs"), methodParameters.map { it.name() }.toList()) + assertEquals(listOf("str", "int", null, null), methodParameters.map { it.returnType() }.toList()) + } + } + + @Test + fun methodInfoTest6Doc() { + val methodInfos = splitMethods(createPath("3.py")) + val method = methodInfos.firstOrNull { it.name() == "async_simple_no_typed" } + assertNotNull(method) + with(method) { + assertEquals("async_simple_no_typed", name()) + assertEquals("AsyncFunctionDef", this.method.root.getTypeLabel()) + assertEquals(null, enclosingElementName()) + assertEquals( + "\n async doc\n ", + this.method.root.getChildOfType("body") + ?.getChildOfType("Expr") + ?.getChildOfType("Constant-str") + ?.getToken() + ) + assertEquals(4, methodParameters.size) + assertEquals( + listOf("gh", "original_issue", "branch", "backport_pr_number"), + methodParameters.map { it.name() }.toList() + ) + assertEquals(listOf(null, null, null, null), methodParameters.map { it.returnType() }.toList()) + } + } + + @Test + fun methodInfoTest7InnerFunc() { + val methodInfos = splitMethods(createPath("4.py")) + val method = methodInfos.firstOrNull { it.name() == "foo_2" } + assertNotNull(method) + with(method) { + assertEquals("foo_2", name()) + assertEquals("foo_1", method.method.root.parent?.wrappedNode?.parent?.label) + assertEquals(null, enclosingElementName()) + assertEquals("Constant-NoneType", this.method.returnTypeNode?.getTypeLabel()) + assertEquals(1, methodParameters.size) + assertEquals(listOf("c"), methodParameters.map { it.name() }.toList()) + assertEquals(listOf(null), methodParameters.map { it.returnType() }.toList()) + } + } + + @Test + fun methodInfoTest8InnerFunc() { + val methodInfos = splitMethods(createPath("4.py")) + val method = methodInfos.firstOrNull { it.name() == "bar_2" } + assertNotNull(method) + with(method) { + assertEquals("bar_2", name()) + assertEquals("bar_1", method.method.root.parent?.wrappedNode?.parent?.label) + assertEquals(null, enclosingElementName()) + assertEquals("Constant-int", this.method.returnTypeNode?.getTypeLabel()) + assertEquals(2, methodParameters.size) + assertEquals(listOf("d", "e"), methodParameters.map { it.name() }.toList()) + assertEquals(listOf("int", "int"), methodParameters.map { it.returnType() }.toList()) + } + } +} diff --git a/src/test/kotlin/astminer/parse/python/GumTreePythonParserTets.kt b/src/test/kotlin/astminer/parse/python/GumTreePythonParserTets.kt new file mode 100644 index 00000000..33eeb673 --- /dev/null +++ b/src/test/kotlin/astminer/parse/python/GumTreePythonParserTets.kt @@ -0,0 +1,50 @@ +package astminer.parse.python + +import org.junit.After +import org.junit.Before +import org.junit.Test +import java.io.File +import kotlin.test.assertFalse +import kotlin.test.assertNotNull +import kotlin.test.assertNull +import kotlin.test.assertTrue + +class GumTreePythonParserTest { + private val parser: GumTreePythonParser = GumTreePythonParser() + private val testFolder = File(".python_parser_test_tmp") + private val testFile = testFolder.resolve("test_file.py") + + @Before + fun mkdir() { + testFolder.mkdirs() + testFile.createNewFile() + } + + @After + fun rmdir() { + testFolder.deleteRecursively() + } + + @Test(expected = Test.None::class) + fun emptyFile() { + val node = parser.parseInputStream(testFile.inputStream()) + assertNotNull(node) + assertTrue(node.wrappedNode.children.isEmpty()) + } + + @Test(expected = Test.None::class) + fun invalidCode() { + testFile.writeText("INVALID PYTHON CODE") + val node = parser.parseInputStream(testFile.inputStream()) + assertNull(node) + } + + @Test(expected = Test.None::class) + fun goodFile() { + val node = parser.parseInputStream( + File("src/test/resources/gumTreeMethodSplitter/1.py").inputStream() + ) + assertNotNull(node) + assertFalse(node.wrappedNode.children.isEmpty()) + } +} From 65313725732497057ec0f758308feadceafcbbb6 Mon Sep 17 00:00:00 2001 From: Ivan Rybin Date: Tue, 16 Mar 2021 10:36:52 +0300 Subject: [PATCH 031/308] CI config update --- .circleci/config.yml | 31 +++++++++++++++++-- ...rserTets.kt => GumTreePythonParserTest.kt} | 0 2 files changed, 29 insertions(+), 2 deletions(-) rename src/test/kotlin/astminer/parse/python/{GumTreePythonParserTets.kt => GumTreePythonParserTest.kt} (100%) diff --git a/.circleci/config.yml b/.circleci/config.yml index 29c651f9..68780800 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -3,12 +3,38 @@ jobs: # build with machine executor build: machine: - enabled: true + image: ubuntu-1604:202101-01 working_directory: ~/astminer steps: - checkout: path: ~/astminer - - run: ./gradlew build --stacktrace + - run: + name: Installing Python 3.8 + command: | + sudo apt-get install software-properties-common + sudo add-apt-repository ppa:deadsnakes/ppa + sudo apt-get update + sudo apt-get install python3.8 + sudo apt-get -y install python3-pip + ls -l /usr/bin/python* + python3 --version + - run: + name: Installing backend GumTree parser for Python + # for using pythonparser we need /tmp folder in PATH variable + command: | + pwd + git clone https://github.com/JetBrains-Research/pythonparser pythonparser_tmp_dir + pip3 install -r pythonparser_tmp_dir/requirements.txt + mv pythonparser_tmp_dir/src/main/python/pythonparser/pythonparser_3.py /tmp/pythonparser + chmod +x /tmp/pythonparser + ls -l /tmp + - run: + name: Build and tests + command: | + export PATH=$PATH:/tmp + echo $PATH + ./gradlew build --stacktrace + ./gradlew test --stacktrace # release release: @@ -22,6 +48,7 @@ jobs: path: ~/astminer - run: ./gradlew bintrayUpload "-PbranchName=$GIT_BRANCH" + release-dev: machine: enabled: true diff --git a/src/test/kotlin/astminer/parse/python/GumTreePythonParserTets.kt b/src/test/kotlin/astminer/parse/python/GumTreePythonParserTest.kt similarity index 100% rename from src/test/kotlin/astminer/parse/python/GumTreePythonParserTets.kt rename to src/test/kotlin/astminer/parse/python/GumTreePythonParserTest.kt From fa1fa9a6eb4fb1f09e613e2041c281f13d53f0c4 Mon Sep 17 00:00:00 2001 From: Ivan Rybin Date: Tue, 16 Mar 2021 15:38:19 +0300 Subject: [PATCH 032/308] CI config fixes --- .circleci/config.yml | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 68780800..7b5f08c8 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -3,37 +3,28 @@ jobs: # build with machine executor build: machine: - image: ubuntu-1604:202101-01 + image: ubuntu-2004:202010-01 working_directory: ~/astminer steps: - checkout: path: ~/astminer - - run: - name: Installing Python 3.8 - command: | - sudo apt-get install software-properties-common - sudo add-apt-repository ppa:deadsnakes/ppa - sudo apt-get update - sudo apt-get install python3.8 - sudo apt-get -y install python3-pip - ls -l /usr/bin/python* - python3 --version - run: name: Installing backend GumTree parser for Python - # for using pythonparser we need /tmp folder in PATH variable command: | - pwd git clone https://github.com/JetBrains-Research/pythonparser pythonparser_tmp_dir pip3 install -r pythonparser_tmp_dir/requirements.txt mv pythonparser_tmp_dir/src/main/python/pythonparser/pythonparser_3.py /tmp/pythonparser chmod +x /tmp/pythonparser - ls -l /tmp - run: - name: Build and tests + # for using pythonparser we need /tmp folder in PATH variable + name: Build command: | export PATH=$PATH:/tmp - echo $PATH ./gradlew build --stacktrace + - run: + name: Test + command: | + export PATH=$PATH:/tmp ./gradlew test --stacktrace # release @@ -90,4 +81,4 @@ workflows: filters: branches: only: - - master-dev + - master-dev \ No newline at end of file From 7d0cc750c41046a5a1c70ac4387342c0c9118344 Mon Sep 17 00:00:00 2001 From: Ivan Rybin Date: Sun, 21 Mar 2021 13:02:23 +0300 Subject: [PATCH 033/308] fix python parser type for label extractor --- .../kotlin/astminer/cli/LabelExtractors.kt | 29 ++++++++++++++----- .../astminer/examples/AllJavaMethods.kt | 4 +-- ...litter.kt => GumTreeJavaMethodSplitter.kt} | 2 +- ...st.kt => GumTreeJavaMethodSplitterTest.kt} | 4 +-- ...st.kt => GumTreeJavaMethodSplitterTest.kt} | 2 +- 5 files changed, 28 insertions(+), 13 deletions(-) rename src/main/kotlin/astminer/parse/java/{GumTreeMethodSplitter.kt => GumTreeJavaMethodSplitter.kt} (97%) rename src/test/kotlin/astminer/parse/java/{GumTreeMethodSplitterTest.kt => GumTreeJavaMethodSplitterTest.kt} (96%) rename src/test/kotlin/astminer/parse/python/{GumTreeMethodSplitterTest.kt => GumTreeJavaMethodSplitterTest.kt} (99%) diff --git a/src/main/kotlin/astminer/cli/LabelExtractors.kt b/src/main/kotlin/astminer/cli/LabelExtractors.kt index cb035e9c..c418a48b 100644 --- a/src/main/kotlin/astminer/cli/LabelExtractors.kt +++ b/src/main/kotlin/astminer/cli/LabelExtractors.kt @@ -12,7 +12,9 @@ import astminer.parse.antlr.python.PythonMethodSplitter import astminer.parse.cpp.FuzzyMethodSplitter import astminer.parse.cpp.FuzzyNode import astminer.parse.java.GumTreeJavaNode -import astminer.parse.java.GumTreeMethodSplitter +import astminer.parse.java.GumTreeJavaMethodSplitter +import astminer.parse.python.GumTreePythonMethodSplitter +import astminer.parse.python.GumTreePythonNode import java.io.File @@ -42,7 +44,8 @@ abstract class FileLabelExtractor : LabelExtractor { abstract class MethodLabelExtractor( open val filterPredicates: Collection = emptyList(), - open val javaParser: String = "gumtree" + open val javaParser: String = "gumtree", + open val pythonParser: String = "antlr" ) : LabelExtractor { override fun toLabeledData( @@ -61,7 +64,7 @@ abstract class MethodLabelExtractor( "java" -> { when (javaParser) { "gumtree" -> { - val methodSplitter = GumTreeMethodSplitter() + val methodSplitter = GumTreeJavaMethodSplitter() methodSplitter.splitIntoMethods(root as GumTreeJavaNode) } "antlr" -> { @@ -74,8 +77,19 @@ abstract class MethodLabelExtractor( } } "py" -> { - val methodSplitter = PythonMethodSplitter() - methodSplitter.splitIntoMethods(root as SimpleNode) + when (pythonParser) { + "gumtree" -> { + val methodSplitter = GumTreePythonMethodSplitter() + methodSplitter.splitIntoMethods(root as GumTreePythonNode) + } + "antlr" -> { + val methodSplitter = PythonMethodSplitter() + methodSplitter.splitIntoMethods(root as SimpleNode) + } + else -> { + throw UnsupportedOperationException("Unsupported parser $pythonParser") + } + } } "js" -> { val methodSplitter = JavaScriptMethodSplitter() @@ -111,8 +125,9 @@ class FolderExtractor : FileLabelExtractor() { class MethodNameExtractor( val hideMethodNames: Boolean = false, override val filterPredicates: Collection = emptyList(), - override val javaParser: String = "gumtree" -) : MethodLabelExtractor(filterPredicates, javaParser) { + override val javaParser: String = "gumtree", + override val pythonParser: String = "antlr" +) : MethodLabelExtractor(filterPredicates, javaParser, pythonParser) { override fun extractLabel(methodInfo: MethodInfo, filePath: String): String? { val methodNameNode = methodInfo.method.nameNode ?: return null diff --git a/src/main/kotlin/astminer/examples/AllJavaMethods.kt b/src/main/kotlin/astminer/examples/AllJavaMethods.kt index 19ca4f23..7454480c 100644 --- a/src/main/kotlin/astminer/examples/AllJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/AllJavaMethods.kt @@ -4,7 +4,7 @@ import astminer.common.model.LabeledPathContexts import astminer.common.model.MethodInfo import astminer.parse.java.GumTreeJavaNode import astminer.parse.java.GumTreeJavaParser -import astminer.parse.java.GumTreeMethodSplitter +import astminer.parse.java.GumTreeJavaMethodSplitter import astminer.paths.PathMiner import astminer.paths.PathRetrievalSettings import astminer.paths.CsvPathStorage @@ -34,7 +34,7 @@ fun allJavaMethods() { val fileNode = GumTreeJavaParser().parseInputStream(file.inputStream()) ?: return@forFilesWithSuffix //extract method nodes - val methodNodes = GumTreeMethodSplitter().splitIntoMethods(fileNode) + val methodNodes = GumTreeJavaMethodSplitter().splitIntoMethods(fileNode) methodNodes.forEach { methodInfo -> //Retrieve paths from every node individually diff --git a/src/main/kotlin/astminer/parse/java/GumTreeMethodSplitter.kt b/src/main/kotlin/astminer/parse/java/GumTreeJavaMethodSplitter.kt similarity index 97% rename from src/main/kotlin/astminer/parse/java/GumTreeMethodSplitter.kt rename to src/main/kotlin/astminer/parse/java/GumTreeJavaMethodSplitter.kt index a66fc261..6571610c 100644 --- a/src/main/kotlin/astminer/parse/java/GumTreeMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/java/GumTreeJavaMethodSplitter.kt @@ -5,7 +5,7 @@ import astminer.common.preOrder private fun GumTreeJavaNode.isTypeNode() = getTypeLabel().endsWith("Type") -class GumTreeMethodSplitter : TreeMethodSplitter { +class GumTreeJavaMethodSplitter : TreeMethodSplitter { companion object { private object TypeLabels { diff --git a/src/test/kotlin/astminer/parse/java/GumTreeMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/java/GumTreeJavaMethodSplitterTest.kt similarity index 96% rename from src/test/kotlin/astminer/parse/java/GumTreeMethodSplitterTest.kt rename to src/test/kotlin/astminer/parse/java/GumTreeJavaMethodSplitterTest.kt index a59e6a8c..3670c3ac 100644 --- a/src/test/kotlin/astminer/parse/java/GumTreeMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/java/GumTreeJavaMethodSplitterTest.kt @@ -11,10 +11,10 @@ private fun createTree(filename: String): GumTreeJavaNode { } private fun createAndSplitTree(filename: String): Collection> { - return GumTreeMethodSplitter().splitIntoMethods(createTree(filename)) + return GumTreeJavaMethodSplitter().splitIntoMethods(createTree(filename)) } -class GumTreeMethodSplitterTest { +class GumTreeJavaMethodSplitterTest { @Test fun testMethodExtraction1() { val methodInfos = createAndSplitTree("src/test/resources/gumTreeMethodSplitter/1.java") diff --git a/src/test/kotlin/astminer/parse/python/GumTreeMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/python/GumTreeJavaMethodSplitterTest.kt similarity index 99% rename from src/test/kotlin/astminer/parse/python/GumTreeMethodSplitterTest.kt rename to src/test/kotlin/astminer/parse/python/GumTreeJavaMethodSplitterTest.kt index 303e66fe..608a5b9e 100644 --- a/src/test/kotlin/astminer/parse/python/GumTreeMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/python/GumTreeJavaMethodSplitterTest.kt @@ -6,7 +6,7 @@ import java.io.File import kotlin.test.assertEquals import kotlin.test.assertNotNull -class GumTreeMethodSplitterTest { +class GumTreeJavaMethodSplitterTest { private fun parse(filename: String): GumTreePythonNode? = GumTreePythonParser().parseInputStream(File(filename).inputStream()) From f7d8b52c534774b0746a3eba7b31bdd2cea8d163 Mon Sep 17 00:00:00 2001 From: illided Date: Thu, 25 Mar 2021 18:20:30 +0300 Subject: [PATCH 034/308] language handlers added --- .../kotlin/astminer/parse/AntlrHandler.kt | 35 +++++++++++++++++++ .../kotlin/astminer/parse/GumtreeHandler.kt | 31 ++++++++++++++++ src/main/kotlin/astminer/parse/factory.kt | 26 ++++++++++++++ .../kotlin/astminer/parse/handlerModel.kt | 10 ++++++ 4 files changed, 102 insertions(+) create mode 100644 src/main/kotlin/astminer/parse/AntlrHandler.kt create mode 100644 src/main/kotlin/astminer/parse/GumtreeHandler.kt create mode 100644 src/main/kotlin/astminer/parse/factory.kt create mode 100644 src/main/kotlin/astminer/parse/handlerModel.kt diff --git a/src/main/kotlin/astminer/parse/AntlrHandler.kt b/src/main/kotlin/astminer/parse/AntlrHandler.kt new file mode 100644 index 00000000..71f2a867 --- /dev/null +++ b/src/main/kotlin/astminer/parse/AntlrHandler.kt @@ -0,0 +1,35 @@ +package astminer.parse + +import astminer.common.model.MethodInfo +import astminer.common.model.Node +import astminer.common.model.TreeMethodSplitter +import astminer.parse.antlr.SimpleNode +import astminer.parse.antlr.java.JavaMethodSplitter +import astminer.parse.antlr.java.JavaParser +import astminer.parse.antlr.javascript.JavaScriptMethodSplitter +import astminer.parse.antlr.javascript.JavaScriptParser +import astminer.parse.antlr.python.PythonMethodSplitter +import astminer.parse.antlr.python.PythonParser + +abstract class AntlrLanguageHandler : LanguageHandler { + abstract val splitter: TreeMethodSplitter + override fun splitIntoMethods(root: Node): Collection> { + require(root is SimpleNode) { "Wrong node type" } + return splitter.splitIntoMethods(root) + } +} + +class AntlrJavaHandler : AntlrLanguageHandler() { + override val parser = JavaParser() + override val splitter = JavaMethodSplitter() +} + +class AntlrPythonHandler : AntlrLanguageHandler() { + override val parser = PythonParser() + override val splitter = PythonMethodSplitter() +} + +class AntlrJavascriptHandler : AntlrLanguageHandler() { + override val parser = JavaScriptParser() + override val splitter = JavaScriptMethodSplitter() +} \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/GumtreeHandler.kt b/src/main/kotlin/astminer/parse/GumtreeHandler.kt new file mode 100644 index 00000000..c2871f06 --- /dev/null +++ b/src/main/kotlin/astminer/parse/GumtreeHandler.kt @@ -0,0 +1,31 @@ +package astminer.parse + +import astminer.common.model.MethodInfo +import astminer.common.model.Node +import astminer.parse.java.GumTreeJavaNode +import astminer.parse.java.GumTreeJavaParser +import astminer.parse.java.GumTreeMethodSplitter +import astminer.parse.python.GumTreePythonMethodSplitter +import astminer.parse.python.GumTreePythonNode +import astminer.parse.python.GumTreePythonParser + +abstract class GumTreeHandler : LanguageHandler + +class JavaGumtreeHandler() : GumTreeHandler() { + override val parser = GumTreeJavaParser() + private val splitter = GumTreeMethodSplitter() + + override fun splitIntoMethods(root: Node): Collection> { + require(root is GumTreeJavaNode) { "Wrong node type" } + return splitter.splitIntoMethods(root) + } +} + +class PythonGumTreeHandler : GumTreeHandler() { + override val parser = GumTreePythonParser() + private val splitter = GumTreePythonMethodSplitter() + override fun splitIntoMethods(root: Node): Collection> { + require(root is GumTreePythonNode) { "Wrong node type" } + return splitter.splitIntoMethods(root) + } +} diff --git a/src/main/kotlin/astminer/parse/factory.kt b/src/main/kotlin/astminer/parse/factory.kt new file mode 100644 index 00000000..ed2f3a43 --- /dev/null +++ b/src/main/kotlin/astminer/parse/factory.kt @@ -0,0 +1,26 @@ +package astminer.parse + +fun getLanguageHandler(extension: String, parserType: String): LanguageHandler { + return when (parserType) { + "gumtree" -> getGumtreeHandler(extension) + "antlr" -> getAntlrHandler(extension) + else -> throw UnsupportedOperationException() + } +} + +private fun getGumtreeHandler(extension: String): GumTreeHandler { + return when (extension) { + "java" -> JavaGumtreeHandler() + "python" -> PythonGumTreeHandler() + else -> throw UnsupportedOperationException() + } +} + +private fun getAntlrHandler(extension: String): AntlrLanguageHandler { + return when (extension) { + "java" -> AntlrJavaHandler() + "javascript" -> AntlrJavascriptHandler() + "python" -> AntlrPythonHandler() + else -> throw UnsupportedOperationException() + } +} diff --git a/src/main/kotlin/astminer/parse/handlerModel.kt b/src/main/kotlin/astminer/parse/handlerModel.kt new file mode 100644 index 00000000..0b3fae44 --- /dev/null +++ b/src/main/kotlin/astminer/parse/handlerModel.kt @@ -0,0 +1,10 @@ +package astminer.parse + +import astminer.common.model.MethodInfo +import astminer.common.model.Node +import astminer.common.model.Parser + +interface LanguageHandler { + fun splitIntoMethods(root: Node): Collection> + val parser: Parser +} From fa33bafb0d1a419a19eb5b8585d062661198df5f Mon Sep 17 00:00:00 2001 From: illided Date: Thu, 25 Mar 2021 18:29:38 +0300 Subject: [PATCH 035/308] fuzzy support added --- .../kotlin/astminer/parse/FuzzyHandler.kt | 21 +++++++++++++++++++ src/main/kotlin/astminer/parse/factory.kt | 8 +++++++ 2 files changed, 29 insertions(+) create mode 100644 src/main/kotlin/astminer/parse/FuzzyHandler.kt diff --git a/src/main/kotlin/astminer/parse/FuzzyHandler.kt b/src/main/kotlin/astminer/parse/FuzzyHandler.kt new file mode 100644 index 00000000..1865b61b --- /dev/null +++ b/src/main/kotlin/astminer/parse/FuzzyHandler.kt @@ -0,0 +1,21 @@ +package astminer.parse + +import astminer.common.model.MethodInfo +import astminer.common.model.Node +import astminer.common.model.TreeMethodSplitter +import astminer.parse.cpp.FuzzyCppParser +import astminer.parse.cpp.FuzzyMethodSplitter +import astminer.parse.cpp.FuzzyNode + +abstract class FuzzyHandler : LanguageHandler { + abstract val splitter: TreeMethodSplitter + override fun splitIntoMethods(root: Node): Collection> { + require(root is FuzzyNode) { "Wrong node type" } + return splitter.splitIntoMethods(root) + } +} + +class CppFuzzyHandler : FuzzyHandler() { + override val splitter = FuzzyMethodSplitter() + override val parser = FuzzyCppParser() +} \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/factory.kt b/src/main/kotlin/astminer/parse/factory.kt index ed2f3a43..43fc6929 100644 --- a/src/main/kotlin/astminer/parse/factory.kt +++ b/src/main/kotlin/astminer/parse/factory.kt @@ -4,6 +4,7 @@ fun getLanguageHandler(extension: String, parserType: String): LanguageHandler { return when (parserType) { "gumtree" -> getGumtreeHandler(extension) "antlr" -> getAntlrHandler(extension) + "fuzzy" -> getFuzzyHandler(extension) else -> throw UnsupportedOperationException() } } @@ -24,3 +25,10 @@ private fun getAntlrHandler(extension: String): AntlrLanguageHandler { else -> throw UnsupportedOperationException() } } + +private fun getFuzzyHandler(extension: String): FuzzyHandler { + return when(extension) { + "c","cpp" -> CppFuzzyHandler() + else -> throw UnsupportedOperationException() + } +} \ No newline at end of file From 3bf99d26fe74a877b027e7581c07b17c842b69da Mon Sep 17 00:00:00 2001 From: illided Date: Sun, 28 Mar 2021 11:33:39 +0300 Subject: [PATCH 036/308] gumtree java method splitter added in handler --- src/main/kotlin/astminer/parse/GumtreeHandler.kt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/kotlin/astminer/parse/GumtreeHandler.kt b/src/main/kotlin/astminer/parse/GumtreeHandler.kt index c2871f06..d6645606 100644 --- a/src/main/kotlin/astminer/parse/GumtreeHandler.kt +++ b/src/main/kotlin/astminer/parse/GumtreeHandler.kt @@ -4,7 +4,7 @@ import astminer.common.model.MethodInfo import astminer.common.model.Node import astminer.parse.java.GumTreeJavaNode import astminer.parse.java.GumTreeJavaParser -import astminer.parse.java.GumTreeMethodSplitter +import astminer.parse.java.GumTreeJavaMethodSplitter import astminer.parse.python.GumTreePythonMethodSplitter import astminer.parse.python.GumTreePythonNode import astminer.parse.python.GumTreePythonParser @@ -13,7 +13,7 @@ abstract class GumTreeHandler : LanguageHandler class JavaGumtreeHandler() : GumTreeHandler() { override val parser = GumTreeJavaParser() - private val splitter = GumTreeMethodSplitter() + private val splitter = GumTreeJavaMethodSplitter() override fun splitIntoMethods(root: Node): Collection> { require(root is GumTreeJavaNode) { "Wrong node type" } From e37a4accb35cbdc3844c0ded854822cf1bc3a8b3 Mon Sep 17 00:00:00 2001 From: illided Date: Mon, 29 Mar 2021 13:39:31 +0300 Subject: [PATCH 037/308] factory added and handlers refactored --- .../kotlin/astminer/parse/AntlrHandler.kt | 39 +++++++++--------- .../kotlin/astminer/parse/FuzzyHandler.kt | 14 +++---- .../kotlin/astminer/parse/GumtreeHandler.kt | 31 +++++++------- src/main/kotlin/astminer/parse/factory.kt | 32 ++++++++------- .../kotlin/astminer/parse/handlerModel.kt | 40 ++++++++++++++++--- 5 files changed, 92 insertions(+), 64 deletions(-) diff --git a/src/main/kotlin/astminer/parse/AntlrHandler.kt b/src/main/kotlin/astminer/parse/AntlrHandler.kt index 71f2a867..90dbc7f0 100644 --- a/src/main/kotlin/astminer/parse/AntlrHandler.kt +++ b/src/main/kotlin/astminer/parse/AntlrHandler.kt @@ -1,8 +1,6 @@ package astminer.parse -import astminer.common.model.MethodInfo -import astminer.common.model.Node -import astminer.common.model.TreeMethodSplitter +import astminer.common.model.ParseResult import astminer.parse.antlr.SimpleNode import astminer.parse.antlr.java.JavaMethodSplitter import astminer.parse.antlr.java.JavaParser @@ -10,26 +8,31 @@ import astminer.parse.antlr.javascript.JavaScriptMethodSplitter import astminer.parse.antlr.javascript.JavaScriptParser import astminer.parse.antlr.python.PythonMethodSplitter import astminer.parse.antlr.python.PythonParser +import java.io.File -abstract class AntlrLanguageHandler : LanguageHandler { - abstract val splitter: TreeMethodSplitter - override fun splitIntoMethods(root: Node): Collection> { - require(root is SimpleNode) { "Wrong node type" } - return splitter.splitIntoMethods(root) +object AntlrJavaHandlerFactory : HandlerFactory { + override fun createHandler(file: File) = AntlrJavaHandler(file) + + class AntlrJavaHandler(file: File) : LanguageHandler() { + override val parseResult: ParseResult = JavaParser().parseFile(file) + override val splitter = JavaMethodSplitter() } } -class AntlrJavaHandler : AntlrLanguageHandler() { - override val parser = JavaParser() - override val splitter = JavaMethodSplitter() -} +object AntlrPythonHandlerFactory : HandlerFactory { + override fun createHandler(file: File) = AntlrPythonHandler(file) -class AntlrPythonHandler : AntlrLanguageHandler() { - override val parser = PythonParser() - override val splitter = PythonMethodSplitter() + class AntlrPythonHandler(file: File) : LanguageHandler() { + override val parseResult: ParseResult = PythonParser().parseFile(file) + override val splitter = PythonMethodSplitter() + } } -class AntlrJavascriptHandler : AntlrLanguageHandler() { - override val parser = JavaScriptParser() - override val splitter = JavaScriptMethodSplitter() +object AntlrJavascriptHandlerFactory : HandlerFactory { + override fun createHandler(file: File) = AntlrJavascriptHandler(file) + + class AntlrJavascriptHandler(file: File) : LanguageHandler() { + override val parseResult: ParseResult = JavaScriptParser().parseFile(file) + override val splitter = JavaScriptMethodSplitter() + } } \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/FuzzyHandler.kt b/src/main/kotlin/astminer/parse/FuzzyHandler.kt index 1865b61b..e09ed1f1 100644 --- a/src/main/kotlin/astminer/parse/FuzzyHandler.kt +++ b/src/main/kotlin/astminer/parse/FuzzyHandler.kt @@ -2,20 +2,18 @@ package astminer.parse import astminer.common.model.MethodInfo import astminer.common.model.Node +import astminer.common.model.ParseResult import astminer.common.model.TreeMethodSplitter import astminer.parse.cpp.FuzzyCppParser import astminer.parse.cpp.FuzzyMethodSplitter import astminer.parse.cpp.FuzzyNode +import java.io.File -abstract class FuzzyHandler : LanguageHandler { - abstract val splitter: TreeMethodSplitter - override fun splitIntoMethods(root: Node): Collection> { - require(root is FuzzyNode) { "Wrong node type" } - return splitter.splitIntoMethods(root) - } +object CppFuzzyHandlerFactory: HandlerFactory { + override fun createHandler(file: File): LanguageHandler = CppFuzzyHandler(file) } -class CppFuzzyHandler : FuzzyHandler() { +class CppFuzzyHandler(file: File) : LanguageHandler() { override val splitter = FuzzyMethodSplitter() - override val parser = FuzzyCppParser() + override val parseResult: ParseResult = FuzzyCppParser().parseFile(file) } \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/GumtreeHandler.kt b/src/main/kotlin/astminer/parse/GumtreeHandler.kt index d6645606..a8e3c065 100644 --- a/src/main/kotlin/astminer/parse/GumtreeHandler.kt +++ b/src/main/kotlin/astminer/parse/GumtreeHandler.kt @@ -1,31 +1,28 @@ package astminer.parse -import astminer.common.model.MethodInfo -import astminer.common.model.Node +import astminer.common.model.ParseResult import astminer.parse.java.GumTreeJavaNode import astminer.parse.java.GumTreeJavaParser import astminer.parse.java.GumTreeJavaMethodSplitter import astminer.parse.python.GumTreePythonMethodSplitter import astminer.parse.python.GumTreePythonNode import astminer.parse.python.GumTreePythonParser +import java.io.File -abstract class GumTreeHandler : LanguageHandler +object JavaGumtreeHandlerFactory : HandlerFactory { + override fun createHandler(file: File): LanguageHandler = JavaGumtreeHandler(file) -class JavaGumtreeHandler() : GumTreeHandler() { - override val parser = GumTreeJavaParser() - private val splitter = GumTreeJavaMethodSplitter() - - override fun splitIntoMethods(root: Node): Collection> { - require(root is GumTreeJavaNode) { "Wrong node type" } - return splitter.splitIntoMethods(root) + class JavaGumtreeHandler(file: File) : LanguageHandler() { + override val splitter = GumTreeJavaMethodSplitter() + override val parseResult: ParseResult = GumTreeJavaParser().parseFile(file) } } -class PythonGumTreeHandler : GumTreeHandler() { - override val parser = GumTreePythonParser() - private val splitter = GumTreePythonMethodSplitter() - override fun splitIntoMethods(root: Node): Collection> { - require(root is GumTreePythonNode) { "Wrong node type" } - return splitter.splitIntoMethods(root) +object PythonGumTreeHandlerFactory : HandlerFactory { + override fun createHandler(file: File): LanguageHandler = PythonGumTreeHandler(file) + + class PythonGumTreeHandler(file: File) : LanguageHandler() { + override val splitter = GumTreePythonMethodSplitter() + override val parseResult: ParseResult = GumTreePythonParser().parseFile(file) } -} +} \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/factory.kt b/src/main/kotlin/astminer/parse/factory.kt index 43fc6929..31798aef 100644 --- a/src/main/kotlin/astminer/parse/factory.kt +++ b/src/main/kotlin/astminer/parse/factory.kt @@ -1,34 +1,36 @@ package astminer.parse -fun getLanguageHandler(extension: String, parserType: String): LanguageHandler { +import java.io.File + +fun getHandlerFactory(extension: String, parserType: String): HandlerFactory { return when (parserType) { - "gumtree" -> getGumtreeHandler(extension) - "antlr" -> getAntlrHandler(extension) - "fuzzy" -> getFuzzyHandler(extension) + "gumtree" -> getGumtreeHandlerFactory(extension) + "antlr" -> getAntlrHandlerFactory(extension) + "fuzzy" -> getFuzzyHandlerFactory(extension) else -> throw UnsupportedOperationException() } } -private fun getGumtreeHandler(extension: String): GumTreeHandler { +private fun getGumtreeHandlerFactory(extension: String): HandlerFactory { return when (extension) { - "java" -> JavaGumtreeHandler() - "python" -> PythonGumTreeHandler() + "java" -> JavaGumtreeHandlerFactory + "python" -> PythonGumTreeHandlerFactory else -> throw UnsupportedOperationException() } } -private fun getAntlrHandler(extension: String): AntlrLanguageHandler { +private fun getAntlrHandlerFactory(extension: String): HandlerFactory { return when (extension) { - "java" -> AntlrJavaHandler() - "javascript" -> AntlrJavascriptHandler() - "python" -> AntlrPythonHandler() + "java" -> AntlrJavaHandlerFactory + "javascript" -> AntlrJavascriptHandlerFactory + "python" -> AntlrPythonHandlerFactory else -> throw UnsupportedOperationException() } } -private fun getFuzzyHandler(extension: String): FuzzyHandler { - return when(extension) { - "c","cpp" -> CppFuzzyHandler() +private fun getFuzzyHandlerFactory(extension: String): HandlerFactory { + return when (extension) { + "c", "cpp" -> CppFuzzyHandlerFactory else -> throw UnsupportedOperationException() } -} \ No newline at end of file +} diff --git a/src/main/kotlin/astminer/parse/handlerModel.kt b/src/main/kotlin/astminer/parse/handlerModel.kt index 0b3fae44..979ad7bd 100644 --- a/src/main/kotlin/astminer/parse/handlerModel.kt +++ b/src/main/kotlin/astminer/parse/handlerModel.kt @@ -1,10 +1,38 @@ package astminer.parse -import astminer.common.model.MethodInfo -import astminer.common.model.Node -import astminer.common.model.Parser +import astminer.common.model.* +import astminer.common.preOrder +import astminer.common.setNormalizedToken +import astminer.common.splitToSubtokens +import java.io.File -interface LanguageHandler { - fun splitIntoMethods(root: Node): Collection> - val parser: Parser +interface HandlerFactory { + fun createHandler(file: File): LanguageHandler +} + +abstract class LanguageHandler { + abstract val parseResult: ParseResult + protected abstract val splitter: TreeMethodSplitter + + fun splitIntoMethods(): Collection> { + val root = parseResult.root ?: return emptyList() + return splitter.splitIntoMethods(root) + } + + fun normalizeParseResult(splitTokens: Boolean): LanguageHandler { + parseResult.root?.preOrder()?.forEach { node -> processNodeToken(node, splitTokens) } + return this + } + + private fun processNodeToken(node: Node, splitToken: Boolean) { + if (splitToken) { + node.setNormalizedToken(separateToken(node.getToken())) + } else { + node.setNormalizedToken() + } + } + + private fun separateToken(token: String, separator: CharSequence = "|"): String { + return splitToSubtokens(token).joinToString(separator) + } } From 57ba9474ffd57cf6430deeeee0549eed55e71b83 Mon Sep 17 00:00:00 2001 From: illided Date: Tue, 30 Mar 2021 18:11:23 +0300 Subject: [PATCH 038/308] parsers and splitters moved according to the hierarchy --- .../java/astminer/examples/AllJavaFiles.java | 2 +- src/main/kotlin/astminer/cli/LabelExtractors.kt | 12 ++++++------ .../kotlin/astminer/cli/PathContextsExtractor.kt | 4 ++-- .../kotlin/astminer/cli/ProjectPreprocessor.kt | 2 +- src/main/kotlin/astminer/cli/utils.kt | 4 ++-- src/main/kotlin/astminer/examples/AllCppFiles.kt | 2 +- .../astminer/examples/AllJavaFilesGumTree.kt | 2 +- .../kotlin/astminer/examples/AllJavaMethods.kt | 6 +++--- .../kotlin/astminer/examples/AllPythonMethods.kt | 6 +++--- .../astminer/examples/FeatureExtraction.kt | 2 +- src/main/kotlin/astminer/parse/FuzzyHandler.kt | 9 +++------ .../astminer/parse/{ => antlr}/AntlrHandler.kt | 5 +++-- src/main/kotlin/astminer/parse/factory.kt | 6 +++++- .../parse/{ => fuzzy}/cpp/FuzzyCppParser.kt | 5 ++--- .../parse/{ => fuzzy}/cpp/FuzzyMethodSplitter.kt | 2 +- .../astminer/parse/{ => fuzzy}/cpp/FuzzyNode.kt | 2 +- .../astminer/parse/{ => fuzzy}/cpp/utils.kt | 2 +- .../parse/{ => gumtree}/GumtreeHandler.kt | 16 +++++++++------- .../java/GumTreeJavaMethodSplitter.kt | 2 +- .../parse/{ => gumtree}/java/GumTreeJavaNode.kt | 2 +- .../{ => gumtree}/java/GumTreeJavaParser.kt | 3 ++- .../python/GumTreePythonMethodSplitter.kt | 2 +- .../{ => gumtree}/python/GumTreePythonNode.kt | 2 +- .../{ => gumtree}/python/GumTreePythonParser.kt | 3 ++- .../astminer/parse/cpp/FuzzyCppParserTest.kt | 2 ++ .../parse/cpp/FuzzyMethodSplitterTest.kt | 3 +++ .../java/GumTreeJavaMethodSplitterTest.kt | 3 ++- .../{ => gumtree}/java/GumTreeJavaParserTest.kt | 2 +- .../python/GumTreeJavaMethodSplitterTest.kt | 3 ++- .../python/GumTreePythonParserTest.kt | 3 ++- 30 files changed, 66 insertions(+), 53 deletions(-) rename src/main/kotlin/astminer/parse/{ => antlr}/AntlrHandler.kt (93%) rename src/main/kotlin/astminer/parse/{ => fuzzy}/cpp/FuzzyCppParser.kt (98%) rename src/main/kotlin/astminer/parse/{ => fuzzy}/cpp/FuzzyMethodSplitter.kt (98%) rename src/main/kotlin/astminer/parse/{ => fuzzy}/cpp/FuzzyNode.kt (97%) rename src/main/kotlin/astminer/parse/{ => fuzzy}/cpp/utils.kt (96%) rename src/main/kotlin/astminer/parse/{ => gumtree}/GumtreeHandler.kt (66%) rename src/main/kotlin/astminer/parse/{ => gumtree}/java/GumTreeJavaMethodSplitter.kt (98%) rename src/main/kotlin/astminer/parse/{ => gumtree}/java/GumTreeJavaNode.kt (97%) rename src/main/kotlin/astminer/parse/{ => gumtree}/java/GumTreeJavaParser.kt (88%) rename src/main/kotlin/astminer/parse/{ => gumtree}/python/GumTreePythonMethodSplitter.kt (99%) rename src/main/kotlin/astminer/parse/{ => gumtree}/python/GumTreePythonNode.kt (96%) rename src/main/kotlin/astminer/parse/{ => gumtree}/python/GumTreePythonParser.kt (88%) rename src/test/kotlin/astminer/parse/{ => gumtree}/java/GumTreeJavaMethodSplitterTest.kt (97%) rename src/test/kotlin/astminer/parse/{ => gumtree}/java/GumTreeJavaParserTest.kt (96%) rename src/test/kotlin/astminer/parse/{ => gumtree}/python/GumTreeJavaMethodSplitterTest.kt (98%) rename src/test/kotlin/astminer/parse/{ => gumtree}/python/GumTreePythonParserTest.kt (93%) diff --git a/src/main/java/astminer/examples/AllJavaFiles.java b/src/main/java/astminer/examples/AllJavaFiles.java index 26789b59..17c6d32e 100644 --- a/src/main/java/astminer/examples/AllJavaFiles.java +++ b/src/main/java/astminer/examples/AllJavaFiles.java @@ -1,7 +1,7 @@ package astminer.examples; import astminer.common.model.*; -import astminer.parse.java.GumTreeJavaParser; +import astminer.parse.gumtree.java.GumTreeJavaParser; import astminer.paths.*; import java.io.FileInputStream; diff --git a/src/main/kotlin/astminer/cli/LabelExtractors.kt b/src/main/kotlin/astminer/cli/LabelExtractors.kt index c418a48b..54398e0c 100644 --- a/src/main/kotlin/astminer/cli/LabelExtractors.kt +++ b/src/main/kotlin/astminer/cli/LabelExtractors.kt @@ -9,12 +9,12 @@ import astminer.parse.antlr.SimpleNode import astminer.parse.antlr.java.JavaMethodSplitter import astminer.parse.antlr.javascript.JavaScriptMethodSplitter import astminer.parse.antlr.python.PythonMethodSplitter -import astminer.parse.cpp.FuzzyMethodSplitter -import astminer.parse.cpp.FuzzyNode -import astminer.parse.java.GumTreeJavaNode -import astminer.parse.java.GumTreeJavaMethodSplitter -import astminer.parse.python.GumTreePythonMethodSplitter -import astminer.parse.python.GumTreePythonNode +import astminer.parse.fuzzy.cpp.FuzzyMethodSplitter +import astminer.parse.fuzzy.cpp.FuzzyNode +import astminer.parse.gumtree.java.GumTreeJavaNode +import astminer.parse.gumtree.java.GumTreeJavaMethodSplitter +import astminer.parse.gumtree.python.GumTreePythonMethodSplitter +import astminer.parse.gumtree.python.GumTreePythonNode import java.io.File diff --git a/src/main/kotlin/astminer/cli/PathContextsExtractor.kt b/src/main/kotlin/astminer/cli/PathContextsExtractor.kt index ff8d4d61..ebb5ca36 100644 --- a/src/main/kotlin/astminer/cli/PathContextsExtractor.kt +++ b/src/main/kotlin/astminer/cli/PathContextsExtractor.kt @@ -6,8 +6,8 @@ import astminer.common.model.* import astminer.parse.antlr.java.JavaParser import astminer.parse.antlr.javascript.JavaScriptParser import astminer.parse.antlr.python.PythonParser -import astminer.parse.cpp.FuzzyCppParser -import astminer.parse.java.GumTreeJavaParser +import astminer.parse.fuzzy.cpp.FuzzyCppParser +import astminer.parse.gumtree.java.GumTreeJavaParser import astminer.paths.Code2VecPathStorage import astminer.paths.PathMiner import astminer.paths.PathRetrievalSettings diff --git a/src/main/kotlin/astminer/cli/ProjectPreprocessor.kt b/src/main/kotlin/astminer/cli/ProjectPreprocessor.kt index 52b6f5f2..1dfa5ca7 100644 --- a/src/main/kotlin/astminer/cli/ProjectPreprocessor.kt +++ b/src/main/kotlin/astminer/cli/ProjectPreprocessor.kt @@ -1,6 +1,6 @@ package astminer.cli -import astminer.parse.cpp.FuzzyCppParser +import astminer.parse.fuzzy.cpp.FuzzyCppParser import com.github.ajalt.clikt.core.CliktCommand import com.github.ajalt.clikt.parameters.options.option import com.github.ajalt.clikt.parameters.options.required diff --git a/src/main/kotlin/astminer/cli/utils.kt b/src/main/kotlin/astminer/cli/utils.kt index 815e4b5e..6567528a 100644 --- a/src/main/kotlin/astminer/cli/utils.kt +++ b/src/main/kotlin/astminer/cli/utils.kt @@ -2,8 +2,8 @@ package astminer.cli import astminer.parse.antlr.java.JavaParser import astminer.parse.antlr.python.PythonParser -import astminer.parse.cpp.FuzzyCppParser -import astminer.parse.java.GumTreeJavaParser +import astminer.parse.fuzzy.cpp.FuzzyCppParser +import astminer.parse.gumtree.java.GumTreeJavaParser import astminer.common.model.Node import astminer.common.model.ParseResult import astminer.common.model.Parser diff --git a/src/main/kotlin/astminer/examples/AllCppFiles.kt b/src/main/kotlin/astminer/examples/AllCppFiles.kt index 9396f598..a302731e 100644 --- a/src/main/kotlin/astminer/examples/AllCppFiles.kt +++ b/src/main/kotlin/astminer/examples/AllCppFiles.kt @@ -4,7 +4,7 @@ package astminer.examples import astminer.common.getProjectFilesWithExtension import astminer.common.model.LabeledPathContexts -import astminer.parse.cpp.FuzzyCppParser +import astminer.parse.fuzzy.cpp.FuzzyCppParser import astminer.paths.PathMiner import astminer.paths.PathRetrievalSettings import astminer.paths.CsvPathStorage diff --git a/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt b/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt index 0c34108b..1309e7ca 100644 --- a/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt +++ b/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt @@ -1,7 +1,7 @@ package astminer.examples import astminer.common.model.LabeledPathContexts -import astminer.parse.java.GumTreeJavaParser +import astminer.parse.gumtree.java.GumTreeJavaParser import astminer.paths.PathMiner import astminer.paths.PathRetrievalSettings import astminer.paths.CsvPathStorage diff --git a/src/main/kotlin/astminer/examples/AllJavaMethods.kt b/src/main/kotlin/astminer/examples/AllJavaMethods.kt index 7454480c..beb03f1d 100644 --- a/src/main/kotlin/astminer/examples/AllJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/AllJavaMethods.kt @@ -2,9 +2,9 @@ package astminer.examples import astminer.common.model.LabeledPathContexts import astminer.common.model.MethodInfo -import astminer.parse.java.GumTreeJavaNode -import astminer.parse.java.GumTreeJavaParser -import astminer.parse.java.GumTreeJavaMethodSplitter +import astminer.parse.gumtree.java.GumTreeJavaNode +import astminer.parse.gumtree.java.GumTreeJavaParser +import astminer.parse.gumtree.java.GumTreeJavaMethodSplitter import astminer.paths.PathMiner import astminer.paths.PathRetrievalSettings import astminer.paths.CsvPathStorage diff --git a/src/main/kotlin/astminer/examples/AllPythonMethods.kt b/src/main/kotlin/astminer/examples/AllPythonMethods.kt index 488bb4eb..3e991560 100644 --- a/src/main/kotlin/astminer/examples/AllPythonMethods.kt +++ b/src/main/kotlin/astminer/examples/AllPythonMethods.kt @@ -2,9 +2,9 @@ package astminer.examples import astminer.common.model.LabeledPathContexts import astminer.common.model.MethodInfo -import astminer.parse.python.GumTreePythonMethodSplitter -import astminer.parse.python.GumTreePythonNode -import astminer.parse.python.GumTreePythonParser +import astminer.parse.gumtree.python.GumTreePythonMethodSplitter +import astminer.parse.gumtree.python.GumTreePythonNode +import astminer.parse.gumtree.python.GumTreePythonParser import astminer.paths.CsvPathStorage import astminer.paths.PathMiner import astminer.paths.PathRetrievalSettings diff --git a/src/main/kotlin/astminer/examples/FeatureExtraction.kt b/src/main/kotlin/astminer/examples/FeatureExtraction.kt index 318088ff..786ff840 100644 --- a/src/main/kotlin/astminer/examples/FeatureExtraction.kt +++ b/src/main/kotlin/astminer/examples/FeatureExtraction.kt @@ -2,7 +2,7 @@ package astminer.examples import astminer.common.numberOfLines import astminer.featureextraction.* -import astminer.parse.java.GumTreeJavaParser +import astminer.parse.gumtree.java.GumTreeJavaParser import java.io.File diff --git a/src/main/kotlin/astminer/parse/FuzzyHandler.kt b/src/main/kotlin/astminer/parse/FuzzyHandler.kt index e09ed1f1..5b7eb3f7 100644 --- a/src/main/kotlin/astminer/parse/FuzzyHandler.kt +++ b/src/main/kotlin/astminer/parse/FuzzyHandler.kt @@ -1,12 +1,9 @@ package astminer.parse -import astminer.common.model.MethodInfo -import astminer.common.model.Node import astminer.common.model.ParseResult -import astminer.common.model.TreeMethodSplitter -import astminer.parse.cpp.FuzzyCppParser -import astminer.parse.cpp.FuzzyMethodSplitter -import astminer.parse.cpp.FuzzyNode +import astminer.parse.fuzzy.cpp.FuzzyCppParser +import astminer.parse.fuzzy.cpp.FuzzyMethodSplitter +import astminer.parse.fuzzy.cpp.FuzzyNode import java.io.File object CppFuzzyHandlerFactory: HandlerFactory { diff --git a/src/main/kotlin/astminer/parse/AntlrHandler.kt b/src/main/kotlin/astminer/parse/antlr/AntlrHandler.kt similarity index 93% rename from src/main/kotlin/astminer/parse/AntlrHandler.kt rename to src/main/kotlin/astminer/parse/antlr/AntlrHandler.kt index 90dbc7f0..34740c48 100644 --- a/src/main/kotlin/astminer/parse/AntlrHandler.kt +++ b/src/main/kotlin/astminer/parse/antlr/AntlrHandler.kt @@ -1,7 +1,8 @@ -package astminer.parse +package astminer.parse.antlr import astminer.common.model.ParseResult -import astminer.parse.antlr.SimpleNode +import astminer.parse.HandlerFactory +import astminer.parse.LanguageHandler import astminer.parse.antlr.java.JavaMethodSplitter import astminer.parse.antlr.java.JavaParser import astminer.parse.antlr.javascript.JavaScriptMethodSplitter diff --git a/src/main/kotlin/astminer/parse/factory.kt b/src/main/kotlin/astminer/parse/factory.kt index 31798aef..fbc1d608 100644 --- a/src/main/kotlin/astminer/parse/factory.kt +++ b/src/main/kotlin/astminer/parse/factory.kt @@ -1,6 +1,10 @@ package astminer.parse -import java.io.File +import astminer.parse.antlr.AntlrJavaHandlerFactory +import astminer.parse.antlr.AntlrJavascriptHandlerFactory +import astminer.parse.antlr.AntlrPythonHandlerFactory +import astminer.parse.gumtree.JavaGumtreeHandlerFactory +import astminer.parse.gumtree.PythonGumTreeHandlerFactory fun getHandlerFactory(extension: String, parserType: String): HandlerFactory { return when (parserType) { diff --git a/src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppParser.kt similarity index 98% rename from src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt rename to src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppParser.kt index e8cacb76..a5c18b36 100644 --- a/src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppParser.kt @@ -1,4 +1,4 @@ -package astminer.parse.cpp +package astminer.parse.fuzzy.cpp import astminer.common.model.ParseResult import astminer.common.model.Parser @@ -9,7 +9,6 @@ import io.shiftleft.codepropertygraph.generated.NodeTypes import io.shiftleft.fuzzyc2cpg.FuzzyC2Cpg import overflowdb.Edge import overflowdb.Node -import overflowdb.Element import scala.Option import scala.collection.immutable.Set import java.io.File @@ -102,7 +101,7 @@ class FuzzyCppParser : Parser { /** * Convert [cpg][io.shiftleft.codepropertygraph.Cpg] created by fuzzyc2cpg - * to list of [FuzzyNode][astminer.parse.cpp.FuzzyNode]. + * to list of [FuzzyNode][astminer.parse.fuzzy.cpp.FuzzyNode]. * Cpg may contain graphs for several files, in that case several ASTs will be created. * @param cpg to be converted * @param filePath to the parsed file that will be used if parsing failed diff --git a/src/main/kotlin/astminer/parse/cpp/FuzzyMethodSplitter.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyMethodSplitter.kt similarity index 98% rename from src/main/kotlin/astminer/parse/cpp/FuzzyMethodSplitter.kt rename to src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyMethodSplitter.kt index 9e351c72..b6f387cd 100644 --- a/src/main/kotlin/astminer/parse/cpp/FuzzyMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyMethodSplitter.kt @@ -1,4 +1,4 @@ -package astminer.parse.cpp +package astminer.parse.fuzzy.cpp import astminer.common.* import astminer.common.model.* diff --git a/src/main/kotlin/astminer/parse/cpp/FuzzyNode.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyNode.kt similarity index 97% rename from src/main/kotlin/astminer/parse/cpp/FuzzyNode.kt rename to src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyNode.kt index 490803c9..574c5258 100644 --- a/src/main/kotlin/astminer/parse/cpp/FuzzyNode.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyNode.kt @@ -1,4 +1,4 @@ -package astminer.parse.cpp +package astminer.parse.fuzzy.cpp import astminer.common.model.Node import com.google.common.collect.TreeMultiset diff --git a/src/main/kotlin/astminer/parse/cpp/utils.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/utils.kt similarity index 96% rename from src/main/kotlin/astminer/parse/cpp/utils.kt rename to src/main/kotlin/astminer/parse/fuzzy/cpp/utils.kt index 5eade8d4..fc117afa 100644 --- a/src/main/kotlin/astminer/parse/cpp/utils.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/utils.kt @@ -1,4 +1,4 @@ -package astminer.parse.cpp +package astminer.parse.fuzzy.cpp import java.io.File import java.util.concurrent.TimeUnit diff --git a/src/main/kotlin/astminer/parse/GumtreeHandler.kt b/src/main/kotlin/astminer/parse/gumtree/GumtreeHandler.kt similarity index 66% rename from src/main/kotlin/astminer/parse/GumtreeHandler.kt rename to src/main/kotlin/astminer/parse/gumtree/GumtreeHandler.kt index a8e3c065..9ac399ed 100644 --- a/src/main/kotlin/astminer/parse/GumtreeHandler.kt +++ b/src/main/kotlin/astminer/parse/gumtree/GumtreeHandler.kt @@ -1,12 +1,14 @@ -package astminer.parse +package astminer.parse.gumtree import astminer.common.model.ParseResult -import astminer.parse.java.GumTreeJavaNode -import astminer.parse.java.GumTreeJavaParser -import astminer.parse.java.GumTreeJavaMethodSplitter -import astminer.parse.python.GumTreePythonMethodSplitter -import astminer.parse.python.GumTreePythonNode -import astminer.parse.python.GumTreePythonParser +import astminer.parse.HandlerFactory +import astminer.parse.LanguageHandler +import astminer.parse.gumtree.java.GumTreeJavaNode +import astminer.parse.gumtree.java.GumTreeJavaParser +import astminer.parse.gumtree.java.GumTreeJavaMethodSplitter +import astminer.parse.gumtree.python.GumTreePythonMethodSplitter +import astminer.parse.gumtree.python.GumTreePythonNode +import astminer.parse.gumtree.python.GumTreePythonParser import java.io.File object JavaGumtreeHandlerFactory : HandlerFactory { diff --git a/src/main/kotlin/astminer/parse/java/GumTreeJavaMethodSplitter.kt b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt similarity index 98% rename from src/main/kotlin/astminer/parse/java/GumTreeJavaMethodSplitter.kt rename to src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt index 6571610c..cc4c5ae1 100644 --- a/src/main/kotlin/astminer/parse/java/GumTreeJavaMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt @@ -1,4 +1,4 @@ -package astminer.parse.java +package astminer.parse.gumtree.java import astminer.common.model.* import astminer.common.preOrder diff --git a/src/main/kotlin/astminer/parse/java/GumTreeJavaNode.kt b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaNode.kt similarity index 97% rename from src/main/kotlin/astminer/parse/java/GumTreeJavaNode.kt rename to src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaNode.kt index d8d4e163..d5c5bdc7 100644 --- a/src/main/kotlin/astminer/parse/java/GumTreeJavaNode.kt +++ b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaNode.kt @@ -1,4 +1,4 @@ -package astminer.parse.java +package astminer.parse.gumtree.java import com.github.gumtreediff.tree.ITree import com.github.gumtreediff.tree.TreeContext diff --git a/src/main/kotlin/astminer/parse/java/GumTreeJavaParser.kt b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaParser.kt similarity index 88% rename from src/main/kotlin/astminer/parse/java/GumTreeJavaParser.kt rename to src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaParser.kt index 2112f5cd..a0b20be3 100644 --- a/src/main/kotlin/astminer/parse/java/GumTreeJavaParser.kt +++ b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaParser.kt @@ -1,9 +1,10 @@ -package astminer.parse.java +package astminer.parse.gumtree.java import com.github.gumtreediff.client.Run import com.github.gumtreediff.gen.jdt.JdtTreeGenerator import com.github.gumtreediff.tree.TreeContext import astminer.common.model.Parser +import astminer.parse.gumtree.java.GumTreeJavaNode import java.io.InputStream import java.io.InputStreamReader diff --git a/src/main/kotlin/astminer/parse/python/GumTreePythonMethodSplitter.kt b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitter.kt similarity index 99% rename from src/main/kotlin/astminer/parse/python/GumTreePythonMethodSplitter.kt rename to src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitter.kt index 01571c92..89282acd 100644 --- a/src/main/kotlin/astminer/parse/python/GumTreePythonMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitter.kt @@ -1,4 +1,4 @@ -package astminer.parse.python +package astminer.parse.gumtree.python import astminer.common.model.ElementNode import astminer.common.model.MethodInfo diff --git a/src/main/kotlin/astminer/parse/python/GumTreePythonNode.kt b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonNode.kt similarity index 96% rename from src/main/kotlin/astminer/parse/python/GumTreePythonNode.kt rename to src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonNode.kt index e696f21a..82d7743a 100644 --- a/src/main/kotlin/astminer/parse/python/GumTreePythonNode.kt +++ b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonNode.kt @@ -1,4 +1,4 @@ -package astminer.parse.python +package astminer.parse.gumtree.python import astminer.common.model.Node import com.github.gumtreediff.tree.ITree diff --git a/src/main/kotlin/astminer/parse/python/GumTreePythonParser.kt b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonParser.kt similarity index 88% rename from src/main/kotlin/astminer/parse/python/GumTreePythonParser.kt rename to src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonParser.kt index c6d791fc..2591bd2a 100644 --- a/src/main/kotlin/astminer/parse/python/GumTreePythonParser.kt +++ b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonParser.kt @@ -1,6 +1,7 @@ -package astminer.parse.python +package astminer.parse.gumtree.python import astminer.common.model.Parser +import astminer.parse.gumtree.python.GumTreePythonNode import com.github.gumtreediff.client.Run import com.github.gumtreediff.gen.python.PythonTreeGenerator import com.github.gumtreediff.tree.TreeContext diff --git a/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt b/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt index fb508fe3..95a32049 100644 --- a/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt +++ b/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt @@ -2,6 +2,8 @@ package astminer.parse.cpp import astminer.common.getProjectFilesWithExtension import astminer.examples.forFilesWithSuffix +import astminer.parse.fuzzy.cpp.FuzzyCppParser +import astminer.parse.fuzzy.cpp.FuzzyNode import org.junit.Assert import org.junit.Test import java.io.File diff --git a/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt index aebff769..7190776f 100644 --- a/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt @@ -1,6 +1,9 @@ package astminer.parse.cpp import astminer.common.model.MethodInfo +import astminer.parse.fuzzy.cpp.FuzzyCppParser +import astminer.parse.fuzzy.cpp.FuzzyMethodSplitter +import astminer.parse.fuzzy.cpp.FuzzyNode import org.junit.Test import kotlin.test.assertEquals import java.io.File diff --git a/src/test/kotlin/astminer/parse/java/GumTreeJavaMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitterTest.kt similarity index 97% rename from src/test/kotlin/astminer/parse/java/GumTreeJavaMethodSplitterTest.kt rename to src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitterTest.kt index 3670c3ac..fc8c3c07 100644 --- a/src/test/kotlin/astminer/parse/java/GumTreeJavaMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitterTest.kt @@ -1,6 +1,7 @@ -package astminer.parse.java +package astminer.parse.gumtree.java import astminer.common.model.MethodInfo +import astminer.parse.gumtree.java.GumTreeJavaNode import org.junit.Test import java.io.File import kotlin.test.assertEquals diff --git a/src/test/kotlin/astminer/parse/java/GumTreeJavaParserTest.kt b/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaParserTest.kt similarity index 96% rename from src/test/kotlin/astminer/parse/java/GumTreeJavaParserTest.kt rename to src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaParserTest.kt index 04e359e1..5ee1b47d 100644 --- a/src/test/kotlin/astminer/parse/java/GumTreeJavaParserTest.kt +++ b/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaParserTest.kt @@ -1,4 +1,4 @@ -package astminer.parse.java +package astminer.parse.gumtree.java import astminer.common.getProjectFilesWithExtension import astminer.common.model.Node diff --git a/src/test/kotlin/astminer/parse/python/GumTreeJavaMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/gumtree/python/GumTreeJavaMethodSplitterTest.kt similarity index 98% rename from src/test/kotlin/astminer/parse/python/GumTreeJavaMethodSplitterTest.kt rename to src/test/kotlin/astminer/parse/gumtree/python/GumTreeJavaMethodSplitterTest.kt index 608a5b9e..7cf3ed6a 100644 --- a/src/test/kotlin/astminer/parse/python/GumTreeJavaMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/gumtree/python/GumTreeJavaMethodSplitterTest.kt @@ -1,6 +1,7 @@ -package astminer.parse.python +package astminer.parse.gumtree.python import astminer.common.model.MethodInfo +import astminer.parse.gumtree.python.GumTreePythonParser import org.junit.Test import java.io.File import kotlin.test.assertEquals diff --git a/src/test/kotlin/astminer/parse/python/GumTreePythonParserTest.kt b/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonParserTest.kt similarity index 93% rename from src/test/kotlin/astminer/parse/python/GumTreePythonParserTest.kt rename to src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonParserTest.kt index 33eeb673..11c464bc 100644 --- a/src/test/kotlin/astminer/parse/python/GumTreePythonParserTest.kt +++ b/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonParserTest.kt @@ -1,5 +1,6 @@ -package astminer.parse.python +package astminer.parse.gumtree.python +import astminer.parse.gumtree.python.GumTreePythonParser import org.junit.After import org.junit.Before import org.junit.Test From b4420623fe0bb68f6b945096a7796460027c9916 Mon Sep 17 00:00:00 2001 From: illided Date: Tue, 30 Mar 2021 21:55:03 +0300 Subject: [PATCH 039/308] gumtree nodes merged --- .../kotlin/astminer/cli/LabelExtractors.kt | 7 ++- .../astminer/examples/AllJavaMethods.kt | 4 +- .../astminer/examples/AllPythonMethods.kt | 4 +- .../GumTreeJavaNode.kt => GumTreeNode.kt} | 13 +++--- .../astminer/parse/gumtree/GumtreeHandler.kt | 14 +++--- .../gumtree/java/GumTreeJavaMethodSplitter.kt | 29 ++++++------ .../parse/gumtree/java/GumTreeJavaParser.kt | 10 ++--- .../python/GumTreePythonMethodSplitter.kt | 27 +++++------ .../parse/gumtree/python/GumTreePythonNode.kt | 45 ------------------- .../gumtree/python/GumTreePythonParser.kt | 10 ++--- .../java/GumTreeJavaMethodSplitterTest.kt | 8 ++-- .../python/GumTreeJavaMethodSplitterTest.kt | 6 +-- .../gumtree/python/GumTreePythonParserTest.kt | 1 - 13 files changed, 65 insertions(+), 113 deletions(-) rename src/main/kotlin/astminer/parse/gumtree/{java/GumTreeJavaNode.kt => GumTreeNode.kt} (69%) delete mode 100644 src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonNode.kt diff --git a/src/main/kotlin/astminer/cli/LabelExtractors.kt b/src/main/kotlin/astminer/cli/LabelExtractors.kt index 54398e0c..6f21cefc 100644 --- a/src/main/kotlin/astminer/cli/LabelExtractors.kt +++ b/src/main/kotlin/astminer/cli/LabelExtractors.kt @@ -11,10 +11,9 @@ import astminer.parse.antlr.javascript.JavaScriptMethodSplitter import astminer.parse.antlr.python.PythonMethodSplitter import astminer.parse.fuzzy.cpp.FuzzyMethodSplitter import astminer.parse.fuzzy.cpp.FuzzyNode -import astminer.parse.gumtree.java.GumTreeJavaNode +import astminer.parse.gumtree.GumTreeNode import astminer.parse.gumtree.java.GumTreeJavaMethodSplitter import astminer.parse.gumtree.python.GumTreePythonMethodSplitter -import astminer.parse.gumtree.python.GumTreePythonNode import java.io.File @@ -65,7 +64,7 @@ abstract class MethodLabelExtractor( when (javaParser) { "gumtree" -> { val methodSplitter = GumTreeJavaMethodSplitter() - methodSplitter.splitIntoMethods(root as GumTreeJavaNode) + methodSplitter.splitIntoMethods(root as GumTreeNode) } "antlr" -> { val methodSplitter = JavaMethodSplitter() @@ -80,7 +79,7 @@ abstract class MethodLabelExtractor( when (pythonParser) { "gumtree" -> { val methodSplitter = GumTreePythonMethodSplitter() - methodSplitter.splitIntoMethods(root as GumTreePythonNode) + methodSplitter.splitIntoMethods(root as GumTreeNode) } "antlr" -> { val methodSplitter = PythonMethodSplitter() diff --git a/src/main/kotlin/astminer/examples/AllJavaMethods.kt b/src/main/kotlin/astminer/examples/AllJavaMethods.kt index beb03f1d..74bc6842 100644 --- a/src/main/kotlin/astminer/examples/AllJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/AllJavaMethods.kt @@ -2,7 +2,7 @@ package astminer.examples import astminer.common.model.LabeledPathContexts import astminer.common.model.MethodInfo -import astminer.parse.gumtree.java.GumTreeJavaNode +import astminer.parse.gumtree.GumTreeNode import astminer.parse.gumtree.java.GumTreeJavaParser import astminer.parse.gumtree.java.GumTreeJavaMethodSplitter import astminer.paths.PathMiner @@ -12,7 +12,7 @@ import astminer.paths.toPathContext import java.io.File -private fun getCsvFriendlyMethodId(methodInfo: MethodInfo): String { +private fun getCsvFriendlyMethodId(methodInfo: MethodInfo): String { val className = methodInfo.enclosingElementName() ?: "" val methodName = methodInfo.name() ?: "unknown_method" val parameterTypes = methodInfo.methodParameters.joinToString("|") { it.name() ?: "_" } diff --git a/src/main/kotlin/astminer/examples/AllPythonMethods.kt b/src/main/kotlin/astminer/examples/AllPythonMethods.kt index 3e991560..1b24b8b6 100644 --- a/src/main/kotlin/astminer/examples/AllPythonMethods.kt +++ b/src/main/kotlin/astminer/examples/AllPythonMethods.kt @@ -3,7 +3,7 @@ package astminer.examples import astminer.common.model.LabeledPathContexts import astminer.common.model.MethodInfo import astminer.parse.gumtree.python.GumTreePythonMethodSplitter -import astminer.parse.gumtree.python.GumTreePythonNode +import astminer.parse.gumtree.GumTreeNode import astminer.parse.gumtree.python.GumTreePythonParser import astminer.paths.CsvPathStorage import astminer.paths.PathMiner @@ -11,7 +11,7 @@ import astminer.paths.PathRetrievalSettings import astminer.paths.toPathContext import java.io.File -private fun getCsvFriendlyMethodId(methodInfo: MethodInfo): String { +private fun getCsvFriendlyMethodId(methodInfo: MethodInfo): String { val className = methodInfo.enclosingElementName() ?: "" val methodName = methodInfo.name() ?: "unknown_method" val parameterTypes = methodInfo.methodParameters.joinToString("|") { it.name() ?: "_" } diff --git a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaNode.kt b/src/main/kotlin/astminer/parse/gumtree/GumTreeNode.kt similarity index 69% rename from src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaNode.kt rename to src/main/kotlin/astminer/parse/gumtree/GumTreeNode.kt index d5c5bdc7..c03dda2a 100644 --- a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaNode.kt +++ b/src/main/kotlin/astminer/parse/gumtree/GumTreeNode.kt @@ -1,10 +1,10 @@ -package astminer.parse.gumtree.java +package astminer.parse.gumtree +import astminer.common.model.Node import com.github.gumtreediff.tree.ITree import com.github.gumtreediff.tree.TreeContext -import astminer.common.model.Node -class GumTreeJavaNode(val wrappedNode: ITree, val context: TreeContext, val parent: GumTreeJavaNode?) : Node { +class GumTreeNode(val wrappedNode: ITree, val context: TreeContext, val parent: GumTreeNode?): Node { private val metadata: MutableMap = HashMap() override fun getMetadata(key: String): Any? { @@ -19,8 +19,8 @@ class GumTreeJavaNode(val wrappedNode: ITree, val context: TreeContext, val pare return childrenList.isEmpty() } - private val childrenList: MutableList by lazy { - wrappedNode.children.map { GumTreeJavaNode(it, context, this) }.toMutableList() + private val childrenList: MutableList by lazy { + wrappedNode.children.map { GumTreeNode(it, context, this) }.toMutableList() } override fun getTypeLabel(): String { @@ -40,7 +40,6 @@ class GumTreeJavaNode(val wrappedNode: ITree, val context: TreeContext, val pare } override fun removeChildrenOfType(typeLabel: String) { - childrenList.removeIf { it.getTypeLabel() == typeLabel } + childrenList.removeIf{ it.getTypeLabel() == typeLabel} } - } \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/gumtree/GumtreeHandler.kt b/src/main/kotlin/astminer/parse/gumtree/GumtreeHandler.kt index 9ac399ed..06c889a9 100644 --- a/src/main/kotlin/astminer/parse/gumtree/GumtreeHandler.kt +++ b/src/main/kotlin/astminer/parse/gumtree/GumtreeHandler.kt @@ -3,28 +3,26 @@ package astminer.parse.gumtree import astminer.common.model.ParseResult import astminer.parse.HandlerFactory import astminer.parse.LanguageHandler -import astminer.parse.gumtree.java.GumTreeJavaNode import astminer.parse.gumtree.java.GumTreeJavaParser import astminer.parse.gumtree.java.GumTreeJavaMethodSplitter import astminer.parse.gumtree.python.GumTreePythonMethodSplitter -import astminer.parse.gumtree.python.GumTreePythonNode import astminer.parse.gumtree.python.GumTreePythonParser import java.io.File object JavaGumtreeHandlerFactory : HandlerFactory { - override fun createHandler(file: File): LanguageHandler = JavaGumtreeHandler(file) + override fun createHandler(file: File): LanguageHandler = JavaGumtreeHandler(file) - class JavaGumtreeHandler(file: File) : LanguageHandler() { + class JavaGumtreeHandler(file: File) : LanguageHandler() { override val splitter = GumTreeJavaMethodSplitter() - override val parseResult: ParseResult = GumTreeJavaParser().parseFile(file) + override val parseResult: ParseResult = GumTreeJavaParser().parseFile(file) } } object PythonGumTreeHandlerFactory : HandlerFactory { - override fun createHandler(file: File): LanguageHandler = PythonGumTreeHandler(file) + override fun createHandler(file: File): LanguageHandler = PythonGumTreeHandler(file) - class PythonGumTreeHandler(file: File) : LanguageHandler() { + class PythonGumTreeHandler(file: File) : LanguageHandler() { override val splitter = GumTreePythonMethodSplitter() - override val parseResult: ParseResult = GumTreePythonParser().parseFile(file) + override val parseResult: ParseResult = GumTreePythonParser().parseFile(file) } } \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt index cc4c5ae1..ce57cfef 100644 --- a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt @@ -2,10 +2,11 @@ package astminer.parse.gumtree.java import astminer.common.model.* import astminer.common.preOrder +import astminer.parse.gumtree.GumTreeNode -private fun GumTreeJavaNode.isTypeNode() = getTypeLabel().endsWith("Type") +private fun GumTreeNode.isTypeNode() = getTypeLabel().endsWith("Type") -class GumTreeJavaMethodSplitter : TreeMethodSplitter { +class GumTreeJavaMethodSplitter : TreeMethodSplitter { companion object { private object TypeLabels { @@ -16,12 +17,12 @@ class GumTreeJavaMethodSplitter : TreeMethodSplitter { } } - override fun splitIntoMethods(root: GumTreeJavaNode): Collection> { + override fun splitIntoMethods(root: GumTreeNode): Collection> { val methodRoots = root.preOrder().filter { it.getTypeLabel() == TypeLabels.methodDeclaration } - return methodRoots.map { collectMethodInfo(it as GumTreeJavaNode) } + return methodRoots.map { collectMethodInfo(it as GumTreeNode) } } - private fun collectMethodInfo(methodNode: GumTreeJavaNode): MethodInfo { + private fun collectMethodInfo(methodNode: GumTreeNode): MethodInfo { val methodReturnType = getElementType(methodNode) val methodName = getElementName(methodNode) @@ -37,33 +38,33 @@ class GumTreeJavaMethodSplitter : TreeMethodSplitter { ) } - private fun getElementName(node: GumTreeJavaNode) = node.getChildren().map { - it as GumTreeJavaNode + private fun getElementName(node: GumTreeNode) = node.getChildren().map { + it as GumTreeNode }.firstOrNull { it.getTypeLabel() == TypeLabels.simpleName } - private fun getElementType(node: GumTreeJavaNode) = node.getChildren().map { - it as GumTreeJavaNode + private fun getElementType(node: GumTreeNode) = node.getChildren().map { + it as GumTreeNode }.firstOrNull { it.isTypeNode() } - private fun getEnclosingClass(node: GumTreeJavaNode): GumTreeJavaNode? { + private fun getEnclosingClass(node: GumTreeNode): GumTreeNode? { if (node.getTypeLabel() == TypeLabels.typeDeclaration) { return node } - val parentNode = node.getParent() as? GumTreeJavaNode + val parentNode = node.getParent() as? GumTreeNode return parentNode?.let { getEnclosingClass(it) } } - private fun getParameters(methodNode: GumTreeJavaNode): List> { + private fun getParameters(methodNode: GumTreeNode): List> { val params = methodNode.getChildren().filter { it.getTypeLabel() == TypeLabels.singleVariableDeclaration } return params.map { - val node = it as GumTreeJavaNode - ParameterNode( + val node = it as GumTreeNode + ParameterNode( node, getElementType(node), getElementName(node) diff --git a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaParser.kt b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaParser.kt index a0b20be3..d1b97a45 100644 --- a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaParser.kt +++ b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaParser.kt @@ -4,21 +4,21 @@ import com.github.gumtreediff.client.Run import com.github.gumtreediff.gen.jdt.JdtTreeGenerator import com.github.gumtreediff.tree.TreeContext import astminer.common.model.Parser -import astminer.parse.gumtree.java.GumTreeJavaNode +import astminer.parse.gumtree.GumTreeNode import java.io.InputStream import java.io.InputStreamReader -class GumTreeJavaParser : Parser { +class GumTreeJavaParser : Parser { init { Run.initGenerators() } - override fun parseInputStream(content: InputStream): GumTreeJavaNode? { + override fun parseInputStream(content: InputStream): GumTreeNode? { val treeContext = JdtTreeGenerator().generate(InputStreamReader(content)) return wrapGumTreeNode(treeContext) } } -fun wrapGumTreeNode(treeContext: TreeContext): GumTreeJavaNode { - return GumTreeJavaNode(treeContext.root, treeContext, null) +fun wrapGumTreeNode(treeContext: TreeContext): GumTreeNode { + return GumTreeNode(treeContext.root, treeContext, null) } \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitter.kt b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitter.kt index 89282acd..3aca70b6 100644 --- a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitter.kt @@ -6,8 +6,9 @@ import astminer.common.model.MethodNode import astminer.common.model.ParameterNode import astminer.common.model.TreeMethodSplitter import astminer.common.preOrder +import astminer.parse.gumtree.GumTreeNode -class GumTreePythonMethodSplitter : TreeMethodSplitter { +class GumTreePythonMethodSplitter : TreeMethodSplitter { companion object { private object TypeLabels { const val classDefinition = "ClassDef" @@ -32,12 +33,12 @@ class GumTreePythonMethodSplitter : TreeMethodSplitter { } } - override fun splitIntoMethods(root: GumTreePythonNode): Collection> { + override fun splitIntoMethods(root: GumTreeNode): Collection> { val methodRoots = root.preOrder().filter { TypeLabels.methodDefinitions.contains(it.getTypeLabel()) } - return methodRoots.map { collectMethodInfo(it as GumTreePythonNode) } + return methodRoots.map { collectMethodInfo(it as GumTreeNode) } } - private fun collectMethodInfo(methodNode: GumTreePythonNode): MethodInfo { + private fun collectMethodInfo(methodNode: GumTreeNode): MethodInfo { val methodReturnType = getElementType(methodNode) // no methods return types for current parser val methodName = getElementName(methodNode) @@ -53,32 +54,32 @@ class GumTreePythonMethodSplitter : TreeMethodSplitter { ) } - private fun getElementName(node: GumTreePythonNode) = node + private fun getElementName(node: GumTreeNode) = node - private fun getElementType(node: GumTreePythonNode): GumTreePythonNode? { + private fun getElementType(node: GumTreeNode): GumTreeNode? { if (node.getTypeLabel() == TypeLabels.arg) { - return node.getChildOfType(TypeLabels.nameLoad) as GumTreePythonNode? + return node.getChildOfType(TypeLabels.nameLoad) as GumTreeNode? } // if return statement has "Constant-`Type`" return value => function type is `Type` if (TypeLabels.methodDefinitions.contains(node.getTypeLabel())) { return node.getChildOfType(TypeLabels.body)?.getChildOfType(TypeLabels.returnTypeLabel)?.let { it.getChildren().firstOrNull { child -> child.getTypeLabel().startsWith(TypeLabels.constantType) - } as GumTreePythonNode? + } as GumTreeNode? } } return null } - private fun getEnclosingClass(node: GumTreePythonNode): GumTreePythonNode? { + private fun getEnclosingClass(node: GumTreeNode): GumTreeNode? { if (node.getTypeLabel() == TypeLabels.classDefinition) { return node } - val parentNode = node.getParent() as? GumTreePythonNode + val parentNode = node.getParent() as? GumTreeNode return parentNode?.let { getEnclosingClass(it) } } - private fun getParameters(methodNode: GumTreePythonNode): List> { + private fun getParameters(methodNode: GumTreeNode): List> { val params = methodNode.getChildrenOfType(TypeLabels.arguments).flatMap { it.getChildren() }.filter { @@ -98,8 +99,8 @@ class GumTreePythonMethodSplitter : TreeMethodSplitter { } return params.map { - val node = it as GumTreePythonNode - ParameterNode( + val node = it as GumTreeNode + ParameterNode( node, getElementType(node), getElementName(node) diff --git a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonNode.kt b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonNode.kt deleted file mode 100644 index 82d7743a..00000000 --- a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonNode.kt +++ /dev/null @@ -1,45 +0,0 @@ -package astminer.parse.gumtree.python - -import astminer.common.model.Node -import com.github.gumtreediff.tree.ITree -import com.github.gumtreediff.tree.TreeContext - -class GumTreePythonNode(val wrappedNode: ITree, val context: TreeContext, val parent: GumTreePythonNode?) : Node { - private val metadata: MutableMap = HashMap() - - override fun getMetadata(key: String): Any? { - return metadata[key] - } - - override fun setMetadata(key: String, value: Any) { - metadata[key] = value - } - - override fun isLeaf(): Boolean { - return childrenList.isEmpty() - } - - private val childrenList: MutableList by lazy { - wrappedNode.children.map { GumTreePythonNode(it, context, this) }.toMutableList() - } - - override fun getTypeLabel(): String { - return context.getTypeLabel(wrappedNode) - } - - override fun getChildren(): List { - return childrenList - } - - override fun getParent(): Node? { - return parent - } - - override fun getToken(): String { - return wrappedNode.label - } - - override fun removeChildrenOfType(typeLabel: String) { - childrenList.removeIf { it.getTypeLabel() == typeLabel } - } -} diff --git a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonParser.kt b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonParser.kt index 2591bd2a..0f5ad777 100644 --- a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonParser.kt +++ b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonParser.kt @@ -1,19 +1,19 @@ package astminer.parse.gumtree.python import astminer.common.model.Parser -import astminer.parse.gumtree.python.GumTreePythonNode +import astminer.parse.gumtree.GumTreeNode import com.github.gumtreediff.client.Run import com.github.gumtreediff.gen.python.PythonTreeGenerator import com.github.gumtreediff.tree.TreeContext import java.io.InputStream import java.io.InputStreamReader -class GumTreePythonParser : Parser { +class GumTreePythonParser : Parser { init { Run.initGenerators() } - override fun parseInputStream(content: InputStream): GumTreePythonNode? = try { + override fun parseInputStream(content: InputStream): GumTreeNode? = try { val context = PythonTreeGenerator().generate(InputStreamReader(content)) wrapGumTreeNode(context) } catch (e: Exception) { @@ -21,6 +21,6 @@ class GumTreePythonParser : Parser { } } -fun wrapGumTreeNode(treeContext: TreeContext): GumTreePythonNode { - return GumTreePythonNode(treeContext.root, treeContext, null) +fun wrapGumTreeNode(treeContext: TreeContext): GumTreeNode { + return GumTreeNode(treeContext.root, treeContext, null) } diff --git a/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitterTest.kt index fc8c3c07..76f9868f 100644 --- a/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitterTest.kt @@ -1,17 +1,17 @@ package astminer.parse.gumtree.java import astminer.common.model.MethodInfo -import astminer.parse.gumtree.java.GumTreeJavaNode +import astminer.parse.gumtree.GumTreeNode import org.junit.Test import java.io.File import kotlin.test.assertEquals -private fun createTree(filename: String): GumTreeJavaNode { +private fun createTree(filename: String): GumTreeNode { val parser = GumTreeJavaParser() - return parser.parseInputStream(File(filename).inputStream()) as GumTreeJavaNode + return parser.parseInputStream(File(filename).inputStream()) as GumTreeNode } -private fun createAndSplitTree(filename: String): Collection> { +private fun createAndSplitTree(filename: String): Collection> { return GumTreeJavaMethodSplitter().splitIntoMethods(createTree(filename)) } diff --git a/src/test/kotlin/astminer/parse/gumtree/python/GumTreeJavaMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/gumtree/python/GumTreeJavaMethodSplitterTest.kt index 7cf3ed6a..6e1c6025 100644 --- a/src/test/kotlin/astminer/parse/gumtree/python/GumTreeJavaMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/gumtree/python/GumTreeJavaMethodSplitterTest.kt @@ -1,17 +1,17 @@ package astminer.parse.gumtree.python import astminer.common.model.MethodInfo -import astminer.parse.gumtree.python.GumTreePythonParser +import astminer.parse.gumtree.GumTreeNode import org.junit.Test import java.io.File import kotlin.test.assertEquals import kotlin.test.assertNotNull class GumTreeJavaMethodSplitterTest { - private fun parse(filename: String): GumTreePythonNode? = + private fun parse(filename: String): GumTreeNode? = GumTreePythonParser().parseInputStream(File(filename).inputStream()) - private fun splitMethods(filename: String): Collection> = parse(filename)?.let { + private fun splitMethods(filename: String): Collection> = parse(filename)?.let { GumTreePythonMethodSplitter().splitIntoMethods(it) } ?: emptyList() diff --git a/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonParserTest.kt b/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonParserTest.kt index 11c464bc..48be6f7d 100644 --- a/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonParserTest.kt +++ b/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonParserTest.kt @@ -1,6 +1,5 @@ package astminer.parse.gumtree.python -import astminer.parse.gumtree.python.GumTreePythonParser import org.junit.After import org.junit.Before import org.junit.Test From 16abd3a4c6f345d804f22ed504c2b747630aff21 Mon Sep 17 00:00:00 2001 From: illided Date: Tue, 30 Mar 2021 21:59:36 +0300 Subject: [PATCH 040/308] some renaming --- src/main/kotlin/astminer/parse/FuzzyHandler.kt | 11 ++++++----- src/main/kotlin/astminer/parse/factory.kt | 10 +++++----- .../kotlin/astminer/parse/gumtree/GumtreeHandler.kt | 4 ++-- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/src/main/kotlin/astminer/parse/FuzzyHandler.kt b/src/main/kotlin/astminer/parse/FuzzyHandler.kt index 5b7eb3f7..1edcd030 100644 --- a/src/main/kotlin/astminer/parse/FuzzyHandler.kt +++ b/src/main/kotlin/astminer/parse/FuzzyHandler.kt @@ -6,11 +6,12 @@ import astminer.parse.fuzzy.cpp.FuzzyMethodSplitter import astminer.parse.fuzzy.cpp.FuzzyNode import java.io.File -object CppFuzzyHandlerFactory: HandlerFactory { +object FuzzyCppHandler : HandlerFactory { override fun createHandler(file: File): LanguageHandler = CppFuzzyHandler(file) -} -class CppFuzzyHandler(file: File) : LanguageHandler() { - override val splitter = FuzzyMethodSplitter() - override val parseResult: ParseResult = FuzzyCppParser().parseFile(file) + + class CppFuzzyHandler(file: File) : LanguageHandler() { + override val splitter = FuzzyMethodSplitter() + override val parseResult: ParseResult = FuzzyCppParser().parseFile(file) + } } \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/factory.kt b/src/main/kotlin/astminer/parse/factory.kt index fbc1d608..40aafccc 100644 --- a/src/main/kotlin/astminer/parse/factory.kt +++ b/src/main/kotlin/astminer/parse/factory.kt @@ -3,8 +3,8 @@ package astminer.parse import astminer.parse.antlr.AntlrJavaHandlerFactory import astminer.parse.antlr.AntlrJavascriptHandlerFactory import astminer.parse.antlr.AntlrPythonHandlerFactory -import astminer.parse.gumtree.JavaGumtreeHandlerFactory -import astminer.parse.gumtree.PythonGumTreeHandlerFactory +import astminer.parse.gumtree.GumtreeJavaHandlerFactory +import astminer.parse.gumtree.GumtreePythonHandlerFactory fun getHandlerFactory(extension: String, parserType: String): HandlerFactory { return when (parserType) { @@ -17,8 +17,8 @@ fun getHandlerFactory(extension: String, parserType: String): HandlerFactory { private fun getGumtreeHandlerFactory(extension: String): HandlerFactory { return when (extension) { - "java" -> JavaGumtreeHandlerFactory - "python" -> PythonGumTreeHandlerFactory + "java" -> GumtreeJavaHandlerFactory + "python" -> GumtreePythonHandlerFactory else -> throw UnsupportedOperationException() } } @@ -34,7 +34,7 @@ private fun getAntlrHandlerFactory(extension: String): HandlerFactory { private fun getFuzzyHandlerFactory(extension: String): HandlerFactory { return when (extension) { - "c", "cpp" -> CppFuzzyHandlerFactory + "c", "cpp" -> FuzzyCppHandler else -> throw UnsupportedOperationException() } } diff --git a/src/main/kotlin/astminer/parse/gumtree/GumtreeHandler.kt b/src/main/kotlin/astminer/parse/gumtree/GumtreeHandler.kt index 06c889a9..c3809145 100644 --- a/src/main/kotlin/astminer/parse/gumtree/GumtreeHandler.kt +++ b/src/main/kotlin/astminer/parse/gumtree/GumtreeHandler.kt @@ -9,7 +9,7 @@ import astminer.parse.gumtree.python.GumTreePythonMethodSplitter import astminer.parse.gumtree.python.GumTreePythonParser import java.io.File -object JavaGumtreeHandlerFactory : HandlerFactory { +object GumtreeJavaHandlerFactory : HandlerFactory { override fun createHandler(file: File): LanguageHandler = JavaGumtreeHandler(file) class JavaGumtreeHandler(file: File) : LanguageHandler() { @@ -18,7 +18,7 @@ object JavaGumtreeHandlerFactory : HandlerFactory { } } -object PythonGumTreeHandlerFactory : HandlerFactory { +object GumtreePythonHandlerFactory : HandlerFactory { override fun createHandler(file: File): LanguageHandler = PythonGumTreeHandler(file) class PythonGumTreeHandler(file: File) : LanguageHandler() { From 0eb2b39c28b09f22f3801c0cc235cbbf9a53caa4 Mon Sep 17 00:00:00 2001 From: furetur Date: Wed, 31 Mar 2021 21:11:38 +0500 Subject: [PATCH 041/308] refactored storages with ugly solution for token normalization --- .../java/astminer/examples/AllJavaFiles.java | 22 +++++-------- .../kotlin/astminer/cli/Code2VecExtractor.kt | 33 +++++++++---------- .../astminer/cli/PathContextsExtractor.kt | 30 ++++++++--------- src/main/kotlin/astminer/cli/ProjectParser.kt | 19 +++++------ .../astminer/common/model/ParsingModel.kt | 2 +- .../astminer/examples/AllJavaMethods.kt | 16 ++++----- 6 files changed, 55 insertions(+), 67 deletions(-) diff --git a/src/main/java/astminer/examples/AllJavaFiles.java b/src/main/java/astminer/examples/AllJavaFiles.java index 26789b59..7cc06779 100644 --- a/src/main/java/astminer/examples/AllJavaFiles.java +++ b/src/main/java/astminer/examples/AllJavaFiles.java @@ -2,14 +2,15 @@ import astminer.common.model.*; import astminer.parse.java.GumTreeJavaParser; -import astminer.paths.*; +import astminer.storage.CountingPathStorage; +import astminer.storage.CountingPathStorageConfig; +import astminer.storage.CsvPathStorage; +import astminer.storage.LabellingResult; import java.io.FileInputStream; import java.io.IOException; import java.nio.file.*; import java.nio.file.attribute.BasicFileAttributes; -import java.util.Collection; -import java.util.stream.Collectors; //Retrieve paths from Java files, using a GumTree parser. public class AllJavaFiles { @@ -17,8 +18,8 @@ public class AllJavaFiles { private static final String OUTPUT_FOLDER = "out_examples/allJavaFiles_GumTree_java"; public static void runExample() { - final PathMiner miner = new PathMiner(new PathRetrievalSettings(5,5)); - final CountingPathStorage pathStorage = new CsvPathStorage(OUTPUT_FOLDER, Long.MAX_VALUE, Long.MAX_VALUE); + final CountingPathStorageConfig config = new CountingPathStorageConfig(5, 5, false, Long.MAX_VALUE, Long.MAX_VALUE, Integer.MAX_VALUE); + final CountingPathStorage pathStorage = new CsvPathStorage(OUTPUT_FOLDER, config); final Path inputFolder = Paths.get(INPUT_FOLDER); @@ -29,14 +30,9 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attributes) thro if (fileTree == null) { return FileVisitResult.CONTINUE; } - final Collection paths = miner.retrievePaths(fileTree); - final Collection pathContexts = paths - .stream() - .map(node -> - PathUtilKt.toPathContext(node, (Node::getToken)) - ).collect(Collectors.toList()); - - pathStorage.store(new LabeledPathContexts<>(file.toAbsolutePath().toString(), pathContexts)); + + String filePath = file.toAbsolutePath().toString(); + pathStorage.store(new LabellingResult<>(fileTree, filePath, filePath)); return FileVisitResult.CONTINUE; } diff --git a/src/main/kotlin/astminer/cli/Code2VecExtractor.kt b/src/main/kotlin/astminer/cli/Code2VecExtractor.kt index 26b3e263..53a446df 100644 --- a/src/main/kotlin/astminer/cli/Code2VecExtractor.kt +++ b/src/main/kotlin/astminer/cli/Code2VecExtractor.kt @@ -1,14 +1,11 @@ package astminer.cli import astminer.common.getProjectFilesWithExtension -import astminer.common.getNormalizedToken -import astminer.common.model.LabeledPathContexts import astminer.common.model.Node import astminer.common.model.ParseResult -import astminer.paths.Code2VecPathStorage -import astminer.paths.PathMiner -import astminer.paths.PathRetrievalSettings -import astminer.paths.toPathContext +import astminer.storage.Code2VecPathStorage +import astminer.storage.CountingPathStorageConfig +import astminer.storage.toLabellingResult import com.github.ajalt.clikt.core.CliktCommand import com.github.ajalt.clikt.parameters.options.* import com.github.ajalt.clikt.parameters.types.int @@ -121,32 +118,32 @@ class Code2VecExtractor(private val customLabelExtractor: LabelExtractor? = null private fun extractFromTree( parseResult: ParseResult, - miner: PathMiner, storage: Code2VecPathStorage, labelExtractor: LabelExtractor ) { val labeledParseResults = labelExtractor.toLabeledData(parseResult) // Retrieve paths from every node individually - labeledParseResults.forEach { (root, label) -> - val paths = miner.retrievePaths(root).take(maxPathContexts) - storage.store(LabeledPathContexts(label, paths.map { - toPathContext(it) { node -> - node.getNormalizedToken() - } - })) + labeledParseResults.forEach { + storage.store(it.toLabellingResult(parseResult.filePath)) } } private fun extract(labelExtractor: LabelExtractor) { val outputDir = File(outputDirName) + val storageConfig = CountingPathStorageConfig( + maxPathLength, + maxPathWidth, + true, + maxTokens, + maxPaths, + maxPathContexts + ) for (extension in extensions) { - val miner = PathMiner(PathRetrievalSettings(maxPathLength, maxPathWidth)) - val outputDirForLanguage = outputDir.resolve(extension) outputDirForLanguage.mkdir() // Choose type of storage - val storage = Code2VecPathStorage(outputDirForLanguage.path, maxPaths, maxTokens) + val storage = Code2VecPathStorage(outputDirForLanguage.path, storageConfig) // Choose type of parser val parser = getParser( extension, @@ -156,7 +153,7 @@ class Code2VecExtractor(private val customLabelExtractor: LabelExtractor? = null parser.parseFiles(getProjectFilesWithExtension(File(projectRoot), extension)) { normalizeParseResult(it, isTokenSplitted) // Retrieve labeled data - extractFromTree(it, miner, storage, labelExtractor) + extractFromTree(it, storage, labelExtractor) } // Save stored data on disk storage.close() diff --git a/src/main/kotlin/astminer/cli/PathContextsExtractor.kt b/src/main/kotlin/astminer/cli/PathContextsExtractor.kt index ff8d4d61..96cf4dd9 100644 --- a/src/main/kotlin/astminer/cli/PathContextsExtractor.kt +++ b/src/main/kotlin/astminer/cli/PathContextsExtractor.kt @@ -1,6 +1,5 @@ package astminer.cli -import astminer.common.getNormalizedToken import astminer.common.getProjectFilesWithExtension import astminer.common.model.* import astminer.parse.antlr.java.JavaParser @@ -8,10 +7,9 @@ import astminer.parse.antlr.javascript.JavaScriptParser import astminer.parse.antlr.python.PythonParser import astminer.parse.cpp.FuzzyCppParser import astminer.parse.java.GumTreeJavaParser -import astminer.paths.Code2VecPathStorage -import astminer.paths.PathMiner -import astminer.paths.PathRetrievalSettings -import astminer.paths.toPathContext +import astminer.storage.Code2VecPathStorage +import astminer.storage.CountingPathStorageConfig +import astminer.storage.toLabellingResult import com.github.ajalt.clikt.core.CliktCommand import com.github.ajalt.clikt.parameters.options.* import com.github.ajalt.clikt.parameters.types.int @@ -87,7 +85,7 @@ class PathContextsExtractor(private val customLabelExtractor: LabelExtractor? = private fun getParser(extension: String): Parser { if (extension == "java") { - return when(javaParser) { + return when (javaParser) { "gumtree" -> GumTreeJavaParser() "antlr" -> JavaParser() else -> throw IllegalArgumentException("javaParser should be `antlr` or `gumtree`, not $javaParser") @@ -103,25 +101,27 @@ class PathContextsExtractor(private val customLabelExtractor: LabelExtractor? = private fun extractPathContexts(labelExtractor: LabelExtractor) { val outputDir = File(outputDirName) + val storageConfig = CountingPathStorageConfig( + maxPathLength, + maxPathWidth, + true, + maxTokens, + maxPaths, + maxPathContexts + ) for (extension in extensions) { - val miner = PathMiner(PathRetrievalSettings(maxPathLength, maxPathWidth)) val parser = getParser(extension) val outputDirForLanguage = outputDir.resolve(extension) outputDirForLanguage.mkdir() - val storage = Code2VecPathStorage(outputDirForLanguage.path, maxPaths, maxTokens) + val storage = Code2VecPathStorage(outputDirForLanguage.path, storageConfig) val files = getProjectFilesWithExtension(File(projectRoot), extension) parser.parseFiles(files) { parseResult -> normalizeParseResult(parseResult, splitTokens = true) val labeledParseResults = labelExtractor.toLabeledData(parseResult) - labeledParseResults.forEach { (root, label) -> - val paths = miner.retrievePaths(root).take(maxPathContexts) - storage.store(LabeledPathContexts(label, paths.map { astPath -> - toPathContext(astPath) { node -> - node.getNormalizedToken() - } - })) + labeledParseResults.forEach { + storage.store(it.toLabellingResult(parseResult.filePath)) } } diff --git a/src/main/kotlin/astminer/cli/ProjectParser.kt b/src/main/kotlin/astminer/cli/ProjectParser.kt index 7941f1b7..ba220260 100644 --- a/src/main/kotlin/astminer/cli/ProjectParser.kt +++ b/src/main/kotlin/astminer/cli/ProjectParser.kt @@ -1,10 +1,11 @@ package astminer.cli -import astminer.ast.CsvAstStorage -import astminer.ast.DotAstStorage +import astminer.storage.CsvAstStorage +import astminer.storage.DotAstStorage import astminer.common.getProjectFilesWithExtension -import astminer.common.model.AstStorage import astminer.common.preOrder +import astminer.storage.Storage +import astminer.storage.toLabellingResult import com.github.ajalt.clikt.core.CliktCommand import com.github.ajalt.clikt.parameters.options.* import com.github.ajalt.clikt.parameters.types.int @@ -99,7 +100,7 @@ class ProjectParser(private val customLabelExtractor: LabelExtractor? = null) : ).flag(default = false) - private fun getStorage(storageType: String, directoryPath: String): AstStorage { + private fun getStorage(storageType: String, directoryPath: String): Storage { return when (storageType) { "csv" -> CsvAstStorage(directoryPath) "dot" -> DotAstStorage(directoryPath) @@ -126,14 +127,12 @@ class ProjectParser(private val customLabelExtractor: LabelExtractor? = null) : parser.parseFiles(filesToParse) { parseResult -> normalizeParseResult(parseResult, isTokenSplitted) val labeledParseResults = labelExtractor.toLabeledData(parseResult) - labeledParseResults.forEach { (root, label) -> - root.preOrder().forEach { node -> + labeledParseResults.forEach { labeled -> + labeled.root.preOrder().forEach { node -> excludeNodes.forEach { node.removeChildrenOfType(it) } } - root.apply { - // Save AST as it is or process it to extract features / path-based representations - storage.store(root, label, parseResult.filePath) - } + // Save AST as it is or process it to extract features / path-based representations + storage.store(labeled.toLabellingResult(parseResult.filePath)) } } // Save stored data on disk diff --git a/src/main/kotlin/astminer/common/model/ParsingModel.kt b/src/main/kotlin/astminer/common/model/ParsingModel.kt index c5aba214..5231bc6e 100644 --- a/src/main/kotlin/astminer/common/model/ParsingModel.kt +++ b/src/main/kotlin/astminer/common/model/ParsingModel.kt @@ -51,7 +51,7 @@ interface Parser { * @param files files to parse * @param handleResult handler to invoke on each file parse result */ - fun parseFiles(files: List, handleResult: (ParseResult) -> Any) { + fun parseFiles(files: List, handleResult: (ParseResult) -> Any?) { files.forEach { handleResult(parseFile(it)) } } } diff --git a/src/main/kotlin/astminer/examples/AllJavaMethods.kt b/src/main/kotlin/astminer/examples/AllJavaMethods.kt index 7454480c..a849f65b 100644 --- a/src/main/kotlin/astminer/examples/AllJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/AllJavaMethods.kt @@ -1,14 +1,12 @@ package astminer.examples -import astminer.common.model.LabeledPathContexts import astminer.common.model.MethodInfo import astminer.parse.java.GumTreeJavaNode import astminer.parse.java.GumTreeJavaParser import astminer.parse.java.GumTreeJavaMethodSplitter -import astminer.paths.PathMiner -import astminer.paths.PathRetrievalSettings -import astminer.paths.CsvPathStorage -import astminer.paths.toPathContext +import astminer.storage.CsvPathStorage +import astminer.storage.CountingPathStorageConfig +import astminer.storage.LabellingResult import java.io.File @@ -25,9 +23,8 @@ private fun getCsvFriendlyMethodId(methodInfo: MethodInfo): Str fun allJavaMethods() { val inputDir = "src/test/resources/gumTreeMethodSplitter" - val miner = PathMiner(PathRetrievalSettings(5, 5)) val outputDir = "out_examples/allJavaMethods" - val storage = CsvPathStorage(outputDir) + val storage = CsvPathStorage(outputDir, CountingPathStorageConfig(5, 5)) File(inputDir).forFilesWithSuffix(".java") { file -> //parse file @@ -37,11 +34,10 @@ fun allJavaMethods() { val methodNodes = GumTreeJavaMethodSplitter().splitIntoMethods(fileNode) methodNodes.forEach { methodInfo -> - //Retrieve paths from every node individually - val paths = miner.retrievePaths(methodInfo.method.root) //Retrieve a method identifier val entityId = "${file.path}::${getCsvFriendlyMethodId(methodInfo)}" - storage.store(LabeledPathContexts(entityId, paths.map { toPathContext(it) })) + val labelingResult = LabellingResult(fileNode, entityId, file.path) + storage.store(labelingResult) } } From 7ccfd3ed0f6c0481ad0ed92a61ffb120b93632f8 Mon Sep 17 00:00:00 2001 From: furetur Date: Wed, 31 Mar 2021 21:13:27 +0500 Subject: [PATCH 042/308] refactored storages with ugly solution for token normalization (commited all the lost files) --- .../kotlin/astminer/examples/AllCppFiles.kt | 16 +-- .../kotlin/astminer/examples/AllJavaAst.kt | 12 +- .../kotlin/astminer/examples/AllJavaFiles.kt | 14 +-- .../astminer/examples/AllJavaFilesGumTree.kt | 22 ++-- .../astminer/examples/AllJavaScriptFiles.kt | 22 ++-- .../astminer/examples/AllPythonFiles.kt | 22 ++-- .../astminer/examples/AllPythonMethods.kt | 17 ++- .../astminer/examples/Code2VecJavaMethods.kt | 12 +- .../astminer/paths/CountingPathStorage.kt | 61 --------- .../{paths => storage}/Code2VecPathStorage.kt | 8 +- .../astminer/storage/CountingPathStorage.kt | 116 ++++++++++++++++++ .../{ast => storage}/CsvAstStorage.kt | 23 ++-- .../{paths => storage}/CsvPathStorage.kt | 9 +- .../{ast => storage}/DotAstStorage.kt | 33 ++--- .../kotlin/astminer/storage/LabelingResult.kt | 16 +++ src/main/kotlin/astminer/storage/Storage.kt | 10 ++ src/test/kotlin/astminer/common/TestUtils.kt | 3 + .../{ast => storage}/CsvAstStorageTest.kt | 5 +- .../{ast => storage}/DotAstStorageTest.kt | 5 +- 19 files changed, 249 insertions(+), 177 deletions(-) delete mode 100644 src/main/kotlin/astminer/paths/CountingPathStorage.kt rename src/main/kotlin/astminer/{paths => storage}/Code2VecPathStorage.kt (58%) create mode 100644 src/main/kotlin/astminer/storage/CountingPathStorage.kt rename src/main/kotlin/astminer/{ast => storage}/CsvAstStorage.kt (69%) rename src/main/kotlin/astminer/{paths => storage}/CsvPathStorage.kt (60%) rename src/main/kotlin/astminer/{ast => storage}/DotAstStorage.kt (73%) create mode 100644 src/main/kotlin/astminer/storage/LabelingResult.kt create mode 100644 src/main/kotlin/astminer/storage/Storage.kt rename src/test/kotlin/astminer/{ast => storage}/CsvAstStorageTest.kt (73%) rename src/test/kotlin/astminer/{ast => storage}/DotAstStorageTest.kt (95%) diff --git a/src/main/kotlin/astminer/examples/AllCppFiles.kt b/src/main/kotlin/astminer/examples/AllCppFiles.kt index 9396f598..5468255f 100644 --- a/src/main/kotlin/astminer/examples/AllCppFiles.kt +++ b/src/main/kotlin/astminer/examples/AllCppFiles.kt @@ -3,21 +3,18 @@ package astminer.examples import astminer.common.getProjectFilesWithExtension -import astminer.common.model.LabeledPathContexts import astminer.parse.cpp.FuzzyCppParser -import astminer.paths.PathMiner -import astminer.paths.PathRetrievalSettings -import astminer.paths.CsvPathStorage -import astminer.paths.toPathContext +import astminer.storage.CsvPathStorage +import astminer.storage.CountingPathStorageConfig +import astminer.storage.labeledWithFilePath import java.io.File // Retrieve paths from .cpp preprocessed files, using a fuzzyc2cpg parser. fun allCppFiles() { val inputDir = File("src/test/resources/examples/cpp") - val miner = PathMiner(PathRetrievalSettings(5, 5)) val outputDir = "out_examples/allCppFiles" - val storage = CsvPathStorage(outputDir) + val storage = CsvPathStorage(outputDir, CountingPathStorageConfig(5, 5, false)) val parser = FuzzyCppParser() val preprocOutputFolder = File("preprocessed") @@ -26,9 +23,8 @@ fun allCppFiles() { val files = getProjectFilesWithExtension(preprocOutputFolder, "cpp") parser.parseFiles(files) { parseResult -> - if (parseResult.root != null) { - val paths = miner.retrievePaths(parseResult.root) - storage.store(LabeledPathContexts(parseResult.filePath, paths.map { toPathContext(it) })) + parseResult.labeledWithFilePath()?.let { + storage.store(it) } } diff --git a/src/main/kotlin/astminer/examples/AllJavaAst.kt b/src/main/kotlin/astminer/examples/AllJavaAst.kt index 42c4f91a..a45295ab 100644 --- a/src/main/kotlin/astminer/examples/AllJavaAst.kt +++ b/src/main/kotlin/astminer/examples/AllJavaAst.kt @@ -1,7 +1,9 @@ package astminer.examples -import astminer.ast.CsvAstStorage +import astminer.common.getProjectFilesWithExtension +import astminer.storage.CsvAstStorage import astminer.parse.antlr.java.JavaParser +import astminer.storage.labeledWithFilePath import java.io.File // Retrieve ASTs from Java files, using a generated parser. @@ -10,9 +12,11 @@ fun allJavaAsts() { val storage = CsvAstStorage("out_examples/allJavaAstsAntlr") - File(folder).forFilesWithSuffix(".java") { file -> - val node = JavaParser().parseInputStream(file.inputStream()) ?: return@forFilesWithSuffix - storage.store(node, label = file.path) + val files = getProjectFilesWithExtension(File(folder), "java") + JavaParser().parseFiles(files) { parseResult -> + parseResult.labeledWithFilePath()?.let { + storage.store(it) + } } storage.close() diff --git a/src/main/kotlin/astminer/examples/AllJavaFiles.kt b/src/main/kotlin/astminer/examples/AllJavaFiles.kt index e51f95bf..448ed6ab 100644 --- a/src/main/kotlin/astminer/examples/AllJavaFiles.kt +++ b/src/main/kotlin/astminer/examples/AllJavaFiles.kt @@ -1,25 +1,21 @@ package astminer.examples -import astminer.common.model.LabeledPathContexts import astminer.parse.antlr.java.JavaMethodSplitter import astminer.parse.antlr.java.JavaParser -import astminer.paths.PathMiner -import astminer.paths.PathRetrievalSettings -import astminer.paths.CsvPathStorage -import astminer.paths.toPathContext +import astminer.storage.CsvPathStorage +import astminer.storage.CountingPathStorageConfig +import astminer.storage.labeledWithFilePath import java.io.File //Retrieve paths from Java files, using a generated parser. fun allJavaFiles() { val inputDir = "src/test/resources/examples/" - val miner = PathMiner(PathRetrievalSettings(5, 5)) val outputDir = "out_examples/allJavaFilesAntlr" - val storage = CsvPathStorage(outputDir) + val storage = CsvPathStorage(outputDir, CountingPathStorageConfig(5, 5, false)) File(inputDir).forFilesWithSuffix("11.java") { file -> val node = JavaParser().parseInputStream(file.inputStream()) ?: return@forFilesWithSuffix - val paths = miner.retrievePaths(node) node.prettyPrint() JavaMethodSplitter().splitIntoMethods(node).forEach { println(it.name()) @@ -29,7 +25,7 @@ fun allJavaFiles() { println("${parameters.name()} ${parameters.returnType()}") } } - storage.store(LabeledPathContexts(file.path, paths.map { toPathContext(it) })) + storage.store(node.labeledWithFilePath(file.path)) } storage.close() diff --git a/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt b/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt index 0c34108b..d83c1a73 100644 --- a/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt +++ b/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt @@ -1,26 +1,24 @@ package astminer.examples -import astminer.common.model.LabeledPathContexts +import astminer.common.getProjectFilesWithExtension import astminer.parse.java.GumTreeJavaParser -import astminer.paths.PathMiner -import astminer.paths.PathRetrievalSettings -import astminer.paths.CsvPathStorage -import astminer.paths.toPathContext +import astminer.storage.CsvPathStorage +import astminer.storage.CountingPathStorageConfig +import astminer.storage.labeledWithFilePath import java.io.File //Retrieve paths from Java files, using a GumTree parser. fun allJavaFilesGumTree() { val inputDir = "src/test/resources/gumTreeMethodSplitter/" - val miner = PathMiner(PathRetrievalSettings(5, 5)) val outputDir = "out_examples/allJavaFilesGumTree" - val storage = CsvPathStorage(outputDir) + val storage = CsvPathStorage(outputDir, CountingPathStorageConfig(5, 5, false)) - File(inputDir).forFilesWithSuffix(".java") { file -> - val node = GumTreeJavaParser().parseInputStream(file.inputStream()) ?: return@forFilesWithSuffix - val paths = miner.retrievePaths(node) - - storage.store(LabeledPathContexts(file.path, paths.map { toPathContext(it) })) + val files = getProjectFilesWithExtension(File(inputDir), "java") + GumTreeJavaParser().parseFiles(files) { parseResult -> + parseResult.labeledWithFilePath()?.let { + storage.store(it) + } } storage.close() diff --git a/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt b/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt index 9b9a23c5..406ff953 100644 --- a/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt +++ b/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt @@ -1,25 +1,23 @@ package astminer.examples -import astminer.common.model.LabeledPathContexts +import astminer.common.getProjectFilesWithExtension import astminer.parse.antlr.javascript.JavaScriptParser -import astminer.paths.CsvPathStorage -import astminer.paths.PathMiner -import astminer.paths.PathRetrievalSettings -import astminer.paths.toPathContext +import astminer.storage.CsvPathStorage +import astminer.storage.CountingPathStorageConfig +import astminer.storage.labeledWithFilePath import java.io.File fun allJavaScriptFiles() { val folder = "src/test/resources/examples" val outputDir = "out_examples/allJavaScriptFilesAntlr" - val miner = PathMiner(PathRetrievalSettings(5, 5)) - val storage = CsvPathStorage(outputDir) + val storage = CsvPathStorage(outputDir, CountingPathStorageConfig(5, 5)) - File(folder).forFilesWithSuffix(".js") {file -> - val node = JavaScriptParser().parseInputStream(file.inputStream()) ?: return@forFilesWithSuffix - val paths = miner.retrievePaths(node) - - storage.store(LabeledPathContexts(file.path, paths.map { toPathContext(it) })) + val files = getProjectFilesWithExtension(File(folder), "js") + JavaScriptParser().parseFiles(files) { parseResult -> + parseResult.labeledWithFilePath()?.let { + storage.store(it) + } } storage.close() diff --git a/src/main/kotlin/astminer/examples/AllPythonFiles.kt b/src/main/kotlin/astminer/examples/AllPythonFiles.kt index 01e1e111..6fa96eca 100644 --- a/src/main/kotlin/astminer/examples/AllPythonFiles.kt +++ b/src/main/kotlin/astminer/examples/AllPythonFiles.kt @@ -1,26 +1,24 @@ package astminer.examples -import astminer.common.model.LabeledPathContexts +import astminer.common.getProjectFilesWithExtension import astminer.parse.antlr.python.PythonParser -import astminer.paths.PathMiner -import astminer.paths.PathRetrievalSettings -import astminer.paths.CsvPathStorage -import astminer.paths.toPathContext +import astminer.storage.CsvPathStorage +import astminer.storage.CountingPathStorageConfig +import astminer.storage.labeledWithFilePath import java.io.File fun allPythonFiles() { val inputDir = "src/test/resources/examples/" - val miner = PathMiner(PathRetrievalSettings(5, 5)) val outputDir = "out_examples/allPythonFiles" - val storage = CsvPathStorage(outputDir) + val storage = CsvPathStorage(outputDir, CountingPathStorageConfig(5, 5, false)) - File(inputDir).forFilesWithSuffix(".py") { file -> - val node = PythonParser().parseInputStream(file.inputStream()) ?: return@forFilesWithSuffix - val paths = miner.retrievePaths(node) - - storage.store(LabeledPathContexts(file.path, paths.map { toPathContext(it) })) + val files = getProjectFilesWithExtension(File(inputDir), "py") + PythonParser().parseFiles(files) { parseResult -> + parseResult.labeledWithFilePath()?.let { + storage.store(it) + } } storage.close() diff --git a/src/main/kotlin/astminer/examples/AllPythonMethods.kt b/src/main/kotlin/astminer/examples/AllPythonMethods.kt index 488bb4eb..5046214e 100644 --- a/src/main/kotlin/astminer/examples/AllPythonMethods.kt +++ b/src/main/kotlin/astminer/examples/AllPythonMethods.kt @@ -1,14 +1,12 @@ package astminer.examples -import astminer.common.model.LabeledPathContexts import astminer.common.model.MethodInfo import astminer.parse.python.GumTreePythonMethodSplitter import astminer.parse.python.GumTreePythonNode import astminer.parse.python.GumTreePythonParser -import astminer.paths.CsvPathStorage -import astminer.paths.PathMiner -import astminer.paths.PathRetrievalSettings -import astminer.paths.toPathContext +import astminer.storage.CsvPathStorage +import astminer.storage.CountingPathStorageConfig +import astminer.storage.LabellingResult import java.io.File private fun getCsvFriendlyMethodId(methodInfo: MethodInfo): String { @@ -21,9 +19,8 @@ private fun getCsvFriendlyMethodId(methodInfo: MethodInfo): S fun allPythonMethods() { val inputDir = "src/test/resources/gumTreeMethodSplitter" - val miner = PathMiner(PathRetrievalSettings(5, 5)) val outputDir = "out_examples/allPythonMethods" - val storage = CsvPathStorage(outputDir) + val storage = CsvPathStorage(outputDir, CountingPathStorageConfig(5, 5, false)) File(inputDir).forFilesWithSuffix(".py") { file -> // parse file @@ -33,11 +30,11 @@ fun allPythonMethods() { val methodNodes = GumTreePythonMethodSplitter().splitIntoMethods(fileNode) methodNodes.forEach { methodInfo -> - // Retrieve paths from every node individually - val paths = miner.retrievePaths(methodInfo.method.root) // Retrieve a method identifier val entityId = "${file.path}::${getCsvFriendlyMethodId(methodInfo)}" - storage.store(LabeledPathContexts(entityId, paths.map { toPathContext(it) })) + val labelingResult = LabellingResult(fileNode, entityId, file.path) + // Retrieve paths from each method individually and store them + storage.store(labelingResult) } } diff --git a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt index ffc0f565..07dcb71b 100644 --- a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt @@ -1,10 +1,12 @@ package astminer.examples import astminer.common.* -import astminer.common.model.LabeledPathContexts import astminer.parse.antlr.java.JavaMethodSplitter import astminer.parse.antlr.java.JavaParser import astminer.paths.* +import astminer.storage.Code2VecPathStorage +import astminer.storage.CountingPathStorageConfig +import astminer.storage.LabellingResult import java.io.File @@ -14,9 +16,8 @@ fun code2vecJavaMethods() { val folder = "src/test/resources/code2vecPathMining" val outputDir = "out_examples/code2vecPathMining" - val miner = PathMiner(PathRetrievalSettings(5, 5)) - val storage = Code2VecPathStorage(outputDir) + val storage = Code2VecPathStorage(outputDir, CountingPathStorageConfig(5, 5)) File(folder).forFilesWithSuffix(".java") { file -> //parse file @@ -32,9 +33,8 @@ fun code2vecJavaMethods() { methodRoot.preOrder().forEach { it.setNormalizedToken() } methodNameNode.setNormalizedToken("METHOD_NAME") - // Retrieve paths from every node individually - val paths = miner.retrievePaths(methodRoot) - storage.store(LabeledPathContexts(label, paths.map { toPathContext(it) { node -> node.getNormalizedToken() } })) + // Retrieve paths from every node individually and store them + storage.store(LabellingResult(methodRoot, label, file.absolutePath)) } } diff --git a/src/main/kotlin/astminer/paths/CountingPathStorage.kt b/src/main/kotlin/astminer/paths/CountingPathStorage.kt deleted file mode 100644 index c671b138..00000000 --- a/src/main/kotlin/astminer/paths/CountingPathStorage.kt +++ /dev/null @@ -1,61 +0,0 @@ -package astminer.paths - -import astminer.common.model.* -import astminer.common.storage.* -import java.io.File -import java.io.PrintWriter - -abstract class CountingPathStorage(override val directoryPath: String, - override val tokensLimit: Long, - override val pathsLimit: Long -) : PathStorage { - - protected val tokensMap: RankedIncrementalIdStorage = RankedIncrementalIdStorage() - protected val orientedNodeTypesMap: RankedIncrementalIdStorage = RankedIncrementalIdStorage() - protected val pathsMap: RankedIncrementalIdStorage> = RankedIncrementalIdStorage() - - private val pathsFile: File - private val labeledPathContextIdsWriter: PrintWriter - - init { - File(directoryPath).mkdirs() - pathsFile = File("$directoryPath/path_contexts.csv") - pathsFile.createNewFile() - labeledPathContextIdsWriter = PrintWriter(pathsFile) - } - - abstract fun pathContextIdsToString(pathContextIds: List, label: LabelType): String - - private fun dumpPathContexts(labeledPathContextIds: LabeledPathContextIds) { - val pathContextIdsString = labeledPathContextIds.pathContexts.filter { - tokensMap.getIdRank(it.startTokenId) <= tokensLimit && - tokensMap.getIdRank(it.endTokenId) <= tokensLimit && - pathsMap.getIdRank(it.pathId) <= pathsLimit - } - labeledPathContextIdsWriter.println(pathContextIdsToString(pathContextIdsString, labeledPathContextIds.label)) - } - - private fun storePathContext(pathContext: PathContext): PathContextId { - val startTokenId = tokensMap.record(pathContext.startToken) - val endTokenId = tokensMap.record(pathContext.endToken) - val orientedNodesIds = pathContext.orientedNodeTypes.map { orientedNodeTypesMap.record(it) } - val pathId = pathsMap.record(orientedNodesIds) - return PathContextId(startTokenId, pathId, endTokenId) - } - - override fun store(labeledPathContexts: LabeledPathContexts) { - val labeledPathContextIds = LabeledPathContextIds( - labeledPathContexts.label, - labeledPathContexts.pathContexts.map { storePathContext(it) } - ) - dumpPathContexts(labeledPathContextIds) - } - - override fun close() { - dumpIdStorageToCsv(tokensMap, "token", tokenToCsvString, File("$directoryPath/tokens.csv"), tokensLimit) - dumpIdStorageToCsv(orientedNodeTypesMap, "node_type", orientedNodeToCsvString, File("$directoryPath/node_types.csv"), Long.MAX_VALUE) - dumpIdStorageToCsv(pathsMap, "path", pathToCsvString, File("$directoryPath/paths.csv"), pathsLimit) - - labeledPathContextIdsWriter.close() - } -} diff --git a/src/main/kotlin/astminer/paths/Code2VecPathStorage.kt b/src/main/kotlin/astminer/storage/Code2VecPathStorage.kt similarity index 58% rename from src/main/kotlin/astminer/paths/Code2VecPathStorage.kt rename to src/main/kotlin/astminer/storage/Code2VecPathStorage.kt index 595b459a..09417241 100644 --- a/src/main/kotlin/astminer/paths/Code2VecPathStorage.kt +++ b/src/main/kotlin/astminer/storage/Code2VecPathStorage.kt @@ -1,11 +1,9 @@ -package astminer.paths +package astminer.storage import astminer.common.model.PathContextId -class Code2VecPathStorage(outputFolderPath: String, - pathsLimit: Long = Long.MAX_VALUE, - tokensLimit: Long = Long.MAX_VALUE -) : CountingPathStorage(outputFolderPath, pathsLimit, tokensLimit) { +class Code2VecPathStorage(outputDirectoryPath: String, config: CountingPathStorageConfig) : + CountingPathStorage(outputDirectoryPath, config) { override fun pathContextIdsToString(pathContextIds: List, label: String): String { val joinedPathContexts = pathContextIds.joinToString(" ") { pathContextId -> diff --git a/src/main/kotlin/astminer/storage/CountingPathStorage.kt b/src/main/kotlin/astminer/storage/CountingPathStorage.kt new file mode 100644 index 00000000..8dd3b5a0 --- /dev/null +++ b/src/main/kotlin/astminer/storage/CountingPathStorage.kt @@ -0,0 +1,116 @@ +package astminer.storage + +import astminer.common.getNormalizedToken +import astminer.common.model.* +import astminer.common.storage.* +import astminer.paths.PathMiner +import astminer.paths.PathRetrievalSettings +import astminer.paths.toPathContext +import java.io.File +import java.io.PrintWriter + +// TODO: finish the documentation +/** + * Config for CountingPathStorage which contains several hyperparameters. + * @property maxTokens ?? + * @property maxPaths ?? + * @property maxPathContextsPerEntity The maximum number of path contexts that should be extracted from LabeledParseResult. + * In other words, the maximum number of path contexts to save from each file/method (depending on granularity) + * @property maxPathLength The maximum length of a single path (based on the formal math definition of path length) + * @property maxPathWidth The maximum width of a single path (based on the formal math definition of path width) + */ +data class CountingPathStorageConfig( + val maxPathLength: Int, + val maxPathWidth: Int, + val normalizeToken: Boolean = true, // TODO: discuss this + val maxTokens: Long = Long.MAX_VALUE, + val maxPaths: Long = Long.MAX_VALUE, + val maxPathContextsPerEntity: Int = Int.MAX_VALUE +) + +/** + * abstract Base class + */ +abstract class CountingPathStorage( + final override val outputDirectoryPath: String, + private val config: CountingPathStorageConfig +) : Storage { + + private val pathMiner = PathMiner(PathRetrievalSettings(config.maxPathLength, config.maxPathWidth)) + + private val tokensMap: RankedIncrementalIdStorage = RankedIncrementalIdStorage() + private val orientedNodeTypesMap: RankedIncrementalIdStorage = RankedIncrementalIdStorage() + private val pathsMap: RankedIncrementalIdStorage> = RankedIncrementalIdStorage() + + private val pathsFile: File + private val labeledPathContextIdsWriter: PrintWriter + + init { + File(outputDirectoryPath).mkdirs() + pathsFile = File("$outputDirectoryPath/path_contexts.csv") + pathsFile.createNewFile() + labeledPathContextIdsWriter = PrintWriter(pathsFile) + } + + abstract fun pathContextIdsToString(pathContextIds: List, label: String): String + + private fun dumpPathContexts(labeledPathContextIds: LabeledPathContextIds) { + val pathContextIdsString = labeledPathContextIds.pathContexts.filter { + tokensMap.getIdRank(it.startTokenId) <= config.maxTokens && + tokensMap.getIdRank(it.endTokenId) <= config.maxTokens && + pathsMap.getIdRank(it.pathId) <= config.maxPaths + } + labeledPathContextIdsWriter.println(pathContextIdsToString(pathContextIdsString, labeledPathContextIds.label)) + } + + private fun storePathContext(pathContext: PathContext): PathContextId { + val startTokenId = tokensMap.record(pathContext.startToken) + val endTokenId = tokensMap.record(pathContext.endToken) + val orientedNodesIds = pathContext.orientedNodeTypes.map { orientedNodeTypesMap.record(it) } + val pathId = pathsMap.record(orientedNodesIds) + return PathContextId(startTokenId, pathId, endTokenId) + } + + private fun retrieveLabeledPathContexts(labellingResult: LabellingResult): LabeledPathContexts { + val paths = pathMiner.retrievePaths(labellingResult.root).take(config.maxPathContextsPerEntity) + return LabeledPathContexts(labellingResult.label, paths.map { astPath -> + toPathContext(astPath) { node -> + // TODO: maybe this whole hassle is not needed + if (config.normalizeToken) { + node.getNormalizedToken() + } else { + node.getToken() + } + } + }) + } + + override fun store(labellingResult: LabellingResult) { + val labeledPathContexts = retrieveLabeledPathContexts(labellingResult) + val labeledPathContextIds = LabeledPathContextIds( + labeledPathContexts.label, + labeledPathContexts.pathContexts.map { storePathContext(it) } + ) + dumpPathContexts(labeledPathContextIds) + } + + override fun close() { + dumpIdStorageToCsv( + tokensMap, + "token", + tokenToCsvString, + File("$outputDirectoryPath/tokens.csv"), + config.maxTokens + ) + dumpIdStorageToCsv( + orientedNodeTypesMap, + "node_type", + orientedNodeToCsvString, + File("$outputDirectoryPath/node_types.csv"), + Long.MAX_VALUE + ) + dumpIdStorageToCsv(pathsMap, "path", pathToCsvString, File("$outputDirectoryPath/paths.csv"), config.maxPaths) + + labeledPathContextIdsWriter.close() + } +} diff --git a/src/main/kotlin/astminer/ast/CsvAstStorage.kt b/src/main/kotlin/astminer/storage/CsvAstStorage.kt similarity index 69% rename from src/main/kotlin/astminer/ast/CsvAstStorage.kt rename to src/main/kotlin/astminer/storage/CsvAstStorage.kt index 6dd30223..f404d399 100644 --- a/src/main/kotlin/astminer/ast/CsvAstStorage.kt +++ b/src/main/kotlin/astminer/storage/CsvAstStorage.kt @@ -1,6 +1,5 @@ -package astminer.ast +package astminer.storage -import astminer.common.model.AstStorage import astminer.common.model.Node import astminer.common.preOrder import astminer.common.storage.* @@ -11,7 +10,7 @@ import java.io.PrintWriter * Stores multiple ASTs by their roots and saves them in .csv format. * Output consists of 3 .csv files: with node types, with tokens and with ASTs. */ -class CsvAstStorage(override val directoryPath: String) : AstStorage { +class CsvAstStorage(override val outputDirectoryPath: String) : Storage { private val tokensMap: RankedIncrementalIdStorage = RankedIncrementalIdStorage() private val nodeTypesMap: RankedIncrementalIdStorage = RankedIncrementalIdStorage() @@ -19,24 +18,24 @@ class CsvAstStorage(override val directoryPath: String) : AstStorage { private val astsOutputStream: PrintWriter init { - File(directoryPath).mkdirs() - val astsFile = File("$directoryPath/asts.csv") + File(outputDirectoryPath).mkdirs() + val astsFile = File("$outputDirectoryPath/asts.csv") astsFile.createNewFile() astsOutputStream = PrintWriter(astsFile) astsOutputStream.write("id,ast\n") } - override fun store(root: Node, label: String, filePath: String) { - for (node in root.preOrder()) { + override fun store(labellingResult: LabellingResult) { + for (node in labellingResult.root.preOrder()) { tokensMap.record(node.getToken()) nodeTypesMap.record(node.getTypeLabel()) } - dumpAst(root, label) + dumpAst(labellingResult.root, labellingResult.label) } override fun close() { - dumpTokenStorage(File("$directoryPath/tokens.csv")) - dumpNodeTypesStorage(File("$directoryPath/node_types.csv")) + dumpTokenStorage(File("$outputDirectoryPath/tokens.csv")) + dumpNodeTypesStorage(File("$outputDirectoryPath/node_types.csv")) astsOutputStream.close() } @@ -55,7 +54,7 @@ class CsvAstStorage(override val directoryPath: String) : AstStorage { internal fun astString(node: Node): String { return "${tokensMap.getId(node.getToken())} ${nodeTypesMap.getId(node.getTypeLabel())}{${ - node.getChildren().joinToString(separator = "", transform = ::astString) + node.getChildren().joinToString(separator = "", transform = ::astString) }}" } -} \ No newline at end of file +} diff --git a/src/main/kotlin/astminer/paths/CsvPathStorage.kt b/src/main/kotlin/astminer/storage/CsvPathStorage.kt similarity index 60% rename from src/main/kotlin/astminer/paths/CsvPathStorage.kt rename to src/main/kotlin/astminer/storage/CsvPathStorage.kt index 05f5d7cf..60e1ae93 100644 --- a/src/main/kotlin/astminer/paths/CsvPathStorage.kt +++ b/src/main/kotlin/astminer/storage/CsvPathStorage.kt @@ -1,12 +1,9 @@ -package astminer.paths +package astminer.storage import astminer.common.model.PathContextId -class CsvPathStorage(outputFolderPath: String, - pathsLimit: Long = Long.MAX_VALUE, - tokensLimit: Long = Long.MAX_VALUE -) : CountingPathStorage(outputFolderPath, pathsLimit, tokensLimit) { - +class CsvPathStorage(outputDirectoryPath: String, config: CountingPathStorageConfig) : + CountingPathStorage(outputDirectoryPath, config) { override fun pathContextIdsToString(pathContextIds: List, label: String): String { val joinedPathContexts = pathContextIds.joinToString(";") { pathContextId -> "${pathContextId.startTokenId} ${pathContextId.pathId} ${pathContextId.endTokenId}" diff --git a/src/main/kotlin/astminer/ast/DotAstStorage.kt b/src/main/kotlin/astminer/storage/DotAstStorage.kt similarity index 73% rename from src/main/kotlin/astminer/ast/DotAstStorage.kt rename to src/main/kotlin/astminer/storage/DotAstStorage.kt index 876ea2bb..de398bb8 100644 --- a/src/main/kotlin/astminer/ast/DotAstStorage.kt +++ b/src/main/kotlin/astminer/storage/DotAstStorage.kt @@ -1,7 +1,6 @@ -package astminer.ast +package astminer.storage import astminer.common.getNormalizedToken -import astminer.common.model.AstStorage import astminer.common.model.Node import astminer.common.preOrder import astminer.common.storage.RankedIncrementalIdStorage @@ -12,7 +11,7 @@ import java.io.PrintWriter * Stores multiple ASTs in dot format (https://en.wikipedia.org/wiki/DOT_(graph_description_language)) * Output consist of separate .dot files for each AST and one full description in .csv format */ -class DotAstStorage(override val directoryPath: String) : AstStorage { +class DotAstStorage(override val outputDirectoryPath: String) : Storage { internal data class FilePath(val parentPath: String, val fileName: String) @@ -22,24 +21,30 @@ class DotAstStorage(override val directoryPath: String) : AstStorage { private var index: Long = 0 init { - File(directoryPath).mkdirs() - astDirectoryPath = File(directoryPath, "asts") + File(outputDirectoryPath).mkdirs() + astDirectoryPath = File(outputDirectoryPath, "asts") astDirectoryPath.mkdirs() - val descriptionFile = File(directoryPath, "description.csv") + val descriptionFile = File(outputDirectoryPath, "description.csv") descriptionFile.createNewFile() descriptionFileStream = PrintWriter(descriptionFile) descriptionFileStream.write("dot_file,source_file,label,node_id,token,type\n") } - override fun store(root: Node, label: String, filePath: String) { + override fun store(labellingResult: LabellingResult) { // Use filename as a label for ast // TODO: save full signature for method - val normalizedLabel = normalizeAstLabel(label) - val normalizedFilepath = normalizeFilepath(filePath) - val nodesMap = dumpAst(root, File(astDirectoryPath, astFilenameFormat.format(index)), normalizedLabel) + val normalizedLabel = normalizeAstLabel(labellingResult.label) + val normalizedFilepath = normalizeFilepath(labellingResult.filePath) + val nodesMap = dumpAst(labellingResult.root, File(astDirectoryPath, astFilenameFormat.format(index)), normalizedLabel) val nodeDescriptionFormat = "${astFilenameFormat.format(index)},$normalizedFilepath,$normalizedLabel,%d,%s,%s" - for (node in root.preOrder()) { - descriptionFileStream.write(nodeDescriptionFormat.format(nodesMap.getId(node) - 1, node.getNormalizedToken(), node.getTypeLabel()) + "\n") + for (node in labellingResult.root.preOrder()) { + descriptionFileStream.write( + nodeDescriptionFormat.format( + nodesMap.getId(node) - 1, + node.getNormalizedToken(), + node.getTypeLabel() + ) + "\n" + ) } ++index } @@ -48,7 +53,7 @@ class DotAstStorage(override val directoryPath: String) : AstStorage { descriptionFileStream.close() } - private fun dumpAst(root: Node, file: File, astName: String) : RankedIncrementalIdStorage { + private fun dumpAst(root: Node, file: File, astName: String): RankedIncrementalIdStorage { val nodesMap = RankedIncrementalIdStorage() // dot parsers (e.g. pydot) can't parse graph/digraph if its name is "graph" val fixedAstName = if (astName == "graph" || astName == "digraph") "_$astName" else astName @@ -59,7 +64,7 @@ class DotAstStorage(override val directoryPath: String) : AstStorage { val rootId = nodesMap.record(node) - 1 val childrenIds = node.getChildren().map { nodesMap.record(it) - 1 } out.println( - "$rootId -- {${childrenIds.joinToString(" ") { it.toString() }}};" + "$rootId -- {${childrenIds.joinToString(" ") { it.toString() }}};" ) } diff --git a/src/main/kotlin/astminer/storage/LabelingResult.kt b/src/main/kotlin/astminer/storage/LabelingResult.kt new file mode 100644 index 00000000..2632ca1e --- /dev/null +++ b/src/main/kotlin/astminer/storage/LabelingResult.kt @@ -0,0 +1,16 @@ +package astminer.storage + +import astminer.cli.LabeledParseResult +import astminer.common.model.Node +import astminer.common.model.ParseResult + +data class LabellingResult(val root: T, val label: String, val filePath: String) + +fun LabeledParseResult.toLabellingResult(filePath: String) = + LabellingResult(this.root, this.label, filePath) + +fun ParseResult.labeledWith(label: String) = this.root?.let { LabellingResult(it, label, this.filePath) } + +fun ParseResult.labeledWithFilePath() = this.labeledWith(this.filePath) + +fun T.labeledWithFilePath(filePath: String) = LabellingResult(this, filePath, filePath) diff --git a/src/main/kotlin/astminer/storage/Storage.kt b/src/main/kotlin/astminer/storage/Storage.kt new file mode 100644 index 00000000..5ef1a057 --- /dev/null +++ b/src/main/kotlin/astminer/storage/Storage.kt @@ -0,0 +1,10 @@ +package astminer.storage + +import astminer.common.model.Node + +interface Storage { + val outputDirectoryPath: String + + fun store(labellingResult: LabellingResult) + fun close() +} diff --git a/src/test/kotlin/astminer/common/TestUtils.kt b/src/test/kotlin/astminer/common/TestUtils.kt index cbeaeed1..32793abb 100644 --- a/src/test/kotlin/astminer/common/TestUtils.kt +++ b/src/test/kotlin/astminer/common/TestUtils.kt @@ -1,6 +1,7 @@ package astminer.common import astminer.common.model.Node +import astminer.storage.LabellingResult class DummyNode(val data: String, val childrenList: MutableList) : Node { @@ -59,3 +60,5 @@ fun createSmallTree(): DummyNode { return node1 } + +fun T.labeledWith(label: String) = LabellingResult(this, label, "") diff --git a/src/test/kotlin/astminer/ast/CsvAstStorageTest.kt b/src/test/kotlin/astminer/storage/CsvAstStorageTest.kt similarity index 73% rename from src/test/kotlin/astminer/ast/CsvAstStorageTest.kt rename to src/test/kotlin/astminer/storage/CsvAstStorageTest.kt index 5f4398cf..1fa88d04 100644 --- a/src/test/kotlin/astminer/ast/CsvAstStorageTest.kt +++ b/src/test/kotlin/astminer/storage/CsvAstStorageTest.kt @@ -1,6 +1,7 @@ -package astminer.ast +package astminer.storage import astminer.common.createSmallTree +import astminer.common.labeledWith import org.junit.Assert import org.junit.Test @@ -10,7 +11,7 @@ class CsvAstStorageTest { fun testAstString() { val root = createSmallTree() val storage = CsvAstStorage(".") - storage.store(root, "entityId") + storage.store(root.labeledWith("entityId")) Assert.assertEquals(storage.astString(root), "1 1{2 2{}3 3{4 4{}}}") } diff --git a/src/test/kotlin/astminer/ast/DotAstStorageTest.kt b/src/test/kotlin/astminer/storage/DotAstStorageTest.kt similarity index 95% rename from src/test/kotlin/astminer/ast/DotAstStorageTest.kt rename to src/test/kotlin/astminer/storage/DotAstStorageTest.kt index d98ce4f2..a192dc5f 100644 --- a/src/test/kotlin/astminer/ast/DotAstStorageTest.kt +++ b/src/test/kotlin/astminer/storage/DotAstStorageTest.kt @@ -1,6 +1,7 @@ -package astminer.ast +package astminer.storage import astminer.common.createSmallTree +import astminer.common.labeledWith import org.junit.Test import java.io.File import kotlin.test.assertEquals @@ -11,7 +12,7 @@ class DotAstStorageTest { fun testDotStorageOnSmallTree() { val root = createSmallTree() val storage = DotAstStorage("test_examples") - storage.store(root, "entityId") + storage.store(root.labeledWith("entityId")) storage.close() From 55bc1ba61a33153c1628bba6e30ab6fce825f4c5 Mon Sep 17 00:00:00 2001 From: furetur Date: Thu, 1 Apr 2021 16:12:07 +0500 Subject: [PATCH 043/308] refactor token normalization and token splitting into a new entity 'TokenProcessor' --- .../java/astminer/examples/AllJavaFiles.java | 9 +++----- .../kotlin/astminer/cli/Code2VecExtractor.kt | 14 ++++++++----- .../astminer/cli/PathContextsExtractor.kt | 5 +++-- .../kotlin/astminer/examples/AllCppFiles.kt | 7 +++---- .../kotlin/astminer/examples/AllJavaFiles.kt | 3 ++- .../astminer/examples/AllJavaFilesGumTree.kt | 3 ++- .../astminer/examples/AllJavaMethods.kt | 3 ++- .../astminer/examples/AllJavaScriptFiles.kt | 3 ++- .../astminer/examples/AllPythonFiles.kt | 3 ++- .../astminer/examples/AllPythonMethods.kt | 3 ++- .../astminer/storage/Code2VecPathStorage.kt | 8 +++++-- .../astminer/storage/CountingPathStorage.kt | 21 ++++++++++--------- .../kotlin/astminer/storage/CsvPathStorage.kt | 11 ++++++++-- .../astminer/storage/TokenProcessors.kt | 14 +++++++++++++ 14 files changed, 70 insertions(+), 37 deletions(-) create mode 100644 src/main/kotlin/astminer/storage/TokenProcessors.kt diff --git a/src/main/java/astminer/examples/AllJavaFiles.java b/src/main/java/astminer/examples/AllJavaFiles.java index 7cc06779..64cf29e6 100644 --- a/src/main/java/astminer/examples/AllJavaFiles.java +++ b/src/main/java/astminer/examples/AllJavaFiles.java @@ -2,10 +2,7 @@ import astminer.common.model.*; import astminer.parse.java.GumTreeJavaParser; -import astminer.storage.CountingPathStorage; -import astminer.storage.CountingPathStorageConfig; -import astminer.storage.CsvPathStorage; -import astminer.storage.LabellingResult; +import astminer.storage.*; import java.io.FileInputStream; import java.io.IOException; @@ -18,8 +15,8 @@ public class AllJavaFiles { private static final String OUTPUT_FOLDER = "out_examples/allJavaFiles_GumTree_java"; public static void runExample() { - final CountingPathStorageConfig config = new CountingPathStorageConfig(5, 5, false, Long.MAX_VALUE, Long.MAX_VALUE, Integer.MAX_VALUE); - final CountingPathStorage pathStorage = new CsvPathStorage(OUTPUT_FOLDER, config); + final CountingPathStorageConfig config = new CountingPathStorageConfig(5, 5, Long.MAX_VALUE, Long.MAX_VALUE, Integer.MAX_VALUE); + final CountingPathStorage pathStorage = new CsvPathStorage(OUTPUT_FOLDER, config, TokenProcessorsKt.getIdentityTokenProcessor()); final Path inputFolder = Paths.get(INPUT_FOLDER); diff --git a/src/main/kotlin/astminer/cli/Code2VecExtractor.kt b/src/main/kotlin/astminer/cli/Code2VecExtractor.kt index 53a446df..c3e121c6 100644 --- a/src/main/kotlin/astminer/cli/Code2VecExtractor.kt +++ b/src/main/kotlin/astminer/cli/Code2VecExtractor.kt @@ -3,9 +3,7 @@ package astminer.cli import astminer.common.getProjectFilesWithExtension import astminer.common.model.Node import astminer.common.model.ParseResult -import astminer.storage.Code2VecPathStorage -import astminer.storage.CountingPathStorageConfig -import astminer.storage.toLabellingResult +import astminer.storage.* import com.github.ajalt.clikt.core.CliktCommand import com.github.ajalt.clikt.parameters.options.* import com.github.ajalt.clikt.parameters.types.int @@ -134,7 +132,6 @@ class Code2VecExtractor(private val customLabelExtractor: LabelExtractor? = null val storageConfig = CountingPathStorageConfig( maxPathLength, maxPathWidth, - true, maxTokens, maxPaths, maxPathContexts @@ -142,8 +139,14 @@ class Code2VecExtractor(private val customLabelExtractor: LabelExtractor? = null for (extension in extensions) { val outputDirForLanguage = outputDir.resolve(extension) outputDirForLanguage.mkdir() + // Choose how to process tokens + val tokenProcessor = if (isTokenSplitted) { + splitTokenProcessor + } else { + code2vecTokenProcessor + } // Choose type of storage - val storage = Code2VecPathStorage(outputDirForLanguage.path, storageConfig) + val storage = Code2VecPathStorage(outputDirForLanguage.path, storageConfig, tokenProcessor) // Choose type of parser val parser = getParser( extension, @@ -151,6 +154,7 @@ class Code2VecExtractor(private val customLabelExtractor: LabelExtractor? = null ) // Parse project one file at a time parser.parseFiles(getProjectFilesWithExtension(File(projectRoot), extension)) { + // TODO: might not be needed normalizeParseResult(it, isTokenSplitted) // Retrieve labeled data extractFromTree(it, storage, labelExtractor) diff --git a/src/main/kotlin/astminer/cli/PathContextsExtractor.kt b/src/main/kotlin/astminer/cli/PathContextsExtractor.kt index 96cf4dd9..27b7011a 100644 --- a/src/main/kotlin/astminer/cli/PathContextsExtractor.kt +++ b/src/main/kotlin/astminer/cli/PathContextsExtractor.kt @@ -9,6 +9,7 @@ import astminer.parse.cpp.FuzzyCppParser import astminer.parse.java.GumTreeJavaParser import astminer.storage.Code2VecPathStorage import astminer.storage.CountingPathStorageConfig +import astminer.storage.splitTokenProcessor import astminer.storage.toLabellingResult import com.github.ajalt.clikt.core.CliktCommand import com.github.ajalt.clikt.parameters.options.* @@ -104,7 +105,6 @@ class PathContextsExtractor(private val customLabelExtractor: LabelExtractor? = val storageConfig = CountingPathStorageConfig( maxPathLength, maxPathWidth, - true, maxTokens, maxPaths, maxPathContexts @@ -114,10 +114,11 @@ class PathContextsExtractor(private val customLabelExtractor: LabelExtractor? = val outputDirForLanguage = outputDir.resolve(extension) outputDirForLanguage.mkdir() - val storage = Code2VecPathStorage(outputDirForLanguage.path, storageConfig) + val storage = Code2VecPathStorage(outputDirForLanguage.path, storageConfig, splitTokenProcessor) val files = getProjectFilesWithExtension(File(projectRoot), extension) parser.parseFiles(files) { parseResult -> + // TODO: might not be needed normalizeParseResult(parseResult, splitTokens = true) val labeledParseResults = labelExtractor.toLabeledData(parseResult) labeledParseResults.forEach { diff --git a/src/main/kotlin/astminer/examples/AllCppFiles.kt b/src/main/kotlin/astminer/examples/AllCppFiles.kt index 5468255f..d0491f70 100644 --- a/src/main/kotlin/astminer/examples/AllCppFiles.kt +++ b/src/main/kotlin/astminer/examples/AllCppFiles.kt @@ -3,10 +3,9 @@ package astminer.examples import astminer.common.getProjectFilesWithExtension +import astminer.common.model.Node import astminer.parse.cpp.FuzzyCppParser -import astminer.storage.CsvPathStorage -import astminer.storage.CountingPathStorageConfig -import astminer.storage.labeledWithFilePath +import astminer.storage.* import java.io.File // Retrieve paths from .cpp preprocessed files, using a fuzzyc2cpg parser. @@ -14,7 +13,7 @@ fun allCppFiles() { val inputDir = File("src/test/resources/examples/cpp") val outputDir = "out_examples/allCppFiles" - val storage = CsvPathStorage(outputDir, CountingPathStorageConfig(5, 5, false)) + val storage = CsvPathStorage(outputDir, CountingPathStorageConfig(5, 5)) val parser = FuzzyCppParser() val preprocOutputFolder = File("preprocessed") diff --git a/src/main/kotlin/astminer/examples/AllJavaFiles.kt b/src/main/kotlin/astminer/examples/AllJavaFiles.kt index 448ed6ab..98c9f87b 100644 --- a/src/main/kotlin/astminer/examples/AllJavaFiles.kt +++ b/src/main/kotlin/astminer/examples/AllJavaFiles.kt @@ -4,6 +4,7 @@ import astminer.parse.antlr.java.JavaMethodSplitter import astminer.parse.antlr.java.JavaParser import astminer.storage.CsvPathStorage import astminer.storage.CountingPathStorageConfig +import astminer.storage.identityTokenProcessor import astminer.storage.labeledWithFilePath import java.io.File @@ -12,7 +13,7 @@ fun allJavaFiles() { val inputDir = "src/test/resources/examples/" val outputDir = "out_examples/allJavaFilesAntlr" - val storage = CsvPathStorage(outputDir, CountingPathStorageConfig(5, 5, false)) + val storage = CsvPathStorage(outputDir, CountingPathStorageConfig(5, 5)) File(inputDir).forFilesWithSuffix("11.java") { file -> val node = JavaParser().parseInputStream(file.inputStream()) ?: return@forFilesWithSuffix diff --git a/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt b/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt index d83c1a73..300a491c 100644 --- a/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt +++ b/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt @@ -4,6 +4,7 @@ import astminer.common.getProjectFilesWithExtension import astminer.parse.java.GumTreeJavaParser import astminer.storage.CsvPathStorage import astminer.storage.CountingPathStorageConfig +import astminer.storage.identityTokenProcessor import astminer.storage.labeledWithFilePath import java.io.File @@ -12,7 +13,7 @@ fun allJavaFilesGumTree() { val inputDir = "src/test/resources/gumTreeMethodSplitter/" val outputDir = "out_examples/allJavaFilesGumTree" - val storage = CsvPathStorage(outputDir, CountingPathStorageConfig(5, 5, false)) + val storage = CsvPathStorage(outputDir, CountingPathStorageConfig(5, 5)) val files = getProjectFilesWithExtension(File(inputDir), "java") GumTreeJavaParser().parseFiles(files) { parseResult -> diff --git a/src/main/kotlin/astminer/examples/AllJavaMethods.kt b/src/main/kotlin/astminer/examples/AllJavaMethods.kt index a849f65b..8f6045b1 100644 --- a/src/main/kotlin/astminer/examples/AllJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/AllJavaMethods.kt @@ -7,6 +7,7 @@ import astminer.parse.java.GumTreeJavaMethodSplitter import astminer.storage.CsvPathStorage import astminer.storage.CountingPathStorageConfig import astminer.storage.LabellingResult +import astminer.storage.splitTokenProcessor import java.io.File @@ -24,7 +25,7 @@ fun allJavaMethods() { val inputDir = "src/test/resources/gumTreeMethodSplitter" val outputDir = "out_examples/allJavaMethods" - val storage = CsvPathStorage(outputDir, CountingPathStorageConfig(5, 5)) + val storage = CsvPathStorage(outputDir, CountingPathStorageConfig(5, 5), splitTokenProcessor) File(inputDir).forFilesWithSuffix(".java") { file -> //parse file diff --git a/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt b/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt index 406ff953..9327835e 100644 --- a/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt +++ b/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt @@ -5,13 +5,14 @@ import astminer.parse.antlr.javascript.JavaScriptParser import astminer.storage.CsvPathStorage import astminer.storage.CountingPathStorageConfig import astminer.storage.labeledWithFilePath +import astminer.storage.splitTokenProcessor import java.io.File fun allJavaScriptFiles() { val folder = "src/test/resources/examples" val outputDir = "out_examples/allJavaScriptFilesAntlr" - val storage = CsvPathStorage(outputDir, CountingPathStorageConfig(5, 5)) + val storage = CsvPathStorage(outputDir, CountingPathStorageConfig(5, 5), splitTokenProcessor) val files = getProjectFilesWithExtension(File(folder), "js") JavaScriptParser().parseFiles(files) { parseResult -> diff --git a/src/main/kotlin/astminer/examples/AllPythonFiles.kt b/src/main/kotlin/astminer/examples/AllPythonFiles.kt index 6fa96eca..50488162 100644 --- a/src/main/kotlin/astminer/examples/AllPythonFiles.kt +++ b/src/main/kotlin/astminer/examples/AllPythonFiles.kt @@ -5,6 +5,7 @@ import astminer.parse.antlr.python.PythonParser import astminer.storage.CsvPathStorage import astminer.storage.CountingPathStorageConfig import astminer.storage.labeledWithFilePath +import astminer.storage.splitTokenProcessor import java.io.File @@ -12,7 +13,7 @@ fun allPythonFiles() { val inputDir = "src/test/resources/examples/" val outputDir = "out_examples/allPythonFiles" - val storage = CsvPathStorage(outputDir, CountingPathStorageConfig(5, 5, false)) + val storage = CsvPathStorage(outputDir, CountingPathStorageConfig(5, 5), splitTokenProcessor) val files = getProjectFilesWithExtension(File(inputDir), "py") PythonParser().parseFiles(files) { parseResult -> diff --git a/src/main/kotlin/astminer/examples/AllPythonMethods.kt b/src/main/kotlin/astminer/examples/AllPythonMethods.kt index 5046214e..1b354551 100644 --- a/src/main/kotlin/astminer/examples/AllPythonMethods.kt +++ b/src/main/kotlin/astminer/examples/AllPythonMethods.kt @@ -7,6 +7,7 @@ import astminer.parse.python.GumTreePythonParser import astminer.storage.CsvPathStorage import astminer.storage.CountingPathStorageConfig import astminer.storage.LabellingResult +import astminer.storage.identityTokenProcessor import java.io.File private fun getCsvFriendlyMethodId(methodInfo: MethodInfo): String { @@ -20,7 +21,7 @@ fun allPythonMethods() { val inputDir = "src/test/resources/gumTreeMethodSplitter" val outputDir = "out_examples/allPythonMethods" - val storage = CsvPathStorage(outputDir, CountingPathStorageConfig(5, 5, false)) + val storage = CsvPathStorage(outputDir, CountingPathStorageConfig(5, 5)) File(inputDir).forFilesWithSuffix(".py") { file -> // parse file diff --git a/src/main/kotlin/astminer/storage/Code2VecPathStorage.kt b/src/main/kotlin/astminer/storage/Code2VecPathStorage.kt index 09417241..d80af83e 100644 --- a/src/main/kotlin/astminer/storage/Code2VecPathStorage.kt +++ b/src/main/kotlin/astminer/storage/Code2VecPathStorage.kt @@ -2,8 +2,12 @@ package astminer.storage import astminer.common.model.PathContextId -class Code2VecPathStorage(outputDirectoryPath: String, config: CountingPathStorageConfig) : - CountingPathStorage(outputDirectoryPath, config) { +class Code2VecPathStorage( + outputDirectoryPath: String, + config: CountingPathStorageConfig, + tokenProcessor: TokenProcessor = code2vecTokenProcessor +) : + CountingPathStorage(outputDirectoryPath, config, tokenProcessor) { override fun pathContextIdsToString(pathContextIds: List, label: String): String { val joinedPathContexts = pathContextIds.joinToString(" ") { pathContextId -> diff --git a/src/main/kotlin/astminer/storage/CountingPathStorage.kt b/src/main/kotlin/astminer/storage/CountingPathStorage.kt index 8dd3b5a0..3f78a333 100644 --- a/src/main/kotlin/astminer/storage/CountingPathStorage.kt +++ b/src/main/kotlin/astminer/storage/CountingPathStorage.kt @@ -22,18 +22,24 @@ import java.io.PrintWriter data class CountingPathStorageConfig( val maxPathLength: Int, val maxPathWidth: Int, - val normalizeToken: Boolean = true, // TODO: discuss this val maxTokens: Long = Long.MAX_VALUE, val maxPaths: Long = Long.MAX_VALUE, val maxPathContextsPerEntity: Int = Int.MAX_VALUE ) +enum class TokenProcessing { + Split, + LeaveUnchanged, + Code2VecNormalize +} + /** * abstract Base class */ abstract class CountingPathStorage( final override val outputDirectoryPath: String, - private val config: CountingPathStorageConfig + private val config: CountingPathStorageConfig, + private val tokenProcessor: TokenProcessor ) : Storage { private val pathMiner = PathMiner(PathRetrievalSettings(config.maxPathLength, config.maxPathWidth)) @@ -54,6 +60,8 @@ abstract class CountingPathStorage( abstract fun pathContextIdsToString(pathContextIds: List, label: String): String + private fun Node.getProcessedToken(): String = this.run(tokenProcessor) + private fun dumpPathContexts(labeledPathContextIds: LabeledPathContextIds) { val pathContextIdsString = labeledPathContextIds.pathContexts.filter { tokensMap.getIdRank(it.startTokenId) <= config.maxTokens && @@ -74,14 +82,7 @@ abstract class CountingPathStorage( private fun retrieveLabeledPathContexts(labellingResult: LabellingResult): LabeledPathContexts { val paths = pathMiner.retrievePaths(labellingResult.root).take(config.maxPathContextsPerEntity) return LabeledPathContexts(labellingResult.label, paths.map { astPath -> - toPathContext(astPath) { node -> - // TODO: maybe this whole hassle is not needed - if (config.normalizeToken) { - node.getNormalizedToken() - } else { - node.getToken() - } - } + toPathContext(astPath) { node -> node.getProcessedToken() } }) } diff --git a/src/main/kotlin/astminer/storage/CsvPathStorage.kt b/src/main/kotlin/astminer/storage/CsvPathStorage.kt index 60e1ae93..90526afd 100644 --- a/src/main/kotlin/astminer/storage/CsvPathStorage.kt +++ b/src/main/kotlin/astminer/storage/CsvPathStorage.kt @@ -1,9 +1,16 @@ package astminer.storage +import astminer.cli.separateToken +import astminer.common.model.Node import astminer.common.model.PathContextId -class CsvPathStorage(outputDirectoryPath: String, config: CountingPathStorageConfig) : - CountingPathStorage(outputDirectoryPath, config) { +class CsvPathStorage( + outputDirectoryPath: String, + config: CountingPathStorageConfig, + tokenProcessor: TokenProcessor = identityTokenProcessor +) : + CountingPathStorage(outputDirectoryPath, config, tokenProcessor) { + override fun pathContextIdsToString(pathContextIds: List, label: String): String { val joinedPathContexts = pathContextIds.joinToString(";") { pathContextId -> "${pathContextId.startTokenId} ${pathContextId.pathId} ${pathContextId.endTokenId}" diff --git a/src/main/kotlin/astminer/storage/TokenProcessors.kt b/src/main/kotlin/astminer/storage/TokenProcessors.kt new file mode 100644 index 00000000..5fb875f3 --- /dev/null +++ b/src/main/kotlin/astminer/storage/TokenProcessors.kt @@ -0,0 +1,14 @@ +package astminer.storage + +import astminer.cli.separateToken +import astminer.common.DEFAULT_TOKEN +import astminer.common.model.Node +import astminer.common.normalizeToken + +typealias TokenProcessor = (Node) -> String + +val splitTokenProcessor: TokenProcessor = { node -> separateToken(node.getToken()) } + +val identityTokenProcessor: TokenProcessor = { node -> node.getToken() } + +val code2vecTokenProcessor: TokenProcessor = { node -> normalizeToken(node.getToken(), DEFAULT_TOKEN) } From 3e4af25a37c85641c3b8b07a8565112474fa7bbd Mon Sep 17 00:00:00 2001 From: furetur Date: Thu, 1 Apr 2021 16:32:24 +0500 Subject: [PATCH 044/308] added documentation for refactored and new entities --- .../astminer/storage/CountingPathStorage.kt | 21 +++++++++---------- .../kotlin/astminer/storage/CsvPathStorage.kt | 2 -- .../kotlin/astminer/storage/LabelingResult.kt | 6 ++++++ .../astminer/storage/TokenProcessors.kt | 10 +++++++++ 4 files changed, 26 insertions(+), 13 deletions(-) diff --git a/src/main/kotlin/astminer/storage/CountingPathStorage.kt b/src/main/kotlin/astminer/storage/CountingPathStorage.kt index 3f78a333..0c6a71e8 100644 --- a/src/main/kotlin/astminer/storage/CountingPathStorage.kt +++ b/src/main/kotlin/astminer/storage/CountingPathStorage.kt @@ -1,6 +1,5 @@ package astminer.storage -import astminer.common.getNormalizedToken import astminer.common.model.* import astminer.common.storage.* import astminer.paths.PathMiner @@ -11,13 +10,13 @@ import java.io.PrintWriter // TODO: finish the documentation /** - * Config for CountingPathStorage which contains several hyperparameters. + * Config for CountingPathStorage which contains all hyperparameters for path extraction. + * @property maxPathLength The maximum length of a single path (based on the formal math definition of path length) + * @property maxPathWidth The maximum width of a single path (based on the formal math definition of path width) * @property maxTokens ?? * @property maxPaths ?? * @property maxPathContextsPerEntity The maximum number of path contexts that should be extracted from LabeledParseResult. * In other words, the maximum number of path contexts to save from each file/method (depending on granularity) - * @property maxPathLength The maximum length of a single path (based on the formal math definition of path length) - * @property maxPathWidth The maximum width of a single path (based on the formal math definition of path width) */ data class CountingPathStorageConfig( val maxPathLength: Int, @@ -27,14 +26,11 @@ data class CountingPathStorageConfig( val maxPathContextsPerEntity: Int = Int.MAX_VALUE ) -enum class TokenProcessing { - Split, - LeaveUnchanged, - Code2VecNormalize -} - /** - * abstract Base class + * Base class for all path storages. Extracts paths from given LabellingResult and stores it in a specified format. + * @property outputDirectoryPath The path to the output directory. + * @property config The config that contains hyperparameters for path extraction. + * @property tokenProcessor The token processor that is used to extract tokens from nodes. */ abstract class CountingPathStorage( final override val outputDirectoryPath: String, @@ -86,6 +82,9 @@ abstract class CountingPathStorage( }) } + /** + * Extract paths from [labellingResult] and store them in the specified format. + */ override fun store(labellingResult: LabellingResult) { val labeledPathContexts = retrieveLabeledPathContexts(labellingResult) val labeledPathContextIds = LabeledPathContextIds( diff --git a/src/main/kotlin/astminer/storage/CsvPathStorage.kt b/src/main/kotlin/astminer/storage/CsvPathStorage.kt index 90526afd..321ca798 100644 --- a/src/main/kotlin/astminer/storage/CsvPathStorage.kt +++ b/src/main/kotlin/astminer/storage/CsvPathStorage.kt @@ -1,7 +1,5 @@ package astminer.storage -import astminer.cli.separateToken -import astminer.common.model.Node import astminer.common.model.PathContextId class CsvPathStorage( diff --git a/src/main/kotlin/astminer/storage/LabelingResult.kt b/src/main/kotlin/astminer/storage/LabelingResult.kt index 2632ca1e..68e3ba8b 100644 --- a/src/main/kotlin/astminer/storage/LabelingResult.kt +++ b/src/main/kotlin/astminer/storage/LabelingResult.kt @@ -4,6 +4,12 @@ import astminer.cli.LabeledParseResult import astminer.common.model.Node import astminer.common.model.ParseResult +/** + * An AST subtree with a label and the path of the source file. + * @property root The root of the AST subtree. + * @property label Any label for this subtree. + * @property filePath The path to the source file where the AST is from. + */ data class LabellingResult(val root: T, val label: String, val filePath: String) fun LabeledParseResult.toLabellingResult(filePath: String) = diff --git a/src/main/kotlin/astminer/storage/TokenProcessors.kt b/src/main/kotlin/astminer/storage/TokenProcessors.kt index 5fb875f3..03dbc9b5 100644 --- a/src/main/kotlin/astminer/storage/TokenProcessors.kt +++ b/src/main/kotlin/astminer/storage/TokenProcessors.kt @@ -5,10 +5,20 @@ import astminer.common.DEFAULT_TOKEN import astminer.common.model.Node import astminer.common.normalizeToken +/** + * A function that should calculate a node's token. + */ typealias TokenProcessor = (Node) -> String val splitTokenProcessor: TokenProcessor = { node -> separateToken(node.getToken()) } +/** + * Returns the original unchanged token. + * Works like the identity function id: x --> x, hence the name. + */ val identityTokenProcessor: TokenProcessor = { node -> node.getToken() } +/** + * Processes the token according to the original code2vec implementation in order to match their behavior. + */ val code2vecTokenProcessor: TokenProcessor = { node -> normalizeToken(node.getToken(), DEFAULT_TOKEN) } From d70df00c75a5aff449d8dad72ffe982762254067 Mon Sep 17 00:00:00 2001 From: illided Date: Sun, 4 Apr 2021 19:01:45 +0300 Subject: [PATCH 045/308] files moved and node renamed --- .../kotlin/astminer/cli/LabelExtractors.kt | 8 +-- .../astminer/parse/antlr/AntlrHandler.kt | 12 ++-- .../antlr/{SimpleNode.kt => AntlrNode.kt} | 4 +- .../kotlin/astminer/parse/antlr/AntlrUtil.kt | 28 ++++---- .../parse/antlr/java/JavaMethodSplitter.kt | 38 +++++----- .../astminer/parse/antlr/java/JavaParser.kt | 6 +- .../javascript/JavaScriptMethodSplitter.kt | 70 +++++++++---------- .../antlr/javascript/JavaScriptParser.kt | 6 +- .../antlr/python/PythonMethodSplitter.kt | 28 ++++---- .../parse/antlr/python/PythonParser.kt | 6 +- .../parse/{ => fuzzy/cpp}/FuzzyHandler.kt | 0 .../kotlin/astminer/cli/LabelExtractorTest.kt | 12 ++-- .../antlr/java/JavaMethodSplitterTest.kt | 4 +- .../JavaScriptMethodSplitterTest.kt | 6 +- .../antlr/python/PythonMethodSplitterTest.kt | 4 +- .../astminer/paths/PathWorkerTestUtil.kt | 8 +-- 16 files changed, 120 insertions(+), 120 deletions(-) rename src/main/kotlin/astminer/parse/antlr/{SimpleNode.kt => AntlrNode.kt} (87%) rename src/main/kotlin/astminer/parse/{ => fuzzy/cpp}/FuzzyHandler.kt (100%) diff --git a/src/main/kotlin/astminer/cli/LabelExtractors.kt b/src/main/kotlin/astminer/cli/LabelExtractors.kt index 6f21cefc..14a48c9f 100644 --- a/src/main/kotlin/astminer/cli/LabelExtractors.kt +++ b/src/main/kotlin/astminer/cli/LabelExtractors.kt @@ -5,7 +5,7 @@ import astminer.common.model.Node import astminer.common.model.ParseResult import astminer.common.preOrder import astminer.common.setNormalizedToken -import astminer.parse.antlr.SimpleNode +import astminer.parse.antlr.AntlrNode import astminer.parse.antlr.java.JavaMethodSplitter import astminer.parse.antlr.javascript.JavaScriptMethodSplitter import astminer.parse.antlr.python.PythonMethodSplitter @@ -68,7 +68,7 @@ abstract class MethodLabelExtractor( } "antlr" -> { val methodSplitter = JavaMethodSplitter() - methodSplitter.splitIntoMethods(root as SimpleNode) + methodSplitter.splitIntoMethods(root as AntlrNode) } else -> { throw UnsupportedOperationException("Unsupported parser $javaParser") @@ -83,7 +83,7 @@ abstract class MethodLabelExtractor( } "antlr" -> { val methodSplitter = PythonMethodSplitter() - methodSplitter.splitIntoMethods(root as SimpleNode) + methodSplitter.splitIntoMethods(root as AntlrNode) } else -> { throw UnsupportedOperationException("Unsupported parser $pythonParser") @@ -92,7 +92,7 @@ abstract class MethodLabelExtractor( } "js" -> { val methodSplitter = JavaScriptMethodSplitter() - methodSplitter.splitIntoMethods(root as SimpleNode) + methodSplitter.splitIntoMethods(root as AntlrNode) } else -> throw UnsupportedOperationException("Unsupported extension $fileExtension") }.filter { methodInfo -> diff --git a/src/main/kotlin/astminer/parse/antlr/AntlrHandler.kt b/src/main/kotlin/astminer/parse/antlr/AntlrHandler.kt index 34740c48..062efe17 100644 --- a/src/main/kotlin/astminer/parse/antlr/AntlrHandler.kt +++ b/src/main/kotlin/astminer/parse/antlr/AntlrHandler.kt @@ -14,8 +14,8 @@ import java.io.File object AntlrJavaHandlerFactory : HandlerFactory { override fun createHandler(file: File) = AntlrJavaHandler(file) - class AntlrJavaHandler(file: File) : LanguageHandler() { - override val parseResult: ParseResult = JavaParser().parseFile(file) + class AntlrJavaHandler(file: File) : LanguageHandler() { + override val parseResult: ParseResult = JavaParser().parseFile(file) override val splitter = JavaMethodSplitter() } } @@ -23,8 +23,8 @@ object AntlrJavaHandlerFactory : HandlerFactory { object AntlrPythonHandlerFactory : HandlerFactory { override fun createHandler(file: File) = AntlrPythonHandler(file) - class AntlrPythonHandler(file: File) : LanguageHandler() { - override val parseResult: ParseResult = PythonParser().parseFile(file) + class AntlrPythonHandler(file: File) : LanguageHandler() { + override val parseResult: ParseResult = PythonParser().parseFile(file) override val splitter = PythonMethodSplitter() } } @@ -32,8 +32,8 @@ object AntlrPythonHandlerFactory : HandlerFactory { object AntlrJavascriptHandlerFactory : HandlerFactory { override fun createHandler(file: File) = AntlrJavascriptHandler(file) - class AntlrJavascriptHandler(file: File) : LanguageHandler() { - override val parseResult: ParseResult = JavaScriptParser().parseFile(file) + class AntlrJavascriptHandler(file: File) : LanguageHandler() { + override val parseResult: ParseResult = JavaScriptParser().parseFile(file) override val splitter = JavaScriptMethodSplitter() } } \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/antlr/SimpleNode.kt b/src/main/kotlin/astminer/parse/antlr/AntlrNode.kt similarity index 87% rename from src/main/kotlin/astminer/parse/antlr/SimpleNode.kt rename to src/main/kotlin/astminer/parse/antlr/AntlrNode.kt index 3bf196b6..5f068fc7 100644 --- a/src/main/kotlin/astminer/parse/antlr/SimpleNode.kt +++ b/src/main/kotlin/astminer/parse/antlr/AntlrNode.kt @@ -2,14 +2,14 @@ package astminer.parse.antlr import astminer.common.model.Node -class SimpleNode(private val typeLabel: String, private var parent: Node?, private var token: String?) : Node { +class AntlrNode(private val typeLabel: String, private var parent: Node?, private var token: String?) : Node { private val metadata: MutableMap = HashMap() private var children: MutableList = mutableListOf() fun setChildren(newChildren: List) { children = newChildren.toMutableList() - children.forEach { (it as SimpleNode).setParent(this) } + children.forEach { (it as AntlrNode).setParent(this) } } fun setParent(newParent: Node?) { diff --git a/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt b/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt index 80cb6ab8..9ac2530e 100644 --- a/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt +++ b/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt @@ -6,13 +6,13 @@ import org.antlr.v4.runtime.Vocabulary import org.antlr.v4.runtime.tree.ErrorNode import org.antlr.v4.runtime.tree.TerminalNode -fun convertAntlrTree(tree: ParserRuleContext, ruleNames: Array, vocabulary: Vocabulary): SimpleNode { +fun convertAntlrTree(tree: ParserRuleContext, ruleNames: Array, vocabulary: Vocabulary): AntlrNode { return compressTree(convertRuleContext(tree, ruleNames, null, vocabulary)) } -private fun convertRuleContext(ruleContext: ParserRuleContext, ruleNames: Array, parent: Node?, vocabulary: Vocabulary): SimpleNode { +private fun convertRuleContext(ruleContext: ParserRuleContext, ruleNames: Array, parent: Node?, vocabulary: Vocabulary): AntlrNode { val typeLabel = ruleNames[ruleContext.ruleIndex] - val currentNode = SimpleNode(typeLabel, parent, null) + val currentNode = AntlrNode(typeLabel, parent, null) val children: MutableList = ArrayList() ruleContext.children?.forEach { @@ -31,22 +31,22 @@ private fun convertRuleContext(ruleContext: ParserRuleContext, ruleNames: Array< return currentNode } -private fun convertTerminal(terminalNode: TerminalNode, parent: Node?, vocabulary: Vocabulary): SimpleNode { - return SimpleNode(vocabulary.getSymbolicName(terminalNode.symbol.type), parent, terminalNode.symbol.text) +private fun convertTerminal(terminalNode: TerminalNode, parent: Node?, vocabulary: Vocabulary): AntlrNode { + return AntlrNode(vocabulary.getSymbolicName(terminalNode.symbol.type), parent, terminalNode.symbol.text) } -private fun convertErrorNode(errorNode: ErrorNode, parent: Node?): SimpleNode { - return SimpleNode("Error", parent, errorNode.text) +private fun convertErrorNode(errorNode: ErrorNode, parent: Node?): AntlrNode { + return AntlrNode("Error", parent, errorNode.text) } /** * Remove intermediate nodes that have a single child. */ -fun simplifyTree(tree: SimpleNode): SimpleNode { +fun simplifyTree(tree: AntlrNode): AntlrNode { return if (tree.getChildren().size == 1) { - simplifyTree(tree.getChildren().first() as SimpleNode) + simplifyTree(tree.getChildren().first() as AntlrNode) } else { - tree.setChildren(tree.getChildren().map { simplifyTree(it as SimpleNode) }.toMutableList()) + tree.setChildren(tree.getChildren().map { simplifyTree(it as AntlrNode) }.toMutableList()) tree } } @@ -54,10 +54,10 @@ fun simplifyTree(tree: SimpleNode): SimpleNode { /** * Compress paths of intermediate nodes that have a single child into individual nodes. */ -fun compressTree(root: SimpleNode): SimpleNode { +fun compressTree(root: AntlrNode): AntlrNode { return if (root.getChildren().size == 1) { - val child = compressTree(root.getChildren().first() as SimpleNode) - val compressedNode = SimpleNode( + val child = compressTree(root.getChildren().first() as AntlrNode) + val compressedNode = AntlrNode( root.getTypeLabel() + "|" + child.getTypeLabel(), root.getParent(), child.getToken() @@ -65,7 +65,7 @@ fun compressTree(root: SimpleNode): SimpleNode { compressedNode.setChildren(child.getChildren()) compressedNode } else { - root.setChildren(root.getChildren().map { compressTree(it as SimpleNode) }.toMutableList()) + root.setChildren(root.getChildren().map { compressTree(it as AntlrNode) }.toMutableList()) root } } diff --git a/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt b/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt index 48b1577c..1bb8e6fe 100644 --- a/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt @@ -2,10 +2,10 @@ package astminer.parse.antlr.java import astminer.common.* import astminer.common.model.* -import astminer.parse.antlr.SimpleNode +import astminer.parse.antlr.AntlrNode import astminer.parse.antlr.decompressTypeLabel -class JavaMethodSplitter : TreeMethodSplitter { +class JavaMethodSplitter : TreeMethodSplitter { companion object { private const val METHOD_NODE = "methodDeclaration" private const val METHOD_RETURN_TYPE_NODE = "typeTypeOrVoid" @@ -21,23 +21,23 @@ class JavaMethodSplitter : TreeMethodSplitter { private const val PARAMETER_NAME_NODE = "variableDeclaratorId" } - override fun splitIntoMethods(root: SimpleNode): Collection> { + override fun splitIntoMethods(root: AntlrNode): Collection> { val methodRoots = root.preOrder().filter { decompressTypeLabel(it.getTypeLabel()).last() == METHOD_NODE } - return methodRoots.map { collectMethodInfo(it as SimpleNode) } + return methodRoots.map { collectMethodInfo(it as AntlrNode) } } - private fun collectMethodInfo(methodNode: SimpleNode): MethodInfo { - val methodName = methodNode.getChildOfType(METHOD_NAME_NODE) as? SimpleNode - val methodReturnTypeNode = methodNode.getChildOfType(METHOD_RETURN_TYPE_NODE) as? SimpleNode + private fun collectMethodInfo(methodNode: AntlrNode): MethodInfo { + val methodName = methodNode.getChildOfType(METHOD_NAME_NODE) as? AntlrNode + val methodReturnTypeNode = methodNode.getChildOfType(METHOD_RETURN_TYPE_NODE) as? AntlrNode methodReturnTypeNode?.setToken(collectParameterToken(methodReturnTypeNode)) val classRoot = getEnclosingClass(methodNode) - val className = classRoot?.getChildOfType(CLASS_NAME_NODE) as? SimpleNode + val className = classRoot?.getChildOfType(CLASS_NAME_NODE) as? AntlrNode - val parametersRoot = methodNode.getChildOfType(METHOD_PARAMETER_NODE) as? SimpleNode - val innerParametersRoot = parametersRoot?.getChildOfType(METHOD_PARAMETER_INNER_NODE) as? SimpleNode + val parametersRoot = methodNode.getChildOfType(METHOD_PARAMETER_NODE) as? AntlrNode + val innerParametersRoot = parametersRoot?.getChildOfType(METHOD_PARAMETER_INNER_NODE) as? AntlrNode val parametersList = when { innerParametersRoot != null -> getListOfParameters(innerParametersRoot) @@ -52,18 +52,18 @@ class JavaMethodSplitter : TreeMethodSplitter { ) } - private fun getEnclosingClass(node: SimpleNode): SimpleNode? { + private fun getEnclosingClass(node: AntlrNode): AntlrNode? { if (decompressTypeLabel(node.getTypeLabel()).last() == CLASS_DECLARATION_NODE) { return node } - val parentNode = node.getParent() as? SimpleNode + val parentNode = node.getParent() as? AntlrNode if (parentNode != null) { return getEnclosingClass(parentNode) } return null } - private fun getListOfParameters(parametersRoot: SimpleNode): List> { + private fun getListOfParameters(parametersRoot: AntlrNode): List> { if (METHOD_SINGLE_PARAMETER_NODE.contains(decompressTypeLabel(parametersRoot.getTypeLabel()).last())) { return listOf(getParameterInfoFromNode(parametersRoot)) } @@ -71,26 +71,26 @@ class JavaMethodSplitter : TreeMethodSplitter { val firstType = decompressTypeLabel(it.getTypeLabel()).first() METHOD_SINGLE_PARAMETER_NODE.contains(firstType) }.map { - getParameterInfoFromNode(it as SimpleNode) + getParameterInfoFromNode(it as AntlrNode) } } - private fun getParameterInfoFromNode(parameterRoot: SimpleNode): ParameterNode { - val returnTypeNode = parameterRoot.getChildOfType(PARAMETER_RETURN_TYPE_NODE) as? SimpleNode + private fun getParameterInfoFromNode(parameterRoot: AntlrNode): ParameterNode { + val returnTypeNode = parameterRoot.getChildOfType(PARAMETER_RETURN_TYPE_NODE) as? AntlrNode returnTypeNode?.setToken(collectParameterToken(returnTypeNode)) return ParameterNode( parameterRoot, returnTypeNode, - parameterRoot.getChildOfType(PARAMETER_NAME_NODE) as? SimpleNode + parameterRoot.getChildOfType(PARAMETER_NAME_NODE) as? AntlrNode ) } - private fun collectParameterToken(parameterRoot: SimpleNode): String { + private fun collectParameterToken(parameterRoot: AntlrNode): String { if (parameterRoot.isLeaf()) { return parameterRoot.getToken() } return parameterRoot.getChildren().joinToString(separator = "") { child -> - collectParameterToken(child as SimpleNode) + collectParameterToken(child as AntlrNode) } } } \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/antlr/java/JavaParser.kt b/src/main/kotlin/astminer/parse/antlr/java/JavaParser.kt index 2dac7c70..c2a56e4f 100644 --- a/src/main/kotlin/astminer/parse/antlr/java/JavaParser.kt +++ b/src/main/kotlin/astminer/parse/antlr/java/JavaParser.kt @@ -1,7 +1,7 @@ package astminer.parse.antlr.java import astminer.common.model.Parser -import astminer.parse.antlr.SimpleNode +import astminer.parse.antlr.AntlrNode import astminer.parse.antlr.convertAntlrTree import org.antlr.v4.runtime.CommonTokenStream import me.vovak.antlr.parser.Java8Lexer @@ -10,8 +10,8 @@ import org.antlr.v4.runtime.CharStreams import java.io.InputStream import java.lang.Exception -class JavaParser : Parser { - override fun parseInputStream(content: InputStream): SimpleNode? { +class JavaParser : Parser { + override fun parseInputStream(content: InputStream): AntlrNode? { return try { val lexer = Java8Lexer(CharStreams.fromStream(content)) lexer.removeErrorListeners() diff --git a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt index 58d74fbc..7b5b0eba 100644 --- a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt @@ -2,26 +2,26 @@ package astminer.parse.antlr.javascript import astminer.common.model.* import astminer.common.preOrder -import astminer.parse.antlr.SimpleNode +import astminer.parse.antlr.AntlrNode import astminer.parse.antlr.decompressTypeLabel /** * Get all methods (in JavaScript there are divided into functions, arrow functions and methods) and information * about their names, enclosing elements and parameters. */ -class JavaScriptMethodSplitter : TreeMethodSplitter { +class JavaScriptMethodSplitter : TreeMethodSplitter { companion object { private const val METHOD_NODE = "methodDefinition" private const val ARROW_NODE = "ARROW" private const val FUNCTION_NODE = "Function" } - override fun splitIntoMethods(root: SimpleNode): Collection> { + override fun splitIntoMethods(root: AntlrNode): Collection> { val methodRoots: List = root.preOrder().map { node -> when { - node.isArrowElement() -> ArrowElement(node as SimpleNode) - node.isFunctionElement() -> FunctionElement(node as SimpleNode) - node.isMethodElement() -> MethodElement(node as SimpleNode) + node.isArrowElement() -> ArrowElement(node as AntlrNode) + node.isFunctionElement() -> FunctionElement(node as AntlrNode) + node.isMethodElement() -> MethodElement(node as AntlrNode) else -> null } }.filterNotNull() @@ -37,7 +37,7 @@ class JavaScriptMethodSplitter : TreeMethodSplitter { /** Base class for describing JavaScript methods, functions or arrow functions. */ -abstract class JavaScriptElement(private val element: SimpleNode) { +abstract class JavaScriptElement(private val element: AntlrNode) { companion object { private val ENCLOSING_ELEMENT_NODES = listOf("functionDeclaration", "variableDeclaration", "classDeclaration", "methodDefinition") private const val ENCLOSING_ELEMENT_NAME_NODE = "Identifier" @@ -50,8 +50,8 @@ abstract class JavaScriptElement(private val element: SimpleNode) { * Gets [element]'s information about its root, name, enclosing elements and list of parameters. * @return element info */ - fun getElementInfo() : MethodInfo { - val enclosingRoot = getEnclosingElementRoot(element.getParent() as SimpleNode) + fun getElementInfo() : MethodInfo { + val enclosingRoot = getEnclosingElementRoot(element.getParent() as AntlrNode) return MethodInfo( MethodNode(element, null, getElementName()), ElementNode(enclosingRoot, getEnclosingElementName(enclosingRoot)), @@ -64,11 +64,11 @@ abstract class JavaScriptElement(private val element: SimpleNode) { * @param node for checking if it is root of enclosing element * @return root of enclosing element */ - open fun getEnclosingElementRoot(node: SimpleNode?): SimpleNode? { + open fun getEnclosingElementRoot(node: AntlrNode?): AntlrNode? { if (node == null || decompressTypeLabel(node.getTypeLabel()).intersect(ENCLOSING_ELEMENT_NODES).isNotEmpty()) { return node } - return getEnclosingElementRoot(node.getParent() as? SimpleNode) + return getEnclosingElementRoot(node.getParent() as? AntlrNode) } /** @@ -76,10 +76,10 @@ abstract class JavaScriptElement(private val element: SimpleNode) { * @param enclosingRoot - root of enclosing element * @return name node of enclosing element */ - open fun getEnclosingElementName(enclosingRoot: SimpleNode?) : SimpleNode? { + open fun getEnclosingElementName(enclosingRoot: AntlrNode?) : AntlrNode? { return enclosingRoot?.getChildren()?.firstOrNull { decompressTypeLabel(it.getTypeLabel()).last() == ENCLOSING_ELEMENT_NAME_NODE - } as? SimpleNode + } as? AntlrNode } /** @@ -87,7 +87,7 @@ abstract class JavaScriptElement(private val element: SimpleNode) { * @param parameterRoot - parent node of all parameter's nodes * @return list of [element]'s parameters */ - open fun getElementParametersList(parameterRoot: SimpleNode?): List> { + open fun getElementParametersList(parameterRoot: AntlrNode?): List> { return when { parameterRoot == null -> emptyList() parameterRoot.hasLastLabel(PARAMETER_NAME_NODE) -> listOf(ParameterNode(parameterRoot, null, parameterRoot)) @@ -101,11 +101,11 @@ abstract class JavaScriptElement(private val element: SimpleNode) { return decompressTypeLabel(getTypeLabel()).last() == typeLabel } - private fun SimpleNode.getItOrChildrenOfType(typeLabel: String) : List { + private fun AntlrNode.getItOrChildrenOfType(typeLabel: String) : List { return if (hasLastLabel(typeLabel)) { listOf(this) } else { - this.getChildrenOfType(typeLabel).mapNotNull { it as? SimpleNode } + this.getChildrenOfType(typeLabel).mapNotNull { it as? AntlrNode } } } @@ -113,72 +113,72 @@ abstract class JavaScriptElement(private val element: SimpleNode) { * Gets name of [element]. * @return [element]'s name node */ - abstract fun getElementName(): SimpleNode? + abstract fun getElementName(): AntlrNode? /** * Gets parent node of all [element]'s parameter nodes. * @return parameters' parent node */ - abstract fun getElementParametersRoot(): SimpleNode? + abstract fun getElementParametersRoot(): AntlrNode? } -class ArrowElement(private val element: SimpleNode) : JavaScriptElement(element) { +class ArrowElement(private val element: AntlrNode) : JavaScriptElement(element) { companion object { private const val ARROW_NAME_NODE = "Identifier" private const val ARROW_PARAMETER_NODE = "arrowFunctionParameters" private const val ARROW_PARAMETER_INNER_NODE = "formalParameterList" } - override fun getElementName(): SimpleNode? { + override fun getElementName(): AntlrNode? { return element.getChildren().firstOrNull { it.getTypeLabel() == ARROW_NAME_NODE - } as? SimpleNode + } as? AntlrNode } - override fun getElementParametersRoot(): SimpleNode? { - val parameterRoot = element.getChildOfType(ARROW_PARAMETER_NODE) as? SimpleNode - return parameterRoot?.getChildOfType(ARROW_PARAMETER_INNER_NODE) as? SimpleNode ?: parameterRoot + override fun getElementParametersRoot(): AntlrNode? { + val parameterRoot = element.getChildOfType(ARROW_PARAMETER_NODE) as? AntlrNode + return parameterRoot?.getChildOfType(ARROW_PARAMETER_INNER_NODE) as? AntlrNode ?: parameterRoot } } -class FunctionElement(private val element: SimpleNode) : JavaScriptElement(element) { +class FunctionElement(private val element: AntlrNode) : JavaScriptElement(element) { companion object { private const val FUNCTION_NAME_NODE = "Identifier" private const val FUNCTION_PARAMETER_NODE = "formalParameterList" } - override fun getElementName(): SimpleNode? { + override fun getElementName(): AntlrNode? { return element.getChildren().firstOrNull { it.getTypeLabel() == FUNCTION_NAME_NODE - } as? SimpleNode + } as? AntlrNode } - override fun getElementParametersRoot(): SimpleNode? { - return element.getChildOfType(FUNCTION_PARAMETER_NODE) as? SimpleNode + override fun getElementParametersRoot(): AntlrNode? { + return element.getChildOfType(FUNCTION_PARAMETER_NODE) as? AntlrNode } } -class MethodElement(private val element: SimpleNode) : JavaScriptElement(element) { +class MethodElement(private val element: AntlrNode) : JavaScriptElement(element) { companion object { private val METHOD_GETTERS_SETTERS = listOf("getter", "setter") private const val METHOD_NAME_NODE = "identifierName" private const val METHOD_PARAMETER_NODE = "formalParameterList" } - override fun getElementName(): SimpleNode? { + override fun getElementName(): AntlrNode? { val methodNameParent = element.getChildren().firstOrNull { METHOD_GETTERS_SETTERS.contains(it.getTypeLabel()) - } as? SimpleNode ?: element + } as? AntlrNode ?: element return methodNameParent.getChildren().firstOrNull { decompressTypeLabel(it.getTypeLabel()).contains(METHOD_NAME_NODE) - } as? SimpleNode + } as? AntlrNode } - override fun getElementParametersRoot(): SimpleNode? { - return element.getChildOfType(METHOD_PARAMETER_NODE) as? SimpleNode + override fun getElementParametersRoot(): AntlrNode? { + return element.getChildOfType(METHOD_PARAMETER_NODE) as? AntlrNode } } \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptParser.kt b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptParser.kt index 44ac5555..d832b98b 100644 --- a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptParser.kt +++ b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptParser.kt @@ -1,7 +1,7 @@ package astminer.parse.antlr.javascript import astminer.common.model.Parser -import astminer.parse.antlr.SimpleNode +import astminer.parse.antlr.AntlrNode import astminer.parse.antlr.convertAntlrTree import me.vovak.antlr.parser.JavaScriptLexer import me.vovak.antlr.parser.JavaScriptParser @@ -10,8 +10,8 @@ import org.antlr.v4.runtime.CommonTokenStream import java.io.InputStream import java.lang.Exception -class JavaScriptParser : Parser { - override fun parseInputStream(content: InputStream): SimpleNode? { +class JavaScriptParser : Parser { + override fun parseInputStream(content: InputStream): AntlrNode? { return try { val lexer = JavaScriptLexer(CharStreams.fromStream(content)) lexer.removeErrorListeners() diff --git a/src/main/kotlin/astminer/parse/antlr/python/PythonMethodSplitter.kt b/src/main/kotlin/astminer/parse/antlr/python/PythonMethodSplitter.kt index 746f52a2..553dbbe0 100644 --- a/src/main/kotlin/astminer/parse/antlr/python/PythonMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/python/PythonMethodSplitter.kt @@ -2,11 +2,11 @@ package astminer.parse.antlr.python import astminer.common.* import astminer.common.model.* -import astminer.parse.antlr.SimpleNode +import astminer.parse.antlr.AntlrNode import astminer.parse.antlr.decompressTypeLabel -class PythonMethodSplitter : TreeMethodSplitter { +class PythonMethodSplitter : TreeMethodSplitter { companion object { private const val METHOD_NODE = "funcdef" @@ -21,21 +21,21 @@ class PythonMethodSplitter : TreeMethodSplitter { private const val PARAMETER_NAME_NODE = "NAME" } - override fun splitIntoMethods(root: SimpleNode): Collection> { + override fun splitIntoMethods(root: AntlrNode): Collection> { val methodRoots = root.preOrder().filter { decompressTypeLabel(it.getTypeLabel()).last() == METHOD_NODE } - return methodRoots.map { collectMethodInfo(it as SimpleNode) } + return methodRoots.map { collectMethodInfo(it as AntlrNode) } } - private fun collectMethodInfo(methodNode: SimpleNode): MethodInfo { - val methodName = methodNode.getChildOfType(METHOD_NAME_NODE) as? SimpleNode + private fun collectMethodInfo(methodNode: AntlrNode): MethodInfo { + val methodName = methodNode.getChildOfType(METHOD_NAME_NODE) as? AntlrNode val classRoot = getEnclosingClass(methodNode) - val className = classRoot?.getChildOfType(CLASS_NAME_NODE) as? SimpleNode + val className = classRoot?.getChildOfType(CLASS_NAME_NODE) as? AntlrNode - val parametersRoot = methodNode.getChildOfType(METHOD_PARAMETER_NODE) as? SimpleNode - val innerParametersRoot = parametersRoot?.getChildOfType(METHOD_PARAMETER_INNER_NODE) as? SimpleNode + val parametersRoot = methodNode.getChildOfType(METHOD_PARAMETER_NODE) as? AntlrNode + val innerParametersRoot = parametersRoot?.getChildOfType(METHOD_PARAMETER_INNER_NODE) as? AntlrNode val parametersList = when { innerParametersRoot != null -> getListOfParameters(innerParametersRoot) @@ -50,26 +50,26 @@ class PythonMethodSplitter : TreeMethodSplitter { ) } - private fun getEnclosingClass(node: SimpleNode): SimpleNode? { + private fun getEnclosingClass(node: AntlrNode): AntlrNode? { if (decompressTypeLabel(node.getTypeLabel()).last() == CLASS_DECLARATION_NODE) { return node } - val parentNode = node.getParent() as? SimpleNode + val parentNode = node.getParent() as? AntlrNode if (parentNode != null) { return getEnclosingClass(parentNode) } return null } - private fun getListOfParameters(parameterRoot: SimpleNode): List> { + private fun getListOfParameters(parameterRoot: AntlrNode): List> { if (decompressTypeLabel(parameterRoot.getTypeLabel()).last() == PARAMETER_NAME_NODE) { return listOf(ParameterNode(parameterRoot, null, parameterRoot)) } return parameterRoot.getChildrenOfType(METHOD_SINGLE_PARAMETER_NODE).map { if (decompressTypeLabel(it.getTypeLabel()).last() == PARAMETER_NAME_NODE) { - ParameterNode(it as SimpleNode, null, it) + ParameterNode(it as AntlrNode, null, it) } else { - ParameterNode(it as SimpleNode, null, it.getChildOfType(PARAMETER_NAME_NODE) as SimpleNode) + ParameterNode(it as AntlrNode, null, it.getChildOfType(PARAMETER_NAME_NODE) as AntlrNode) } } } diff --git a/src/main/kotlin/astminer/parse/antlr/python/PythonParser.kt b/src/main/kotlin/astminer/parse/antlr/python/PythonParser.kt index 4c1e021a..19156e5c 100644 --- a/src/main/kotlin/astminer/parse/antlr/python/PythonParser.kt +++ b/src/main/kotlin/astminer/parse/antlr/python/PythonParser.kt @@ -3,15 +3,15 @@ package astminer.parse.antlr.python import me.vovak.antlr.parser.Python3Lexer import me.vovak.antlr.parser.Python3Parser import astminer.common.model.Parser -import astminer.parse.antlr.SimpleNode +import astminer.parse.antlr.AntlrNode import astminer.parse.antlr.convertAntlrTree import org.antlr.v4.runtime.CharStreams import org.antlr.v4.runtime.CommonTokenStream import java.io.InputStream import java.lang.Exception -class PythonParser : Parser { - override fun parseInputStream(content: InputStream): SimpleNode? { +class PythonParser : Parser { + override fun parseInputStream(content: InputStream): AntlrNode? { return try { val lexer = Python3Lexer(CharStreams.fromStream(content)) lexer.removeErrorListeners() diff --git a/src/main/kotlin/astminer/parse/FuzzyHandler.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyHandler.kt similarity index 100% rename from src/main/kotlin/astminer/parse/FuzzyHandler.kt rename to src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyHandler.kt diff --git a/src/test/kotlin/astminer/cli/LabelExtractorTest.kt b/src/test/kotlin/astminer/cli/LabelExtractorTest.kt index 5e3c44be..9259b033 100644 --- a/src/test/kotlin/astminer/cli/LabelExtractorTest.kt +++ b/src/test/kotlin/astminer/cli/LabelExtractorTest.kt @@ -5,7 +5,7 @@ import astminer.common.model.ElementNode import astminer.common.model.MethodInfo import astminer.common.model.MethodNode import astminer.common.model.ParseResult -import astminer.parse.antlr.SimpleNode +import astminer.parse.antlr.AntlrNode import org.junit.Test import kotlin.test.assertEquals import kotlin.test.assertTrue @@ -17,7 +17,7 @@ internal class LabelExtractorTest { private const val FOLDER = "folder" private const val FILENAME = "file.txt" private const val METHOD_NAME = "method" - private val DUMMY_ROOT = SimpleNode("", null, null) + private val DUMMY_ROOT = AntlrNode("", null, null) } @Test @@ -60,8 +60,8 @@ internal class LabelExtractorTest { @Test fun testMethodNameExtractor() { - val nameNode = SimpleNode("", DUMMY_ROOT, METHOD_NAME) - val methodInfo = MethodInfo( + val nameNode = AntlrNode("", DUMMY_ROOT, METHOD_NAME) + val methodInfo = MethodInfo( MethodNode(DUMMY_ROOT, null, nameNode), ElementNode(null, null), emptyList() @@ -75,8 +75,8 @@ internal class LabelExtractorTest { @Test fun testMethodNameExtractorHide() { - val nameNode = SimpleNode("", DUMMY_ROOT, METHOD_NAME) - val methodInfo = MethodInfo( + val nameNode = AntlrNode("", DUMMY_ROOT, METHOD_NAME) + val methodInfo = MethodInfo( MethodNode(DUMMY_ROOT, null, nameNode), ElementNode(null, null), emptyList() diff --git a/src/test/kotlin/astminer/parse/antlr/java/JavaMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/java/JavaMethodSplitterTest.kt index 4e89ffef..58f707ff 100644 --- a/src/test/kotlin/astminer/parse/antlr/java/JavaMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/java/JavaMethodSplitterTest.kt @@ -1,7 +1,7 @@ package astminer.parse.antlr.java import astminer.common.model.MethodInfo -import astminer.parse.antlr.SimpleNode +import astminer.parse.antlr.AntlrNode import org.junit.Test import kotlin.test.assertEquals import java.io.File @@ -15,7 +15,7 @@ class JavaMethodSplitterTest { val parser = JavaParser() } - var methodInfos: Collection> = listOf() + var methodInfos: Collection> = listOf() @BeforeTest fun parseTree() { diff --git a/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitterTest.kt index ee814f8b..2e29d9d6 100644 --- a/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitterTest.kt @@ -1,7 +1,7 @@ package astminer.parse.antlr.javascript import astminer.common.model.MethodInfo -import astminer.parse.antlr.SimpleNode +import astminer.parse.antlr.AntlrNode import org.junit.Test import java.io.File import kotlin.test.BeforeTest @@ -17,7 +17,7 @@ class JavaScriptMethodSplitterTest { val parser = JavaScriptParser() } - var methodInfos: Collection> = listOf() + var methodInfos: Collection> = listOf() @BeforeTest fun parseTree() { @@ -43,7 +43,7 @@ class JavaScriptMethodSplitterTest { } } - fun MethodInfo.getJsonInfo(): String { + fun MethodInfo.getJsonInfo(): String { return "info : {" + "name : ${name()}, " + "args : ${methodParameters.map { it.name() }.joinToString(", ")}, " + diff --git a/src/test/kotlin/astminer/parse/antlr/python/PythonMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/python/PythonMethodSplitterTest.kt index 2a668002..6391e041 100644 --- a/src/test/kotlin/astminer/parse/antlr/python/PythonMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/python/PythonMethodSplitterTest.kt @@ -1,7 +1,7 @@ package astminer.parse.antlr.python import astminer.common.model.MethodInfo -import astminer.parse.antlr.SimpleNode +import astminer.parse.antlr.AntlrNode import org.junit.Test import kotlin.test.assertEquals import java.io.File @@ -16,7 +16,7 @@ class PythonMethodSplitterTest { val parser = PythonParser() } - var methodInfos: Collection> = listOf() + var methodInfos: Collection> = listOf() @BeforeTest fun parseTree() { diff --git a/src/test/kotlin/astminer/paths/PathWorkerTestUtil.kt b/src/test/kotlin/astminer/paths/PathWorkerTestUtil.kt index c195e54c..44877b87 100644 --- a/src/test/kotlin/astminer/paths/PathWorkerTestUtil.kt +++ b/src/test/kotlin/astminer/paths/PathWorkerTestUtil.kt @@ -3,14 +3,14 @@ package astminer.paths import astminer.common.model.ASTPath import astminer.common.model.Node import astminer.common.postOrder -import astminer.parse.antlr.SimpleNode +import astminer.parse.antlr.AntlrNode import org.junit.Assert -fun simpleNode(number: Int, parent: Node?): SimpleNode { - return SimpleNode("$number", parent, "node_$number") +fun simpleNode(number: Int, parent: Node?): AntlrNode { + return AntlrNode("$number", parent, "node_$number") } -fun simpleNodes(numbers: List, parent: Node?): List { +fun simpleNodes(numbers: List, parent: Node?): List { return numbers.map { simpleNode(it, parent) } } From c55426b609907845d9fcd03c49d26b2831f2c219 Mon Sep 17 00:00:00 2001 From: illided Date: Sun, 4 Apr 2021 20:27:05 +0300 Subject: [PATCH 046/308] get and set metadata removed --- src/main/kotlin/astminer/common/TreeUtil.kt | 6 +++--- .../astminer/common/model/ParsingModel.kt | 3 +-- .../kotlin/astminer/parse/antlr/AntlrNode.kt | 20 ++++++------------- .../kotlin/astminer/parse/antlr/AntlrUtil.kt | 2 +- .../astminer/parse/gumtree/GumTreeNode.kt | 10 +--------- src/main/kotlin/astminer/paths/PathWorker.kt | 4 ++-- src/test/kotlin/astminer/common/TestUtils.kt | 8 +------- 7 files changed, 15 insertions(+), 38 deletions(-) diff --git a/src/main/kotlin/astminer/common/TreeUtil.kt b/src/main/kotlin/astminer/common/TreeUtil.kt index 53d2127e..f94ae475 100644 --- a/src/main/kotlin/astminer/common/TreeUtil.kt +++ b/src/main/kotlin/astminer/common/TreeUtil.kt @@ -42,17 +42,17 @@ const val DEFAULT_TOKEN = "EMPTY_TOKEN" * Set normalized token for a node with default normalizing function. */ fun Node.setNormalizedToken() { - setMetadata(NORMALIZED_TOKEN_KEY, normalizeToken(getToken(), DEFAULT_TOKEN)) + metadata[NORMALIZED_TOKEN_KEY] = normalizeToken(getToken(), DEFAULT_TOKEN) } /** * Set normalized token to a custom value. */ fun Node.setNormalizedToken(normalizedToken: String) { - setMetadata(NORMALIZED_TOKEN_KEY, normalizedToken) + metadata[NORMALIZED_TOKEN_KEY] = normalizedToken } -fun Node.getNormalizedToken(): String = getMetadata(NORMALIZED_TOKEN_KEY)?.toString() ?: DEFAULT_TOKEN +fun Node.getNormalizedToken(): String = metadata[NORMALIZED_TOKEN_KEY]?.toString() ?: DEFAULT_TOKEN /** * The function was adopted from the original code2vec implementation in order to match their behavior: diff --git a/src/main/kotlin/astminer/common/model/ParsingModel.kt b/src/main/kotlin/astminer/common/model/ParsingModel.kt index c5aba214..5cd10e6d 100644 --- a/src/main/kotlin/astminer/common/model/ParsingModel.kt +++ b/src/main/kotlin/astminer/common/model/ParsingModel.kt @@ -11,8 +11,7 @@ interface Node { fun getToken(): String fun isLeaf(): Boolean - fun getMetadata(key: String): Any? - fun setMetadata(key: String, value: Any) + val metadata: MutableMap fun prettyPrint(indent: Int = 0, indentSymbol: String = "--") { repeat(indent) { print(indentSymbol) } diff --git a/src/main/kotlin/astminer/parse/antlr/AntlrNode.kt b/src/main/kotlin/astminer/parse/antlr/AntlrNode.kt index 5f068fc7..f08bcdb8 100644 --- a/src/main/kotlin/astminer/parse/antlr/AntlrNode.kt +++ b/src/main/kotlin/astminer/parse/antlr/AntlrNode.kt @@ -3,16 +3,16 @@ package astminer.parse.antlr import astminer.common.model.Node class AntlrNode(private val typeLabel: String, private var parent: Node?, private var token: String?) : Node { - private val metadata: MutableMap = HashMap() + override val metadata: MutableMap = HashMap() - private var children: MutableList = mutableListOf() + private var children: MutableList = mutableListOf() - fun setChildren(newChildren: List) { + fun setChildren(newChildren: List) { children = newChildren.toMutableList() - children.forEach { (it as AntlrNode).setParent(this) } + children.forEach { it.setParent(this) } } - fun setParent(newParent: Node?) { + private fun setParent(newParent: Node?) { parent = newParent } @@ -20,7 +20,7 @@ class AntlrNode(private val typeLabel: String, private var parent: Node?, privat return typeLabel } - override fun getChildren(): MutableList { + override fun getChildren(): List { return children } @@ -40,14 +40,6 @@ class AntlrNode(private val typeLabel: String, private var parent: Node?, privat return children.isEmpty() } - override fun getMetadata(key: String): Any? { - return metadata[key] - } - - override fun setMetadata(key: String, value: Any) { - metadata[key] = value - } - override fun getChildrenOfType(typeLabel: String) = getChildren().filter { decompressTypeLabel(it.getTypeLabel()).firstOrNull() == typeLabel } diff --git a/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt b/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt index 9ac2530e..ed4c6819 100644 --- a/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt +++ b/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt @@ -13,7 +13,7 @@ fun convertAntlrTree(tree: ParserRuleContext, ruleNames: Array, vocabula private fun convertRuleContext(ruleContext: ParserRuleContext, ruleNames: Array, parent: Node?, vocabulary: Vocabulary): AntlrNode { val typeLabel = ruleNames[ruleContext.ruleIndex] val currentNode = AntlrNode(typeLabel, parent, null) - val children: MutableList = ArrayList() + val children: MutableList = ArrayList() ruleContext.children?.forEach { if (it is TerminalNode) { diff --git a/src/main/kotlin/astminer/parse/gumtree/GumTreeNode.kt b/src/main/kotlin/astminer/parse/gumtree/GumTreeNode.kt index c03dda2a..cdcf917d 100644 --- a/src/main/kotlin/astminer/parse/gumtree/GumTreeNode.kt +++ b/src/main/kotlin/astminer/parse/gumtree/GumTreeNode.kt @@ -5,15 +5,7 @@ import com.github.gumtreediff.tree.ITree import com.github.gumtreediff.tree.TreeContext class GumTreeNode(val wrappedNode: ITree, val context: TreeContext, val parent: GumTreeNode?): Node { - private val metadata: MutableMap = HashMap() - - override fun getMetadata(key: String): Any? { - return metadata[key] - } - - override fun setMetadata(key: String, value: Any) { - metadata[key] = value - } + override val metadata: MutableMap = HashMap() override fun isLeaf(): Boolean { return childrenList.isEmpty() diff --git a/src/main/kotlin/astminer/paths/PathWorker.kt b/src/main/kotlin/astminer/paths/PathWorker.kt index ad1bceff..76da549e 100644 --- a/src/main/kotlin/astminer/paths/PathWorker.kt +++ b/src/main/kotlin/astminer/paths/PathWorker.kt @@ -12,10 +12,10 @@ class PathWorker { private const val PATH_PIECES_KEY = "path_pieces" private fun Node.setPathPieces(pathPieces: List) { - this.setMetadata(PATH_PIECES_KEY, pathPieces) + this.metadata[PATH_PIECES_KEY] = pathPieces } - private fun Node.getPathPieces(): List? = this.getMetadata(PATH_PIECES_KEY) as List? + private fun Node.getPathPieces(): List? = this.metadata[PATH_PIECES_KEY] as List? } fun retrievePaths(tree: Node) = retrievePaths(tree, null, null) diff --git a/src/test/kotlin/astminer/common/TestUtils.kt b/src/test/kotlin/astminer/common/TestUtils.kt index cbeaeed1..a0fbe314 100644 --- a/src/test/kotlin/astminer/common/TestUtils.kt +++ b/src/test/kotlin/astminer/common/TestUtils.kt @@ -4,13 +4,7 @@ import astminer.common.model.Node class DummyNode(val data: String, val childrenList: MutableList) : Node { - override fun setMetadata(key: String, value: Any) { - - } - - override fun getMetadata(key: String): Any? { - return null - } + override val metadata: MutableMap = hashMapOf() override fun isLeaf(): Boolean { return childrenList.isEmpty() From bfbaf6c278c29ffe8905f809f88888008c549ac8 Mon Sep 17 00:00:00 2001 From: illided Date: Sun, 4 Apr 2021 20:53:43 +0300 Subject: [PATCH 047/308] fuzzy support --- .../kotlin/astminer/parse/fuzzy/cpp/FuzzyCppParser.kt | 4 ++-- src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyNode.kt | 10 +--------- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppParser.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppParser.kt index a5c18b36..67633eea 100644 --- a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppParser.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppParser.kt @@ -175,7 +175,7 @@ class FuzzyCppParser : Parser { val node = FuzzyNode(v.property(replaceableNodeKey.key).toString(), token, order) v.propertyKeys().forEach { k -> val property = v.property(k) ?: return@forEach - node.setMetadata(k, property.toString()) + node.metadata[k] = property.toString() } return node } @@ -191,7 +191,7 @@ class FuzzyCppParser : Parser { return@forEach } } - node.setMetadata(k, property) + node.metadata[k]= property } return node } diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyNode.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyNode.kt index 574c5258..bc2f3243 100644 --- a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyNode.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyNode.kt @@ -11,7 +11,7 @@ import com.google.common.collect.TreeMultiset */ class FuzzyNode(private val typeLabel: String, private val token: String?, order: Int?) : Node { private val order = order ?: -1 - private val metadata: MutableMap = HashMap() + override val metadata: MutableMap = HashMap() private var parent: Node? = null private var children = TreeMultiset.create(compareBy( { it.order }, @@ -47,14 +47,6 @@ class FuzzyNode(private val typeLabel: String, private val token: String?, order return children.isEmpty() } - override fun getMetadata(key: String): Any? { - return metadata[key] - } - - override fun setMetadata(key: String, value: Any) { - metadata[key] = value - } - private fun setParent(node: Node) { parent = node } From 328a4e807aaeb71d76d49f61f649ffeaecb828a1 Mon Sep 17 00:00:00 2001 From: illided Date: Sun, 4 Apr 2021 21:01:56 +0300 Subject: [PATCH 048/308] node test class refactored --- .../astminer/featureextraction/TreeFeatureTestUtil.kt | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/test/kotlin/astminer/featureextraction/TreeFeatureTestUtil.kt b/src/test/kotlin/astminer/featureextraction/TreeFeatureTestUtil.kt index c0558bd6..faa0e572 100644 --- a/src/test/kotlin/astminer/featureextraction/TreeFeatureTestUtil.kt +++ b/src/test/kotlin/astminer/featureextraction/TreeFeatureTestUtil.kt @@ -5,7 +5,7 @@ import astminer.common.model.Node class PrettyNode(private val type: String, private val token: String) : Node { private var children: MutableList = ArrayList() private var parent: PrettyNode? = null - private val metadata: MutableMap = HashMap() + override val metadata: MutableMap = HashMap() override fun getChildren(): MutableList = children @@ -34,10 +34,6 @@ class PrettyNode(private val type: String, private val token: String) : Node { override fun isLeaf(): Boolean = children.isEmpty() - override fun getMetadata(key: String): Any? = metadata[key] - - override fun setMetadata(key: String, value: Any) = metadata.set(key, value) - override fun getTypeLabel(): String = type override fun removeChildrenOfType(typeLabel: String) { From 9025ac458f0f832a1b3d108af78ab0c121bc7f1b Mon Sep 17 00:00:00 2001 From: illided Date: Sun, 4 Apr 2021 21:05:47 +0300 Subject: [PATCH 049/308] unnecessary casts removed --- src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt | 8 ++++---- .../astminer/parse/antlr/java/JavaMethodSplitter.kt | 4 ++-- .../parse/antlr/javascript/JavaScriptMethodSplitter.kt | 10 +++++----- .../parse/antlr/python/PythonMethodSplitter.kt | 4 ++-- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt b/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt index ed4c6819..394feac9 100644 --- a/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt +++ b/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt @@ -44,9 +44,9 @@ private fun convertErrorNode(errorNode: ErrorNode, parent: Node?): AntlrNode { */ fun simplifyTree(tree: AntlrNode): AntlrNode { return if (tree.getChildren().size == 1) { - simplifyTree(tree.getChildren().first() as AntlrNode) + simplifyTree(tree.getChildren().first()) } else { - tree.setChildren(tree.getChildren().map { simplifyTree(it as AntlrNode) }.toMutableList()) + tree.setChildren(tree.getChildren().map { simplifyTree(it) }.toMutableList()) tree } } @@ -56,7 +56,7 @@ fun simplifyTree(tree: AntlrNode): AntlrNode { */ fun compressTree(root: AntlrNode): AntlrNode { return if (root.getChildren().size == 1) { - val child = compressTree(root.getChildren().first() as AntlrNode) + val child = compressTree(root.getChildren().first()) val compressedNode = AntlrNode( root.getTypeLabel() + "|" + child.getTypeLabel(), root.getParent(), @@ -65,7 +65,7 @@ fun compressTree(root: AntlrNode): AntlrNode { compressedNode.setChildren(child.getChildren()) compressedNode } else { - root.setChildren(root.getChildren().map { compressTree(it as AntlrNode) }.toMutableList()) + root.setChildren(root.getChildren().map { compressTree(it) }.toMutableList()) root } } diff --git a/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt b/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt index 1bb8e6fe..170e1701 100644 --- a/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt @@ -71,7 +71,7 @@ class JavaMethodSplitter : TreeMethodSplitter { val firstType = decompressTypeLabel(it.getTypeLabel()).first() METHOD_SINGLE_PARAMETER_NODE.contains(firstType) }.map { - getParameterInfoFromNode(it as AntlrNode) + getParameterInfoFromNode(it) } } @@ -90,7 +90,7 @@ class JavaMethodSplitter : TreeMethodSplitter { return parameterRoot.getToken() } return parameterRoot.getChildren().joinToString(separator = "") { child -> - collectParameterToken(child as AntlrNode) + collectParameterToken(child) } } } \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt index 7b5b0eba..8182cd73 100644 --- a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt @@ -79,7 +79,7 @@ abstract class JavaScriptElement(private val element: AntlrNode) { open fun getEnclosingElementName(enclosingRoot: AntlrNode?) : AntlrNode? { return enclosingRoot?.getChildren()?.firstOrNull { decompressTypeLabel(it.getTypeLabel()).last() == ENCLOSING_ELEMENT_NAME_NODE - } as? AntlrNode + } } /** @@ -133,7 +133,7 @@ class ArrowElement(private val element: AntlrNode) : JavaScriptElement(element) override fun getElementName(): AntlrNode? { return element.getChildren().firstOrNull { it.getTypeLabel() == ARROW_NAME_NODE - } as? AntlrNode + } } override fun getElementParametersRoot(): AntlrNode? { @@ -152,7 +152,7 @@ class FunctionElement(private val element: AntlrNode) : JavaScriptElement(elemen override fun getElementName(): AntlrNode? { return element.getChildren().firstOrNull { it.getTypeLabel() == FUNCTION_NAME_NODE - } as? AntlrNode + } } override fun getElementParametersRoot(): AntlrNode? { @@ -171,11 +171,11 @@ class MethodElement(private val element: AntlrNode) : JavaScriptElement(element) override fun getElementName(): AntlrNode? { val methodNameParent = element.getChildren().firstOrNull { METHOD_GETTERS_SETTERS.contains(it.getTypeLabel()) - } as? AntlrNode ?: element + } ?: element return methodNameParent.getChildren().firstOrNull { decompressTypeLabel(it.getTypeLabel()).contains(METHOD_NAME_NODE) - } as? AntlrNode + } } override fun getElementParametersRoot(): AntlrNode? { diff --git a/src/main/kotlin/astminer/parse/antlr/python/PythonMethodSplitter.kt b/src/main/kotlin/astminer/parse/antlr/python/PythonMethodSplitter.kt index 553dbbe0..aedd3bcb 100644 --- a/src/main/kotlin/astminer/parse/antlr/python/PythonMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/python/PythonMethodSplitter.kt @@ -67,9 +67,9 @@ class PythonMethodSplitter : TreeMethodSplitter { } return parameterRoot.getChildrenOfType(METHOD_SINGLE_PARAMETER_NODE).map { if (decompressTypeLabel(it.getTypeLabel()).last() == PARAMETER_NAME_NODE) { - ParameterNode(it as AntlrNode, null, it) + ParameterNode(it, null, it) } else { - ParameterNode(it as AntlrNode, null, it.getChildOfType(PARAMETER_NAME_NODE) as AntlrNode) + ParameterNode(it, null, it.getChildOfType(PARAMETER_NAME_NODE) as AntlrNode) } } } From 4a06194574af7de2a4cdd43b65ef750fbcdd4509 Mon Sep 17 00:00:00 2001 From: furetur Date: Mon, 5 Apr 2021 22:26:34 +0500 Subject: [PATCH 050/308] renamed CountingPathStorage to PathBasedStorage --- src/main/java/astminer/examples/AllJavaFiles.java | 4 ++-- src/main/kotlin/astminer/cli/Code2VecExtractor.kt | 2 +- src/main/kotlin/astminer/cli/PathContextsExtractor.kt | 4 ++-- src/main/kotlin/astminer/examples/AllCppFiles.kt | 3 +-- src/main/kotlin/astminer/examples/AllJavaFiles.kt | 5 ++--- src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt | 5 ++--- src/main/kotlin/astminer/examples/AllJavaMethods.kt | 4 ++-- src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt | 4 ++-- src/main/kotlin/astminer/examples/AllPythonFiles.kt | 4 ++-- src/main/kotlin/astminer/examples/AllPythonMethods.kt | 5 ++--- src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt | 4 ++-- src/main/kotlin/astminer/storage/Code2VecPathStorage.kt | 4 ++-- src/main/kotlin/astminer/storage/CsvPathStorage.kt | 4 ++-- .../storage/{CountingPathStorage.kt => PathBasedStorage.kt} | 6 +++--- 14 files changed, 27 insertions(+), 31 deletions(-) rename src/main/kotlin/astminer/storage/{CountingPathStorage.kt => PathBasedStorage.kt} (97%) diff --git a/src/main/java/astminer/examples/AllJavaFiles.java b/src/main/java/astminer/examples/AllJavaFiles.java index 64cf29e6..909b9905 100644 --- a/src/main/java/astminer/examples/AllJavaFiles.java +++ b/src/main/java/astminer/examples/AllJavaFiles.java @@ -15,8 +15,8 @@ public class AllJavaFiles { private static final String OUTPUT_FOLDER = "out_examples/allJavaFiles_GumTree_java"; public static void runExample() { - final CountingPathStorageConfig config = new CountingPathStorageConfig(5, 5, Long.MAX_VALUE, Long.MAX_VALUE, Integer.MAX_VALUE); - final CountingPathStorage pathStorage = new CsvPathStorage(OUTPUT_FOLDER, config, TokenProcessorsKt.getIdentityTokenProcessor()); + final PathBasedStorageConfig config = new PathBasedStorageConfig(5, 5, Long.MAX_VALUE, Long.MAX_VALUE, Integer.MAX_VALUE); + final PathBasedStorage pathStorage = new CsvPathStorage(OUTPUT_FOLDER, config, TokenProcessorsKt.getIdentityTokenProcessor()); final Path inputFolder = Paths.get(INPUT_FOLDER); diff --git a/src/main/kotlin/astminer/cli/Code2VecExtractor.kt b/src/main/kotlin/astminer/cli/Code2VecExtractor.kt index c3e121c6..2d0c052d 100644 --- a/src/main/kotlin/astminer/cli/Code2VecExtractor.kt +++ b/src/main/kotlin/astminer/cli/Code2VecExtractor.kt @@ -129,7 +129,7 @@ class Code2VecExtractor(private val customLabelExtractor: LabelExtractor? = null private fun extract(labelExtractor: LabelExtractor) { val outputDir = File(outputDirName) - val storageConfig = CountingPathStorageConfig( + val storageConfig = PathBasedStorageConfig( maxPathLength, maxPathWidth, maxTokens, diff --git a/src/main/kotlin/astminer/cli/PathContextsExtractor.kt b/src/main/kotlin/astminer/cli/PathContextsExtractor.kt index 27b7011a..2522886d 100644 --- a/src/main/kotlin/astminer/cli/PathContextsExtractor.kt +++ b/src/main/kotlin/astminer/cli/PathContextsExtractor.kt @@ -8,7 +8,7 @@ import astminer.parse.antlr.python.PythonParser import astminer.parse.cpp.FuzzyCppParser import astminer.parse.java.GumTreeJavaParser import astminer.storage.Code2VecPathStorage -import astminer.storage.CountingPathStorageConfig +import astminer.storage.PathBasedStorageConfig import astminer.storage.splitTokenProcessor import astminer.storage.toLabellingResult import com.github.ajalt.clikt.core.CliktCommand @@ -102,7 +102,7 @@ class PathContextsExtractor(private val customLabelExtractor: LabelExtractor? = private fun extractPathContexts(labelExtractor: LabelExtractor) { val outputDir = File(outputDirName) - val storageConfig = CountingPathStorageConfig( + val storageConfig = PathBasedStorageConfig( maxPathLength, maxPathWidth, maxTokens, diff --git a/src/main/kotlin/astminer/examples/AllCppFiles.kt b/src/main/kotlin/astminer/examples/AllCppFiles.kt index d0491f70..2de6fd36 100644 --- a/src/main/kotlin/astminer/examples/AllCppFiles.kt +++ b/src/main/kotlin/astminer/examples/AllCppFiles.kt @@ -3,7 +3,6 @@ package astminer.examples import astminer.common.getProjectFilesWithExtension -import astminer.common.model.Node import astminer.parse.cpp.FuzzyCppParser import astminer.storage.* import java.io.File @@ -13,7 +12,7 @@ fun allCppFiles() { val inputDir = File("src/test/resources/examples/cpp") val outputDir = "out_examples/allCppFiles" - val storage = CsvPathStorage(outputDir, CountingPathStorageConfig(5, 5)) + val storage = CsvPathStorage(outputDir, PathBasedStorageConfig(5, 5)) val parser = FuzzyCppParser() val preprocOutputFolder = File("preprocessed") diff --git a/src/main/kotlin/astminer/examples/AllJavaFiles.kt b/src/main/kotlin/astminer/examples/AllJavaFiles.kt index 98c9f87b..d345884a 100644 --- a/src/main/kotlin/astminer/examples/AllJavaFiles.kt +++ b/src/main/kotlin/astminer/examples/AllJavaFiles.kt @@ -3,8 +3,7 @@ package astminer.examples import astminer.parse.antlr.java.JavaMethodSplitter import astminer.parse.antlr.java.JavaParser import astminer.storage.CsvPathStorage -import astminer.storage.CountingPathStorageConfig -import astminer.storage.identityTokenProcessor +import astminer.storage.PathBasedStorageConfig import astminer.storage.labeledWithFilePath import java.io.File @@ -13,7 +12,7 @@ fun allJavaFiles() { val inputDir = "src/test/resources/examples/" val outputDir = "out_examples/allJavaFilesAntlr" - val storage = CsvPathStorage(outputDir, CountingPathStorageConfig(5, 5)) + val storage = CsvPathStorage(outputDir, PathBasedStorageConfig(5, 5)) File(inputDir).forFilesWithSuffix("11.java") { file -> val node = JavaParser().parseInputStream(file.inputStream()) ?: return@forFilesWithSuffix diff --git a/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt b/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt index 300a491c..600b06ac 100644 --- a/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt +++ b/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt @@ -3,8 +3,7 @@ package astminer.examples import astminer.common.getProjectFilesWithExtension import astminer.parse.java.GumTreeJavaParser import astminer.storage.CsvPathStorage -import astminer.storage.CountingPathStorageConfig -import astminer.storage.identityTokenProcessor +import astminer.storage.PathBasedStorageConfig import astminer.storage.labeledWithFilePath import java.io.File @@ -13,7 +12,7 @@ fun allJavaFilesGumTree() { val inputDir = "src/test/resources/gumTreeMethodSplitter/" val outputDir = "out_examples/allJavaFilesGumTree" - val storage = CsvPathStorage(outputDir, CountingPathStorageConfig(5, 5)) + val storage = CsvPathStorage(outputDir, PathBasedStorageConfig(5, 5)) val files = getProjectFilesWithExtension(File(inputDir), "java") GumTreeJavaParser().parseFiles(files) { parseResult -> diff --git a/src/main/kotlin/astminer/examples/AllJavaMethods.kt b/src/main/kotlin/astminer/examples/AllJavaMethods.kt index 8f6045b1..397b57fc 100644 --- a/src/main/kotlin/astminer/examples/AllJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/AllJavaMethods.kt @@ -5,7 +5,7 @@ import astminer.parse.java.GumTreeJavaNode import astminer.parse.java.GumTreeJavaParser import astminer.parse.java.GumTreeJavaMethodSplitter import astminer.storage.CsvPathStorage -import astminer.storage.CountingPathStorageConfig +import astminer.storage.PathBasedStorageConfig import astminer.storage.LabellingResult import astminer.storage.splitTokenProcessor import java.io.File @@ -25,7 +25,7 @@ fun allJavaMethods() { val inputDir = "src/test/resources/gumTreeMethodSplitter" val outputDir = "out_examples/allJavaMethods" - val storage = CsvPathStorage(outputDir, CountingPathStorageConfig(5, 5), splitTokenProcessor) + val storage = CsvPathStorage(outputDir, PathBasedStorageConfig(5, 5), splitTokenProcessor) File(inputDir).forFilesWithSuffix(".java") { file -> //parse file diff --git a/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt b/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt index 9327835e..9157669c 100644 --- a/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt +++ b/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt @@ -3,7 +3,7 @@ package astminer.examples import astminer.common.getProjectFilesWithExtension import astminer.parse.antlr.javascript.JavaScriptParser import astminer.storage.CsvPathStorage -import astminer.storage.CountingPathStorageConfig +import astminer.storage.PathBasedStorageConfig import astminer.storage.labeledWithFilePath import astminer.storage.splitTokenProcessor import java.io.File @@ -12,7 +12,7 @@ fun allJavaScriptFiles() { val folder = "src/test/resources/examples" val outputDir = "out_examples/allJavaScriptFilesAntlr" - val storage = CsvPathStorage(outputDir, CountingPathStorageConfig(5, 5), splitTokenProcessor) + val storage = CsvPathStorage(outputDir, PathBasedStorageConfig(5, 5), splitTokenProcessor) val files = getProjectFilesWithExtension(File(folder), "js") JavaScriptParser().parseFiles(files) { parseResult -> diff --git a/src/main/kotlin/astminer/examples/AllPythonFiles.kt b/src/main/kotlin/astminer/examples/AllPythonFiles.kt index 50488162..d98f8451 100644 --- a/src/main/kotlin/astminer/examples/AllPythonFiles.kt +++ b/src/main/kotlin/astminer/examples/AllPythonFiles.kt @@ -3,7 +3,7 @@ package astminer.examples import astminer.common.getProjectFilesWithExtension import astminer.parse.antlr.python.PythonParser import astminer.storage.CsvPathStorage -import astminer.storage.CountingPathStorageConfig +import astminer.storage.PathBasedStorageConfig import astminer.storage.labeledWithFilePath import astminer.storage.splitTokenProcessor import java.io.File @@ -13,7 +13,7 @@ fun allPythonFiles() { val inputDir = "src/test/resources/examples/" val outputDir = "out_examples/allPythonFiles" - val storage = CsvPathStorage(outputDir, CountingPathStorageConfig(5, 5), splitTokenProcessor) + val storage = CsvPathStorage(outputDir, PathBasedStorageConfig(5, 5), splitTokenProcessor) val files = getProjectFilesWithExtension(File(inputDir), "py") PythonParser().parseFiles(files) { parseResult -> diff --git a/src/main/kotlin/astminer/examples/AllPythonMethods.kt b/src/main/kotlin/astminer/examples/AllPythonMethods.kt index 1b354551..745335ab 100644 --- a/src/main/kotlin/astminer/examples/AllPythonMethods.kt +++ b/src/main/kotlin/astminer/examples/AllPythonMethods.kt @@ -5,9 +5,8 @@ import astminer.parse.python.GumTreePythonMethodSplitter import astminer.parse.python.GumTreePythonNode import astminer.parse.python.GumTreePythonParser import astminer.storage.CsvPathStorage -import astminer.storage.CountingPathStorageConfig +import astminer.storage.PathBasedStorageConfig import astminer.storage.LabellingResult -import astminer.storage.identityTokenProcessor import java.io.File private fun getCsvFriendlyMethodId(methodInfo: MethodInfo): String { @@ -21,7 +20,7 @@ fun allPythonMethods() { val inputDir = "src/test/resources/gumTreeMethodSplitter" val outputDir = "out_examples/allPythonMethods" - val storage = CsvPathStorage(outputDir, CountingPathStorageConfig(5, 5)) + val storage = CsvPathStorage(outputDir, PathBasedStorageConfig(5, 5)) File(inputDir).forFilesWithSuffix(".py") { file -> // parse file diff --git a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt index 07dcb71b..742a4148 100644 --- a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt @@ -5,7 +5,7 @@ import astminer.parse.antlr.java.JavaMethodSplitter import astminer.parse.antlr.java.JavaParser import astminer.paths.* import astminer.storage.Code2VecPathStorage -import astminer.storage.CountingPathStorageConfig +import astminer.storage.PathBasedStorageConfig import astminer.storage.LabellingResult import java.io.File @@ -17,7 +17,7 @@ fun code2vecJavaMethods() { val outputDir = "out_examples/code2vecPathMining" - val storage = Code2VecPathStorage(outputDir, CountingPathStorageConfig(5, 5)) + val storage = Code2VecPathStorage(outputDir, PathBasedStorageConfig(5, 5)) File(folder).forFilesWithSuffix(".java") { file -> //parse file diff --git a/src/main/kotlin/astminer/storage/Code2VecPathStorage.kt b/src/main/kotlin/astminer/storage/Code2VecPathStorage.kt index d80af83e..ec8a0b8f 100644 --- a/src/main/kotlin/astminer/storage/Code2VecPathStorage.kt +++ b/src/main/kotlin/astminer/storage/Code2VecPathStorage.kt @@ -4,10 +4,10 @@ import astminer.common.model.PathContextId class Code2VecPathStorage( outputDirectoryPath: String, - config: CountingPathStorageConfig, + config: PathBasedStorageConfig, tokenProcessor: TokenProcessor = code2vecTokenProcessor ) : - CountingPathStorage(outputDirectoryPath, config, tokenProcessor) { + PathBasedStorage(outputDirectoryPath, config, tokenProcessor) { override fun pathContextIdsToString(pathContextIds: List, label: String): String { val joinedPathContexts = pathContextIds.joinToString(" ") { pathContextId -> diff --git a/src/main/kotlin/astminer/storage/CsvPathStorage.kt b/src/main/kotlin/astminer/storage/CsvPathStorage.kt index 321ca798..0f4934f5 100644 --- a/src/main/kotlin/astminer/storage/CsvPathStorage.kt +++ b/src/main/kotlin/astminer/storage/CsvPathStorage.kt @@ -4,10 +4,10 @@ import astminer.common.model.PathContextId class CsvPathStorage( outputDirectoryPath: String, - config: CountingPathStorageConfig, + config: PathBasedStorageConfig, tokenProcessor: TokenProcessor = identityTokenProcessor ) : - CountingPathStorage(outputDirectoryPath, config, tokenProcessor) { + PathBasedStorage(outputDirectoryPath, config, tokenProcessor) { override fun pathContextIdsToString(pathContextIds: List, label: String): String { val joinedPathContexts = pathContextIds.joinToString(";") { pathContextId -> diff --git a/src/main/kotlin/astminer/storage/CountingPathStorage.kt b/src/main/kotlin/astminer/storage/PathBasedStorage.kt similarity index 97% rename from src/main/kotlin/astminer/storage/CountingPathStorage.kt rename to src/main/kotlin/astminer/storage/PathBasedStorage.kt index 0c6a71e8..b09986a6 100644 --- a/src/main/kotlin/astminer/storage/CountingPathStorage.kt +++ b/src/main/kotlin/astminer/storage/PathBasedStorage.kt @@ -18,7 +18,7 @@ import java.io.PrintWriter * @property maxPathContextsPerEntity The maximum number of path contexts that should be extracted from LabeledParseResult. * In other words, the maximum number of path contexts to save from each file/method (depending on granularity) */ -data class CountingPathStorageConfig( +data class PathBasedStorageConfig( val maxPathLength: Int, val maxPathWidth: Int, val maxTokens: Long = Long.MAX_VALUE, @@ -32,9 +32,9 @@ data class CountingPathStorageConfig( * @property config The config that contains hyperparameters for path extraction. * @property tokenProcessor The token processor that is used to extract tokens from nodes. */ -abstract class CountingPathStorage( +abstract class PathBasedStorage( final override val outputDirectoryPath: String, - private val config: CountingPathStorageConfig, + private val config: PathBasedStorageConfig, private val tokenProcessor: TokenProcessor ) : Storage { From d59c00724cd8b25753077dc9ea9043e21e5efdde Mon Sep 17 00:00:00 2001 From: furetur Date: Mon, 12 Apr 2021 18:59:22 +0500 Subject: [PATCH 051/308] change all limits from Long.MAX_VALUE to null + refactored TokenProcessor.kt --- .../java/astminer/examples/AllJavaFiles.java | 2 +- .../kotlin/astminer/cli/Code2VecExtractor.kt | 4 +- .../astminer/cli/PathContextsExtractor.kt | 7 +--- .../astminer/common/storage/CsvFileUtil.kt | 4 +- .../astminer/examples/AllJavaMethods.kt | 7 +--- .../astminer/examples/AllJavaScriptFiles.kt | 4 +- .../astminer/examples/AllPythonFiles.kt | 4 +- .../astminer/storage/Code2VecPathStorage.kt | 2 +- .../kotlin/astminer/storage/CsvPathStorage.kt | 2 +- .../astminer/storage/PathBasedStorage.kt | 29 ++++++++++----- .../kotlin/astminer/storage/TokenProcessor.kt | 37 +++++++++++++++++++ .../astminer/storage/TokenProcessors.kt | 24 ------------ 12 files changed, 71 insertions(+), 55 deletions(-) create mode 100644 src/main/kotlin/astminer/storage/TokenProcessor.kt delete mode 100644 src/main/kotlin/astminer/storage/TokenProcessors.kt diff --git a/src/main/java/astminer/examples/AllJavaFiles.java b/src/main/java/astminer/examples/AllJavaFiles.java index 909b9905..e5be2d61 100644 --- a/src/main/java/astminer/examples/AllJavaFiles.java +++ b/src/main/java/astminer/examples/AllJavaFiles.java @@ -16,7 +16,7 @@ public class AllJavaFiles { public static void runExample() { final PathBasedStorageConfig config = new PathBasedStorageConfig(5, 5, Long.MAX_VALUE, Long.MAX_VALUE, Integer.MAX_VALUE); - final PathBasedStorage pathStorage = new CsvPathStorage(OUTPUT_FOLDER, config, TokenProcessorsKt.getIdentityTokenProcessor()); + final PathBasedStorage pathStorage = new CsvPathStorage(OUTPUT_FOLDER, config, TokenProcessor.LeaveOriginal); final Path inputFolder = Paths.get(INPUT_FOLDER); diff --git a/src/main/kotlin/astminer/cli/Code2VecExtractor.kt b/src/main/kotlin/astminer/cli/Code2VecExtractor.kt index 2d0c052d..99da803f 100644 --- a/src/main/kotlin/astminer/cli/Code2VecExtractor.kt +++ b/src/main/kotlin/astminer/cli/Code2VecExtractor.kt @@ -141,9 +141,9 @@ class Code2VecExtractor(private val customLabelExtractor: LabelExtractor? = null outputDirForLanguage.mkdir() // Choose how to process tokens val tokenProcessor = if (isTokenSplitted) { - splitTokenProcessor + TokenProcessor.Split } else { - code2vecTokenProcessor + TokenProcessor.Normalize } // Choose type of storage val storage = Code2VecPathStorage(outputDirForLanguage.path, storageConfig, tokenProcessor) diff --git a/src/main/kotlin/astminer/cli/PathContextsExtractor.kt b/src/main/kotlin/astminer/cli/PathContextsExtractor.kt index 2522886d..67664433 100644 --- a/src/main/kotlin/astminer/cli/PathContextsExtractor.kt +++ b/src/main/kotlin/astminer/cli/PathContextsExtractor.kt @@ -7,10 +7,7 @@ import astminer.parse.antlr.javascript.JavaScriptParser import astminer.parse.antlr.python.PythonParser import astminer.parse.cpp.FuzzyCppParser import astminer.parse.java.GumTreeJavaParser -import astminer.storage.Code2VecPathStorage -import astminer.storage.PathBasedStorageConfig -import astminer.storage.splitTokenProcessor -import astminer.storage.toLabellingResult +import astminer.storage.* import com.github.ajalt.clikt.core.CliktCommand import com.github.ajalt.clikt.parameters.options.* import com.github.ajalt.clikt.parameters.types.int @@ -114,7 +111,7 @@ class PathContextsExtractor(private val customLabelExtractor: LabelExtractor? = val outputDirForLanguage = outputDir.resolve(extension) outputDirForLanguage.mkdir() - val storage = Code2VecPathStorage(outputDirForLanguage.path, storageConfig, splitTokenProcessor) + val storage = Code2VecPathStorage(outputDirForLanguage.path, storageConfig, TokenProcessor.Split) val files = getProjectFilesWithExtension(File(projectRoot), extension) parser.parseFiles(files) { parseResult -> diff --git a/src/main/kotlin/astminer/common/storage/CsvFileUtil.kt b/src/main/kotlin/astminer/common/storage/CsvFileUtil.kt index 46702d18..bc40c140 100644 --- a/src/main/kotlin/astminer/common/storage/CsvFileUtil.kt +++ b/src/main/kotlin/astminer/common/storage/CsvFileUtil.kt @@ -7,13 +7,13 @@ fun dumpIdStorageToCsv(storage: RankedIncrementalIdStorage, typeHeader: String, csvSerializer: (T) -> String, file: File, - limit: Long = Long.MAX_VALUE) { + limit: Long? = null) { file.printWriter().use { out -> out.println("id,$typeHeader") storage.idPerItem.forEach { val id = it.value val item = it.key - if (storage.getKeyRank(item) <= limit) { + if (limit == null || storage.getKeyRank(item) <= limit) { out.println("$id,${csvSerializer.invoke(item)}") } } diff --git a/src/main/kotlin/astminer/examples/AllJavaMethods.kt b/src/main/kotlin/astminer/examples/AllJavaMethods.kt index 397b57fc..7395b3bf 100644 --- a/src/main/kotlin/astminer/examples/AllJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/AllJavaMethods.kt @@ -4,10 +4,7 @@ import astminer.common.model.MethodInfo import astminer.parse.java.GumTreeJavaNode import astminer.parse.java.GumTreeJavaParser import astminer.parse.java.GumTreeJavaMethodSplitter -import astminer.storage.CsvPathStorage -import astminer.storage.PathBasedStorageConfig -import astminer.storage.LabellingResult -import astminer.storage.splitTokenProcessor +import astminer.storage.* import java.io.File @@ -25,7 +22,7 @@ fun allJavaMethods() { val inputDir = "src/test/resources/gumTreeMethodSplitter" val outputDir = "out_examples/allJavaMethods" - val storage = CsvPathStorage(outputDir, PathBasedStorageConfig(5, 5), splitTokenProcessor) + val storage = CsvPathStorage(outputDir, PathBasedStorageConfig(5, 5), TokenProcessor.Split) File(inputDir).forFilesWithSuffix(".java") { file -> //parse file diff --git a/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt b/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt index 9157669c..2391ae33 100644 --- a/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt +++ b/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt @@ -4,15 +4,15 @@ import astminer.common.getProjectFilesWithExtension import astminer.parse.antlr.javascript.JavaScriptParser import astminer.storage.CsvPathStorage import astminer.storage.PathBasedStorageConfig +import astminer.storage.TokenProcessor import astminer.storage.labeledWithFilePath -import astminer.storage.splitTokenProcessor import java.io.File fun allJavaScriptFiles() { val folder = "src/test/resources/examples" val outputDir = "out_examples/allJavaScriptFilesAntlr" - val storage = CsvPathStorage(outputDir, PathBasedStorageConfig(5, 5), splitTokenProcessor) + val storage = CsvPathStorage(outputDir, PathBasedStorageConfig(5, 5), TokenProcessor.Split) val files = getProjectFilesWithExtension(File(folder), "js") JavaScriptParser().parseFiles(files) { parseResult -> diff --git a/src/main/kotlin/astminer/examples/AllPythonFiles.kt b/src/main/kotlin/astminer/examples/AllPythonFiles.kt index d98f8451..f7328538 100644 --- a/src/main/kotlin/astminer/examples/AllPythonFiles.kt +++ b/src/main/kotlin/astminer/examples/AllPythonFiles.kt @@ -4,8 +4,8 @@ import astminer.common.getProjectFilesWithExtension import astminer.parse.antlr.python.PythonParser import astminer.storage.CsvPathStorage import astminer.storage.PathBasedStorageConfig +import astminer.storage.TokenProcessor import astminer.storage.labeledWithFilePath -import astminer.storage.splitTokenProcessor import java.io.File @@ -13,7 +13,7 @@ fun allPythonFiles() { val inputDir = "src/test/resources/examples/" val outputDir = "out_examples/allPythonFiles" - val storage = CsvPathStorage(outputDir, PathBasedStorageConfig(5, 5), splitTokenProcessor) + val storage = CsvPathStorage(outputDir, PathBasedStorageConfig(5, 5), TokenProcessor.Split) val files = getProjectFilesWithExtension(File(inputDir), "py") PythonParser().parseFiles(files) { parseResult -> diff --git a/src/main/kotlin/astminer/storage/Code2VecPathStorage.kt b/src/main/kotlin/astminer/storage/Code2VecPathStorage.kt index ec8a0b8f..7863f9fb 100644 --- a/src/main/kotlin/astminer/storage/Code2VecPathStorage.kt +++ b/src/main/kotlin/astminer/storage/Code2VecPathStorage.kt @@ -5,7 +5,7 @@ import astminer.common.model.PathContextId class Code2VecPathStorage( outputDirectoryPath: String, config: PathBasedStorageConfig, - tokenProcessor: TokenProcessor = code2vecTokenProcessor + tokenProcessor: TokenProcessor = TokenProcessor.Normalize ) : PathBasedStorage(outputDirectoryPath, config, tokenProcessor) { diff --git a/src/main/kotlin/astminer/storage/CsvPathStorage.kt b/src/main/kotlin/astminer/storage/CsvPathStorage.kt index 0f4934f5..76bf8a7d 100644 --- a/src/main/kotlin/astminer/storage/CsvPathStorage.kt +++ b/src/main/kotlin/astminer/storage/CsvPathStorage.kt @@ -5,7 +5,7 @@ import astminer.common.model.PathContextId class CsvPathStorage( outputDirectoryPath: String, config: PathBasedStorageConfig, - tokenProcessor: TokenProcessor = identityTokenProcessor + tokenProcessor: TokenProcessor = TokenProcessor.LeaveOriginal ) : PathBasedStorage(outputDirectoryPath, config, tokenProcessor) { diff --git a/src/main/kotlin/astminer/storage/PathBasedStorage.kt b/src/main/kotlin/astminer/storage/PathBasedStorage.kt index b09986a6..4675f5c5 100644 --- a/src/main/kotlin/astminer/storage/PathBasedStorage.kt +++ b/src/main/kotlin/astminer/storage/PathBasedStorage.kt @@ -21,9 +21,9 @@ import java.io.PrintWriter data class PathBasedStorageConfig( val maxPathLength: Int, val maxPathWidth: Int, - val maxTokens: Long = Long.MAX_VALUE, - val maxPaths: Long = Long.MAX_VALUE, - val maxPathContextsPerEntity: Int = Int.MAX_VALUE + val maxTokens: Long? = null, + val maxPaths: Long? = null, + val maxPathContextsPerEntity: Int? = null ) /** @@ -56,14 +56,18 @@ abstract class PathBasedStorage( abstract fun pathContextIdsToString(pathContextIds: List, label: String): String - private fun Node.getProcessedToken(): String = this.run(tokenProcessor) + private fun Node.getProcessedToken(): String = tokenProcessor.processToken(this) private fun dumpPathContexts(labeledPathContextIds: LabeledPathContextIds) { val pathContextIdsString = labeledPathContextIds.pathContexts.filter { - tokensMap.getIdRank(it.startTokenId) <= config.maxTokens && - tokensMap.getIdRank(it.endTokenId) <= config.maxTokens && - pathsMap.getIdRank(it.pathId) <= config.maxPaths + val isNumberOfTokensValid = config.maxTokens == null || + tokensMap.getIdRank(it.startTokenId) <= config.maxTokens && + tokensMap.getIdRank(it.endTokenId) <= config.maxTokens + val isNumberOfPathsValid = config.maxPaths == null || pathsMap.getIdRank(it.pathId) <= config.maxPaths + + isNumberOfTokensValid && isNumberOfPathsValid } + labeledPathContextIdsWriter.println(pathContextIdsToString(pathContextIdsString, labeledPathContextIds.label)) } @@ -75,8 +79,14 @@ abstract class PathBasedStorage( return PathContextId(startTokenId, pathId, endTokenId) } + private fun retrievePaths(node: Node) = if (config.maxPathContextsPerEntity != null) { + pathMiner.retrievePaths(node).take(config.maxPathContextsPerEntity) + } else { + pathMiner.retrievePaths(node) + } + private fun retrieveLabeledPathContexts(labellingResult: LabellingResult): LabeledPathContexts { - val paths = pathMiner.retrievePaths(labellingResult.root).take(config.maxPathContextsPerEntity) + val paths = retrievePaths(labellingResult.root) return LabeledPathContexts(labellingResult.label, paths.map { astPath -> toPathContext(astPath) { node -> node.getProcessedToken() } }) @@ -106,8 +116,7 @@ abstract class PathBasedStorage( orientedNodeTypesMap, "node_type", orientedNodeToCsvString, - File("$outputDirectoryPath/node_types.csv"), - Long.MAX_VALUE + File("$outputDirectoryPath/node_types.csv") ) dumpIdStorageToCsv(pathsMap, "path", pathToCsvString, File("$outputDirectoryPath/paths.csv"), config.maxPaths) diff --git a/src/main/kotlin/astminer/storage/TokenProcessor.kt b/src/main/kotlin/astminer/storage/TokenProcessor.kt new file mode 100644 index 00000000..9b6ff431 --- /dev/null +++ b/src/main/kotlin/astminer/storage/TokenProcessor.kt @@ -0,0 +1,37 @@ +package astminer.storage + +import astminer.cli.separateToken +import astminer.common.DEFAULT_TOKEN +import astminer.common.model.Node +import astminer.common.normalizeToken + +/** + * Each TokenProcessor processes a node's token and returns a new representation of it. + * Before saving a token on the disk one usually processes the token with a TokenProcessor. + */ +enum class TokenProcessor { + /** + * Does not actually process the token, returns the original unchanged token. + * Works like the identity function id: x --> x, hence the name. + */ + LeaveOriginal { + override fun processToken(node: Node): String = node.getToken() + }, + + /** + * Splits the token into subtokens (words). + * For example, "getFull_name" --> "get full name" + */ + Split { + override fun processToken(node: Node): String = separateToken(node.getToken()) + }, + + /** + * Processes the token according to the original code2vec implementation in order to match their behavior. + */ + Normalize { + override fun processToken(node: Node): String = normalizeToken(node.getToken(), DEFAULT_TOKEN) + }; + + abstract fun processToken(node: Node): String +} diff --git a/src/main/kotlin/astminer/storage/TokenProcessors.kt b/src/main/kotlin/astminer/storage/TokenProcessors.kt deleted file mode 100644 index 03dbc9b5..00000000 --- a/src/main/kotlin/astminer/storage/TokenProcessors.kt +++ /dev/null @@ -1,24 +0,0 @@ -package astminer.storage - -import astminer.cli.separateToken -import astminer.common.DEFAULT_TOKEN -import astminer.common.model.Node -import astminer.common.normalizeToken - -/** - * A function that should calculate a node's token. - */ -typealias TokenProcessor = (Node) -> String - -val splitTokenProcessor: TokenProcessor = { node -> separateToken(node.getToken()) } - -/** - * Returns the original unchanged token. - * Works like the identity function id: x --> x, hence the name. - */ -val identityTokenProcessor: TokenProcessor = { node -> node.getToken() } - -/** - * Processes the token according to the original code2vec implementation in order to match their behavior. - */ -val code2vecTokenProcessor: TokenProcessor = { node -> normalizeToken(node.getToken(), DEFAULT_TOKEN) } From 93ecf8033fe22cd9da33afe9e4bc0b86e2138e52 Mon Sep 17 00:00:00 2001 From: furetur Date: Mon, 12 Apr 2021 19:03:13 +0500 Subject: [PATCH 052/308] reorganized the storage package --- src/main/java/astminer/examples/AllJavaFiles.java | 3 +++ src/main/kotlin/astminer/cli/Code2VecExtractor.kt | 2 ++ src/main/kotlin/astminer/cli/PathContextsExtractor.kt | 2 ++ src/main/kotlin/astminer/cli/ProjectParser.kt | 4 ++-- src/main/kotlin/astminer/examples/AllCppFiles.kt | 2 ++ src/main/kotlin/astminer/examples/AllJavaAst.kt | 2 +- src/main/kotlin/astminer/examples/AllJavaFiles.kt | 4 ++-- src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt | 4 ++-- src/main/kotlin/astminer/examples/AllJavaMethods.kt | 2 ++ src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt | 4 ++-- src/main/kotlin/astminer/examples/AllPythonFiles.kt | 4 ++-- src/main/kotlin/astminer/examples/AllPythonMethods.kt | 4 ++-- src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt | 5 ++--- src/main/kotlin/astminer/storage/{ => ast}/CsvAstStorage.kt | 4 +++- src/main/kotlin/astminer/storage/{ => ast}/DotAstStorage.kt | 4 +++- .../astminer/storage/{ => path}/Code2VecPathStorage.kt | 3 ++- .../kotlin/astminer/storage/{ => path}/CsvPathStorage.kt | 3 ++- .../kotlin/astminer/storage/{ => path}/PathBasedStorage.kt | 5 ++++- .../kotlin/astminer/storage/{ => ast}/CsvAstStorageTest.kt | 3 ++- .../kotlin/astminer/storage/{ => ast}/DotAstStorageTest.kt | 2 +- 20 files changed, 43 insertions(+), 23 deletions(-) rename src/main/kotlin/astminer/storage/{ => ast}/CsvAstStorage.kt (95%) rename src/main/kotlin/astminer/storage/{ => ast}/DotAstStorage.kt (97%) rename src/main/kotlin/astminer/storage/{ => path}/Code2VecPathStorage.kt (90%) rename src/main/kotlin/astminer/storage/{ => path}/CsvPathStorage.kt (89%) rename src/main/kotlin/astminer/storage/{ => path}/PathBasedStorage.kt (97%) rename src/test/kotlin/astminer/storage/{ => ast}/CsvAstStorageTest.kt (84%) rename src/test/kotlin/astminer/storage/{ => ast}/DotAstStorageTest.kt (98%) diff --git a/src/main/java/astminer/examples/AllJavaFiles.java b/src/main/java/astminer/examples/AllJavaFiles.java index e5be2d61..1fd188ed 100644 --- a/src/main/java/astminer/examples/AllJavaFiles.java +++ b/src/main/java/astminer/examples/AllJavaFiles.java @@ -3,6 +3,9 @@ import astminer.common.model.*; import astminer.parse.java.GumTreeJavaParser; import astminer.storage.*; +import astminer.storage.path.CsvPathStorage; +import astminer.storage.path.PathBasedStorage; +import astminer.storage.path.PathBasedStorageConfig; import java.io.FileInputStream; import java.io.IOException; diff --git a/src/main/kotlin/astminer/cli/Code2VecExtractor.kt b/src/main/kotlin/astminer/cli/Code2VecExtractor.kt index 99da803f..f4b0232c 100644 --- a/src/main/kotlin/astminer/cli/Code2VecExtractor.kt +++ b/src/main/kotlin/astminer/cli/Code2VecExtractor.kt @@ -4,6 +4,8 @@ import astminer.common.getProjectFilesWithExtension import astminer.common.model.Node import astminer.common.model.ParseResult import astminer.storage.* +import astminer.storage.path.Code2VecPathStorage +import astminer.storage.path.PathBasedStorageConfig import com.github.ajalt.clikt.core.CliktCommand import com.github.ajalt.clikt.parameters.options.* import com.github.ajalt.clikt.parameters.types.int diff --git a/src/main/kotlin/astminer/cli/PathContextsExtractor.kt b/src/main/kotlin/astminer/cli/PathContextsExtractor.kt index 67664433..b8761687 100644 --- a/src/main/kotlin/astminer/cli/PathContextsExtractor.kt +++ b/src/main/kotlin/astminer/cli/PathContextsExtractor.kt @@ -8,6 +8,8 @@ import astminer.parse.antlr.python.PythonParser import astminer.parse.cpp.FuzzyCppParser import astminer.parse.java.GumTreeJavaParser import astminer.storage.* +import astminer.storage.path.Code2VecPathStorage +import astminer.storage.path.PathBasedStorageConfig import com.github.ajalt.clikt.core.CliktCommand import com.github.ajalt.clikt.parameters.options.* import com.github.ajalt.clikt.parameters.types.int diff --git a/src/main/kotlin/astminer/cli/ProjectParser.kt b/src/main/kotlin/astminer/cli/ProjectParser.kt index ba220260..57b825b1 100644 --- a/src/main/kotlin/astminer/cli/ProjectParser.kt +++ b/src/main/kotlin/astminer/cli/ProjectParser.kt @@ -1,7 +1,7 @@ package astminer.cli -import astminer.storage.CsvAstStorage -import astminer.storage.DotAstStorage +import astminer.storage.ast.CsvAstStorage +import astminer.storage.ast.DotAstStorage import astminer.common.getProjectFilesWithExtension import astminer.common.preOrder import astminer.storage.Storage diff --git a/src/main/kotlin/astminer/examples/AllCppFiles.kt b/src/main/kotlin/astminer/examples/AllCppFiles.kt index 2de6fd36..c412c9ef 100644 --- a/src/main/kotlin/astminer/examples/AllCppFiles.kt +++ b/src/main/kotlin/astminer/examples/AllCppFiles.kt @@ -5,6 +5,8 @@ package astminer.examples import astminer.common.getProjectFilesWithExtension import astminer.parse.cpp.FuzzyCppParser import astminer.storage.* +import astminer.storage.path.CsvPathStorage +import astminer.storage.path.PathBasedStorageConfig import java.io.File // Retrieve paths from .cpp preprocessed files, using a fuzzyc2cpg parser. diff --git a/src/main/kotlin/astminer/examples/AllJavaAst.kt b/src/main/kotlin/astminer/examples/AllJavaAst.kt index a45295ab..8bee4fa1 100644 --- a/src/main/kotlin/astminer/examples/AllJavaAst.kt +++ b/src/main/kotlin/astminer/examples/AllJavaAst.kt @@ -1,7 +1,7 @@ package astminer.examples import astminer.common.getProjectFilesWithExtension -import astminer.storage.CsvAstStorage +import astminer.storage.ast.CsvAstStorage import astminer.parse.antlr.java.JavaParser import astminer.storage.labeledWithFilePath import java.io.File diff --git a/src/main/kotlin/astminer/examples/AllJavaFiles.kt b/src/main/kotlin/astminer/examples/AllJavaFiles.kt index d345884a..bc3385d7 100644 --- a/src/main/kotlin/astminer/examples/AllJavaFiles.kt +++ b/src/main/kotlin/astminer/examples/AllJavaFiles.kt @@ -2,8 +2,8 @@ package astminer.examples import astminer.parse.antlr.java.JavaMethodSplitter import astminer.parse.antlr.java.JavaParser -import astminer.storage.CsvPathStorage -import astminer.storage.PathBasedStorageConfig +import astminer.storage.path.CsvPathStorage +import astminer.storage.path.PathBasedStorageConfig import astminer.storage.labeledWithFilePath import java.io.File diff --git a/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt b/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt index 600b06ac..f8ae02f6 100644 --- a/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt +++ b/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt @@ -2,8 +2,8 @@ package astminer.examples import astminer.common.getProjectFilesWithExtension import astminer.parse.java.GumTreeJavaParser -import astminer.storage.CsvPathStorage -import astminer.storage.PathBasedStorageConfig +import astminer.storage.path.CsvPathStorage +import astminer.storage.path.PathBasedStorageConfig import astminer.storage.labeledWithFilePath import java.io.File diff --git a/src/main/kotlin/astminer/examples/AllJavaMethods.kt b/src/main/kotlin/astminer/examples/AllJavaMethods.kt index 7395b3bf..860fe3d5 100644 --- a/src/main/kotlin/astminer/examples/AllJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/AllJavaMethods.kt @@ -5,6 +5,8 @@ import astminer.parse.java.GumTreeJavaNode import astminer.parse.java.GumTreeJavaParser import astminer.parse.java.GumTreeJavaMethodSplitter import astminer.storage.* +import astminer.storage.path.CsvPathStorage +import astminer.storage.path.PathBasedStorageConfig import java.io.File diff --git a/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt b/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt index 2391ae33..92eec691 100644 --- a/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt +++ b/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt @@ -2,8 +2,8 @@ package astminer.examples import astminer.common.getProjectFilesWithExtension import astminer.parse.antlr.javascript.JavaScriptParser -import astminer.storage.CsvPathStorage -import astminer.storage.PathBasedStorageConfig +import astminer.storage.path.CsvPathStorage +import astminer.storage.path.PathBasedStorageConfig import astminer.storage.TokenProcessor import astminer.storage.labeledWithFilePath import java.io.File diff --git a/src/main/kotlin/astminer/examples/AllPythonFiles.kt b/src/main/kotlin/astminer/examples/AllPythonFiles.kt index f7328538..7bbeb789 100644 --- a/src/main/kotlin/astminer/examples/AllPythonFiles.kt +++ b/src/main/kotlin/astminer/examples/AllPythonFiles.kt @@ -2,8 +2,8 @@ package astminer.examples import astminer.common.getProjectFilesWithExtension import astminer.parse.antlr.python.PythonParser -import astminer.storage.CsvPathStorage -import astminer.storage.PathBasedStorageConfig +import astminer.storage.path.CsvPathStorage +import astminer.storage.path.PathBasedStorageConfig import astminer.storage.TokenProcessor import astminer.storage.labeledWithFilePath import java.io.File diff --git a/src/main/kotlin/astminer/examples/AllPythonMethods.kt b/src/main/kotlin/astminer/examples/AllPythonMethods.kt index 745335ab..14a278ac 100644 --- a/src/main/kotlin/astminer/examples/AllPythonMethods.kt +++ b/src/main/kotlin/astminer/examples/AllPythonMethods.kt @@ -4,8 +4,8 @@ import astminer.common.model.MethodInfo import astminer.parse.python.GumTreePythonMethodSplitter import astminer.parse.python.GumTreePythonNode import astminer.parse.python.GumTreePythonParser -import astminer.storage.CsvPathStorage -import astminer.storage.PathBasedStorageConfig +import astminer.storage.path.CsvPathStorage +import astminer.storage.path.PathBasedStorageConfig import astminer.storage.LabellingResult import java.io.File diff --git a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt index 742a4148..a33c2682 100644 --- a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt @@ -3,9 +3,8 @@ package astminer.examples import astminer.common.* import astminer.parse.antlr.java.JavaMethodSplitter import astminer.parse.antlr.java.JavaParser -import astminer.paths.* -import astminer.storage.Code2VecPathStorage -import astminer.storage.PathBasedStorageConfig +import astminer.storage.path.Code2VecPathStorage +import astminer.storage.path.PathBasedStorageConfig import astminer.storage.LabellingResult import java.io.File diff --git a/src/main/kotlin/astminer/storage/CsvAstStorage.kt b/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt similarity index 95% rename from src/main/kotlin/astminer/storage/CsvAstStorage.kt rename to src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt index f404d399..10fec9a2 100644 --- a/src/main/kotlin/astminer/storage/CsvAstStorage.kt +++ b/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt @@ -1,8 +1,10 @@ -package astminer.storage +package astminer.storage.ast import astminer.common.model.Node import astminer.common.preOrder import astminer.common.storage.* +import astminer.storage.LabellingResult +import astminer.storage.Storage import java.io.File import java.io.PrintWriter diff --git a/src/main/kotlin/astminer/storage/DotAstStorage.kt b/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt similarity index 97% rename from src/main/kotlin/astminer/storage/DotAstStorage.kt rename to src/main/kotlin/astminer/storage/ast/DotAstStorage.kt index de398bb8..468a017b 100644 --- a/src/main/kotlin/astminer/storage/DotAstStorage.kt +++ b/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt @@ -1,9 +1,11 @@ -package astminer.storage +package astminer.storage.ast import astminer.common.getNormalizedToken import astminer.common.model.Node import astminer.common.preOrder import astminer.common.storage.RankedIncrementalIdStorage +import astminer.storage.LabellingResult +import astminer.storage.Storage import java.io.File import java.io.PrintWriter diff --git a/src/main/kotlin/astminer/storage/Code2VecPathStorage.kt b/src/main/kotlin/astminer/storage/path/Code2VecPathStorage.kt similarity index 90% rename from src/main/kotlin/astminer/storage/Code2VecPathStorage.kt rename to src/main/kotlin/astminer/storage/path/Code2VecPathStorage.kt index 7863f9fb..51f694dc 100644 --- a/src/main/kotlin/astminer/storage/Code2VecPathStorage.kt +++ b/src/main/kotlin/astminer/storage/path/Code2VecPathStorage.kt @@ -1,6 +1,7 @@ -package astminer.storage +package astminer.storage.path import astminer.common.model.PathContextId +import astminer.storage.TokenProcessor class Code2VecPathStorage( outputDirectoryPath: String, diff --git a/src/main/kotlin/astminer/storage/CsvPathStorage.kt b/src/main/kotlin/astminer/storage/path/CsvPathStorage.kt similarity index 89% rename from src/main/kotlin/astminer/storage/CsvPathStorage.kt rename to src/main/kotlin/astminer/storage/path/CsvPathStorage.kt index 76bf8a7d..190f1b5b 100644 --- a/src/main/kotlin/astminer/storage/CsvPathStorage.kt +++ b/src/main/kotlin/astminer/storage/path/CsvPathStorage.kt @@ -1,6 +1,7 @@ -package astminer.storage +package astminer.storage.path import astminer.common.model.PathContextId +import astminer.storage.TokenProcessor class CsvPathStorage( outputDirectoryPath: String, diff --git a/src/main/kotlin/astminer/storage/PathBasedStorage.kt b/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt similarity index 97% rename from src/main/kotlin/astminer/storage/PathBasedStorage.kt rename to src/main/kotlin/astminer/storage/path/PathBasedStorage.kt index 4675f5c5..cab347d3 100644 --- a/src/main/kotlin/astminer/storage/PathBasedStorage.kt +++ b/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt @@ -1,10 +1,13 @@ -package astminer.storage +package astminer.storage.path import astminer.common.model.* import astminer.common.storage.* import astminer.paths.PathMiner import astminer.paths.PathRetrievalSettings import astminer.paths.toPathContext +import astminer.storage.LabellingResult +import astminer.storage.Storage +import astminer.storage.TokenProcessor import java.io.File import java.io.PrintWriter diff --git a/src/test/kotlin/astminer/storage/CsvAstStorageTest.kt b/src/test/kotlin/astminer/storage/ast/CsvAstStorageTest.kt similarity index 84% rename from src/test/kotlin/astminer/storage/CsvAstStorageTest.kt rename to src/test/kotlin/astminer/storage/ast/CsvAstStorageTest.kt index 1fa88d04..796e76f4 100644 --- a/src/test/kotlin/astminer/storage/CsvAstStorageTest.kt +++ b/src/test/kotlin/astminer/storage/ast/CsvAstStorageTest.kt @@ -1,7 +1,8 @@ -package astminer.storage +package astminer.storage.ast import astminer.common.createSmallTree import astminer.common.labeledWith +import astminer.storage.ast.CsvAstStorage import org.junit.Assert import org.junit.Test diff --git a/src/test/kotlin/astminer/storage/DotAstStorageTest.kt b/src/test/kotlin/astminer/storage/ast/DotAstStorageTest.kt similarity index 98% rename from src/test/kotlin/astminer/storage/DotAstStorageTest.kt rename to src/test/kotlin/astminer/storage/ast/DotAstStorageTest.kt index a192dc5f..7fca39e3 100644 --- a/src/test/kotlin/astminer/storage/DotAstStorageTest.kt +++ b/src/test/kotlin/astminer/storage/ast/DotAstStorageTest.kt @@ -1,4 +1,4 @@ -package astminer.storage +package astminer.storage.ast import astminer.common.createSmallTree import astminer.common.labeledWith From edef56514bc95ac9c7934a1792069ee3ae753608 Mon Sep 17 00:00:00 2001 From: furetur Date: Mon, 12 Apr 2021 19:28:00 +0500 Subject: [PATCH 053/308] added TokenProcessorTest.kt --- .../kotlin/astminer/storage/TokenProcessorTest.kt | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 src/test/kotlin/astminer/storage/TokenProcessorTest.kt diff --git a/src/test/kotlin/astminer/storage/TokenProcessorTest.kt b/src/test/kotlin/astminer/storage/TokenProcessorTest.kt new file mode 100644 index 00000000..119de43d --- /dev/null +++ b/src/test/kotlin/astminer/storage/TokenProcessorTest.kt @@ -0,0 +1,13 @@ +package astminer.storage + +import astminer.common.DummyNode +import org.junit.Assert +import org.junit.Test + +internal class TokenProcessorTest { + @Test + fun `test leave original should return the unchanged token`() { + val node = DummyNode("original unchanged token", mutableListOf()) + Assert.assertEquals("original unchanged token", TokenProcessor.LeaveOriginal.processToken(node)) + } +} From 6a2a3d07902451d9cd0280b13bfb13df81e26b30 Mon Sep 17 00:00:00 2001 From: furetur Date: Tue, 13 Apr 2021 10:36:15 +0500 Subject: [PATCH 054/308] added filePath to LabeledParseResult and removed LabellingResult --- .../java/astminer/examples/AllJavaFiles.java | 3 ++- .../kotlin/astminer/cli/Code2VecExtractor.kt | 6 ++--- .../kotlin/astminer/cli/LabelExtractors.kt | 20 +++++++++++------ .../astminer/cli/PathContextsExtractor.kt | 4 +--- src/main/kotlin/astminer/cli/ProjectParser.kt | 7 +++--- .../astminer/common/model/ParsingModel.kt | 9 ++++++-- .../kotlin/astminer/examples/AllCppFiles.kt | 5 ++--- .../kotlin/astminer/examples/AllJavaAst.kt | 5 ++--- .../kotlin/astminer/examples/AllJavaFiles.kt | 4 ++-- .../astminer/examples/AllJavaFilesGumTree.kt | 5 ++--- .../astminer/examples/AllJavaMethods.kt | 3 ++- .../astminer/examples/AllJavaScriptFiles.kt | 5 ++--- .../astminer/examples/AllPythonFiles.kt | 5 ++--- .../astminer/examples/AllPythonMethods.kt | 4 ++-- .../astminer/examples/Code2VecJavaMethods.kt | 4 ++-- .../kotlin/astminer/storage/LabelingResult.kt | 22 ------------------- src/main/kotlin/astminer/storage/Storage.kt | 10 ++++++++- .../astminer/storage/ast/CsvAstStorage.kt | 8 +++---- .../astminer/storage/ast/DotAstStorage.kt | 12 +++++----- .../astminer/storage/path/PathBasedStorage.kt | 14 ++++++------ src/test/kotlin/astminer/common/TestUtils.kt | 4 ++-- 21 files changed, 74 insertions(+), 85 deletions(-) delete mode 100644 src/main/kotlin/astminer/storage/LabelingResult.kt diff --git a/src/main/java/astminer/examples/AllJavaFiles.java b/src/main/java/astminer/examples/AllJavaFiles.java index 1fd188ed..b3c434cf 100644 --- a/src/main/java/astminer/examples/AllJavaFiles.java +++ b/src/main/java/astminer/examples/AllJavaFiles.java @@ -1,5 +1,6 @@ package astminer.examples; +import astminer.cli.LabeledResult; import astminer.common.model.*; import astminer.parse.java.GumTreeJavaParser; import astminer.storage.*; @@ -32,7 +33,7 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attributes) thro } String filePath = file.toAbsolutePath().toString(); - pathStorage.store(new LabellingResult<>(fileTree, filePath, filePath)); + pathStorage.store(new LabeledResult<>(fileTree, filePath, filePath)); return FileVisitResult.CONTINUE; } diff --git a/src/main/kotlin/astminer/cli/Code2VecExtractor.kt b/src/main/kotlin/astminer/cli/Code2VecExtractor.kt index f4b0232c..f1e20b3a 100644 --- a/src/main/kotlin/astminer/cli/Code2VecExtractor.kt +++ b/src/main/kotlin/astminer/cli/Code2VecExtractor.kt @@ -123,10 +123,8 @@ class Code2VecExtractor(private val customLabelExtractor: LabelExtractor? = null ) { val labeledParseResults = labelExtractor.toLabeledData(parseResult) - // Retrieve paths from every node individually - labeledParseResults.forEach { - storage.store(it.toLabellingResult(parseResult.filePath)) - } + // Retrieve paths from every node individually and store them + storage.store(labeledParseResults) } private fun extract(labelExtractor: LabelExtractor) { diff --git a/src/main/kotlin/astminer/cli/LabelExtractors.kt b/src/main/kotlin/astminer/cli/LabelExtractors.kt index c418a48b..d1e68e96 100644 --- a/src/main/kotlin/astminer/cli/LabelExtractors.kt +++ b/src/main/kotlin/astminer/cli/LabelExtractors.kt @@ -18,24 +18,30 @@ import astminer.parse.python.GumTreePythonNode import java.io.File -data class LabeledParseResult(val root: T, val label: String) +/** + * An AST subtree with a label and the path of the source file. + * @property root The root of the AST subtree. + * @property label Any label for this subtree. + * @property filePath The path to the source file where the AST is from. + */ +data class LabeledResult(val root: T, val label: String, val filePath: String) interface LabelExtractor { - fun toLabeledData(parseResult: ParseResult): List> + fun toLabeledData(parseResult: ParseResult): List> } abstract class FileLabelExtractor : LabelExtractor { override fun toLabeledData( parseResult: ParseResult - ): List> { + ): List> { val (root, filePath) = parseResult return if (root == null) { emptyList() } else { val label = extractLabel(root, filePath) ?: return emptyList() - listOf(LabeledParseResult(root, label)) + listOf(LabeledResult(root, label, parseResult.filePath)) } } @@ -50,7 +56,7 @@ abstract class MethodLabelExtractor( override fun toLabeledData( parseResult: ParseResult - ): List> { + ): List> { val (root, filePath) = parseResult if (root == null) { return emptyList() @@ -103,7 +109,7 @@ abstract class MethodLabelExtractor( } return methodInfos.mapNotNull { val label = extractLabel(it, filePath) ?: return@mapNotNull null - LabeledParseResult(it.method.root, label) + LabeledResult(it.method.root, label, filePath) } } @@ -111,7 +117,7 @@ abstract class MethodLabelExtractor( } class FilePathExtractor : FileLabelExtractor() { - override fun extractLabel(root: Node, filePath: String): String? { + override fun extractLabel(root: Node, filePath: String): String { return filePath } } diff --git a/src/main/kotlin/astminer/cli/PathContextsExtractor.kt b/src/main/kotlin/astminer/cli/PathContextsExtractor.kt index b8761687..855fd951 100644 --- a/src/main/kotlin/astminer/cli/PathContextsExtractor.kt +++ b/src/main/kotlin/astminer/cli/PathContextsExtractor.kt @@ -120,9 +120,7 @@ class PathContextsExtractor(private val customLabelExtractor: LabelExtractor? = // TODO: might not be needed normalizeParseResult(parseResult, splitTokens = true) val labeledParseResults = labelExtractor.toLabeledData(parseResult) - labeledParseResults.forEach { - storage.store(it.toLabellingResult(parseResult.filePath)) - } + storage.store(labeledParseResults) } // Save stored data on disk diff --git a/src/main/kotlin/astminer/cli/ProjectParser.kt b/src/main/kotlin/astminer/cli/ProjectParser.kt index 57b825b1..246e2aa4 100644 --- a/src/main/kotlin/astminer/cli/ProjectParser.kt +++ b/src/main/kotlin/astminer/cli/ProjectParser.kt @@ -5,7 +5,6 @@ import astminer.storage.ast.DotAstStorage import astminer.common.getProjectFilesWithExtension import astminer.common.preOrder import astminer.storage.Storage -import astminer.storage.toLabellingResult import com.github.ajalt.clikt.core.CliktCommand import com.github.ajalt.clikt.parameters.options.* import com.github.ajalt.clikt.parameters.types.int @@ -127,12 +126,12 @@ class ProjectParser(private val customLabelExtractor: LabelExtractor? = null) : parser.parseFiles(filesToParse) { parseResult -> normalizeParseResult(parseResult, isTokenSplitted) val labeledParseResults = labelExtractor.toLabeledData(parseResult) - labeledParseResults.forEach { labeled -> - labeled.root.preOrder().forEach { node -> + labeledParseResults.forEach { labeledParseResult -> + labeledParseResult.root.preOrder().forEach { node -> excludeNodes.forEach { node.removeChildrenOfType(it) } } // Save AST as it is or process it to extract features / path-based representations - storage.store(labeled.toLabellingResult(parseResult.filePath)) + storage.store(labeledParseResult) } } // Save stored data on disk diff --git a/src/main/kotlin/astminer/common/model/ParsingModel.kt b/src/main/kotlin/astminer/common/model/ParsingModel.kt index 5231bc6e..39184c5f 100644 --- a/src/main/kotlin/astminer/common/model/ParsingModel.kt +++ b/src/main/kotlin/astminer/common/model/ParsingModel.kt @@ -1,5 +1,6 @@ package astminer.common.model +import astminer.cli.LabeledResult import java.io.File import java.io.InputStream @@ -42,7 +43,7 @@ interface Parser { /** * Parse file into an AST. * @param file file to parse - * @return ParseResult instance + * @return ParseResult instance */ fun parseFile(file: File) = ParseResult(parseInputStream(file.inputStream()), file.path) @@ -56,4 +57,8 @@ interface Parser { } } -data class ParseResult(val root: T?, val filePath: String) +data class ParseResult(val root: T?, val filePath: String) { + fun labeledWith(label: String): LabeledResult? = root?.let { LabeledResult(it, label, filePath) } + + fun labeledWithFilePath(): LabeledResult? = labeledWith(filePath) +} diff --git a/src/main/kotlin/astminer/examples/AllCppFiles.kt b/src/main/kotlin/astminer/examples/AllCppFiles.kt index c412c9ef..1bab184f 100644 --- a/src/main/kotlin/astminer/examples/AllCppFiles.kt +++ b/src/main/kotlin/astminer/examples/AllCppFiles.kt @@ -4,7 +4,6 @@ package astminer.examples import astminer.common.getProjectFilesWithExtension import astminer.parse.cpp.FuzzyCppParser -import astminer.storage.* import astminer.storage.path.CsvPathStorage import astminer.storage.path.PathBasedStorageConfig import java.io.File @@ -23,8 +22,8 @@ fun allCppFiles() { val files = getProjectFilesWithExtension(preprocOutputFolder, "cpp") parser.parseFiles(files) { parseResult -> - parseResult.labeledWithFilePath()?.let { - storage.store(it) + parseResult.labeledWithFilePath()?.let { labeledResult -> + storage.store(labeledResult) } } diff --git a/src/main/kotlin/astminer/examples/AllJavaAst.kt b/src/main/kotlin/astminer/examples/AllJavaAst.kt index 8bee4fa1..675bc62f 100644 --- a/src/main/kotlin/astminer/examples/AllJavaAst.kt +++ b/src/main/kotlin/astminer/examples/AllJavaAst.kt @@ -3,7 +3,6 @@ package astminer.examples import astminer.common.getProjectFilesWithExtension import astminer.storage.ast.CsvAstStorage import astminer.parse.antlr.java.JavaParser -import astminer.storage.labeledWithFilePath import java.io.File // Retrieve ASTs from Java files, using a generated parser. @@ -14,8 +13,8 @@ fun allJavaAsts() { val files = getProjectFilesWithExtension(File(folder), "java") JavaParser().parseFiles(files) { parseResult -> - parseResult.labeledWithFilePath()?.let { - storage.store(it) + parseResult.labeledWithFilePath()?.let { labeledResult -> + storage.store(labeledResult) } } diff --git a/src/main/kotlin/astminer/examples/AllJavaFiles.kt b/src/main/kotlin/astminer/examples/AllJavaFiles.kt index bc3385d7..d1e2d56a 100644 --- a/src/main/kotlin/astminer/examples/AllJavaFiles.kt +++ b/src/main/kotlin/astminer/examples/AllJavaFiles.kt @@ -1,10 +1,10 @@ package astminer.examples +import astminer.cli.LabeledResult import astminer.parse.antlr.java.JavaMethodSplitter import astminer.parse.antlr.java.JavaParser import astminer.storage.path.CsvPathStorage import astminer.storage.path.PathBasedStorageConfig -import astminer.storage.labeledWithFilePath import java.io.File //Retrieve paths from Java files, using a generated parser. @@ -25,7 +25,7 @@ fun allJavaFiles() { println("${parameters.name()} ${parameters.returnType()}") } } - storage.store(node.labeledWithFilePath(file.path)) + storage.store(LabeledResult(node, file.path, file.path)) } storage.close() diff --git a/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt b/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt index f8ae02f6..347ec963 100644 --- a/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt +++ b/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt @@ -4,7 +4,6 @@ import astminer.common.getProjectFilesWithExtension import astminer.parse.java.GumTreeJavaParser import astminer.storage.path.CsvPathStorage import astminer.storage.path.PathBasedStorageConfig -import astminer.storage.labeledWithFilePath import java.io.File //Retrieve paths from Java files, using a GumTree parser. @@ -16,8 +15,8 @@ fun allJavaFilesGumTree() { val files = getProjectFilesWithExtension(File(inputDir), "java") GumTreeJavaParser().parseFiles(files) { parseResult -> - parseResult.labeledWithFilePath()?.let { - storage.store(it) + parseResult.labeledWithFilePath()?.let { labeledResult -> + storage.store(labeledResult) } } diff --git a/src/main/kotlin/astminer/examples/AllJavaMethods.kt b/src/main/kotlin/astminer/examples/AllJavaMethods.kt index 860fe3d5..f4842f9e 100644 --- a/src/main/kotlin/astminer/examples/AllJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/AllJavaMethods.kt @@ -1,5 +1,6 @@ package astminer.examples +import astminer.cli.LabeledResult import astminer.common.model.MethodInfo import astminer.parse.java.GumTreeJavaNode import astminer.parse.java.GumTreeJavaParser @@ -36,7 +37,7 @@ fun allJavaMethods() { methodNodes.forEach { methodInfo -> //Retrieve a method identifier val entityId = "${file.path}::${getCsvFriendlyMethodId(methodInfo)}" - val labelingResult = LabellingResult(fileNode, entityId, file.path) + val labelingResult = LabeledResult(fileNode, entityId, file.path) storage.store(labelingResult) } } diff --git a/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt b/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt index 92eec691..f04e75fa 100644 --- a/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt +++ b/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt @@ -5,7 +5,6 @@ import astminer.parse.antlr.javascript.JavaScriptParser import astminer.storage.path.CsvPathStorage import astminer.storage.path.PathBasedStorageConfig import astminer.storage.TokenProcessor -import astminer.storage.labeledWithFilePath import java.io.File fun allJavaScriptFiles() { @@ -16,8 +15,8 @@ fun allJavaScriptFiles() { val files = getProjectFilesWithExtension(File(folder), "js") JavaScriptParser().parseFiles(files) { parseResult -> - parseResult.labeledWithFilePath()?.let { - storage.store(it) + parseResult.labeledWithFilePath()?.let { labeledResult -> + storage.store(labeledResult) } } diff --git a/src/main/kotlin/astminer/examples/AllPythonFiles.kt b/src/main/kotlin/astminer/examples/AllPythonFiles.kt index 7bbeb789..239f2d4b 100644 --- a/src/main/kotlin/astminer/examples/AllPythonFiles.kt +++ b/src/main/kotlin/astminer/examples/AllPythonFiles.kt @@ -5,7 +5,6 @@ import astminer.parse.antlr.python.PythonParser import astminer.storage.path.CsvPathStorage import astminer.storage.path.PathBasedStorageConfig import astminer.storage.TokenProcessor -import astminer.storage.labeledWithFilePath import java.io.File @@ -17,8 +16,8 @@ fun allPythonFiles() { val files = getProjectFilesWithExtension(File(inputDir), "py") PythonParser().parseFiles(files) { parseResult -> - parseResult.labeledWithFilePath()?.let { - storage.store(it) + parseResult.labeledWithFilePath()?.let { labeledResult -> + storage.store(labeledResult) } } diff --git a/src/main/kotlin/astminer/examples/AllPythonMethods.kt b/src/main/kotlin/astminer/examples/AllPythonMethods.kt index 14a278ac..eec37d96 100644 --- a/src/main/kotlin/astminer/examples/AllPythonMethods.kt +++ b/src/main/kotlin/astminer/examples/AllPythonMethods.kt @@ -1,12 +1,12 @@ package astminer.examples +import astminer.cli.LabeledResult import astminer.common.model.MethodInfo import astminer.parse.python.GumTreePythonMethodSplitter import astminer.parse.python.GumTreePythonNode import astminer.parse.python.GumTreePythonParser import astminer.storage.path.CsvPathStorage import astminer.storage.path.PathBasedStorageConfig -import astminer.storage.LabellingResult import java.io.File private fun getCsvFriendlyMethodId(methodInfo: MethodInfo): String { @@ -32,7 +32,7 @@ fun allPythonMethods() { methodNodes.forEach { methodInfo -> // Retrieve a method identifier val entityId = "${file.path}::${getCsvFriendlyMethodId(methodInfo)}" - val labelingResult = LabellingResult(fileNode, entityId, file.path) + val labelingResult = LabeledResult(fileNode, entityId, file.path) // Retrieve paths from each method individually and store them storage.store(labelingResult) } diff --git a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt index a33c2682..553cd735 100644 --- a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt @@ -1,11 +1,11 @@ package astminer.examples +import astminer.cli.LabeledResult import astminer.common.* import astminer.parse.antlr.java.JavaMethodSplitter import astminer.parse.antlr.java.JavaParser import astminer.storage.path.Code2VecPathStorage import astminer.storage.path.PathBasedStorageConfig -import astminer.storage.LabellingResult import java.io.File @@ -33,7 +33,7 @@ fun code2vecJavaMethods() { methodNameNode.setNormalizedToken("METHOD_NAME") // Retrieve paths from every node individually and store them - storage.store(LabellingResult(methodRoot, label, file.absolutePath)) + storage.store(LabeledResult(methodRoot, label, file.absolutePath)) } } diff --git a/src/main/kotlin/astminer/storage/LabelingResult.kt b/src/main/kotlin/astminer/storage/LabelingResult.kt deleted file mode 100644 index 68e3ba8b..00000000 --- a/src/main/kotlin/astminer/storage/LabelingResult.kt +++ /dev/null @@ -1,22 +0,0 @@ -package astminer.storage - -import astminer.cli.LabeledParseResult -import astminer.common.model.Node -import astminer.common.model.ParseResult - -/** - * An AST subtree with a label and the path of the source file. - * @property root The root of the AST subtree. - * @property label Any label for this subtree. - * @property filePath The path to the source file where the AST is from. - */ -data class LabellingResult(val root: T, val label: String, val filePath: String) - -fun LabeledParseResult.toLabellingResult(filePath: String) = - LabellingResult(this.root, this.label, filePath) - -fun ParseResult.labeledWith(label: String) = this.root?.let { LabellingResult(it, label, this.filePath) } - -fun ParseResult.labeledWithFilePath() = this.labeledWith(this.filePath) - -fun T.labeledWithFilePath(filePath: String) = LabellingResult(this, filePath, filePath) diff --git a/src/main/kotlin/astminer/storage/Storage.kt b/src/main/kotlin/astminer/storage/Storage.kt index 5ef1a057..8153b165 100644 --- a/src/main/kotlin/astminer/storage/Storage.kt +++ b/src/main/kotlin/astminer/storage/Storage.kt @@ -1,10 +1,18 @@ package astminer.storage +import astminer.cli.LabeledResult import astminer.common.model.Node interface Storage { val outputDirectoryPath: String - fun store(labellingResult: LabellingResult) + fun store(labeledResult: LabeledResult) + + fun store(labeledResults: Iterable>) { + for (labeledResult in labeledResults) { + store(labeledResult) + } + } + fun close() } diff --git a/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt b/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt index 10fec9a2..2e6b8cab 100644 --- a/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt +++ b/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt @@ -1,9 +1,9 @@ package astminer.storage.ast +import astminer.cli.LabeledResult import astminer.common.model.Node import astminer.common.preOrder import astminer.common.storage.* -import astminer.storage.LabellingResult import astminer.storage.Storage import java.io.File import java.io.PrintWriter @@ -27,12 +27,12 @@ class CsvAstStorage(override val outputDirectoryPath: String) : Storage { astsOutputStream.write("id,ast\n") } - override fun store(labellingResult: LabellingResult) { - for (node in labellingResult.root.preOrder()) { + override fun store(labeledResult: LabeledResult) { + for (node in labeledResult.root.preOrder()) { tokensMap.record(node.getToken()) nodeTypesMap.record(node.getTypeLabel()) } - dumpAst(labellingResult.root, labellingResult.label) + dumpAst(labeledResult.root, labeledResult.label) } override fun close() { diff --git a/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt b/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt index 468a017b..d8026dbf 100644 --- a/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt +++ b/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt @@ -1,10 +1,10 @@ package astminer.storage.ast +import astminer.cli.LabeledResult import astminer.common.getNormalizedToken import astminer.common.model.Node import astminer.common.preOrder import astminer.common.storage.RankedIncrementalIdStorage -import astminer.storage.LabellingResult import astminer.storage.Storage import java.io.File import java.io.PrintWriter @@ -32,14 +32,14 @@ class DotAstStorage(override val outputDirectoryPath: String) : Storage { descriptionFileStream.write("dot_file,source_file,label,node_id,token,type\n") } - override fun store(labellingResult: LabellingResult) { + override fun store(labeledResult: LabeledResult) { // Use filename as a label for ast // TODO: save full signature for method - val normalizedLabel = normalizeAstLabel(labellingResult.label) - val normalizedFilepath = normalizeFilepath(labellingResult.filePath) - val nodesMap = dumpAst(labellingResult.root, File(astDirectoryPath, astFilenameFormat.format(index)), normalizedLabel) + val normalizedLabel = normalizeAstLabel(labeledResult.label) + val normalizedFilepath = normalizeFilepath(labeledResult.filePath) + val nodesMap = dumpAst(labeledResult.root, File(astDirectoryPath, astFilenameFormat.format(index)), normalizedLabel) val nodeDescriptionFormat = "${astFilenameFormat.format(index)},$normalizedFilepath,$normalizedLabel,%d,%s,%s" - for (node in labellingResult.root.preOrder()) { + for (node in labeledResult.root.preOrder()) { descriptionFileStream.write( nodeDescriptionFormat.format( nodesMap.getId(node) - 1, diff --git a/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt b/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt index cab347d3..cc54653c 100644 --- a/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt +++ b/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt @@ -1,11 +1,11 @@ package astminer.storage.path +import astminer.cli.LabeledResult import astminer.common.model.* import astminer.common.storage.* import astminer.paths.PathMiner import astminer.paths.PathRetrievalSettings import astminer.paths.toPathContext -import astminer.storage.LabellingResult import astminer.storage.Storage import astminer.storage.TokenProcessor import java.io.File @@ -88,18 +88,18 @@ abstract class PathBasedStorage( pathMiner.retrievePaths(node) } - private fun retrieveLabeledPathContexts(labellingResult: LabellingResult): LabeledPathContexts { - val paths = retrievePaths(labellingResult.root) - return LabeledPathContexts(labellingResult.label, paths.map { astPath -> + private fun retrieveLabeledPathContexts(labeledResult: LabeledResult): LabeledPathContexts { + val paths = retrievePaths(labeledResult.root) + return LabeledPathContexts(labeledResult.label, paths.map { astPath -> toPathContext(astPath) { node -> node.getProcessedToken() } }) } /** - * Extract paths from [labellingResult] and store them in the specified format. + * Extract paths from [labeledResult] and store them in the specified format. */ - override fun store(labellingResult: LabellingResult) { - val labeledPathContexts = retrieveLabeledPathContexts(labellingResult) + override fun store(labeledResult: LabeledResult) { + val labeledPathContexts = retrieveLabeledPathContexts(labeledResult) val labeledPathContextIds = LabeledPathContextIds( labeledPathContexts.label, labeledPathContexts.pathContexts.map { storePathContext(it) } diff --git a/src/test/kotlin/astminer/common/TestUtils.kt b/src/test/kotlin/astminer/common/TestUtils.kt index 32793abb..6d9c1723 100644 --- a/src/test/kotlin/astminer/common/TestUtils.kt +++ b/src/test/kotlin/astminer/common/TestUtils.kt @@ -1,7 +1,7 @@ package astminer.common +import astminer.cli.LabeledResult import astminer.common.model.Node -import astminer.storage.LabellingResult class DummyNode(val data: String, val childrenList: MutableList) : Node { @@ -61,4 +61,4 @@ fun createSmallTree(): DummyNode { return node1 } -fun T.labeledWith(label: String) = LabellingResult(this, label, "") +fun T.labeledWith(label: String) = LabeledResult(this, label, "") From 55e76feae8e46944fb0e7119f05b66f2f5ebd338 Mon Sep 17 00:00:00 2001 From: furetur Date: Tue, 13 Apr 2021 11:28:28 +0500 Subject: [PATCH 055/308] added documentation --- .../storage/RankedIncrementalIdStorage.kt | 58 +++++++++++++++---- src/main/kotlin/astminer/storage/Storage.kt | 5 +- src/test/kotlin/astminer/common/TestUtils.kt | 20 +++++++ 3 files changed, 69 insertions(+), 14 deletions(-) diff --git a/src/main/kotlin/astminer/common/storage/RankedIncrementalIdStorage.kt b/src/main/kotlin/astminer/common/storage/RankedIncrementalIdStorage.kt index 0f372baf..03990c4d 100644 --- a/src/main/kotlin/astminer/common/storage/RankedIncrementalIdStorage.kt +++ b/src/main/kotlin/astminer/common/storage/RankedIncrementalIdStorage.kt @@ -1,52 +1,86 @@ package astminer.common.storage +typealias Id = Long + +/** + * This storage automatically assigns each item an id + * and records how many times each item has been recorded in the storage. + * It ranks items by the number of times they have been recorded. + */ class RankedIncrementalIdStorage { private var keyCounter = 0L - val idPerItem: MutableMap = HashMap() - private val idCountMap: MutableMap = HashMap() - private var idCountRanks: Map? = null + val idPerItem: MutableMap = HashMap() + private val idCountMap: MutableMap = HashMap() + private var idCountRanks: Map? = null private fun putAndIncrementKey(item: T): Long { idPerItem[item] = ++keyCounter return keyCounter } - private fun incrementIdCount(id: Long) { + private fun incrementIdCount(id: Id) { idCountMap[id] = idCountMap.getOrDefault(id, 0) + 1 } - fun record(item: T): Long { + /** + * Puts the item into the storage or increments the count of [item] in the storage if it is already present. + * @param item The item to be put in the storage + * @return The id of the recorded item + */ + fun record(item: T): Id { val id = idPerItem[item] ?: putAndIncrementKey(item) incrementIdCount(id) return id } + /** + * Returns the id of the item if the item was recorded with the record(item) method, returns 0 otherwise. + */ fun getId(item: T): Long = idPerItem[item] ?: 0 - fun getIdCount(id: Long) = idCountMap.getOrDefault(id, 0) + /** + * Returns the number of times the item with the provided [id] has been recorded in the storage. + */ + fun getIdCount(id: Id) = idCountMap.getOrDefault(id, 0) - fun lookUpValue(id: Long): T? { + /** + * Returns the item by its [id] + */ + fun lookUpValue(id: Id): T? { return idPerItem.entries.firstOrNull { it.value == id }?.key } + /** + * Returns the rank of the [item] + * @see getIdRank + */ fun getKeyRank(item: T) = getIdRank(getId(item)) - fun getIdRank(id: Long): Long { + /** + * Returns the rank of the item with this [id]. + * The item that has been recorded in the storage the most times has the rank 1, + * the second most recorded item has the rank 2, and so on... + */ + fun getIdRank(id: Id): Long { if (idCountRanks == null) { computeRanks() } return idCountRanks?.get(id) ?: 0 } + /** + * Computes the ranks + * @see getIdRank + */ fun computeRanks() { - val sortedEntries = idCountMap.entries + val sortedIds = idCountMap.entries .sortedBy { it.value } .reversed() .map { it.key } .toList() - val idRankMap = mutableMapOf() - for (i in sortedEntries.indices) { - idRankMap[sortedEntries[i]] = (i + 1).toLong() + val idRankMap = mutableMapOf() + for ((index, id) in sortedIds.withIndex()) { + idRankMap[id] = (index + 1).toLong() } idCountRanks = idRankMap } diff --git a/src/main/kotlin/astminer/storage/Storage.kt b/src/main/kotlin/astminer/storage/Storage.kt index 8153b165..3a306ea7 100644 --- a/src/main/kotlin/astminer/storage/Storage.kt +++ b/src/main/kotlin/astminer/storage/Storage.kt @@ -2,8 +2,9 @@ package astminer.storage import astminer.cli.LabeledResult import astminer.common.model.Node +import java.io.Closeable -interface Storage { +interface Storage : Closeable { val outputDirectoryPath: String fun store(labeledResult: LabeledResult) @@ -14,5 +15,5 @@ interface Storage { } } - fun close() + override fun close() } diff --git a/src/test/kotlin/astminer/common/TestUtils.kt b/src/test/kotlin/astminer/common/TestUtils.kt index 6d9c1723..a0b64b38 100644 --- a/src/test/kotlin/astminer/common/TestUtils.kt +++ b/src/test/kotlin/astminer/common/TestUtils.kt @@ -39,6 +39,17 @@ class DummyNode(val data: String, val childrenList: MutableList) : No } +/** + * Returns a small tree. + * Diagram: + * 1 + * / \ + * / \ + * 2 3 + * / | \ / \ + * 4 5 6 7 8 + * + */ fun createDummyTree(): DummyNode { val node4 = DummyNode("4", mutableListOf()) val node5 = DummyNode("5", mutableListOf()) @@ -52,6 +63,15 @@ fun createDummyTree(): DummyNode { return DummyNode("1", mutableListOf(node2, node3)) } +/** + * Returns a small tree. + * Diagram: + * 1 + * / \ + * 2 3 + * \ + * 4 + */ fun createSmallTree(): DummyNode { val node4 = DummyNode("4", mutableListOf()) val node3 = DummyNode("3", mutableListOf(node4)) From 029227e77f6770da06b18014ee1bfca8f403cadd Mon Sep 17 00:00:00 2001 From: furetur Date: Tue, 13 Apr 2021 12:48:51 +0500 Subject: [PATCH 056/308] added tests for dot and csv storage --- src/test/kotlin/astminer/common/TestUtils.kt | 17 ++++++ .../astminer/storage/ast/CsvAstStorageTest.kt | 39 ++++++++++++++ .../astminer/storage/ast/DotAstStorageTest.kt | 53 ++++++++++++++++++- 3 files changed, 107 insertions(+), 2 deletions(-) diff --git a/src/test/kotlin/astminer/common/TestUtils.kt b/src/test/kotlin/astminer/common/TestUtils.kt index a0b64b38..74094e79 100644 --- a/src/test/kotlin/astminer/common/TestUtils.kt +++ b/src/test/kotlin/astminer/common/TestUtils.kt @@ -81,4 +81,21 @@ fun createSmallTree(): DummyNode { return node1 } +/** + * Creates a bamboo + * Diagram for [size] 3: + * 1 + * \ + * 2 + * \ + * 3 + */ +fun createBamboo(size: Int): DummyNode { + var root = DummyNode(size.toString(), mutableListOf()) + for (i in 1 until size) { + root = DummyNode((size - i).toString(), mutableListOf(root)) + } + return root +} + fun T.labeledWith(label: String) = LabeledResult(this, label, "") diff --git a/src/test/kotlin/astminer/storage/ast/CsvAstStorageTest.kt b/src/test/kotlin/astminer/storage/ast/CsvAstStorageTest.kt index 796e76f4..ddb0b49e 100644 --- a/src/test/kotlin/astminer/storage/ast/CsvAstStorageTest.kt +++ b/src/test/kotlin/astminer/storage/ast/CsvAstStorageTest.kt @@ -1,5 +1,7 @@ package astminer.storage.ast +import astminer.common.createBamboo +import astminer.common.createDummyTree import astminer.common.createSmallTree import astminer.common.labeledWith import astminer.storage.ast.CsvAstStorage @@ -7,6 +9,13 @@ import org.junit.Assert import org.junit.Test class CsvAstStorageTest { + private fun generateCorrectAstStringForBamboo(from: Int, to: Int): String { + if (from == to) { + return "$from $from{}" + } + val child = generateCorrectAstStringForBamboo(from + 1, to) + return "$from $from{$child}" + } @Test fun testAstString() { @@ -17,4 +26,34 @@ class CsvAstStorageTest { Assert.assertEquals(storage.astString(root), "1 1{2 2{}3 3{4 4{}}}") } + @Test + fun `test ast string for bigger tree`() { + val root = createDummyTree() + val storage = CsvAstStorage(".") + storage.store(root.labeledWith("entityId")) + + val expected = "1 1{2 2{3 3{}4 4{}5 5{}}6 6{7 7{}8 8{}}}" + Assert.assertEquals(expected, storage.astString(root)) + } + + @Test + fun `test ast string for small bamboo`() { + val bamboo = createBamboo(10) + val storage = CsvAstStorage(".") + storage.store(bamboo.labeledWith("entityId")) + + val expected = generateCorrectAstStringForBamboo(1, 10) + Assert.assertEquals(expected, storage.astString(bamboo)) + } + + @Test + fun `test ast string for big bamboo`() { + val bamboo = createBamboo(100) + val storage = CsvAstStorage(".") + storage.store(bamboo.labeledWith("entityId")) + + val expected = generateCorrectAstStringForBamboo(1, 100) + Assert.assertEquals(expected, storage.astString(bamboo)) + } + } \ No newline at end of file diff --git a/src/test/kotlin/astminer/storage/ast/DotAstStorageTest.kt b/src/test/kotlin/astminer/storage/ast/DotAstStorageTest.kt index 7fca39e3..c8a0014c 100644 --- a/src/test/kotlin/astminer/storage/ast/DotAstStorageTest.kt +++ b/src/test/kotlin/astminer/storage/ast/DotAstStorageTest.kt @@ -1,12 +1,33 @@ package astminer.storage.ast -import astminer.common.createSmallTree -import astminer.common.labeledWith +import astminer.common.* import org.junit.Test import java.io.File import kotlin.test.assertEquals class DotAstStorageTest { + private fun testOnTree(root: DummyNode, expectedLines: List) { + DotAstStorage("test_examples").use { storage -> + storage.store(root.labeledWith("entityId")) + } + + val storageLines = File(File("test_examples", "asts"), "ast_0.dot").readLines() + + File("test_examples").deleteRecursively() + + assertEquals(expectedLines, storageLines) + } + + private fun getBambooLines(size: Int): List { + val lines = mutableListOf() + lines.add("digraph entityId {") + for (i in 0..(size - 2)) { + lines.add("$i -- {${i + 1}};") + } + lines.add("${size - 1} -- {};") + lines.add("}") + return lines + } @Test fun testDotStorageOnSmallTree() { @@ -31,6 +52,34 @@ class DotAstStorageTest { assertEquals(trueLines, storageLines) } + @Test + fun `test dot storage on dummy tree`() { + val trueLines = listOf( + "digraph entityId {", + "0 -- {1 2};", + "1 -- {3 4 5};", + "3 -- {};", + "4 -- {};", + "5 -- {};", + "2 -- {6 7};", + "6 -- {};", + "7 -- {};", + "}" + ) + + testOnTree(createDummyTree(), trueLines) + } + + @Test + fun `test dot storage on small bamboo`() { + testOnTree(createBamboo(10), getBambooLines(10)) + } + + @Test + fun `test dot storage on big bamboo`() { + testOnTree(createBamboo(100), getBambooLines(100)) + } + @Test fun testLabelNormalization() { val label = "some/kind/of/random/path" From 0f0645207340eb0b18b6dbea267c43aea4b98360 Mon Sep 17 00:00:00 2001 From: furetur Date: Tue, 13 Apr 2021 13:30:58 +0500 Subject: [PATCH 057/308] removed LeaveOriginal TokenProcessor --- src/main/java/astminer/examples/AllJavaFiles.java | 2 +- src/main/kotlin/astminer/storage/TokenProcessor.kt | 8 -------- .../astminer/storage/path/Code2VecPathStorage.kt | 2 +- .../kotlin/astminer/storage/path/CsvPathStorage.kt | 2 +- .../kotlin/astminer/storage/TokenProcessorTest.kt | 13 ------------- 5 files changed, 3 insertions(+), 24 deletions(-) delete mode 100644 src/test/kotlin/astminer/storage/TokenProcessorTest.kt diff --git a/src/main/java/astminer/examples/AllJavaFiles.java b/src/main/java/astminer/examples/AllJavaFiles.java index b3c434cf..2b81745e 100644 --- a/src/main/java/astminer/examples/AllJavaFiles.java +++ b/src/main/java/astminer/examples/AllJavaFiles.java @@ -20,7 +20,7 @@ public class AllJavaFiles { public static void runExample() { final PathBasedStorageConfig config = new PathBasedStorageConfig(5, 5, Long.MAX_VALUE, Long.MAX_VALUE, Integer.MAX_VALUE); - final PathBasedStorage pathStorage = new CsvPathStorage(OUTPUT_FOLDER, config, TokenProcessor.LeaveOriginal); + final PathBasedStorage pathStorage = new CsvPathStorage(OUTPUT_FOLDER, config, TokenProcessor.Normalize); final Path inputFolder = Paths.get(INPUT_FOLDER); diff --git a/src/main/kotlin/astminer/storage/TokenProcessor.kt b/src/main/kotlin/astminer/storage/TokenProcessor.kt index 9b6ff431..dc1851e0 100644 --- a/src/main/kotlin/astminer/storage/TokenProcessor.kt +++ b/src/main/kotlin/astminer/storage/TokenProcessor.kt @@ -10,14 +10,6 @@ import astminer.common.normalizeToken * Before saving a token on the disk one usually processes the token with a TokenProcessor. */ enum class TokenProcessor { - /** - * Does not actually process the token, returns the original unchanged token. - * Works like the identity function id: x --> x, hence the name. - */ - LeaveOriginal { - override fun processToken(node: Node): String = node.getToken() - }, - /** * Splits the token into subtokens (words). * For example, "getFull_name" --> "get full name" diff --git a/src/main/kotlin/astminer/storage/path/Code2VecPathStorage.kt b/src/main/kotlin/astminer/storage/path/Code2VecPathStorage.kt index 51f694dc..dbaa0fd5 100644 --- a/src/main/kotlin/astminer/storage/path/Code2VecPathStorage.kt +++ b/src/main/kotlin/astminer/storage/path/Code2VecPathStorage.kt @@ -6,7 +6,7 @@ import astminer.storage.TokenProcessor class Code2VecPathStorage( outputDirectoryPath: String, config: PathBasedStorageConfig, - tokenProcessor: TokenProcessor = TokenProcessor.Normalize + tokenProcessor: TokenProcessor ) : PathBasedStorage(outputDirectoryPath, config, tokenProcessor) { diff --git a/src/main/kotlin/astminer/storage/path/CsvPathStorage.kt b/src/main/kotlin/astminer/storage/path/CsvPathStorage.kt index 190f1b5b..576b388d 100644 --- a/src/main/kotlin/astminer/storage/path/CsvPathStorage.kt +++ b/src/main/kotlin/astminer/storage/path/CsvPathStorage.kt @@ -6,7 +6,7 @@ import astminer.storage.TokenProcessor class CsvPathStorage( outputDirectoryPath: String, config: PathBasedStorageConfig, - tokenProcessor: TokenProcessor = TokenProcessor.LeaveOriginal + tokenProcessor: TokenProcessor ) : PathBasedStorage(outputDirectoryPath, config, tokenProcessor) { diff --git a/src/test/kotlin/astminer/storage/TokenProcessorTest.kt b/src/test/kotlin/astminer/storage/TokenProcessorTest.kt deleted file mode 100644 index 119de43d..00000000 --- a/src/test/kotlin/astminer/storage/TokenProcessorTest.kt +++ /dev/null @@ -1,13 +0,0 @@ -package astminer.storage - -import astminer.common.DummyNode -import org.junit.Assert -import org.junit.Test - -internal class TokenProcessorTest { - @Test - fun `test leave original should return the unchanged token`() { - val node = DummyNode("original unchanged token", mutableListOf()) - Assert.assertEquals("original unchanged token", TokenProcessor.LeaveOriginal.processToken(node)) - } -} From 3ff90ea1ef7c55e37265dff12da68221316ad248 Mon Sep 17 00:00:00 2001 From: furetur Date: Tue, 13 Apr 2021 13:39:03 +0500 Subject: [PATCH 058/308] changed remaining MAX_VALUE to null --- src/main/java/astminer/examples/AllJavaFiles.java | 2 +- src/main/kotlin/astminer/storage/path/Code2VecPathStorage.kt | 2 +- src/main/kotlin/astminer/storage/path/CsvPathStorage.kt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/astminer/examples/AllJavaFiles.java b/src/main/java/astminer/examples/AllJavaFiles.java index 2b81745e..032b6c12 100644 --- a/src/main/java/astminer/examples/AllJavaFiles.java +++ b/src/main/java/astminer/examples/AllJavaFiles.java @@ -19,7 +19,7 @@ public class AllJavaFiles { private static final String OUTPUT_FOLDER = "out_examples/allJavaFiles_GumTree_java"; public static void runExample() { - final PathBasedStorageConfig config = new PathBasedStorageConfig(5, 5, Long.MAX_VALUE, Long.MAX_VALUE, Integer.MAX_VALUE); + final PathBasedStorageConfig config = new PathBasedStorageConfig(5, 5, null, null, null); final PathBasedStorage pathStorage = new CsvPathStorage(OUTPUT_FOLDER, config, TokenProcessor.Normalize); final Path inputFolder = Paths.get(INPUT_FOLDER); diff --git a/src/main/kotlin/astminer/storage/path/Code2VecPathStorage.kt b/src/main/kotlin/astminer/storage/path/Code2VecPathStorage.kt index dbaa0fd5..51f694dc 100644 --- a/src/main/kotlin/astminer/storage/path/Code2VecPathStorage.kt +++ b/src/main/kotlin/astminer/storage/path/Code2VecPathStorage.kt @@ -6,7 +6,7 @@ import astminer.storage.TokenProcessor class Code2VecPathStorage( outputDirectoryPath: String, config: PathBasedStorageConfig, - tokenProcessor: TokenProcessor + tokenProcessor: TokenProcessor = TokenProcessor.Normalize ) : PathBasedStorage(outputDirectoryPath, config, tokenProcessor) { diff --git a/src/main/kotlin/astminer/storage/path/CsvPathStorage.kt b/src/main/kotlin/astminer/storage/path/CsvPathStorage.kt index 576b388d..d57ee370 100644 --- a/src/main/kotlin/astminer/storage/path/CsvPathStorage.kt +++ b/src/main/kotlin/astminer/storage/path/CsvPathStorage.kt @@ -6,7 +6,7 @@ import astminer.storage.TokenProcessor class CsvPathStorage( outputDirectoryPath: String, config: PathBasedStorageConfig, - tokenProcessor: TokenProcessor + tokenProcessor: TokenProcessor = TokenProcessor.Normalize ) : PathBasedStorage(outputDirectoryPath, config, tokenProcessor) { From b49205136a41603e5da8a4229039c91b7dbc2e76 Mon Sep 17 00:00:00 2001 From: furetur Date: Fri, 16 Apr 2021 14:02:14 +0500 Subject: [PATCH 059/308] introduced technical tokens and added documentation for them --- .../kotlin/astminer/cli/LabelExtractors.kt | 6 ++-- src/main/kotlin/astminer/cli/ProjectParser.kt | 34 +++++++++++-------- src/main/kotlin/astminer/common/TreeUtil.kt | 15 ++++++-- src/main/kotlin/astminer/storage/Storage.kt | 2 -- .../kotlin/astminer/storage/TokenProcessor.kt | 12 +++++-- .../astminer/storage/ast/DotAstStorage.kt | 18 ++++++---- .../astminer/storage/path/PathBasedStorage.kt | 4 +-- .../kotlin/astminer/cli/LabelExtractorTest.kt | 7 ++-- 8 files changed, 62 insertions(+), 36 deletions(-) diff --git a/src/main/kotlin/astminer/cli/LabelExtractors.kt b/src/main/kotlin/astminer/cli/LabelExtractors.kt index d1e68e96..94475201 100644 --- a/src/main/kotlin/astminer/cli/LabelExtractors.kt +++ b/src/main/kotlin/astminer/cli/LabelExtractors.kt @@ -4,7 +4,7 @@ import astminer.common.model.MethodInfo import astminer.common.model.Node import astminer.common.model.ParseResult import astminer.common.preOrder -import astminer.common.setNormalizedToken +import astminer.common.setTechnicalToken import astminer.parse.antlr.SimpleNode import astminer.parse.antlr.java.JavaMethodSplitter import astminer.parse.antlr.javascript.JavaScriptMethodSplitter @@ -143,10 +143,10 @@ class MethodNameExtractor( if (hideMethodNames) { methodRoot.preOrder().forEach { node -> if (node.getToken() == methodName) { - node.setNormalizedToken("SELF") + node.setTechnicalToken("SELF") } } - methodNameNode.setNormalizedToken("METHOD_NAME") + methodNameNode.setTechnicalToken("METHOD_NAME") } return methodName } diff --git a/src/main/kotlin/astminer/cli/ProjectParser.kt b/src/main/kotlin/astminer/cli/ProjectParser.kt index 246e2aa4..b550a6b8 100644 --- a/src/main/kotlin/astminer/cli/ProjectParser.kt +++ b/src/main/kotlin/astminer/cli/ProjectParser.kt @@ -5,6 +5,7 @@ import astminer.storage.ast.DotAstStorage import astminer.common.getProjectFilesWithExtension import astminer.common.preOrder import astminer.storage.Storage +import astminer.storage.TokenProcessor import com.github.ajalt.clikt.core.CliktCommand import com.github.ajalt.clikt.parameters.options.* import com.github.ajalt.clikt.parameters.types.int @@ -94,15 +95,18 @@ class ProjectParser(private val customLabelExtractor: LabelExtractor? = null) : ).int().default(-1) val folderLabel: Boolean by option( - "--folder-label", - help = "if passed with file-level granularity, the folder name is used to label paths" + "--folder-label", + help = "if passed with file-level granularity, the folder name is used to label paths" ).flag(default = false) private fun getStorage(storageType: String, directoryPath: String): Storage { return when (storageType) { "csv" -> CsvAstStorage(directoryPath) - "dot" -> DotAstStorage(directoryPath) + "dot" -> DotAstStorage( + directoryPath, + if (isTokenSplitted) TokenProcessor.Split else TokenProcessor.Normalize + ) else -> { throw UnsupportedOperationException("Unsupported AST storage $storageType") } @@ -118,8 +122,8 @@ class ProjectParser(private val customLabelExtractor: LabelExtractor? = null) : val storage = getStorage(astStorageType, outputDirForLanguage.path) // Choose type of parser val parser = getParser( - extension, - javaParser + extension, + javaParser ) // Parse project val filesToParse = getProjectFilesWithExtension(File(projectRoot), extension) @@ -142,16 +146,16 @@ class ProjectParser(private val customLabelExtractor: LabelExtractor? = null) : override fun run() { val labelExtractor = customLabelExtractor ?: getLabelExtractor( - granularityLevel, - javaParser, - isMethodNameHide, - excludeModifiers, - excludeAnnotations, - filterConstructors, - maxMethodNameLength, - maxTokenLength, - maxTreeSize, - folderLabel + granularityLevel, + javaParser, + isMethodNameHide, + excludeModifiers, + excludeAnnotations, + filterConstructors, + maxMethodNameLength, + maxTokenLength, + maxTreeSize, + folderLabel ) parsing(labelExtractor) } diff --git a/src/main/kotlin/astminer/common/TreeUtil.kt b/src/main/kotlin/astminer/common/TreeUtil.kt index 53d2127e..2d600af9 100644 --- a/src/main/kotlin/astminer/common/TreeUtil.kt +++ b/src/main/kotlin/astminer/common/TreeUtil.kt @@ -1,9 +1,9 @@ package astminer.common import astminer.common.model.Node +import astminer.storage.TokenProcessor import java.util.ArrayList - fun Node.postOrderIterator(): Iterator { //TODO implement properly return postOrder().listIterator() @@ -52,7 +52,18 @@ fun Node.setNormalizedToken(normalizedToken: String) { setMetadata(NORMALIZED_TOKEN_KEY, normalizedToken) } -fun Node.getNormalizedToken(): String = getMetadata(NORMALIZED_TOKEN_KEY)?.toString() ?: DEFAULT_TOKEN +/** + * Sets a node's technical token. + * Technical tokens do not have to represent original tokens. + * @see TokenProcessor and how it treats technical tokens + */ +fun Node.setTechnicalToken(token: String) = setMetadata("technical_token", token) + +/** + * Get a node's technical token. + * @see setTechnicalToken for more + */ +fun Node.getTechnicalToken(): String? = getMetadata("technical_token")?.toString() /** * The function was adopted from the original code2vec implementation in order to match their behavior: diff --git a/src/main/kotlin/astminer/storage/Storage.kt b/src/main/kotlin/astminer/storage/Storage.kt index 3a306ea7..c7c26032 100644 --- a/src/main/kotlin/astminer/storage/Storage.kt +++ b/src/main/kotlin/astminer/storage/Storage.kt @@ -14,6 +14,4 @@ interface Storage : Closeable { store(labeledResult) } } - - override fun close() } diff --git a/src/main/kotlin/astminer/storage/TokenProcessor.kt b/src/main/kotlin/astminer/storage/TokenProcessor.kt index dc1851e0..003b0465 100644 --- a/src/main/kotlin/astminer/storage/TokenProcessor.kt +++ b/src/main/kotlin/astminer/storage/TokenProcessor.kt @@ -2,17 +2,18 @@ package astminer.storage import astminer.cli.separateToken import astminer.common.DEFAULT_TOKEN +import astminer.common.getTechnicalToken import astminer.common.model.Node import astminer.common.normalizeToken /** - * Each TokenProcessor processes a node's token and returns a new representation of it. + * Each TokenProcessor processes a node's token and returns a new representation of it. *It respects technical tokens*. * Before saving a token on the disk one usually processes the token with a TokenProcessor. */ enum class TokenProcessor { /** * Splits the token into subtokens (words). - * For example, "getFull_name" --> "get full name" + * For example, "getFull_name" --> "get|full|name" */ Split { override fun processToken(node: Node): String = separateToken(node.getToken()) @@ -25,5 +26,10 @@ enum class TokenProcessor { override fun processToken(node: Node): String = normalizeToken(node.getToken(), DEFAULT_TOKEN) }; - abstract fun processToken(node: Node): String + protected abstract fun processToken(node: Node): String + + /** + * Returns technical token, if technical token is set. Returns processed original token otherwise. + */ + fun getPresentableToken(node: Node) = node.getTechnicalToken() ?: processToken(node) } diff --git a/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt b/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt index d8026dbf..087b7149 100644 --- a/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt +++ b/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt @@ -1,11 +1,11 @@ package astminer.storage.ast import astminer.cli.LabeledResult -import astminer.common.getNormalizedToken import astminer.common.model.Node import astminer.common.preOrder import astminer.common.storage.RankedIncrementalIdStorage import astminer.storage.Storage +import astminer.storage.TokenProcessor import java.io.File import java.io.PrintWriter @@ -13,7 +13,10 @@ import java.io.PrintWriter * Stores multiple ASTs in dot format (https://en.wikipedia.org/wiki/DOT_(graph_description_language)) * Output consist of separate .dot files for each AST and one full description in .csv format */ -class DotAstStorage(override val outputDirectoryPath: String) : Storage { +class DotAstStorage( + override val outputDirectoryPath: String, + val tokenProcessor: TokenProcessor = TokenProcessor.Normalize +) : Storage { internal data class FilePath(val parentPath: String, val fileName: String) @@ -32,18 +35,21 @@ class DotAstStorage(override val outputDirectoryPath: String) : Storage { descriptionFileStream.write("dot_file,source_file,label,node_id,token,type\n") } + private fun Node.getPresentableToken(): String = tokenProcessor.getPresentableToken(this) + override fun store(labeledResult: LabeledResult) { // Use filename as a label for ast // TODO: save full signature for method val normalizedLabel = normalizeAstLabel(labeledResult.label) val normalizedFilepath = normalizeFilepath(labeledResult.filePath) - val nodesMap = dumpAst(labeledResult.root, File(astDirectoryPath, astFilenameFormat.format(index)), normalizedLabel) + val nodesMap = + dumpAst(labeledResult.root, File(astDirectoryPath, astFilenameFormat.format(index)), normalizedLabel) val nodeDescriptionFormat = "${astFilenameFormat.format(index)},$normalizedFilepath,$normalizedLabel,%d,%s,%s" for (node in labeledResult.root.preOrder()) { descriptionFileStream.write( nodeDescriptionFormat.format( nodesMap.getId(node) - 1, - node.getNormalizedToken(), + node.getPresentableToken(), node.getTypeLabel() ) + "\n" ) @@ -77,14 +83,14 @@ class DotAstStorage(override val outputDirectoryPath: String) : Storage { // Label should contain only latin letters, numbers and underscores, other symbols replace with an underscore internal fun normalizeAstLabel(label: String): String = - label.replace("[^A-z^0-9^_]".toRegex(), "_") + label.replace("[^A-z^0-9^_]".toRegex(), "_") /** * Filepath should contain only latin letters, numbers, underscores, hyphens, backslashes and dots * Underscore replace other symbols */ internal fun normalizeFilepath(filepath: String): String = - filepath.replace("[^A-z^0-9^_^\\-^.^/]".toRegex(), "_") + filepath.replace("[^A-z^0-9^_^\\-^.^/]".toRegex(), "_") /** * Split the full path to specified file into the parent's path, and the file name diff --git a/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt b/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt index cc54653c..e998543c 100644 --- a/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt +++ b/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt @@ -59,7 +59,7 @@ abstract class PathBasedStorage( abstract fun pathContextIdsToString(pathContextIds: List, label: String): String - private fun Node.getProcessedToken(): String = tokenProcessor.processToken(this) + private fun Node.getPresentableToken(): String = tokenProcessor.getPresentableToken(this) private fun dumpPathContexts(labeledPathContextIds: LabeledPathContextIds) { val pathContextIdsString = labeledPathContextIds.pathContexts.filter { @@ -91,7 +91,7 @@ abstract class PathBasedStorage( private fun retrieveLabeledPathContexts(labeledResult: LabeledResult): LabeledPathContexts { val paths = retrievePaths(labeledResult.root) return LabeledPathContexts(labeledResult.label, paths.map { astPath -> - toPathContext(astPath) { node -> node.getProcessedToken() } + toPathContext(astPath) { node -> node.getPresentableToken() } }) } diff --git a/src/test/kotlin/astminer/cli/LabelExtractorTest.kt b/src/test/kotlin/astminer/cli/LabelExtractorTest.kt index 5e3c44be..0cd7647f 100644 --- a/src/test/kotlin/astminer/cli/LabelExtractorTest.kt +++ b/src/test/kotlin/astminer/cli/LabelExtractorTest.kt @@ -1,6 +1,6 @@ package astminer.cli -import astminer.common.getNormalizedToken +import astminer.common.getTechnicalToken import astminer.common.model.ElementNode import astminer.common.model.MethodInfo import astminer.common.model.MethodNode @@ -8,6 +8,7 @@ import astminer.common.model.ParseResult import astminer.parse.antlr.SimpleNode import org.junit.Test import kotlin.test.assertEquals +import kotlin.test.assertNull import kotlin.test.assertTrue internal class LabelExtractorTest { @@ -70,7 +71,7 @@ internal class LabelExtractorTest { val methodNameExtractor = MethodNameExtractor(false) val label = methodNameExtractor.extractLabel(methodInfo, PATH_STRING) assertEquals(METHOD_NAME, label) - assertEquals(METHOD_NAME, nameNode.getNormalizedToken()) + assertNull(nameNode.getTechnicalToken()) } @Test @@ -85,6 +86,6 @@ internal class LabelExtractorTest { val methodNameExtractor = MethodNameExtractor(true) val label = methodNameExtractor.extractLabel(methodInfo, PATH_STRING) assertEquals(METHOD_NAME, label) - assertEquals("METHOD_NAME", nameNode.getNormalizedToken()) + assertEquals("METHOD_NAME", nameNode.getTechnicalToken()) } } From 639bf920805ea52a3fb130472c23a93bcef3b1d2 Mon Sep 17 00:00:00 2001 From: furetur Date: Fri, 16 Apr 2021 14:15:38 +0500 Subject: [PATCH 060/308] removed CsvPathStorage.kt --- .../java/astminer/examples/AllJavaFiles.java | 4 ++-- src/main/kotlin/astminer/common/TreeUtil.kt | 7 +++++-- .../kotlin/astminer/examples/AllCppFiles.kt | 4 ++-- .../kotlin/astminer/examples/AllJavaFiles.kt | 4 ++-- .../astminer/examples/AllJavaFilesGumTree.kt | 4 ++-- .../astminer/examples/AllJavaMethods.kt | 4 ++-- .../astminer/examples/AllJavaScriptFiles.kt | 4 ++-- .../astminer/examples/AllPythonFiles.kt | 4 ++-- .../astminer/examples/AllPythonMethods.kt | 4 ++-- .../astminer/storage/path/CsvPathStorage.kt | 19 ------------------- 10 files changed, 21 insertions(+), 37 deletions(-) delete mode 100644 src/main/kotlin/astminer/storage/path/CsvPathStorage.kt diff --git a/src/main/java/astminer/examples/AllJavaFiles.java b/src/main/java/astminer/examples/AllJavaFiles.java index 032b6c12..8dd45b30 100644 --- a/src/main/java/astminer/examples/AllJavaFiles.java +++ b/src/main/java/astminer/examples/AllJavaFiles.java @@ -4,7 +4,7 @@ import astminer.common.model.*; import astminer.parse.java.GumTreeJavaParser; import astminer.storage.*; -import astminer.storage.path.CsvPathStorage; +import astminer.storage.path.Code2VecPathStorage; import astminer.storage.path.PathBasedStorage; import astminer.storage.path.PathBasedStorageConfig; @@ -20,7 +20,7 @@ public class AllJavaFiles { public static void runExample() { final PathBasedStorageConfig config = new PathBasedStorageConfig(5, 5, null, null, null); - final PathBasedStorage pathStorage = new CsvPathStorage(OUTPUT_FOLDER, config, TokenProcessor.Normalize); + final PathBasedStorage pathStorage = new Code2VecPathStorage(OUTPUT_FOLDER, config, TokenProcessor.Normalize); final Path inputFolder = Paths.get(INPUT_FOLDER); diff --git a/src/main/kotlin/astminer/common/TreeUtil.kt b/src/main/kotlin/astminer/common/TreeUtil.kt index 2d600af9..75bf0df0 100644 --- a/src/main/kotlin/astminer/common/TreeUtil.kt +++ b/src/main/kotlin/astminer/common/TreeUtil.kt @@ -37,10 +37,12 @@ fun Node.preOrder(): List { const val NORMALIZED_TOKEN_KEY = "normalized_token" const val DEFAULT_TOKEN = "EMPTY_TOKEN" +const val TECHNICAL_TOKEN_KEY = "technical_token" /** * Set normalized token for a node with default normalizing function. */ +@Deprecated("use setTechnicalToken or TokenProcessor") fun Node.setNormalizedToken() { setMetadata(NORMALIZED_TOKEN_KEY, normalizeToken(getToken(), DEFAULT_TOKEN)) } @@ -48,6 +50,7 @@ fun Node.setNormalizedToken() { /** * Set normalized token to a custom value. */ +@Deprecated("use setTechnicalToken or TokenProcessor") fun Node.setNormalizedToken(normalizedToken: String) { setMetadata(NORMALIZED_TOKEN_KEY, normalizedToken) } @@ -57,13 +60,13 @@ fun Node.setNormalizedToken(normalizedToken: String) { * Technical tokens do not have to represent original tokens. * @see TokenProcessor and how it treats technical tokens */ -fun Node.setTechnicalToken(token: String) = setMetadata("technical_token", token) +fun Node.setTechnicalToken(token: String) = setMetadata(TECHNICAL_TOKEN_KEY, token) /** * Get a node's technical token. * @see setTechnicalToken for more */ -fun Node.getTechnicalToken(): String? = getMetadata("technical_token")?.toString() +fun Node.getTechnicalToken(): String? = getMetadata(TECHNICAL_TOKEN_KEY)?.toString() /** * The function was adopted from the original code2vec implementation in order to match their behavior: diff --git a/src/main/kotlin/astminer/examples/AllCppFiles.kt b/src/main/kotlin/astminer/examples/AllCppFiles.kt index 1bab184f..e36e124e 100644 --- a/src/main/kotlin/astminer/examples/AllCppFiles.kt +++ b/src/main/kotlin/astminer/examples/AllCppFiles.kt @@ -4,7 +4,7 @@ package astminer.examples import astminer.common.getProjectFilesWithExtension import astminer.parse.cpp.FuzzyCppParser -import astminer.storage.path.CsvPathStorage +import astminer.storage.path.Code2VecPathStorage import astminer.storage.path.PathBasedStorageConfig import java.io.File @@ -13,7 +13,7 @@ fun allCppFiles() { val inputDir = File("src/test/resources/examples/cpp") val outputDir = "out_examples/allCppFiles" - val storage = CsvPathStorage(outputDir, PathBasedStorageConfig(5, 5)) + val storage = Code2VecPathStorage(outputDir, PathBasedStorageConfig(5, 5)) val parser = FuzzyCppParser() val preprocOutputFolder = File("preprocessed") diff --git a/src/main/kotlin/astminer/examples/AllJavaFiles.kt b/src/main/kotlin/astminer/examples/AllJavaFiles.kt index d1e2d56a..d135e439 100644 --- a/src/main/kotlin/astminer/examples/AllJavaFiles.kt +++ b/src/main/kotlin/astminer/examples/AllJavaFiles.kt @@ -3,7 +3,7 @@ package astminer.examples import astminer.cli.LabeledResult import astminer.parse.antlr.java.JavaMethodSplitter import astminer.parse.antlr.java.JavaParser -import astminer.storage.path.CsvPathStorage +import astminer.storage.path.Code2VecPathStorage import astminer.storage.path.PathBasedStorageConfig import java.io.File @@ -12,7 +12,7 @@ fun allJavaFiles() { val inputDir = "src/test/resources/examples/" val outputDir = "out_examples/allJavaFilesAntlr" - val storage = CsvPathStorage(outputDir, PathBasedStorageConfig(5, 5)) + val storage = Code2VecPathStorage(outputDir, PathBasedStorageConfig(5, 5)) File(inputDir).forFilesWithSuffix("11.java") { file -> val node = JavaParser().parseInputStream(file.inputStream()) ?: return@forFilesWithSuffix diff --git a/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt b/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt index 347ec963..771a1e20 100644 --- a/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt +++ b/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt @@ -2,7 +2,7 @@ package astminer.examples import astminer.common.getProjectFilesWithExtension import astminer.parse.java.GumTreeJavaParser -import astminer.storage.path.CsvPathStorage +import astminer.storage.path.Code2VecPathStorage import astminer.storage.path.PathBasedStorageConfig import java.io.File @@ -11,7 +11,7 @@ fun allJavaFilesGumTree() { val inputDir = "src/test/resources/gumTreeMethodSplitter/" val outputDir = "out_examples/allJavaFilesGumTree" - val storage = CsvPathStorage(outputDir, PathBasedStorageConfig(5, 5)) + val storage = Code2VecPathStorage(outputDir, PathBasedStorageConfig(5, 5)) val files = getProjectFilesWithExtension(File(inputDir), "java") GumTreeJavaParser().parseFiles(files) { parseResult -> diff --git a/src/main/kotlin/astminer/examples/AllJavaMethods.kt b/src/main/kotlin/astminer/examples/AllJavaMethods.kt index f4842f9e..209939c4 100644 --- a/src/main/kotlin/astminer/examples/AllJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/AllJavaMethods.kt @@ -6,7 +6,7 @@ import astminer.parse.java.GumTreeJavaNode import astminer.parse.java.GumTreeJavaParser import astminer.parse.java.GumTreeJavaMethodSplitter import astminer.storage.* -import astminer.storage.path.CsvPathStorage +import astminer.storage.path.Code2VecPathStorage import astminer.storage.path.PathBasedStorageConfig import java.io.File @@ -25,7 +25,7 @@ fun allJavaMethods() { val inputDir = "src/test/resources/gumTreeMethodSplitter" val outputDir = "out_examples/allJavaMethods" - val storage = CsvPathStorage(outputDir, PathBasedStorageConfig(5, 5), TokenProcessor.Split) + val storage = Code2VecPathStorage(outputDir, PathBasedStorageConfig(5, 5), TokenProcessor.Split) File(inputDir).forFilesWithSuffix(".java") { file -> //parse file diff --git a/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt b/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt index f04e75fa..2e7db26c 100644 --- a/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt +++ b/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt @@ -2,16 +2,16 @@ package astminer.examples import astminer.common.getProjectFilesWithExtension import astminer.parse.antlr.javascript.JavaScriptParser -import astminer.storage.path.CsvPathStorage import astminer.storage.path.PathBasedStorageConfig import astminer.storage.TokenProcessor +import astminer.storage.path.Code2VecPathStorage import java.io.File fun allJavaScriptFiles() { val folder = "src/test/resources/examples" val outputDir = "out_examples/allJavaScriptFilesAntlr" - val storage = CsvPathStorage(outputDir, PathBasedStorageConfig(5, 5), TokenProcessor.Split) + val storage = Code2VecPathStorage(outputDir, PathBasedStorageConfig(5, 5), TokenProcessor.Split) val files = getProjectFilesWithExtension(File(folder), "js") JavaScriptParser().parseFiles(files) { parseResult -> diff --git a/src/main/kotlin/astminer/examples/AllPythonFiles.kt b/src/main/kotlin/astminer/examples/AllPythonFiles.kt index 239f2d4b..ff8a82b0 100644 --- a/src/main/kotlin/astminer/examples/AllPythonFiles.kt +++ b/src/main/kotlin/astminer/examples/AllPythonFiles.kt @@ -2,9 +2,9 @@ package astminer.examples import astminer.common.getProjectFilesWithExtension import astminer.parse.antlr.python.PythonParser -import astminer.storage.path.CsvPathStorage import astminer.storage.path.PathBasedStorageConfig import astminer.storage.TokenProcessor +import astminer.storage.path.Code2VecPathStorage import java.io.File @@ -12,7 +12,7 @@ fun allPythonFiles() { val inputDir = "src/test/resources/examples/" val outputDir = "out_examples/allPythonFiles" - val storage = CsvPathStorage(outputDir, PathBasedStorageConfig(5, 5), TokenProcessor.Split) + val storage = Code2VecPathStorage(outputDir, PathBasedStorageConfig(5, 5), TokenProcessor.Split) val files = getProjectFilesWithExtension(File(inputDir), "py") PythonParser().parseFiles(files) { parseResult -> diff --git a/src/main/kotlin/astminer/examples/AllPythonMethods.kt b/src/main/kotlin/astminer/examples/AllPythonMethods.kt index eec37d96..956fd236 100644 --- a/src/main/kotlin/astminer/examples/AllPythonMethods.kt +++ b/src/main/kotlin/astminer/examples/AllPythonMethods.kt @@ -5,7 +5,7 @@ import astminer.common.model.MethodInfo import astminer.parse.python.GumTreePythonMethodSplitter import astminer.parse.python.GumTreePythonNode import astminer.parse.python.GumTreePythonParser -import astminer.storage.path.CsvPathStorage +import astminer.storage.path.Code2VecPathStorage import astminer.storage.path.PathBasedStorageConfig import java.io.File @@ -20,7 +20,7 @@ fun allPythonMethods() { val inputDir = "src/test/resources/gumTreeMethodSplitter" val outputDir = "out_examples/allPythonMethods" - val storage = CsvPathStorage(outputDir, PathBasedStorageConfig(5, 5)) + val storage = Code2VecPathStorage(outputDir, PathBasedStorageConfig(5, 5)) File(inputDir).forFilesWithSuffix(".py") { file -> // parse file diff --git a/src/main/kotlin/astminer/storage/path/CsvPathStorage.kt b/src/main/kotlin/astminer/storage/path/CsvPathStorage.kt deleted file mode 100644 index d57ee370..00000000 --- a/src/main/kotlin/astminer/storage/path/CsvPathStorage.kt +++ /dev/null @@ -1,19 +0,0 @@ -package astminer.storage.path - -import astminer.common.model.PathContextId -import astminer.storage.TokenProcessor - -class CsvPathStorage( - outputDirectoryPath: String, - config: PathBasedStorageConfig, - tokenProcessor: TokenProcessor = TokenProcessor.Normalize -) : - PathBasedStorage(outputDirectoryPath, config, tokenProcessor) { - - override fun pathContextIdsToString(pathContextIds: List, label: String): String { - val joinedPathContexts = pathContextIds.joinToString(";") { pathContextId -> - "${pathContextId.startTokenId} ${pathContextId.pathId} ${pathContextId.endTokenId}" - } - return "$label,$joinedPathContexts" - } -} From 503edae55aa55fe161dfcb9eaeac050beb3278aa Mon Sep 17 00:00:00 2001 From: furetur Date: Fri, 16 Apr 2021 15:14:58 +0500 Subject: [PATCH 061/308] removed normalized tokens completely --- .../kotlin/astminer/cli/Code2VecExtractor.kt | 2 - .../astminer/cli/PathContextsExtractor.kt | 2 - src/main/kotlin/astminer/cli/ProjectParser.kt | 1 - src/main/kotlin/astminer/cli/utils.kt | 49 ++++-------- src/main/kotlin/astminer/common/TreeUtil.kt | 17 ---- .../astminer/examples/Code2VecJavaMethods.kt | 3 +- .../kotlin/astminer/storage/TokenProcessor.kt | 6 +- .../kotlin/astminer/cli/LabelExtractorTest.kt | 2 - src/test/kotlin/astminer/common/TestUtils.kt | 5 +- .../astminer/storage/TokenProcessorTest.kt | 78 +++++++++++++++++++ 10 files changed, 102 insertions(+), 63 deletions(-) create mode 100644 src/test/kotlin/astminer/storage/TokenProcessorTest.kt diff --git a/src/main/kotlin/astminer/cli/Code2VecExtractor.kt b/src/main/kotlin/astminer/cli/Code2VecExtractor.kt index f1e20b3a..56c290c8 100644 --- a/src/main/kotlin/astminer/cli/Code2VecExtractor.kt +++ b/src/main/kotlin/astminer/cli/Code2VecExtractor.kt @@ -154,8 +154,6 @@ class Code2VecExtractor(private val customLabelExtractor: LabelExtractor? = null ) // Parse project one file at a time parser.parseFiles(getProjectFilesWithExtension(File(projectRoot), extension)) { - // TODO: might not be needed - normalizeParseResult(it, isTokenSplitted) // Retrieve labeled data extractFromTree(it, storage, labelExtractor) } diff --git a/src/main/kotlin/astminer/cli/PathContextsExtractor.kt b/src/main/kotlin/astminer/cli/PathContextsExtractor.kt index 855fd951..b8646bf3 100644 --- a/src/main/kotlin/astminer/cli/PathContextsExtractor.kt +++ b/src/main/kotlin/astminer/cli/PathContextsExtractor.kt @@ -117,8 +117,6 @@ class PathContextsExtractor(private val customLabelExtractor: LabelExtractor? = val files = getProjectFilesWithExtension(File(projectRoot), extension) parser.parseFiles(files) { parseResult -> - // TODO: might not be needed - normalizeParseResult(parseResult, splitTokens = true) val labeledParseResults = labelExtractor.toLabeledData(parseResult) storage.store(labeledParseResults) } diff --git a/src/main/kotlin/astminer/cli/ProjectParser.kt b/src/main/kotlin/astminer/cli/ProjectParser.kt index b550a6b8..cccf4c16 100644 --- a/src/main/kotlin/astminer/cli/ProjectParser.kt +++ b/src/main/kotlin/astminer/cli/ProjectParser.kt @@ -128,7 +128,6 @@ class ProjectParser(private val customLabelExtractor: LabelExtractor? = null) : // Parse project val filesToParse = getProjectFilesWithExtension(File(projectRoot), extension) parser.parseFiles(filesToParse) { parseResult -> - normalizeParseResult(parseResult, isTokenSplitted) val labeledParseResults = labelExtractor.toLabeledData(parseResult) labeledParseResults.forEach { labeledParseResult -> labeledParseResult.root.preOrder().forEach { node -> diff --git a/src/main/kotlin/astminer/cli/utils.kt b/src/main/kotlin/astminer/cli/utils.kt index 815e4b5e..92a1635f 100644 --- a/src/main/kotlin/astminer/cli/utils.kt +++ b/src/main/kotlin/astminer/cli/utils.kt @@ -5,16 +5,12 @@ import astminer.parse.antlr.python.PythonParser import astminer.parse.cpp.FuzzyCppParser import astminer.parse.java.GumTreeJavaParser import astminer.common.model.Node -import astminer.common.model.ParseResult import astminer.common.model.Parser -import astminer.common.preOrder -import astminer.common.setNormalizedToken -import astminer.common.splitToSubtokens import astminer.parse.antlr.javascript.JavaScriptParser fun getParser( - extension: String, - javaParser: String + extension: String, + javaParser: String ): Parser { return when (extension) { "java" -> { @@ -36,33 +32,18 @@ fun getParser( } } -fun separateToken(token: String, separator: CharSequence = "|"): String { - return splitToSubtokens(token).joinToString(separator) -} - -fun processNodeToken(node: Node, splitToken: Boolean) { - if (splitToken) { - node.setNormalizedToken(separateToken(node.getToken())) - } else { - node.setNormalizedToken() - } -} - -fun normalizeParseResult(parseResult: ParseResult, splitTokens: Boolean) { - parseResult.root?.preOrder()?.forEach { node -> processNodeToken(node, splitTokens) } -} fun getLabelExtractor( - granularityLevel: String, - javaParser: String, - hideMethodNames: Boolean, - excludeModifiers: List, - excludeAnnotations: List, - filterConstructors: Boolean, - maxMethodNameLength: Int, - maxTokenLength: Int, - maxTreeSize: Int, - useFolderName: Boolean + granularityLevel: String, + javaParser: String, + hideMethodNames: Boolean, + excludeModifiers: List, + excludeAnnotations: List, + filterConstructors: Boolean, + maxMethodNameLength: Int, + maxTokenLength: Int, + maxTreeSize: Int, + useFolderName: Boolean ): LabelExtractor { when (granularityLevel) { "file" -> { @@ -74,9 +55,9 @@ fun getLabelExtractor( } "method" -> { val filterPredicates = mutableListOf( - ModifierFilterPredicate(excludeModifiers), AnnotationFilterPredicate(excludeAnnotations), - MethodNameLengthFilterPredicate(maxMethodNameLength), TokenLengthFilterPredicate(maxTokenLength), - TreeSizeFilterPredicate(maxTreeSize) + ModifierFilterPredicate(excludeModifiers), AnnotationFilterPredicate(excludeAnnotations), + MethodNameLengthFilterPredicate(maxMethodNameLength), TokenLengthFilterPredicate(maxTokenLength), + TreeSizeFilterPredicate(maxTreeSize) ) if (filterConstructors) { filterPredicates.add(ConstructorFilterPredicate()) diff --git a/src/main/kotlin/astminer/common/TreeUtil.kt b/src/main/kotlin/astminer/common/TreeUtil.kt index 75bf0df0..0e174f89 100644 --- a/src/main/kotlin/astminer/common/TreeUtil.kt +++ b/src/main/kotlin/astminer/common/TreeUtil.kt @@ -35,26 +35,9 @@ fun Node.preOrder(): List { return result } -const val NORMALIZED_TOKEN_KEY = "normalized_token" const val DEFAULT_TOKEN = "EMPTY_TOKEN" const val TECHNICAL_TOKEN_KEY = "technical_token" -/** - * Set normalized token for a node with default normalizing function. - */ -@Deprecated("use setTechnicalToken or TokenProcessor") -fun Node.setNormalizedToken() { - setMetadata(NORMALIZED_TOKEN_KEY, normalizeToken(getToken(), DEFAULT_TOKEN)) -} - -/** - * Set normalized token to a custom value. - */ -@Deprecated("use setTechnicalToken or TokenProcessor") -fun Node.setNormalizedToken(normalizedToken: String) { - setMetadata(NORMALIZED_TOKEN_KEY, normalizedToken) -} - /** * Sets a node's technical token. * Technical tokens do not have to represent original tokens. diff --git a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt index 553cd735..86246764 100644 --- a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt @@ -29,8 +29,7 @@ fun code2vecJavaMethods() { val methodNameNode = methodInfo.method.nameNode ?: return@forEach val methodRoot = methodInfo.method.root val label = splitToSubtokens(methodNameNode.getToken()).joinToString("|") - methodRoot.preOrder().forEach { it.setNormalizedToken() } - methodNameNode.setNormalizedToken("METHOD_NAME") + methodNameNode.setTechnicalToken("METHOD_NAME") // Retrieve paths from every node individually and store them storage.store(LabeledResult(methodRoot, label, file.absolutePath)) diff --git a/src/main/kotlin/astminer/storage/TokenProcessor.kt b/src/main/kotlin/astminer/storage/TokenProcessor.kt index 003b0465..3156b22d 100644 --- a/src/main/kotlin/astminer/storage/TokenProcessor.kt +++ b/src/main/kotlin/astminer/storage/TokenProcessor.kt @@ -1,10 +1,10 @@ package astminer.storage -import astminer.cli.separateToken import astminer.common.DEFAULT_TOKEN import astminer.common.getTechnicalToken import astminer.common.model.Node import astminer.common.normalizeToken +import astminer.common.splitToSubtokens /** * Each TokenProcessor processes a node's token and returns a new representation of it. *It respects technical tokens*. @@ -16,6 +16,10 @@ enum class TokenProcessor { * For example, "getFull_name" --> "get|full|name" */ Split { + private fun separateToken(token: String): String { + return splitToSubtokens(token).joinToString("|") + } + override fun processToken(node: Node): String = separateToken(node.getToken()) }, diff --git a/src/test/kotlin/astminer/cli/LabelExtractorTest.kt b/src/test/kotlin/astminer/cli/LabelExtractorTest.kt index 0cd7647f..e9313f63 100644 --- a/src/test/kotlin/astminer/cli/LabelExtractorTest.kt +++ b/src/test/kotlin/astminer/cli/LabelExtractorTest.kt @@ -67,7 +67,6 @@ internal class LabelExtractorTest { ElementNode(null, null), emptyList() ) - processNodeToken(nameNode, false) val methodNameExtractor = MethodNameExtractor(false) val label = methodNameExtractor.extractLabel(methodInfo, PATH_STRING) assertEquals(METHOD_NAME, label) @@ -82,7 +81,6 @@ internal class LabelExtractorTest { ElementNode(null, null), emptyList() ) - processNodeToken(nameNode, false) val methodNameExtractor = MethodNameExtractor(true) val label = methodNameExtractor.extractLabel(methodInfo, PATH_STRING) assertEquals(METHOD_NAME, label) diff --git a/src/test/kotlin/astminer/common/TestUtils.kt b/src/test/kotlin/astminer/common/TestUtils.kt index 74094e79..8574daf2 100644 --- a/src/test/kotlin/astminer/common/TestUtils.kt +++ b/src/test/kotlin/astminer/common/TestUtils.kt @@ -5,12 +5,13 @@ import astminer.common.model.Node class DummyNode(val data: String, val childrenList: MutableList) : Node { + private val metadata = mutableMapOf() override fun setMetadata(key: String, value: Any) { - + metadata[key] = value } override fun getMetadata(key: String): Any? { - return null + return metadata[key] } override fun isLeaf(): Boolean { diff --git a/src/test/kotlin/astminer/storage/TokenProcessorTest.kt b/src/test/kotlin/astminer/storage/TokenProcessorTest.kt new file mode 100644 index 00000000..b85a3803 --- /dev/null +++ b/src/test/kotlin/astminer/storage/TokenProcessorTest.kt @@ -0,0 +1,78 @@ +package astminer.storage + +import astminer.common.DEFAULT_TOKEN +import astminer.common.DummyNode +import astminer.common.setTechnicalToken +import org.junit.Assert +import org.junit.Test + + +internal class TokenProcessorTest { + private fun normalizeToken(token: String): String { + val node = DummyNode(token, mutableListOf()) + return TokenProcessor.Normalize.getPresentableToken(node) + } + + private fun splitToken(token: String): String { + val node = DummyNode(token, mutableListOf()) + return TokenProcessor.Split.getPresentableToken(node) + } + + @Test + fun testNormalizeTokenCleaning() { + val token = " Token THAT \n contains Whi\"t,es''pace characters!!!and pu.n.c.t.u.a.tion \n" + val expectedToken = "token" + "that" + "contains" + "whitespace" + "characters" + "and" + "punctuation" + Assert.assertEquals( + "All whitespace characters and punctuation should be removed, keeping only letters", + expectedToken, + normalizeToken(token) + ) + } + + @Test + fun testNormalizeTokenWithoutLetters() { + val token = "* *\n" + val expectedToken = "*_*" + Assert.assertEquals( + "Token without letters have whitespaces replaced with underscores", + expectedToken, + normalizeToken(token) + ) + } + + @Test + fun testNormalizeEmptyToken() { + val token = "\n\n" + val expectedToken = DEFAULT_TOKEN + Assert.assertEquals( + "Token without letters have whitespaces replaced with underscores", + expectedToken, + normalizeToken(token) + ) + } + + @Test + fun testTokenSplit() { + val token = "fun_withReallyLong_And_ComplicatedName" + val expectedToken = "fun|with|really|long|and|complicated|name" + Assert.assertEquals( + "Token with snake, camel and combined case should be split into list of its parts", + expectedToken, + splitToken(token) + ) + } + + @Test + fun `test Normalize respects technical token`() { + val node = DummyNode("tokenName", mutableListOf()) + node.setTechnicalToken("technical token") + Assert.assertEquals("technical token", TokenProcessor.Normalize.getPresentableToken(node)) + } + + @Test + fun `test Split respects technical token`() { + val node = DummyNode("tokenName", mutableListOf()) + node.setTechnicalToken("technical token") + Assert.assertEquals("technical token", TokenProcessor.Split.getPresentableToken(node)) + } +} \ No newline at end of file From a72421f958667f5e863f1f375b04036cce4ea679 Mon Sep 17 00:00:00 2001 From: illided Date: Sat, 17 Apr 2021 16:23:41 +0300 Subject: [PATCH 062/308] getChildren of type now overridden in nodes --- .../kotlin/astminer/parse/antlr/AntlrNode.kt | 3 +++ .../parse/antlr/java/JavaMethodSplitter.kt | 14 ++++++------- .../javascript/JavaScriptMethodSplitter.kt | 8 +++---- .../antlr/python/PythonMethodSplitter.kt | 8 +++---- .../astminer/parse/gumtree/GumTreeNode.kt | 15 ++++++++++--- .../gumtree/java/GumTreeJavaMethodSplitter.kt | 21 +++++++++---------- .../python/GumTreePythonMethodSplitter.kt | 9 ++++---- 7 files changed, 44 insertions(+), 34 deletions(-) diff --git a/src/main/kotlin/astminer/parse/antlr/AntlrNode.kt b/src/main/kotlin/astminer/parse/antlr/AntlrNode.kt index f08bcdb8..7d316014 100644 --- a/src/main/kotlin/astminer/parse/antlr/AntlrNode.kt +++ b/src/main/kotlin/astminer/parse/antlr/AntlrNode.kt @@ -44,6 +44,9 @@ class AntlrNode(private val typeLabel: String, private var parent: Node?, privat decompressTypeLabel(it.getTypeLabel()).firstOrNull() == typeLabel } + override fun getChildOfType(typeLabel: String): AntlrNode? = + getChildren().firstOrNull { it.getTypeLabel() == typeLabel } + override fun removeChildrenOfType(typeLabel: String) { children.removeIf { it.getTypeLabel() == typeLabel } } diff --git a/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt b/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt index 170e1701..d7724449 100644 --- a/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt @@ -29,15 +29,15 @@ class JavaMethodSplitter : TreeMethodSplitter { } private fun collectMethodInfo(methodNode: AntlrNode): MethodInfo { - val methodName = methodNode.getChildOfType(METHOD_NAME_NODE) as? AntlrNode - val methodReturnTypeNode = methodNode.getChildOfType(METHOD_RETURN_TYPE_NODE) as? AntlrNode + val methodName = methodNode.getChildOfType(METHOD_NAME_NODE) + val methodReturnTypeNode = methodNode.getChildOfType(METHOD_RETURN_TYPE_NODE) methodReturnTypeNode?.setToken(collectParameterToken(methodReturnTypeNode)) val classRoot = getEnclosingClass(methodNode) - val className = classRoot?.getChildOfType(CLASS_NAME_NODE) as? AntlrNode + val className = classRoot?.getChildOfType(CLASS_NAME_NODE) - val parametersRoot = methodNode.getChildOfType(METHOD_PARAMETER_NODE) as? AntlrNode - val innerParametersRoot = parametersRoot?.getChildOfType(METHOD_PARAMETER_INNER_NODE) as? AntlrNode + val parametersRoot = methodNode.getChildOfType(METHOD_PARAMETER_NODE) + val innerParametersRoot = parametersRoot?.getChildOfType(METHOD_PARAMETER_INNER_NODE) val parametersList = when { innerParametersRoot != null -> getListOfParameters(innerParametersRoot) @@ -76,12 +76,12 @@ class JavaMethodSplitter : TreeMethodSplitter { } private fun getParameterInfoFromNode(parameterRoot: AntlrNode): ParameterNode { - val returnTypeNode = parameterRoot.getChildOfType(PARAMETER_RETURN_TYPE_NODE) as? AntlrNode + val returnTypeNode = parameterRoot.getChildOfType(PARAMETER_RETURN_TYPE_NODE) returnTypeNode?.setToken(collectParameterToken(returnTypeNode)) return ParameterNode( parameterRoot, returnTypeNode, - parameterRoot.getChildOfType(PARAMETER_NAME_NODE) as? AntlrNode + parameterRoot.getChildOfType(PARAMETER_NAME_NODE) ) } diff --git a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt index 8182cd73..387ab198 100644 --- a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt @@ -137,8 +137,8 @@ class ArrowElement(private val element: AntlrNode) : JavaScriptElement(element) } override fun getElementParametersRoot(): AntlrNode? { - val parameterRoot = element.getChildOfType(ARROW_PARAMETER_NODE) as? AntlrNode - return parameterRoot?.getChildOfType(ARROW_PARAMETER_INNER_NODE) as? AntlrNode ?: parameterRoot + val parameterRoot = element.getChildOfType(ARROW_PARAMETER_NODE) + return parameterRoot?.getChildOfType(ARROW_PARAMETER_INNER_NODE) ?: parameterRoot } } @@ -156,7 +156,7 @@ class FunctionElement(private val element: AntlrNode) : JavaScriptElement(elemen } override fun getElementParametersRoot(): AntlrNode? { - return element.getChildOfType(FUNCTION_PARAMETER_NODE) as? AntlrNode + return element.getChildOfType(FUNCTION_PARAMETER_NODE) } } @@ -179,6 +179,6 @@ class MethodElement(private val element: AntlrNode) : JavaScriptElement(element) } override fun getElementParametersRoot(): AntlrNode? { - return element.getChildOfType(METHOD_PARAMETER_NODE) as? AntlrNode + return element.getChildOfType(METHOD_PARAMETER_NODE) } } \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/antlr/python/PythonMethodSplitter.kt b/src/main/kotlin/astminer/parse/antlr/python/PythonMethodSplitter.kt index aedd3bcb..bc62320a 100644 --- a/src/main/kotlin/astminer/parse/antlr/python/PythonMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/python/PythonMethodSplitter.kt @@ -29,13 +29,13 @@ class PythonMethodSplitter : TreeMethodSplitter { } private fun collectMethodInfo(methodNode: AntlrNode): MethodInfo { - val methodName = methodNode.getChildOfType(METHOD_NAME_NODE) as? AntlrNode + val methodName = methodNode.getChildOfType(METHOD_NAME_NODE) val classRoot = getEnclosingClass(methodNode) - val className = classRoot?.getChildOfType(CLASS_NAME_NODE) as? AntlrNode + val className = classRoot?.getChildOfType(CLASS_NAME_NODE) - val parametersRoot = methodNode.getChildOfType(METHOD_PARAMETER_NODE) as? AntlrNode - val innerParametersRoot = parametersRoot?.getChildOfType(METHOD_PARAMETER_INNER_NODE) as? AntlrNode + val parametersRoot = methodNode.getChildOfType(METHOD_PARAMETER_NODE) + val innerParametersRoot = parametersRoot?.getChildOfType(METHOD_PARAMETER_INNER_NODE) val parametersList = when { innerParametersRoot != null -> getListOfParameters(innerParametersRoot) diff --git a/src/main/kotlin/astminer/parse/gumtree/GumTreeNode.kt b/src/main/kotlin/astminer/parse/gumtree/GumTreeNode.kt index cdcf917d..f4da8603 100644 --- a/src/main/kotlin/astminer/parse/gumtree/GumTreeNode.kt +++ b/src/main/kotlin/astminer/parse/gumtree/GumTreeNode.kt @@ -4,7 +4,7 @@ import astminer.common.model.Node import com.github.gumtreediff.tree.ITree import com.github.gumtreediff.tree.TreeContext -class GumTreeNode(val wrappedNode: ITree, val context: TreeContext, val parent: GumTreeNode?): Node { +class GumTreeNode(val wrappedNode: ITree, val context: TreeContext, val parent: GumTreeNode?) : Node { override val metadata: MutableMap = HashMap() override fun isLeaf(): Boolean { @@ -19,7 +19,7 @@ class GumTreeNode(val wrappedNode: ITree, val context: TreeContext, val parent: return context.getTypeLabel(wrappedNode) } - override fun getChildren(): List { + override fun getChildren(): List { return childrenList } @@ -32,6 +32,15 @@ class GumTreeNode(val wrappedNode: ITree, val context: TreeContext, val parent: } override fun removeChildrenOfType(typeLabel: String) { - childrenList.removeIf{ it.getTypeLabel() == typeLabel} + childrenList.removeIf { it.getTypeLabel() == typeLabel } + } + + override fun getChildOfType(typeLabel: String): GumTreeNode? = + getChildren().firstOrNull { it.getTypeLabel() == typeLabel } + + override fun getChildrenOfType(typeLabel: String): List { + val children = super.getChildrenOfType(typeLabel) + return children.filterIsInstance() + .apply { if (size != children.size) throw TypeCastException("Node have children of different types") } } } \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt index ce57cfef..b81b88ec 100644 --- a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt @@ -32,20 +32,20 @@ class GumTreeJavaMethodSplitter : TreeMethodSplitter { val parameters = getParameters(methodNode) return MethodInfo( - MethodNode(methodNode, methodReturnType, methodName), - ElementNode(classRoot, className), - parameters + MethodNode(methodNode, methodReturnType, methodName), + ElementNode(classRoot, className), + parameters ) } private fun getElementName(node: GumTreeNode) = node.getChildren().map { - it as GumTreeNode + it }.firstOrNull { it.getTypeLabel() == TypeLabels.simpleName } private fun getElementType(node: GumTreeNode) = node.getChildren().map { - it as GumTreeNode + it }.firstOrNull { it.isTypeNode() } @@ -62,12 +62,11 @@ class GumTreeJavaMethodSplitter : TreeMethodSplitter { val params = methodNode.getChildren().filter { it.getTypeLabel() == TypeLabels.singleVariableDeclaration } - return params.map { - val node = it as GumTreeNode - ParameterNode( - node, - getElementType(node), - getElementName(node) + return params.map { node -> + ParameterNode( + node, + getElementType(node), + getElementName(node) ) }.toList() } diff --git a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitter.kt b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitter.kt index 3aca70b6..22cec1bf 100644 --- a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitter.kt @@ -58,14 +58,14 @@ class GumTreePythonMethodSplitter : TreeMethodSplitter { private fun getElementType(node: GumTreeNode): GumTreeNode? { if (node.getTypeLabel() == TypeLabels.arg) { - return node.getChildOfType(TypeLabels.nameLoad) as GumTreeNode? + return node.getChildOfType(TypeLabels.nameLoad) } // if return statement has "Constant-`Type`" return value => function type is `Type` if (TypeLabels.methodDefinitions.contains(node.getTypeLabel())) { return node.getChildOfType(TypeLabels.body)?.getChildOfType(TypeLabels.returnTypeLabel)?.let { it.getChildren().firstOrNull { child -> child.getTypeLabel().startsWith(TypeLabels.constantType) - } as GumTreeNode? + } } } return null @@ -98,9 +98,8 @@ class GumTreePythonMethodSplitter : TreeMethodSplitter { params.add(it) } - return params.map { - val node = it as GumTreeNode - ParameterNode( + return params.map {node -> + ParameterNode( node, getElementType(node), getElementName(node) From 45518593eb26164fb459af42fd645459ea809cfa Mon Sep 17 00:00:00 2001 From: illided Date: Sat, 17 Apr 2021 16:46:29 +0300 Subject: [PATCH 063/308] getChildOfType fix --- src/main/kotlin/astminer/parse/antlr/AntlrNode.kt | 2 +- src/main/kotlin/astminer/parse/gumtree/GumTreeNode.kt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/kotlin/astminer/parse/antlr/AntlrNode.kt b/src/main/kotlin/astminer/parse/antlr/AntlrNode.kt index 7d316014..246808d6 100644 --- a/src/main/kotlin/astminer/parse/antlr/AntlrNode.kt +++ b/src/main/kotlin/astminer/parse/antlr/AntlrNode.kt @@ -45,7 +45,7 @@ class AntlrNode(private val typeLabel: String, private var parent: Node?, privat } override fun getChildOfType(typeLabel: String): AntlrNode? = - getChildren().firstOrNull { it.getTypeLabel() == typeLabel } + getChildrenOfType(typeLabel).firstOrNull() override fun removeChildrenOfType(typeLabel: String) { children.removeIf { it.getTypeLabel() == typeLabel } diff --git a/src/main/kotlin/astminer/parse/gumtree/GumTreeNode.kt b/src/main/kotlin/astminer/parse/gumtree/GumTreeNode.kt index f4da8603..0e879836 100644 --- a/src/main/kotlin/astminer/parse/gumtree/GumTreeNode.kt +++ b/src/main/kotlin/astminer/parse/gumtree/GumTreeNode.kt @@ -36,7 +36,7 @@ class GumTreeNode(val wrappedNode: ITree, val context: TreeContext, val parent: } override fun getChildOfType(typeLabel: String): GumTreeNode? = - getChildren().firstOrNull { it.getTypeLabel() == typeLabel } + getChildrenOfType(typeLabel).firstOrNull() override fun getChildrenOfType(typeLabel: String): List { val children = super.getChildrenOfType(typeLabel) From b37b816fac40e4c70063277c36e1516d0c573153 Mon Sep 17 00:00:00 2001 From: illided Date: Sat, 17 Apr 2021 17:48:32 +0300 Subject: [PATCH 064/308] normalize parse result refactor --- .../kotlin/astminer/cli/Code2VecExtractor.kt | 2 +- .../astminer/cli/PathContextsExtractor.kt | 2 +- src/main/kotlin/astminer/cli/ProjectParser.kt | 2 +- .../model/HandlerModel.kt} | 17 ++------------- .../astminer/common/model/ParsingModel.kt | 21 ++++++++++++++++++- .../astminer/parse/antlr/AntlrHandler.kt | 4 ++-- src/main/kotlin/astminer/parse/factory.kt | 1 + .../astminer/parse/fuzzy/cpp/FuzzyHandler.kt | 2 ++ .../astminer/parse/gumtree/GumtreeHandler.kt | 4 ++-- 9 files changed, 32 insertions(+), 23 deletions(-) rename src/main/kotlin/astminer/{parse/handlerModel.kt => common/model/HandlerModel.kt} (54%) diff --git a/src/main/kotlin/astminer/cli/Code2VecExtractor.kt b/src/main/kotlin/astminer/cli/Code2VecExtractor.kt index 26b3e263..5fd38499 100644 --- a/src/main/kotlin/astminer/cli/Code2VecExtractor.kt +++ b/src/main/kotlin/astminer/cli/Code2VecExtractor.kt @@ -154,7 +154,7 @@ class Code2VecExtractor(private val customLabelExtractor: LabelExtractor? = null ) // Parse project one file at a time parser.parseFiles(getProjectFilesWithExtension(File(projectRoot), extension)) { - normalizeParseResult(it, isTokenSplitted) + it.normalize(isTokenSplitted) // Retrieve labeled data extractFromTree(it, miner, storage, labelExtractor) } diff --git a/src/main/kotlin/astminer/cli/PathContextsExtractor.kt b/src/main/kotlin/astminer/cli/PathContextsExtractor.kt index ebb5ca36..95ef479a 100644 --- a/src/main/kotlin/astminer/cli/PathContextsExtractor.kt +++ b/src/main/kotlin/astminer/cli/PathContextsExtractor.kt @@ -113,7 +113,7 @@ class PathContextsExtractor(private val customLabelExtractor: LabelExtractor? = val files = getProjectFilesWithExtension(File(projectRoot), extension) parser.parseFiles(files) { parseResult -> - normalizeParseResult(parseResult, splitTokens = true) + parseResult.normalize(splitTokens = true) val labeledParseResults = labelExtractor.toLabeledData(parseResult) labeledParseResults.forEach { (root, label) -> val paths = miner.retrievePaths(root).take(maxPathContexts) diff --git a/src/main/kotlin/astminer/cli/ProjectParser.kt b/src/main/kotlin/astminer/cli/ProjectParser.kt index 7941f1b7..eb3caa6e 100644 --- a/src/main/kotlin/astminer/cli/ProjectParser.kt +++ b/src/main/kotlin/astminer/cli/ProjectParser.kt @@ -124,7 +124,7 @@ class ProjectParser(private val customLabelExtractor: LabelExtractor? = null) : // Parse project val filesToParse = getProjectFilesWithExtension(File(projectRoot), extension) parser.parseFiles(filesToParse) { parseResult -> - normalizeParseResult(parseResult, isTokenSplitted) + parseResult.normalize(isTokenSplitted) val labeledParseResults = labelExtractor.toLabeledData(parseResult) labeledParseResults.forEach { (root, label) -> root.preOrder().forEach { node -> diff --git a/src/main/kotlin/astminer/parse/handlerModel.kt b/src/main/kotlin/astminer/common/model/HandlerModel.kt similarity index 54% rename from src/main/kotlin/astminer/parse/handlerModel.kt rename to src/main/kotlin/astminer/common/model/HandlerModel.kt index 979ad7bd..020a8f81 100644 --- a/src/main/kotlin/astminer/parse/handlerModel.kt +++ b/src/main/kotlin/astminer/common/model/HandlerModel.kt @@ -1,6 +1,5 @@ -package astminer.parse +package astminer.common.model -import astminer.common.model.* import astminer.common.preOrder import astminer.common.setNormalizedToken import astminer.common.splitToSubtokens @@ -20,19 +19,7 @@ abstract class LanguageHandler { } fun normalizeParseResult(splitTokens: Boolean): LanguageHandler { - parseResult.root?.preOrder()?.forEach { node -> processNodeToken(node, splitTokens) } + parseResult.normalize(splitTokens) return this } - - private fun processNodeToken(node: Node, splitToken: Boolean) { - if (splitToken) { - node.setNormalizedToken(separateToken(node.getToken())) - } else { - node.setNormalizedToken() - } - } - - private fun separateToken(token: String, separator: CharSequence = "|"): String { - return splitToSubtokens(token).joinToString(separator) - } } diff --git a/src/main/kotlin/astminer/common/model/ParsingModel.kt b/src/main/kotlin/astminer/common/model/ParsingModel.kt index 5cd10e6d..b6eb3476 100644 --- a/src/main/kotlin/astminer/common/model/ParsingModel.kt +++ b/src/main/kotlin/astminer/common/model/ParsingModel.kt @@ -1,5 +1,8 @@ package astminer.common.model +import astminer.common.preOrder +import astminer.common.setNormalizedToken +import astminer.common.splitToSubtokens import java.io.File import java.io.InputStream @@ -55,4 +58,20 @@ interface Parser { } } -data class ParseResult(val root: T?, val filePath: String) +data class ParseResult(val root: T?, val filePath: String) { + fun normalize(splitTokens: Boolean) { + this.root?.preOrder()?.forEach { node -> astminer.cli.processNodeToken(node, splitTokens) } + } + + private fun processNodeToken(node: Node, splitToken: Boolean) { + if (splitToken) { + node.setNormalizedToken(separateToken(node.getToken())) + } else { + node.setNormalizedToken() + } + } + + private fun separateToken(token: String, separator: CharSequence = "|"): String { + return splitToSubtokens(token).joinToString(separator) + } +} diff --git a/src/main/kotlin/astminer/parse/antlr/AntlrHandler.kt b/src/main/kotlin/astminer/parse/antlr/AntlrHandler.kt index 062efe17..ad6f6f0b 100644 --- a/src/main/kotlin/astminer/parse/antlr/AntlrHandler.kt +++ b/src/main/kotlin/astminer/parse/antlr/AntlrHandler.kt @@ -1,8 +1,8 @@ package astminer.parse.antlr import astminer.common.model.ParseResult -import astminer.parse.HandlerFactory -import astminer.parse.LanguageHandler +import astminer.common.model.HandlerFactory +import astminer.common.model.LanguageHandler import astminer.parse.antlr.java.JavaMethodSplitter import astminer.parse.antlr.java.JavaParser import astminer.parse.antlr.javascript.JavaScriptMethodSplitter diff --git a/src/main/kotlin/astminer/parse/factory.kt b/src/main/kotlin/astminer/parse/factory.kt index 40aafccc..1c9a6613 100644 --- a/src/main/kotlin/astminer/parse/factory.kt +++ b/src/main/kotlin/astminer/parse/factory.kt @@ -1,5 +1,6 @@ package astminer.parse +import astminer.common.model.HandlerFactory import astminer.parse.antlr.AntlrJavaHandlerFactory import astminer.parse.antlr.AntlrJavascriptHandlerFactory import astminer.parse.antlr.AntlrPythonHandlerFactory diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyHandler.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyHandler.kt index 1edcd030..ea6b014f 100644 --- a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyHandler.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyHandler.kt @@ -1,5 +1,7 @@ package astminer.parse +import astminer.common.model.HandlerFactory +import astminer.common.model.LanguageHandler import astminer.common.model.ParseResult import astminer.parse.fuzzy.cpp.FuzzyCppParser import astminer.parse.fuzzy.cpp.FuzzyMethodSplitter diff --git a/src/main/kotlin/astminer/parse/gumtree/GumtreeHandler.kt b/src/main/kotlin/astminer/parse/gumtree/GumtreeHandler.kt index c3809145..6bb95b27 100644 --- a/src/main/kotlin/astminer/parse/gumtree/GumtreeHandler.kt +++ b/src/main/kotlin/astminer/parse/gumtree/GumtreeHandler.kt @@ -1,8 +1,8 @@ package astminer.parse.gumtree import astminer.common.model.ParseResult -import astminer.parse.HandlerFactory -import astminer.parse.LanguageHandler +import astminer.common.model.HandlerFactory +import astminer.common.model.LanguageHandler import astminer.parse.gumtree.java.GumTreeJavaParser import astminer.parse.gumtree.java.GumTreeJavaMethodSplitter import astminer.parse.gumtree.python.GumTreePythonMethodSplitter From 87d3acd42ef46b485508944b1b5a7330b4f26a51 Mon Sep 17 00:00:00 2001 From: illided Date: Sat, 17 Apr 2021 18:10:45 +0300 Subject: [PATCH 065/308] normalize removed from util --- src/main/kotlin/astminer/cli/utils.kt | 16 ---------------- .../kotlin/astminer/common/model/ParsingModel.kt | 2 +- 2 files changed, 1 insertion(+), 17 deletions(-) diff --git a/src/main/kotlin/astminer/cli/utils.kt b/src/main/kotlin/astminer/cli/utils.kt index 6567528a..c2ccf3ea 100644 --- a/src/main/kotlin/astminer/cli/utils.kt +++ b/src/main/kotlin/astminer/cli/utils.kt @@ -36,22 +36,6 @@ fun getParser( } } -fun separateToken(token: String, separator: CharSequence = "|"): String { - return splitToSubtokens(token).joinToString(separator) -} - -fun processNodeToken(node: Node, splitToken: Boolean) { - if (splitToken) { - node.setNormalizedToken(separateToken(node.getToken())) - } else { - node.setNormalizedToken() - } -} - -fun normalizeParseResult(parseResult: ParseResult, splitTokens: Boolean) { - parseResult.root?.preOrder()?.forEach { node -> processNodeToken(node, splitTokens) } -} - fun getLabelExtractor( granularityLevel: String, javaParser: String, diff --git a/src/main/kotlin/astminer/common/model/ParsingModel.kt b/src/main/kotlin/astminer/common/model/ParsingModel.kt index b6eb3476..5e56a572 100644 --- a/src/main/kotlin/astminer/common/model/ParsingModel.kt +++ b/src/main/kotlin/astminer/common/model/ParsingModel.kt @@ -60,7 +60,7 @@ interface Parser { data class ParseResult(val root: T?, val filePath: String) { fun normalize(splitTokens: Boolean) { - this.root?.preOrder()?.forEach { node -> astminer.cli.processNodeToken(node, splitTokens) } + this.root?.preOrder()?.forEach { node -> processNodeToken(node, splitTokens) } } private fun processNodeToken(node: Node, splitToken: Boolean) { From 815cea2ad141e0503a21bf69dd85662fe8847aa1 Mon Sep 17 00:00:00 2001 From: illided Date: Sat, 17 Apr 2021 18:25:34 +0300 Subject: [PATCH 066/308] normalize functions separated --- src/main/kotlin/astminer/cli/utils.kt | 12 ++++++++++++ .../kotlin/astminer/common/model/ParsingModel.kt | 13 +------------ 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/src/main/kotlin/astminer/cli/utils.kt b/src/main/kotlin/astminer/cli/utils.kt index c2ccf3ea..3f09fdc7 100644 --- a/src/main/kotlin/astminer/cli/utils.kt +++ b/src/main/kotlin/astminer/cli/utils.kt @@ -36,6 +36,18 @@ fun getParser( } } +fun processNodeToken(node: Node, splitToken: Boolean) { + if (splitToken) { + node.setNormalizedToken(separateToken(node.getToken())) + } else { + node.setNormalizedToken() + } +} + +fun separateToken(token: String, separator: CharSequence = "|"): String { + return splitToSubtokens(token).joinToString(separator) +} + fun getLabelExtractor( granularityLevel: String, javaParser: String, diff --git a/src/main/kotlin/astminer/common/model/ParsingModel.kt b/src/main/kotlin/astminer/common/model/ParsingModel.kt index 5e56a572..01e78304 100644 --- a/src/main/kotlin/astminer/common/model/ParsingModel.kt +++ b/src/main/kotlin/astminer/common/model/ParsingModel.kt @@ -1,5 +1,6 @@ package astminer.common.model +import astminer.cli.processNodeToken import astminer.common.preOrder import astminer.common.setNormalizedToken import astminer.common.splitToSubtokens @@ -62,16 +63,4 @@ data class ParseResult(val root: T?, val filePath: String) { fun normalize(splitTokens: Boolean) { this.root?.preOrder()?.forEach { node -> processNodeToken(node, splitTokens) } } - - private fun processNodeToken(node: Node, splitToken: Boolean) { - if (splitToken) { - node.setNormalizedToken(separateToken(node.getToken())) - } else { - node.setNormalizedToken() - } - } - - private fun separateToken(token: String, separator: CharSequence = "|"): String { - return splitToSubtokens(token).joinToString(separator) - } } From 2abcf5f8fefe36b79bc1443da96b3453f60964b9 Mon Sep 17 00:00:00 2001 From: furetur Date: Mon, 19 Apr 2021 23:34:29 +0500 Subject: [PATCH 067/308] cleaned up after merge --- .../java/astminer/examples/AllJavaFiles.java | 2 +- .../astminer/cli/PathContextsExtractor.kt | 6 ----- .../astminer/common/model/HandlerModel.kt | 8 ------- .../astminer/common/model/StorageModel.kt | 23 ------------------- .../kotlin/astminer/examples/AllCppFiles.kt | 2 +- .../astminer/examples/AllJavaFilesGumTree.kt | 2 +- .../astminer/examples/AllJavaMethods.kt | 8 +++---- .../astminer/examples/AllPythonMethods.kt | 8 +++---- 8 files changed, 11 insertions(+), 48 deletions(-) delete mode 100644 src/main/kotlin/astminer/common/model/StorageModel.kt diff --git a/src/main/java/astminer/examples/AllJavaFiles.java b/src/main/java/astminer/examples/AllJavaFiles.java index 8dd45b30..3d69c95d 100644 --- a/src/main/java/astminer/examples/AllJavaFiles.java +++ b/src/main/java/astminer/examples/AllJavaFiles.java @@ -2,7 +2,7 @@ import astminer.cli.LabeledResult; import astminer.common.model.*; -import astminer.parse.java.GumTreeJavaParser; +import astminer.parse.gumtree.java.GumTreeJavaParser; import astminer.storage.*; import astminer.storage.path.Code2VecPathStorage; import astminer.storage.path.PathBasedStorage; diff --git a/src/main/kotlin/astminer/cli/PathContextsExtractor.kt b/src/main/kotlin/astminer/cli/PathContextsExtractor.kt index e7720749..2e74aae7 100644 --- a/src/main/kotlin/astminer/cli/PathContextsExtractor.kt +++ b/src/main/kotlin/astminer/cli/PathContextsExtractor.kt @@ -5,17 +5,11 @@ import astminer.common.model.* import astminer.parse.antlr.java.JavaParser import astminer.parse.antlr.javascript.JavaScriptParser import astminer.parse.antlr.python.PythonParser -import astminer.parse.cpp.FuzzyCppParser -import astminer.parse.java.GumTreeJavaParser import astminer.storage.* import astminer.storage.path.Code2VecPathStorage import astminer.storage.path.PathBasedStorageConfig import astminer.parse.fuzzy.cpp.FuzzyCppParser import astminer.parse.gumtree.java.GumTreeJavaParser -import astminer.paths.Code2VecPathStorage -import astminer.paths.PathMiner -import astminer.paths.PathRetrievalSettings -import astminer.paths.toPathContext import com.github.ajalt.clikt.core.CliktCommand import com.github.ajalt.clikt.parameters.options.* import com.github.ajalt.clikt.parameters.types.int diff --git a/src/main/kotlin/astminer/common/model/HandlerModel.kt b/src/main/kotlin/astminer/common/model/HandlerModel.kt index 020a8f81..5b843a1d 100644 --- a/src/main/kotlin/astminer/common/model/HandlerModel.kt +++ b/src/main/kotlin/astminer/common/model/HandlerModel.kt @@ -1,8 +1,5 @@ package astminer.common.model -import astminer.common.preOrder -import astminer.common.setNormalizedToken -import astminer.common.splitToSubtokens import java.io.File interface HandlerFactory { @@ -17,9 +14,4 @@ abstract class LanguageHandler { val root = parseResult.root ?: return emptyList() return splitter.splitIntoMethods(root) } - - fun normalizeParseResult(splitTokens: Boolean): LanguageHandler { - parseResult.normalize(splitTokens) - return this - } } diff --git a/src/main/kotlin/astminer/common/model/StorageModel.kt b/src/main/kotlin/astminer/common/model/StorageModel.kt deleted file mode 100644 index bd2436c2..00000000 --- a/src/main/kotlin/astminer/common/model/StorageModel.kt +++ /dev/null @@ -1,23 +0,0 @@ -package astminer.common.model - - -/** - * Stores path-contexts and their labels and saves them to directory. - */ -interface PathStorage { - val directoryPath: String - val tokensLimit: Long - val pathsLimit: Long - fun store(labeledPathContexts: LabeledPathContexts) - fun close() -} - -/** - * Stores ASTs in form of their root and saves them to directory. - */ -interface AstStorage { - val directoryPath: String - fun store(root: Node, label: String) = store(root, label, "") - fun store(root: Node, label: String, filePath: String) - fun close() -} diff --git a/src/main/kotlin/astminer/examples/AllCppFiles.kt b/src/main/kotlin/astminer/examples/AllCppFiles.kt index e36e124e..55faff40 100644 --- a/src/main/kotlin/astminer/examples/AllCppFiles.kt +++ b/src/main/kotlin/astminer/examples/AllCppFiles.kt @@ -3,7 +3,7 @@ package astminer.examples import astminer.common.getProjectFilesWithExtension -import astminer.parse.cpp.FuzzyCppParser +import astminer.parse.fuzzy.cpp.FuzzyCppParser import astminer.storage.path.Code2VecPathStorage import astminer.storage.path.PathBasedStorageConfig import java.io.File diff --git a/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt b/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt index 771a1e20..937fb8a6 100644 --- a/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt +++ b/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt @@ -1,7 +1,7 @@ package astminer.examples import astminer.common.getProjectFilesWithExtension -import astminer.parse.java.GumTreeJavaParser +import astminer.parse.gumtree.java.GumTreeJavaParser import astminer.storage.path.Code2VecPathStorage import astminer.storage.path.PathBasedStorageConfig import java.io.File diff --git a/src/main/kotlin/astminer/examples/AllJavaMethods.kt b/src/main/kotlin/astminer/examples/AllJavaMethods.kt index 209939c4..7a748f37 100644 --- a/src/main/kotlin/astminer/examples/AllJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/AllJavaMethods.kt @@ -2,16 +2,16 @@ package astminer.examples import astminer.cli.LabeledResult import astminer.common.model.MethodInfo -import astminer.parse.java.GumTreeJavaNode -import astminer.parse.java.GumTreeJavaParser -import astminer.parse.java.GumTreeJavaMethodSplitter +import astminer.parse.gumtree.GumTreeNode +import astminer.parse.gumtree.java.GumTreeJavaParser +import astminer.parse.gumtree.java.GumTreeJavaMethodSplitter import astminer.storage.* import astminer.storage.path.Code2VecPathStorage import astminer.storage.path.PathBasedStorageConfig import java.io.File -private fun getCsvFriendlyMethodId(methodInfo: MethodInfo): String { +private fun getCsvFriendlyMethodId(methodInfo: MethodInfo): String { val className = methodInfo.enclosingElementName() ?: "" val methodName = methodInfo.name() ?: "unknown_method" val parameterTypes = methodInfo.methodParameters.joinToString("|") { it.name() ?: "_" } diff --git a/src/main/kotlin/astminer/examples/AllPythonMethods.kt b/src/main/kotlin/astminer/examples/AllPythonMethods.kt index 956fd236..2f55d247 100644 --- a/src/main/kotlin/astminer/examples/AllPythonMethods.kt +++ b/src/main/kotlin/astminer/examples/AllPythonMethods.kt @@ -2,14 +2,14 @@ package astminer.examples import astminer.cli.LabeledResult import astminer.common.model.MethodInfo -import astminer.parse.python.GumTreePythonMethodSplitter -import astminer.parse.python.GumTreePythonNode -import astminer.parse.python.GumTreePythonParser +import astminer.parse.gumtree.python.GumTreePythonMethodSplitter +import astminer.parse.gumtree.GumTreeNode +import astminer.parse.gumtree.python.GumTreePythonParser import astminer.storage.path.Code2VecPathStorage import astminer.storage.path.PathBasedStorageConfig import java.io.File -private fun getCsvFriendlyMethodId(methodInfo: MethodInfo): String { +private fun getCsvFriendlyMethodId(methodInfo: MethodInfo): String { val className = methodInfo.enclosingElementName() ?: "" val methodName = methodInfo.name() ?: "unknown_method" val parameterTypes = methodInfo.methodParameters.joinToString("|") { it.name() ?: "_" } From cd11a662260c9990897a0b0a60fd99f2a655ca9f Mon Sep 17 00:00:00 2001 From: furetur Date: Mon, 19 Apr 2021 23:42:03 +0500 Subject: [PATCH 068/308] removed one newline --- src/main/java/astminer/examples/AllJavaFiles.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/astminer/examples/AllJavaFiles.java b/src/main/java/astminer/examples/AllJavaFiles.java index 3d69c95d..9c7a95cb 100644 --- a/src/main/java/astminer/examples/AllJavaFiles.java +++ b/src/main/java/astminer/examples/AllJavaFiles.java @@ -7,7 +7,6 @@ import astminer.storage.path.Code2VecPathStorage; import astminer.storage.path.PathBasedStorage; import astminer.storage.path.PathBasedStorageConfig; - import java.io.FileInputStream; import java.io.IOException; import java.nio.file.*; From 12826a57589b42726a09f8bcbeda7db33801d832 Mon Sep 17 00:00:00 2001 From: illided Date: Thu, 22 Apr 2021 14:36:12 +0300 Subject: [PATCH 069/308] node interface refactored --- .../kotlin/astminer/cli/FilterPredicates.kt | 6 +-- .../kotlin/astminer/cli/LabelExtractors.kt | 2 +- src/main/kotlin/astminer/common/TreeUtil.kt | 4 +- .../astminer/common/model/ParsingModel.kt | 29 +++++------ .../common/model/TreeSplittingModel.kt | 10 ++-- .../astminer/examples/Code2VecJavaMethods.kt | 2 +- .../astminer/featureextraction/TreeFeature.kt | 20 ++++---- .../kotlin/astminer/parse/antlr/AntlrNode.kt | 46 ++++-------------- .../kotlin/astminer/parse/antlr/AntlrUtil.kt | 28 +++++------ .../parse/antlr/java/JavaMethodSplitter.kt | 20 ++++---- .../javascript/JavaScriptMethodSplitter.kt | 30 ++++++------ .../antlr/python/PythonMethodSplitter.kt | 10 ++-- .../parse/fuzzy/cpp/FuzzyMethodSplitter.kt | 6 +-- .../astminer/parse/fuzzy/cpp/FuzzyNode.kt | 48 +++++-------------- .../astminer/parse/gumtree/GumTreeNode.kt | 31 +++--------- .../gumtree/java/GumTreeJavaMethodSplitter.kt | 18 +++---- .../python/GumTreePythonMethodSplitter.kt | 26 +++++----- src/main/kotlin/astminer/paths/PathUtil.kt | 8 ++-- src/main/kotlin/astminer/paths/PathWorker.kt | 4 +- .../kotlin/astminer/storage/TokenProcessor.kt | 4 +- .../astminer/storage/ast/CsvAstStorage.kt | 8 ++-- .../astminer/storage/ast/DotAstStorage.kt | 4 +- src/test/kotlin/astminer/common/TestUtils.kt | 27 +++-------- .../kotlin/astminer/common/TreeUtilTest.kt | 4 +- .../featureextraction/TreeFeatureTestUtil.kt | 36 ++++++-------- .../TreeFeatureTestUtilTest.kt | 14 +++--- .../astminer/parse/antlr/AntrlUtilTest.kt | 2 +- .../JavaScriptMethodSplitterTest.kt | 2 +- .../python/GumTreeJavaMethodSplitterTest.kt | 14 +++--- .../astminer/paths/PathWorkerTestUtil.kt | 10 ++-- .../paths/SampleTreePathWorkerTest.kt | 10 ++-- 31 files changed, 198 insertions(+), 285 deletions(-) diff --git a/src/main/kotlin/astminer/cli/FilterPredicates.kt b/src/main/kotlin/astminer/cli/FilterPredicates.kt index 69e11878..0d39094a 100644 --- a/src/main/kotlin/astminer/cli/FilterPredicates.kt +++ b/src/main/kotlin/astminer/cli/FilterPredicates.kt @@ -10,7 +10,7 @@ abstract class MethodFilterPredicate { fun typeBasedFilterPredicate(root: Node?, nodeType: String, excludeValues: List): Boolean { root?.getChildrenOfType(nodeType)?.forEach { - if (it.getToken() in excludeValues) { + if (it.token in excludeValues) { return false } } @@ -58,7 +58,7 @@ class MethodNameLengthFilterPredicate(private val maxLength: Int) : MethodFilter } val nameNode = methodInfo.method.nameNode return if (nameNode != null) { - splitToSubtokens(nameNode.getToken()).size <= maxLength + splitToSubtokens(nameNode.token).size <= maxLength } else { false } @@ -71,7 +71,7 @@ class TokenLengthFilterPredicate(private val maxLength: Int) : MethodFilterPredi return true } methodInfo.method.root.preOrder().forEach { node -> - if (splitToSubtokens(node.getToken()).size > maxLength) { + if (splitToSubtokens(node.token).size > maxLength) { return false } } diff --git a/src/main/kotlin/astminer/cli/LabelExtractors.kt b/src/main/kotlin/astminer/cli/LabelExtractors.kt index 0a521109..e26eba0d 100644 --- a/src/main/kotlin/astminer/cli/LabelExtractors.kt +++ b/src/main/kotlin/astminer/cli/LabelExtractors.kt @@ -141,7 +141,7 @@ class MethodNameExtractor( if (hideMethodNames) { methodRoot.preOrder().forEach { node -> - if (node.getToken() == methodName) { + if (node.token == methodName) { node.setTechnicalToken("SELF") } } diff --git a/src/main/kotlin/astminer/common/TreeUtil.kt b/src/main/kotlin/astminer/common/TreeUtil.kt index 51b44cca..dcbaa9e7 100644 --- a/src/main/kotlin/astminer/common/TreeUtil.kt +++ b/src/main/kotlin/astminer/common/TreeUtil.kt @@ -15,13 +15,13 @@ fun Node.preOrderIterator(): Iterator { } fun doTraversePostOrder(node: Node, resultList: MutableList) { - node.getChildren().forEach { doTraversePostOrder(it, resultList) } + node.children.forEach { doTraversePostOrder(it, resultList) } resultList.add(node) } fun doTraversePreOrder(node: Node, resultList: MutableList) { resultList.add(node) - node.getChildren().forEach { doTraversePreOrder(it, resultList) } + node.children.forEach { doTraversePreOrder(it, resultList) } } fun Node.postOrder(): List { diff --git a/src/main/kotlin/astminer/common/model/ParsingModel.kt b/src/main/kotlin/astminer/common/model/ParsingModel.kt index cd86b451..140ee28d 100644 --- a/src/main/kotlin/astminer/common/model/ParsingModel.kt +++ b/src/main/kotlin/astminer/common/model/ParsingModel.kt @@ -5,30 +5,31 @@ import java.io.File import java.io.InputStream -interface Node { - fun getTypeLabel(): String - fun getChildren(): List - fun getParent(): Node? - fun getToken(): String - fun isLeaf(): Boolean +abstract class Node{ + abstract val typeLabel: String + abstract val children: List + abstract val parent: Node? + abstract val token: String - val metadata: MutableMap + val metadata: MutableMap = HashMap() + fun isLeaf() = children.isEmpty() fun prettyPrint(indent: Int = 0, indentSymbol: String = "--") { repeat(indent) { print(indentSymbol) } - print(getTypeLabel()) - if (getToken().isNotEmpty()) { - println(" : ${getToken()}") + print(typeLabel) + if (token.isNotEmpty()) { + println(" : $token") } else { println() } - getChildren().forEach { it.prettyPrint(indent + 1, indentSymbol) } + children.forEach { it.prettyPrint(indent + 1, indentSymbol) } } - fun getChildrenOfType(typeLabel: String) = getChildren().filter { it.getTypeLabel() == typeLabel } - fun getChildOfType(typeLabel: String) = getChildrenOfType(typeLabel).firstOrNull() + open fun getChildrenOfType(typeLabel: String) = children.filter { it.typeLabel == typeLabel } + open fun getChildOfType(typeLabel: String) = getChildrenOfType(typeLabel).firstOrNull() - fun removeChildrenOfType(typeLabel: String) + abstract fun removeChildrenOfType(typeLabel: String) + //TODO(move orders here) } interface Parser { diff --git a/src/main/kotlin/astminer/common/model/TreeSplittingModel.kt b/src/main/kotlin/astminer/common/model/TreeSplittingModel.kt index c75bfb9d..434fc9de 100644 --- a/src/main/kotlin/astminer/common/model/TreeSplittingModel.kt +++ b/src/main/kotlin/astminer/common/model/TreeSplittingModel.kt @@ -20,15 +20,15 @@ class MethodNode( val returnTypeNode: T?, val nameNode: T? ) { - fun name() = nameNode?.getToken() - fun returnType() = returnTypeNode?.getToken() + fun name() = nameNode?.token + fun returnType() = returnTypeNode?.token } class ElementNode( val root: T?, val nameNode: T? ) { - fun name() = nameNode?.getToken() + fun name() = nameNode?.token } data class ParameterNode( @@ -36,6 +36,6 @@ data class ParameterNode( val returnTypeNode: T?, val nameNode: T? ) { - fun name() = nameNode?.getToken() - fun returnType() = returnTypeNode?.getToken() + fun name() = nameNode?.token + fun returnType() = returnTypeNode?.token } diff --git a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt index 86246764..831967e0 100644 --- a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt @@ -28,7 +28,7 @@ fun code2vecJavaMethods() { methods.forEach { methodInfo -> val methodNameNode = methodInfo.method.nameNode ?: return@forEach val methodRoot = methodInfo.method.root - val label = splitToSubtokens(methodNameNode.getToken()).joinToString("|") + val label = splitToSubtokens(methodNameNode.token).joinToString("|") methodNameNode.setTechnicalToken("METHOD_NAME") // Retrieve paths from every node individually and store them diff --git a/src/main/kotlin/astminer/featureextraction/TreeFeature.kt b/src/main/kotlin/astminer/featureextraction/TreeFeature.kt index 7a7401f0..1c54b1b4 100644 --- a/src/main/kotlin/astminer/featureextraction/TreeFeature.kt +++ b/src/main/kotlin/astminer/featureextraction/TreeFeature.kt @@ -21,7 +21,7 @@ interface TreeFeature { */ object Depth : TreeFeature { override fun compute(tree: Node): Int { - val max = tree.getChildren().map { compute(it) }.max() ?: 0 + val max = tree.children.map { compute(it) }.max() ?: 0 return max + 1 } } @@ -48,7 +48,7 @@ object BranchingFactor : TreeFeature { */ object NumberOfNodes : TreeFeature { override fun compute(tree: Node): Int { - return tree.getChildren().map { compute(it) }.sum() + 1 + return tree.children.map { compute(it) }.sum() + 1 } } @@ -61,8 +61,8 @@ object Tokens : TreeFeature> { } private fun findTokens(node: Node, tokensList: MutableList): List { - node.getChildren().forEach { findTokens(it, tokensList) } - tokensList.add(node.getToken()) + node.children.forEach { findTokens(it, tokensList) } + tokensList.add(node.token) return tokensList } } @@ -76,8 +76,8 @@ object NodeTypes : TreeFeature> { } private fun findNodeTypes(node: Node, nodeTypesList: MutableList): List { - node.getChildren().forEach { findNodeTypes(it, nodeTypesList) } - nodeTypesList.add(node.getTypeLabel()) + node.children.forEach { findNodeTypes(it, nodeTypesList) } + nodeTypesList.add(node.typeLabel) return nodeTypesList } } @@ -94,18 +94,18 @@ object CompressiblePathLengths : TreeFeature> { } private fun Node.isStartingNode() : Boolean { - return this.hasOneChild() && !(this.getParent()?.hasOneChild() ?: false) + return this.hasOneChild() && !(this.parent?.hasOneChild() ?: false) } - private fun Node.hasOneChild() : Boolean = getChildren().size == 1 + private fun Node.hasOneChild() : Boolean = children.size == 1 private fun findPathLengthFromStartingNode(node: Node) : Int { var length = 1 - var next = node.getChildren().first() + var next = node.children.first() while (next.hasOneChild()) { length++ - next = next.getChildren().first() + next = next.children.first() } return length } diff --git a/src/main/kotlin/astminer/parse/antlr/AntlrNode.kt b/src/main/kotlin/astminer/parse/antlr/AntlrNode.kt index 246808d6..9bed6095 100644 --- a/src/main/kotlin/astminer/parse/antlr/AntlrNode.kt +++ b/src/main/kotlin/astminer/parse/antlr/AntlrNode.kt @@ -2,53 +2,27 @@ package astminer.parse.antlr import astminer.common.model.Node -class AntlrNode(private val typeLabel: String, private var parent: Node?, private var token: String?) : Node { - override val metadata: MutableMap = HashMap() +class AntlrNode(override val typeLabel: String,override var parent: AntlrNode?, token: String?) : Node() { - private var children: MutableList = mutableListOf() + override val children: MutableList = mutableListOf() - fun setChildren(newChildren: List) { - children = newChildren.toMutableList() - children.forEach { it.setParent(this) } - } - - private fun setParent(newParent: Node?) { - parent = newParent - } - - override fun getTypeLabel(): String { - return typeLabel - } - - override fun getChildren(): List { - return children - } - - override fun getParent(): Node? { - return parent - } - - override fun getToken(): String { - return token ?: "null" - } - - fun setToken(newToken: String) { - token = newToken - } + override var token: String = token ?: "null" - override fun isLeaf(): Boolean { - return children.isEmpty() + fun replaceChildren(newChildren: List) { + children.clear() + newChildren.forEach { it.parent = this } + children.addAll(newChildren) } - override fun getChildrenOfType(typeLabel: String) = getChildren().filter { - decompressTypeLabel(it.getTypeLabel()).firstOrNull() == typeLabel + override fun getChildrenOfType(typeLabel: String) = children.filter { + decompressTypeLabel(it.typeLabel).firstOrNull() == typeLabel } override fun getChildOfType(typeLabel: String): AntlrNode? = getChildrenOfType(typeLabel).firstOrNull() override fun removeChildrenOfType(typeLabel: String) { - children.removeIf { it.getTypeLabel() == typeLabel } + children.removeIf { it.typeLabel == typeLabel } } } diff --git a/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt b/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt index 394feac9..8c29ae0a 100644 --- a/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt +++ b/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt @@ -10,7 +10,7 @@ fun convertAntlrTree(tree: ParserRuleContext, ruleNames: Array, vocabula return compressTree(convertRuleContext(tree, ruleNames, null, vocabulary)) } -private fun convertRuleContext(ruleContext: ParserRuleContext, ruleNames: Array, parent: Node?, vocabulary: Vocabulary): AntlrNode { +private fun convertRuleContext(ruleContext: ParserRuleContext, ruleNames: Array, parent: AntlrNode?, vocabulary: Vocabulary): AntlrNode { val typeLabel = ruleNames[ruleContext.ruleIndex] val currentNode = AntlrNode(typeLabel, parent, null) val children: MutableList = ArrayList() @@ -26,16 +26,16 @@ private fun convertRuleContext(ruleContext: ParserRuleContext, ruleNames: Array< } children.add(convertRuleContext(it as ParserRuleContext, ruleNames, currentNode, vocabulary)) } - currentNode.setChildren(children) + currentNode.replaceChildren(children) return currentNode } -private fun convertTerminal(terminalNode: TerminalNode, parent: Node?, vocabulary: Vocabulary): AntlrNode { +private fun convertTerminal(terminalNode: TerminalNode, parent: AntlrNode?, vocabulary: Vocabulary): AntlrNode { return AntlrNode(vocabulary.getSymbolicName(terminalNode.symbol.type), parent, terminalNode.symbol.text) } -private fun convertErrorNode(errorNode: ErrorNode, parent: Node?): AntlrNode { +private fun convertErrorNode(errorNode: ErrorNode, parent: AntlrNode?): AntlrNode { return AntlrNode("Error", parent, errorNode.text) } @@ -43,10 +43,10 @@ private fun convertErrorNode(errorNode: ErrorNode, parent: Node?): AntlrNode { * Remove intermediate nodes that have a single child. */ fun simplifyTree(tree: AntlrNode): AntlrNode { - return if (tree.getChildren().size == 1) { - simplifyTree(tree.getChildren().first()) + return if (tree.children.size == 1) { + simplifyTree(tree.children.first()) } else { - tree.setChildren(tree.getChildren().map { simplifyTree(it) }.toMutableList()) + tree.replaceChildren(tree.children.map { simplifyTree(it) }.toMutableList()) tree } } @@ -55,17 +55,17 @@ fun simplifyTree(tree: AntlrNode): AntlrNode { * Compress paths of intermediate nodes that have a single child into individual nodes. */ fun compressTree(root: AntlrNode): AntlrNode { - return if (root.getChildren().size == 1) { - val child = compressTree(root.getChildren().first()) + return if (root.children.size == 1) { + val child = compressTree(root.children.first()) val compressedNode = AntlrNode( - root.getTypeLabel() + "|" + child.getTypeLabel(), - root.getParent(), - child.getToken() + root.typeLabel + "|" + child.typeLabel, + root.parent, + child.token ) - compressedNode.setChildren(child.getChildren()) + compressedNode.replaceChildren(child.children) compressedNode } else { - root.setChildren(root.getChildren().map { compressTree(it) }.toMutableList()) + root.replaceChildren(root.children.map { compressTree(it) }.toMutableList()) root } } diff --git a/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt b/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt index d7724449..6d76de4f 100644 --- a/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt @@ -23,7 +23,7 @@ class JavaMethodSplitter : TreeMethodSplitter { override fun splitIntoMethods(root: AntlrNode): Collection> { val methodRoots = root.preOrder().filter { - decompressTypeLabel(it.getTypeLabel()).last() == METHOD_NODE + decompressTypeLabel(it.typeLabel).last() == METHOD_NODE } return methodRoots.map { collectMethodInfo(it as AntlrNode) } } @@ -31,7 +31,7 @@ class JavaMethodSplitter : TreeMethodSplitter { private fun collectMethodInfo(methodNode: AntlrNode): MethodInfo { val methodName = methodNode.getChildOfType(METHOD_NAME_NODE) val methodReturnTypeNode = methodNode.getChildOfType(METHOD_RETURN_TYPE_NODE) - methodReturnTypeNode?.setToken(collectParameterToken(methodReturnTypeNode)) + methodReturnTypeNode?.let { it.token = collectParameterToken(it) } val classRoot = getEnclosingClass(methodNode) val className = classRoot?.getChildOfType(CLASS_NAME_NODE) @@ -53,10 +53,10 @@ class JavaMethodSplitter : TreeMethodSplitter { } private fun getEnclosingClass(node: AntlrNode): AntlrNode? { - if (decompressTypeLabel(node.getTypeLabel()).last() == CLASS_DECLARATION_NODE) { + if (decompressTypeLabel(node.typeLabel).last() == CLASS_DECLARATION_NODE) { return node } - val parentNode = node.getParent() as? AntlrNode + val parentNode = node.parent if (parentNode != null) { return getEnclosingClass(parentNode) } @@ -64,11 +64,11 @@ class JavaMethodSplitter : TreeMethodSplitter { } private fun getListOfParameters(parametersRoot: AntlrNode): List> { - if (METHOD_SINGLE_PARAMETER_NODE.contains(decompressTypeLabel(parametersRoot.getTypeLabel()).last())) { + if (METHOD_SINGLE_PARAMETER_NODE.contains(decompressTypeLabel(parametersRoot.typeLabel).last())) { return listOf(getParameterInfoFromNode(parametersRoot)) } - return parametersRoot.getChildren().filter { - val firstType = decompressTypeLabel(it.getTypeLabel()).first() + return parametersRoot.children.filter { + val firstType = decompressTypeLabel(it.typeLabel).first() METHOD_SINGLE_PARAMETER_NODE.contains(firstType) }.map { getParameterInfoFromNode(it) @@ -77,7 +77,7 @@ class JavaMethodSplitter : TreeMethodSplitter { private fun getParameterInfoFromNode(parameterRoot: AntlrNode): ParameterNode { val returnTypeNode = parameterRoot.getChildOfType(PARAMETER_RETURN_TYPE_NODE) - returnTypeNode?.setToken(collectParameterToken(returnTypeNode)) + returnTypeNode?.let { it.token = collectParameterToken(it) } return ParameterNode( parameterRoot, returnTypeNode, @@ -87,9 +87,9 @@ class JavaMethodSplitter : TreeMethodSplitter { private fun collectParameterToken(parameterRoot: AntlrNode): String { if (parameterRoot.isLeaf()) { - return parameterRoot.getToken() + return parameterRoot.token } - return parameterRoot.getChildren().joinToString(separator = "") { child -> + return parameterRoot.children.joinToString(separator = "") { child -> collectParameterToken(child) } } diff --git a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt index 387ab198..ccfc4cd5 100644 --- a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt @@ -31,7 +31,7 @@ class JavaScriptMethodSplitter : TreeMethodSplitter { private fun Node.isArrowElement() = this.getChildOfType(ARROW_NODE) != null private fun Node.isFunctionElement() = this.getChildOfType(FUNCTION_NODE) != null - private fun Node.isMethodElement() = decompressTypeLabel(this.getTypeLabel()).last() == METHOD_NODE + private fun Node.isMethodElement() = decompressTypeLabel(this.typeLabel).last() == METHOD_NODE } /** @@ -51,7 +51,7 @@ abstract class JavaScriptElement(private val element: AntlrNode) { * @return element info */ fun getElementInfo() : MethodInfo { - val enclosingRoot = getEnclosingElementRoot(element.getParent() as AntlrNode) + val enclosingRoot = getEnclosingElementRoot(element.parent as AntlrNode) return MethodInfo( MethodNode(element, null, getElementName()), ElementNode(enclosingRoot, getEnclosingElementName(enclosingRoot)), @@ -65,10 +65,10 @@ abstract class JavaScriptElement(private val element: AntlrNode) { * @return root of enclosing element */ open fun getEnclosingElementRoot(node: AntlrNode?): AntlrNode? { - if (node == null || decompressTypeLabel(node.getTypeLabel()).intersect(ENCLOSING_ELEMENT_NODES).isNotEmpty()) { + if (node == null || decompressTypeLabel(node.typeLabel).intersect(ENCLOSING_ELEMENT_NODES).isNotEmpty()) { return node } - return getEnclosingElementRoot(node.getParent() as? AntlrNode) + return getEnclosingElementRoot(node.parent) } /** @@ -77,8 +77,8 @@ abstract class JavaScriptElement(private val element: AntlrNode) { * @return name node of enclosing element */ open fun getEnclosingElementName(enclosingRoot: AntlrNode?) : AntlrNode? { - return enclosingRoot?.getChildren()?.firstOrNull { - decompressTypeLabel(it.getTypeLabel()).last() == ENCLOSING_ELEMENT_NAME_NODE + return enclosingRoot?.children?.firstOrNull { + decompressTypeLabel(it.typeLabel).last() == ENCLOSING_ELEMENT_NAME_NODE } } @@ -98,7 +98,7 @@ abstract class JavaScriptElement(private val element: AntlrNode) { } private fun Node.hasLastLabel(typeLabel: String): Boolean { - return decompressTypeLabel(getTypeLabel()).last() == typeLabel + return decompressTypeLabel(typeLabel).last() == typeLabel } private fun AntlrNode.getItOrChildrenOfType(typeLabel: String) : List { @@ -131,8 +131,8 @@ class ArrowElement(private val element: AntlrNode) : JavaScriptElement(element) } override fun getElementName(): AntlrNode? { - return element.getChildren().firstOrNull { - it.getTypeLabel() == ARROW_NAME_NODE + return element.children.firstOrNull { + it.typeLabel == ARROW_NAME_NODE } } @@ -150,8 +150,8 @@ class FunctionElement(private val element: AntlrNode) : JavaScriptElement(elemen } override fun getElementName(): AntlrNode? { - return element.getChildren().firstOrNull { - it.getTypeLabel() == FUNCTION_NAME_NODE + return element.children.firstOrNull { + it.typeLabel == FUNCTION_NAME_NODE } } @@ -169,12 +169,12 @@ class MethodElement(private val element: AntlrNode) : JavaScriptElement(element) } override fun getElementName(): AntlrNode? { - val methodNameParent = element.getChildren().firstOrNull { - METHOD_GETTERS_SETTERS.contains(it.getTypeLabel()) + val methodNameParent = element.children.firstOrNull { + METHOD_GETTERS_SETTERS.contains(it.typeLabel) } ?: element - return methodNameParent.getChildren().firstOrNull { - decompressTypeLabel(it.getTypeLabel()).contains(METHOD_NAME_NODE) + return methodNameParent.children.firstOrNull { + decompressTypeLabel(it.typeLabel).contains(METHOD_NAME_NODE) } } diff --git a/src/main/kotlin/astminer/parse/antlr/python/PythonMethodSplitter.kt b/src/main/kotlin/astminer/parse/antlr/python/PythonMethodSplitter.kt index bc62320a..a6baacca 100644 --- a/src/main/kotlin/astminer/parse/antlr/python/PythonMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/python/PythonMethodSplitter.kt @@ -23,7 +23,7 @@ class PythonMethodSplitter : TreeMethodSplitter { override fun splitIntoMethods(root: AntlrNode): Collection> { val methodRoots = root.preOrder().filter { - decompressTypeLabel(it.getTypeLabel()).last() == METHOD_NODE + decompressTypeLabel(it.typeLabel).last() == METHOD_NODE } return methodRoots.map { collectMethodInfo(it as AntlrNode) } } @@ -51,10 +51,10 @@ class PythonMethodSplitter : TreeMethodSplitter { } private fun getEnclosingClass(node: AntlrNode): AntlrNode? { - if (decompressTypeLabel(node.getTypeLabel()).last() == CLASS_DECLARATION_NODE) { + if (decompressTypeLabel(node.typeLabel).last() == CLASS_DECLARATION_NODE) { return node } - val parentNode = node.getParent() as? AntlrNode + val parentNode = node.parent if (parentNode != null) { return getEnclosingClass(parentNode) } @@ -62,11 +62,11 @@ class PythonMethodSplitter : TreeMethodSplitter { } private fun getListOfParameters(parameterRoot: AntlrNode): List> { - if (decompressTypeLabel(parameterRoot.getTypeLabel()).last() == PARAMETER_NAME_NODE) { + if (decompressTypeLabel(parameterRoot.typeLabel).last() == PARAMETER_NAME_NODE) { return listOf(ParameterNode(parameterRoot, null, parameterRoot)) } return parameterRoot.getChildrenOfType(METHOD_SINGLE_PARAMETER_NODE).map { - if (decompressTypeLabel(it.getTypeLabel()).last() == PARAMETER_NAME_NODE) { + if (decompressTypeLabel(it.typeLabel).last() == PARAMETER_NAME_NODE) { ParameterNode(it, null, it) } else { ParameterNode(it, null, it.getChildOfType(PARAMETER_NAME_NODE) as AntlrNode) diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyMethodSplitter.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyMethodSplitter.kt index b6f387cd..80afc833 100644 --- a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyMethodSplitter.kt @@ -20,7 +20,7 @@ class FuzzyMethodSplitter : TreeMethodSplitter { } override fun splitIntoMethods(root: FuzzyNode): Collection> { - val methodRoots = root.preOrder().filter { it.getTypeLabel() == METHOD_NODE } + val methodRoots = root.preOrder().filter { it.typeLabel == METHOD_NODE } return methodRoots.map { collectMethodInfo(it as FuzzyNode) } } @@ -50,10 +50,10 @@ class FuzzyMethodSplitter : TreeMethodSplitter { } private fun getEnclosingClass(node: FuzzyNode): FuzzyNode? { - if (node.getTypeLabel() == CLASS_DECLARATION_NODE) { + if (node.typeLabel == CLASS_DECLARATION_NODE) { return node } - val parentNode = node.getParent() as? FuzzyNode + val parentNode = node.parent as? FuzzyNode if (parentNode != null) { return getEnclosingClass(parentNode) } diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyNode.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyNode.kt index bc2f3243..6aaae4cb 100644 --- a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyNode.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyNode.kt @@ -9,49 +9,25 @@ import com.google.common.collect.TreeMultiset * @param token - node's token * @param order - node's order, which used to express the ordering of children in the AST when it matters */ -class FuzzyNode(private val typeLabel: String, private val token: String?, order: Int?) : Node { +class FuzzyNode(override val typeLabel: String,token: String?, order: Int?) : Node() { private val order = order ?: -1 - override val metadata: MutableMap = HashMap() - private var parent: Node? = null - private var children = TreeMultiset.create(compareBy( - { it.order }, - { System.identityHashCode(it) } + override var parent: Node? = null + private val childrenMultiset = TreeMultiset.create(compareBy( + { it.order }, + { System.identityHashCode(it) } )) - fun getOrder(): Int { - return order - } - - fun addChild(node: FuzzyNode) { - children.add(node) - node.setParent(this) - } - - override fun getTypeLabel(): String { - return typeLabel - } - - override fun getChildren(): List { - return children.toList() - } - - override fun getParent(): Node? { - return parent - } + override val children + get() = childrenMultiset.toList() - override fun getToken(): String { - return token ?: "null" - } + override var token: String = token ?: "null" - override fun isLeaf(): Boolean { - return children.isEmpty() - } - - private fun setParent(node: Node) { - parent = node + fun addChild(node: FuzzyNode) { + childrenMultiset.add(node) + node.parent = this } override fun removeChildrenOfType(typeLabel: String) { - children.removeIf { it.getTypeLabel() == typeLabel } + childrenMultiset.removeIf { it.typeLabel == typeLabel } } } diff --git a/src/main/kotlin/astminer/parse/gumtree/GumTreeNode.kt b/src/main/kotlin/astminer/parse/gumtree/GumTreeNode.kt index 0e879836..7753a891 100644 --- a/src/main/kotlin/astminer/parse/gumtree/GumTreeNode.kt +++ b/src/main/kotlin/astminer/parse/gumtree/GumTreeNode.kt @@ -4,35 +4,18 @@ import astminer.common.model.Node import com.github.gumtreediff.tree.ITree import com.github.gumtreediff.tree.TreeContext -class GumTreeNode(val wrappedNode: ITree, val context: TreeContext, val parent: GumTreeNode?) : Node { - override val metadata: MutableMap = HashMap() +class GumTreeNode(val wrappedNode: ITree, val context: TreeContext,override var parent: GumTreeNode?) : Node() { + override val typeLabel: String + get() = context.getTypeLabel(wrappedNode) - override fun isLeaf(): Boolean { - return childrenList.isEmpty() - } - - private val childrenList: MutableList by lazy { + override val children: MutableList by lazy { wrappedNode.children.map { GumTreeNode(it, context, this) }.toMutableList() } - - override fun getTypeLabel(): String { - return context.getTypeLabel(wrappedNode) - } - - override fun getChildren(): List { - return childrenList - } - - override fun getParent(): Node? { - return parent - } - - override fun getToken(): String { - return wrappedNode.label - } + override val token: String + get() = wrappedNode.label override fun removeChildrenOfType(typeLabel: String) { - childrenList.removeIf { it.getTypeLabel() == typeLabel } + children.removeIf { it.typeLabel == typeLabel } } override fun getChildOfType(typeLabel: String): GumTreeNode? = diff --git a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt index b81b88ec..74e27e8f 100644 --- a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt @@ -4,7 +4,7 @@ import astminer.common.model.* import astminer.common.preOrder import astminer.parse.gumtree.GumTreeNode -private fun GumTreeNode.isTypeNode() = getTypeLabel().endsWith("Type") +private fun GumTreeNode.isTypeNode() = typeLabel.endsWith("Type") class GumTreeJavaMethodSplitter : TreeMethodSplitter { @@ -18,7 +18,7 @@ class GumTreeJavaMethodSplitter : TreeMethodSplitter { } override fun splitIntoMethods(root: GumTreeNode): Collection> { - val methodRoots = root.preOrder().filter { it.getTypeLabel() == TypeLabels.methodDeclaration } + val methodRoots = root.preOrder().filter { it.typeLabel == TypeLabels.methodDeclaration } return methodRoots.map { collectMethodInfo(it as GumTreeNode) } } @@ -38,29 +38,29 @@ class GumTreeJavaMethodSplitter : TreeMethodSplitter { ) } - private fun getElementName(node: GumTreeNode) = node.getChildren().map { + private fun getElementName(node: GumTreeNode) = node.children.map { it }.firstOrNull { - it.getTypeLabel() == TypeLabels.simpleName + it.typeLabel == TypeLabels.simpleName } - private fun getElementType(node: GumTreeNode) = node.getChildren().map { + private fun getElementType(node: GumTreeNode) = node.children.map { it }.firstOrNull { it.isTypeNode() } private fun getEnclosingClass(node: GumTreeNode): GumTreeNode? { - if (node.getTypeLabel() == TypeLabels.typeDeclaration) { + if (node.typeLabel == TypeLabels.typeDeclaration) { return node } - val parentNode = node.getParent() as? GumTreeNode + val parentNode = node.parent return parentNode?.let { getEnclosingClass(it) } } private fun getParameters(methodNode: GumTreeNode): List> { - val params = methodNode.getChildren().filter { - it.getTypeLabel() == TypeLabels.singleVariableDeclaration + val params = methodNode.children.filter { + it.typeLabel == TypeLabels.singleVariableDeclaration } return params.map { node -> ParameterNode( diff --git a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitter.kt b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitter.kt index 22cec1bf..5d472cf8 100644 --- a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitter.kt @@ -34,7 +34,7 @@ class GumTreePythonMethodSplitter : TreeMethodSplitter { } override fun splitIntoMethods(root: GumTreeNode): Collection> { - val methodRoots = root.preOrder().filter { TypeLabels.methodDefinitions.contains(it.getTypeLabel()) } + val methodRoots = root.preOrder().filter { TypeLabels.methodDefinitions.contains(it.typeLabel) } return methodRoots.map { collectMethodInfo(it as GumTreeNode) } } @@ -57,14 +57,14 @@ class GumTreePythonMethodSplitter : TreeMethodSplitter { private fun getElementName(node: GumTreeNode) = node private fun getElementType(node: GumTreeNode): GumTreeNode? { - if (node.getTypeLabel() == TypeLabels.arg) { + if (node.typeLabel == TypeLabels.arg) { return node.getChildOfType(TypeLabels.nameLoad) } // if return statement has "Constant-`Type`" return value => function type is `Type` - if (TypeLabels.methodDefinitions.contains(node.getTypeLabel())) { + if (TypeLabels.methodDefinitions.contains(node.typeLabel)) { return node.getChildOfType(TypeLabels.body)?.getChildOfType(TypeLabels.returnTypeLabel)?.let { - it.getChildren().firstOrNull { child -> - child.getTypeLabel().startsWith(TypeLabels.constantType) + it.children.firstOrNull { child -> + child.typeLabel.startsWith(TypeLabels.constantType) } } } @@ -72,28 +72,28 @@ class GumTreePythonMethodSplitter : TreeMethodSplitter { } private fun getEnclosingClass(node: GumTreeNode): GumTreeNode? { - if (node.getTypeLabel() == TypeLabels.classDefinition) { + if (node.typeLabel == TypeLabels.classDefinition) { return node } - val parentNode = node.getParent() as? GumTreeNode + val parentNode = node.parent return parentNode?.let { getEnclosingClass(it) } } private fun getParameters(methodNode: GumTreeNode): List> { val params = methodNode.getChildrenOfType(TypeLabels.arguments).flatMap { - it.getChildren() + it.children }.filter { - TypeLabels.funcArgsTypesNodes.contains(it.getTypeLabel()) + TypeLabels.funcArgsTypesNodes.contains(it.typeLabel) }.flatMap { - it.getChildren() + it.children }.filter { - it.getTypeLabel() == TypeLabels.arg + it.typeLabel == TypeLabels.arg } as MutableList methodNode.getChildrenOfType(TypeLabels.arguments).flatMap { - it.getChildren() + it.children }.filter { - it.getTypeLabel() == TypeLabels.vararg || it.getTypeLabel() == TypeLabels.kwarg + it.typeLabel == TypeLabels.vararg || it.typeLabel == TypeLabels.kwarg }.forEach { params.add(it) } diff --git a/src/main/kotlin/astminer/paths/PathUtil.kt b/src/main/kotlin/astminer/paths/PathUtil.kt index f342c560..9e4f598c 100644 --- a/src/main/kotlin/astminer/paths/PathUtil.kt +++ b/src/main/kotlin/astminer/paths/PathUtil.kt @@ -2,11 +2,11 @@ package astminer.paths import astminer.common.model.* -fun toPathContext(path: ASTPath, getToken: (Node) -> String = { node -> node.getToken() }): PathContext { +fun toPathContext(path: ASTPath, getToken: (Node) -> String = { node -> node.token }): PathContext { val startToken = getToken(path.upwardNodes.first()) val endToken = getToken(path.downwardNodes.last()) - val astNodes = path.upwardNodes.map { OrientedNodeType(it.getTypeLabel(), Direction.UP) } + - OrientedNodeType(path.topNode.getTypeLabel(), Direction.TOP) + - path.downwardNodes.map { OrientedNodeType(it.getTypeLabel(), Direction.DOWN) } + val astNodes = path.upwardNodes.map { OrientedNodeType(it.typeLabel, Direction.UP) } + + OrientedNodeType(path.topNode.typeLabel, Direction.TOP) + + path.downwardNodes.map { OrientedNodeType(it.typeLabel, Direction.DOWN) } return PathContext(startToken, astNodes, endToken) } diff --git a/src/main/kotlin/astminer/paths/PathWorker.kt b/src/main/kotlin/astminer/paths/PathWorker.kt index 76da549e..fc95ed8e 100644 --- a/src/main/kotlin/astminer/paths/PathWorker.kt +++ b/src/main/kotlin/astminer/paths/PathWorker.kt @@ -59,11 +59,11 @@ class PathWorker { val paths: MutableList = ArrayList() iterator.forEach { currentNode -> if (currentNode.isLeaf()) { - if (currentNode.getToken().isNotEmpty()) { + if (currentNode.token.isNotEmpty()) { currentNode.setPathPieces(listOf(listOf(currentNode))) } } else { - val pathPiecesPerChild = currentNode.getChildren().map { it.getPathPieces() } + val pathPiecesPerChild = currentNode.children.map { it.getPathPieces() } val currentNodePathPieces = updatePathPieces(currentNode, pathPiecesPerChild, maxLength) val currentNodePaths = collapsePiecesToPaths(currentNode, pathPiecesPerChild, maxLength, maxWidth) diff --git a/src/main/kotlin/astminer/storage/TokenProcessor.kt b/src/main/kotlin/astminer/storage/TokenProcessor.kt index 3156b22d..dc418db2 100644 --- a/src/main/kotlin/astminer/storage/TokenProcessor.kt +++ b/src/main/kotlin/astminer/storage/TokenProcessor.kt @@ -20,14 +20,14 @@ enum class TokenProcessor { return splitToSubtokens(token).joinToString("|") } - override fun processToken(node: Node): String = separateToken(node.getToken()) + override fun processToken(node: Node): String = separateToken(node.token) }, /** * Processes the token according to the original code2vec implementation in order to match their behavior. */ Normalize { - override fun processToken(node: Node): String = normalizeToken(node.getToken(), DEFAULT_TOKEN) + override fun processToken(node: Node): String = normalizeToken(node.token, DEFAULT_TOKEN) }; protected abstract fun processToken(node: Node): String diff --git a/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt b/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt index 2e6b8cab..099af188 100644 --- a/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt +++ b/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt @@ -29,8 +29,8 @@ class CsvAstStorage(override val outputDirectoryPath: String) : Storage { override fun store(labeledResult: LabeledResult) { for (node in labeledResult.root.preOrder()) { - tokensMap.record(node.getToken()) - nodeTypesMap.record(node.getTypeLabel()) + tokensMap.record(node.token) + nodeTypesMap.record(node.typeLabel) } dumpAst(labeledResult.root, labeledResult.label) } @@ -55,8 +55,8 @@ class CsvAstStorage(override val outputDirectoryPath: String) : Storage { } internal fun astString(node: Node): String { - return "${tokensMap.getId(node.getToken())} ${nodeTypesMap.getId(node.getTypeLabel())}{${ - node.getChildren().joinToString(separator = "", transform = ::astString) + return "${tokensMap.getId(node.token)} ${nodeTypesMap.getId(node.typeLabel)}{${ + node.children.joinToString(separator = "", transform = ::astString) }}" } } diff --git a/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt b/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt index 087b7149..c709341f 100644 --- a/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt +++ b/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt @@ -50,7 +50,7 @@ class DotAstStorage( nodeDescriptionFormat.format( nodesMap.getId(node) - 1, node.getPresentableToken(), - node.getTypeLabel() + node.typeLabel ) + "\n" ) } @@ -70,7 +70,7 @@ class DotAstStorage( out.println("digraph $fixedAstName {") for (node in root.preOrder()) { val rootId = nodesMap.record(node) - 1 - val childrenIds = node.getChildren().map { nodesMap.record(it) - 1 } + val childrenIds = node.children.map { nodesMap.record(it) - 1 } out.println( "$rootId -- {${childrenIds.joinToString(" ") { it.toString() }}};" ) diff --git a/src/test/kotlin/astminer/common/TestUtils.kt b/src/test/kotlin/astminer/common/TestUtils.kt index b28e7f02..6122f576 100644 --- a/src/test/kotlin/astminer/common/TestUtils.kt +++ b/src/test/kotlin/astminer/common/TestUtils.kt @@ -4,31 +4,16 @@ import astminer.cli.LabeledResult import astminer.common.model.Node -class DummyNode(val data: String, val childrenList: MutableList) : Node { - override val metadata: MutableMap = hashMapOf() +class DummyNode(override val typeLabel: String, override val children: MutableList) : Node() { - override fun isLeaf(): Boolean { - return childrenList.isEmpty() - } - - override fun getTypeLabel(): String { - return data - } - - override fun getChildren(): List { - return childrenList - } - - override fun getParent(): Node? { - TODO("not implemented") //To change body of created functions use File | Settings | File Templates. - } + //TODO("not implemented") + override val parent: Node? = null - override fun getToken(): String { - return data - } + override val token: String + get() = typeLabel override fun removeChildrenOfType(typeLabel: String) { - childrenList.removeIf { it.getTypeLabel() == typeLabel } + children.removeIf { it.typeLabel == typeLabel } } } diff --git a/src/test/kotlin/astminer/common/TreeUtilTest.kt b/src/test/kotlin/astminer/common/TreeUtilTest.kt index 8834a931..d6dbd410 100644 --- a/src/test/kotlin/astminer/common/TreeUtilTest.kt +++ b/src/test/kotlin/astminer/common/TreeUtilTest.kt @@ -7,7 +7,7 @@ class TreeUtilTest { @Test fun testPostOrder() { val root = createDummyTree() - val dataList = root.postOrderIterator().asSequence().map { it.getTypeLabel() } + val dataList = root.postOrderIterator().asSequence().map { it.typeLabel } Assert.assertArrayEquals(arrayOf("4", "5", "6", "2", "7", "8", "3", "1"), dataList.toList().toTypedArray()) } @@ -15,7 +15,7 @@ class TreeUtilTest { @Test fun testPreOrder() { val root = createDummyTree() - val dataList = root.preOrderIterator().asSequence().map { it.getTypeLabel() } + val dataList = root.preOrderIterator().asSequence().map { it.typeLabel } Assert.assertArrayEquals(arrayOf("1", "2", "4", "5", "6", "3", "7", "8"), dataList.toList().toTypedArray()) } diff --git a/src/test/kotlin/astminer/featureextraction/TreeFeatureTestUtil.kt b/src/test/kotlin/astminer/featureextraction/TreeFeatureTestUtil.kt index faa0e572..56d358f6 100644 --- a/src/test/kotlin/astminer/featureextraction/TreeFeatureTestUtil.kt +++ b/src/test/kotlin/astminer/featureextraction/TreeFeatureTestUtil.kt @@ -2,42 +2,36 @@ package astminer.featureextraction import astminer.common.model.Node -class PrettyNode(private val type: String, private val token: String) : Node { - private var children: MutableList = ArrayList() - private var parent: PrettyNode? = null - override val metadata: MutableMap = HashMap() - - override fun getChildren(): MutableList = children +class PrettyNode(override val typeLabel: String, override val token: String) : Node() { + override var children: MutableList = ArrayList() + override var parent: PrettyNode? = null + set(value) { + value?.addChild(this) + field = value + } - override fun getParent(): PrettyNode? = parent fun addChild(node: PrettyNode) = children.add(node) - fun setParent(node: PrettyNode?) { + /*fun setParent(node: PrettyNode?) { node?.addChild(this) parent = node - } + }*/ fun toPrettyString(indent: Int = 0, indentSymbol: String = "--") : String = with(StringBuilder()) { repeat(indent) { append(indentSymbol) } - append(getTypeLabel()) - if (getToken().isNotEmpty()) { - appendln(" : ${getToken()}") + append(typeLabel) + if (token.isNotEmpty()) { + appendln(" : $token") } else { appendln() } - getChildren().forEach { append(it.toPrettyString(indent + 1, indentSymbol)) } + children.forEach { append(it.toPrettyString(indent + 1, indentSymbol)) } toString() } - override fun getToken(): String = token - - override fun isLeaf(): Boolean = children.isEmpty() - - override fun getTypeLabel(): String = type - override fun removeChildrenOfType(typeLabel: String) { - children.removeIf { it.getTypeLabel() == typeLabel } + children.removeIf { it.typeLabel == typeLabel } } } @@ -47,7 +41,7 @@ fun restoreFromPrettyPrint(prettyPrintedTree: String, indentSymbol: String = "-- val tree = prettyPrintedTree.lines().map { s -> val (node, indent) = restorePrintedNode(s, indentSymbol) lastNodeByIndent[indent] = node - node.setParent(lastNodeByIndent[indent - 1]) + node.parent = lastNodeByIndent[indent - 1] node } return tree.first() diff --git a/src/test/kotlin/astminer/featureextraction/TreeFeatureTestUtilTest.kt b/src/test/kotlin/astminer/featureextraction/TreeFeatureTestUtilTest.kt index 2ca6fce2..26081e93 100644 --- a/src/test/kotlin/astminer/featureextraction/TreeFeatureTestUtilTest.kt +++ b/src/test/kotlin/astminer/featureextraction/TreeFeatureTestUtilTest.kt @@ -16,13 +16,13 @@ class TreeFeatureTestUtilTest { val node6 = PrettyNode("6", "g") val node7 = PrettyNode("7", "h") - node1.setParent(node0) - node2.setParent(node0) - node3.setParent(node0) - node4.setParent(node1) - node5.setParent(node4) - node6.setParent(node1) - node7.setParent(node3) + node1.parent = node0 + node2.parent = node0 + node3.parent = node0 + node4.parent = node1 + node5.parent = node4 + node6.parent = node1 + node7.parent = node3 val prettyTree = node0.toPrettyString() val restoredTree = restoreFromPrettyPrint(prettyTree)!! diff --git a/src/test/kotlin/astminer/parse/antlr/AntrlUtilTest.kt b/src/test/kotlin/astminer/parse/antlr/AntrlUtilTest.kt index 189158ed..6bede722 100644 --- a/src/test/kotlin/astminer/parse/antlr/AntrlUtilTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/AntrlUtilTest.kt @@ -16,7 +16,7 @@ class AntrlUtilTest { val node = parser.parseInputStream(FileInputStream(file)) var adoptedNodesSize = 0 node?.preOrder()?.forEach { node -> - adoptedNodesSize += node.getChildren().filter { it.getParent() != node }.size + adoptedNodesSize += node.children.filter { it.parent != node }.size } Assert.assertEquals("There should be no children with different parent", 0, adoptedNodesSize) } diff --git a/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitterTest.kt index 2e29d9d6..596ebdc5 100644 --- a/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitterTest.kt @@ -47,7 +47,7 @@ class JavaScriptMethodSplitterTest { return "info : {" + "name : ${name()}, " + "args : ${methodParameters.map { it.name() }.joinToString(", ")}, " + - "enclosing element : ${enclosingElement.root?.getTypeLabel()?.getEnclosingElementType()}, " + + "enclosing element : ${enclosingElement.root?.typeLabel?.getEnclosingElementType()}, " + "enclosing element name : ${enclosingElementName()}" + "}" } diff --git a/src/test/kotlin/astminer/parse/gumtree/python/GumTreeJavaMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/gumtree/python/GumTreeJavaMethodSplitterTest.kt index 6e1c6025..eaed3251 100644 --- a/src/test/kotlin/astminer/parse/gumtree/python/GumTreeJavaMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/gumtree/python/GumTreeJavaMethodSplitterTest.kt @@ -58,7 +58,7 @@ class GumTreeJavaMethodSplitterTest { assertNotNull(method) with(method) { assertEquals("func_dif_args_typed_return", name()) - assertEquals("Constant-int", this.method.returnTypeNode?.getTypeLabel()) + assertEquals("Constant-int", this.method.returnTypeNode?.typeLabel) assertEquals(6, methodParameters.size) assertEquals(listOf("a", "b", "c", "d", "e", "f"), methodParameters.map { it.name() }.toList()) assertEquals(emptyList(), methodParameters.mapNotNull { it.returnType() }.toList()) @@ -102,9 +102,9 @@ class GumTreeJavaMethodSplitterTest { assertNotNull(method) with(method) { assertEquals("async_schrecklich_typed", name()) - assertEquals("AsyncFunctionDef", this.method.root.getTypeLabel()) + assertEquals("AsyncFunctionDef", this.method.root.typeLabel) assertEquals(null, enclosingElementName()) - assertEquals("Constant-int", this.method.returnTypeNode?.getTypeLabel()) + assertEquals("Constant-int", this.method.returnTypeNode?.typeLabel) assertEquals(4, methodParameters.size) assertEquals(listOf("event", "x", "args", "kwargs"), methodParameters.map { it.name() }.toList()) assertEquals(listOf("str", "int", null, null), methodParameters.map { it.returnType() }.toList()) @@ -118,14 +118,14 @@ class GumTreeJavaMethodSplitterTest { assertNotNull(method) with(method) { assertEquals("async_simple_no_typed", name()) - assertEquals("AsyncFunctionDef", this.method.root.getTypeLabel()) + assertEquals("AsyncFunctionDef", this.method.root.typeLabel) assertEquals(null, enclosingElementName()) assertEquals( "\n async doc\n ", this.method.root.getChildOfType("body") ?.getChildOfType("Expr") ?.getChildOfType("Constant-str") - ?.getToken() + ?.token ) assertEquals(4, methodParameters.size) assertEquals( @@ -145,7 +145,7 @@ class GumTreeJavaMethodSplitterTest { assertEquals("foo_2", name()) assertEquals("foo_1", method.method.root.parent?.wrappedNode?.parent?.label) assertEquals(null, enclosingElementName()) - assertEquals("Constant-NoneType", this.method.returnTypeNode?.getTypeLabel()) + assertEquals("Constant-NoneType", this.method.returnTypeNode?.typeLabel) assertEquals(1, methodParameters.size) assertEquals(listOf("c"), methodParameters.map { it.name() }.toList()) assertEquals(listOf(null), methodParameters.map { it.returnType() }.toList()) @@ -161,7 +161,7 @@ class GumTreeJavaMethodSplitterTest { assertEquals("bar_2", name()) assertEquals("bar_1", method.method.root.parent?.wrappedNode?.parent?.label) assertEquals(null, enclosingElementName()) - assertEquals("Constant-int", this.method.returnTypeNode?.getTypeLabel()) + assertEquals("Constant-int", this.method.returnTypeNode?.typeLabel) assertEquals(2, methodParameters.size) assertEquals(listOf("d", "e"), methodParameters.map { it.name() }.toList()) assertEquals(listOf("int", "int"), methodParameters.map { it.returnType() }.toList()) diff --git a/src/test/kotlin/astminer/paths/PathWorkerTestUtil.kt b/src/test/kotlin/astminer/paths/PathWorkerTestUtil.kt index 44877b87..d8d7bdf0 100644 --- a/src/test/kotlin/astminer/paths/PathWorkerTestUtil.kt +++ b/src/test/kotlin/astminer/paths/PathWorkerTestUtil.kt @@ -6,15 +6,15 @@ import astminer.common.postOrder import astminer.parse.antlr.AntlrNode import org.junit.Assert -fun simpleNode(number: Int, parent: Node?): AntlrNode { +fun simpleNode(number: Int, parent: AntlrNode?): AntlrNode { return AntlrNode("$number", parent, "node_$number") } -fun simpleNodes(numbers: List, parent: Node?): List { +fun simpleNodes(numbers: List, parent: AntlrNode?): List { return numbers.map { simpleNode(it, parent) } } -fun getParentStack(node: Node): List = (node.getParent()?.let { getParentStack(it) } ?: emptyList()) + node +fun getParentStack(node: Node): List = (node.parent?.let { getParentStack(it) } ?: emptyList()) + node fun getAllPathCharacteristics(root: Node): Collection> { val leaves = root.postOrder().filter { it.isLeaf() } @@ -28,8 +28,8 @@ fun getAllPathCharacteristics(root: Node): Collection> { var rightDepth = rightStack.size leftStack.zip(rightStack).zipWithNext { (left1, right1), (left2, right2) -> if (left1 == right1 && left2 != right2) { - val leftIndex = left1.getChildren().indexOf(left2) - val rightIndex = left1.getChildren().indexOf(right2) + val leftIndex = left1.children.indexOf(left2) + val rightIndex = left1.children.indexOf(right2) allPathCharacteristics.add(Pair(rightIndex - leftIndex, leftDepth + rightDepth - 1)) return@zipWithNext } diff --git a/src/test/kotlin/astminer/paths/SampleTreePathWorkerTest.kt b/src/test/kotlin/astminer/paths/SampleTreePathWorkerTest.kt index 682baf97..29fd1932 100644 --- a/src/test/kotlin/astminer/paths/SampleTreePathWorkerTest.kt +++ b/src/test/kotlin/astminer/paths/SampleTreePathWorkerTest.kt @@ -8,21 +8,21 @@ class SampleTreePathWorkerTest : PathWorkerTestBase() { val rootChildren = simpleNodes(listOf(2, 3), root) val (node2, node3) = rootChildren - root.setChildren(rootChildren) + root.replaceChildren(rootChildren) val node2Children = simpleNodes(listOf(4, 5), node2) val (_, node5) = node2Children - node2.setChildren(node2Children) + node2.replaceChildren(node2Children) val node3Children = simpleNodes(listOf(6, 7, 8), node3) val (_, node7, _) = node3Children - node3.setChildren(node3Children) + node3.replaceChildren(node3Children) val node5Children = simpleNodes(listOf(9, 10, 11), node5) - node5.setChildren(node5Children) + node5.replaceChildren(node5Children) val node7Children = simpleNodes(listOf(12, 13), node7) - node7.setChildren(node7Children) + node7.replaceChildren(node7Children) return root } From 4ac571a5e71cc38f0054c060ce2b106c687b322f Mon Sep 17 00:00:00 2001 From: furetur Date: Thu, 22 Apr 2021 20:05:44 +0500 Subject: [PATCH 070/308] committed unfinished garbage --- .../java/astminer/examples/AllJavaFiles.java | 5 +- .../antlr/parser/JavaScriptBaseLexer.java | 4 + .../kotlin/astminer/cli/FilterPredicates.kt | 92 ++++++------------- .../kotlin/astminer/cli/LabelExtractors.kt | 31 +++---- src/main/kotlin/astminer/cli/utils.kt | 6 +- .../astminer/common/model/FunctionInfo.kt | 45 +++++++++ .../astminer/common/model/HandlerModel.kt | 2 +- .../common/model/TreeSplittingModel.kt | 4 +- .../kotlin/astminer/examples/AllJavaFiles.kt | 10 +- .../astminer/examples/AllJavaMethods.kt | 10 +- .../astminer/examples/AllPythonMethods.kt | 10 +- .../astminer/examples/Code2VecJavaMethods.kt | 13 +-- .../parse/antlr/java/JavaMethodSplitter.kt | 5 +- .../javascript/JavaScriptMethodSplitter.kt | 6 +- .../antlr/python/PythonMethodSplitter.kt | 5 +- .../parse/fuzzy/cpp/FuzzyMethodSplitter.kt | 5 +- .../gumtree/java/GumTreeJavaMethodSplitter.kt | 5 +- .../python/GumTreePythonMethodSplitter.kt | 11 +-- .../kotlin/astminer/cli/LabelExtractorTest.kt | 68 ++++++++------ .../antlr/java/JavaMethodSplitterTest.kt | 56 +++++------ .../JavaScriptMethodSplitterTest.kt | 20 ++-- .../antlr/python/PythonMethodSplitterTest.kt | 15 +-- 22 files changed, 226 insertions(+), 202 deletions(-) create mode 100644 src/main/kotlin/astminer/common/model/FunctionInfo.kt diff --git a/src/main/java/astminer/examples/AllJavaFiles.java b/src/main/java/astminer/examples/AllJavaFiles.java index 9c7a95cb..5f705206 100644 --- a/src/main/java/astminer/examples/AllJavaFiles.java +++ b/src/main/java/astminer/examples/AllJavaFiles.java @@ -7,6 +7,8 @@ import astminer.storage.path.Code2VecPathStorage; import astminer.storage.path.PathBasedStorage; import astminer.storage.path.PathBasedStorageConfig; +import org.jetbrains.annotations.NotNull; + import java.io.FileInputStream; import java.io.IOException; import java.nio.file.*; @@ -24,8 +26,9 @@ public static void runExample() { final Path inputFolder = Paths.get(INPUT_FOLDER); FileVisitor fileVisitor = new SimpleFileVisitor() { + @NotNull @Override - public FileVisitResult visitFile(Path file, BasicFileAttributes attributes) throws IOException { + public FileVisitResult visitFile(@NotNull Path file, BasicFileAttributes attributes) throws IOException { Node fileTree = new GumTreeJavaParser().parseInputStream(new FileInputStream(file.toFile())); if (fileTree == null) { return FileVisitResult.CONTINUE; diff --git a/src/main/java/me/vovak/antlr/parser/JavaScriptBaseLexer.java b/src/main/java/me/vovak/antlr/parser/JavaScriptBaseLexer.java index a0f0ef49..1e48aa18 100644 --- a/src/main/java/me/vovak/antlr/parser/JavaScriptBaseLexer.java +++ b/src/main/java/me/vovak/antlr/parser/JavaScriptBaseLexer.java @@ -2,6 +2,8 @@ import me.vovak.antlr.parser.JavaScriptLexer; import org.antlr.v4.runtime.*; +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; import java.util.Stack; @@ -15,8 +17,10 @@ public abstract class JavaScriptBaseLexer extends Lexer * Stores values of nested modes. By default mode is strict or * defined externally (useStrictDefault) */ + @NotNull private Stack scopeStrictModes = new Stack(); + @Nullable private Token lastToken = null; /** * Default value of strict mode diff --git a/src/main/kotlin/astminer/cli/FilterPredicates.kt b/src/main/kotlin/astminer/cli/FilterPredicates.kt index 69e11878..754dee06 100644 --- a/src/main/kotlin/astminer/cli/FilterPredicates.kt +++ b/src/main/kotlin/astminer/cli/FilterPredicates.kt @@ -1,89 +1,55 @@ package astminer.cli -import astminer.common.model.MethodInfo +import astminer.common.model.FunctionInfo import astminer.common.model.Node import astminer.common.preOrder import astminer.common.splitToSubtokens -abstract class MethodFilterPredicate { - open fun isFiltered(methodInfo: MethodInfo): Boolean = false - - fun typeBasedFilterPredicate(root: Node?, nodeType: String, excludeValues: List): Boolean { - root?.getChildrenOfType(nodeType)?.forEach { - if (it.getToken() in excludeValues) { - return false - } - } - return true - } +interface MethodFilter { + fun isFiltered(functionInfo: FunctionInfo): Boolean } -class ModifierFilterPredicate(private val excludeModifiers: List) : - MethodFilterPredicate() { - - // TODO: add other parsers - - private fun gumTreeModifierFilter(root: Node?) : Boolean = - typeBasedFilterPredicate(root, "Modifier", excludeModifiers) - - override fun isFiltered(methodInfo: MethodInfo): Boolean = - gumTreeModifierFilter(methodInfo.method.root) +class ModifierFilterPredicate(private val excludeModifiers: List) : MethodFilter { + override fun isFiltered(functionInfo: FunctionInfo): Boolean = + !excludeModifiers.any { modifier -> modifier in functionInfo.modifiers } } -class AnnotationFilterPredicate(private val excludeAnnotations: List) : - MethodFilterPredicate() { - - // TODO: add other parsers - - private fun gumTreeAnnotationFilter(root: Node?) : Boolean = - typeBasedFilterPredicate( - root?.getChildOfType("MarkerAnnotation"), "SimpleName", excludeAnnotations - ) - - override fun isFiltered(methodInfo: MethodInfo): Boolean = - gumTreeAnnotationFilter(methodInfo.method.root) +class AnnotationFilterPredicate(private val excludeAnnotations: List) : MethodFilter { + override fun isFiltered(functionInfo: FunctionInfo): Boolean = + !excludeAnnotations.any { annotation -> annotation in functionInfo.annotations } } -class ConstructorFilterPredicate : MethodFilterPredicate() { - - override fun isFiltered(methodInfo: MethodInfo): Boolean { - return methodInfo.name() != methodInfo.enclosingElementName() - } +object ConstructorFilterPredicate : MethodFilter { + override fun isFiltered(functionInfo: FunctionInfo) = !functionInfo.isConstructor } -class MethodNameLengthFilterPredicate(private val maxLength: Int) : MethodFilterPredicate() { - override fun isFiltered(methodInfo: MethodInfo): Boolean { - if (maxLength == -1) { - return true - } - val nameNode = methodInfo.method.nameNode - return if (nameNode != null) { - splitToSubtokens(nameNode.getToken()).size <= maxLength +class MethodNameWordsNumberFilter(private val maxWordsNumber: Int) : MethodFilter { + override fun isFiltered(functionInfo: FunctionInfo): Boolean { + return if (maxWordsNumber == -1) { + true } else { - false + val name = functionInfo.name + name != null && splitToSubtokens(name).size <= maxWordsNumber } } } -class TokenLengthFilterPredicate(private val maxLength: Int) : MethodFilterPredicate() { - override fun isFiltered(methodInfo: MethodInfo): Boolean { - if (maxLength == -1) { - return true - } - methodInfo.method.root.preOrder().forEach { node -> - if (splitToSubtokens(node.getToken()).size > maxLength) { - return false - } +class MethodAnyNodeWordsNumberFilter(private val maxWordsNumber: Int) : MethodFilter { + override fun isFiltered(functionInfo: FunctionInfo): Boolean { + return if (maxWordsNumber == -1) { + true + } else { + !functionInfo.root.preOrder().any { node -> splitToSubtokens(node.getToken()).size > maxWordsNumber } } - return true } } -class TreeSizeFilterPredicate(private val maxSize: Int) : MethodFilterPredicate() { - override fun isFiltered(methodInfo: MethodInfo): Boolean { - if (maxSize == -1) { - return true +class TreeSizeFilterPredicate(private val maxSize: Int) : MethodFilter { + override fun isFiltered(functionInfo: FunctionInfo): Boolean { + return if (maxSize == -1) { + true + } else { + functionInfo.root.preOrder().size <= maxSize } - return methodInfo.method.root.preOrder().size <= maxSize } } diff --git a/src/main/kotlin/astminer/cli/LabelExtractors.kt b/src/main/kotlin/astminer/cli/LabelExtractors.kt index 0a521109..df2b2a92 100644 --- a/src/main/kotlin/astminer/cli/LabelExtractors.kt +++ b/src/main/kotlin/astminer/cli/LabelExtractors.kt @@ -1,8 +1,8 @@ package astminer.cli -import astminer.common.model.MethodInfo import astminer.common.model.Node import astminer.common.model.ParseResult +import astminer.common.model.FunctionInfo import astminer.common.preOrder import astminer.common.setTechnicalToken import astminer.parse.antlr.AntlrNode @@ -48,7 +48,7 @@ abstract class FileLabelExtractor : LabelExtractor { } abstract class MethodLabelExtractor( - open val filterPredicates: Collection = emptyList(), + open val filterPredicates: Collection = emptyList(), open val javaParser: String = "gumtree", open val pythonParser: String = "antlr" ) : LabelExtractor { @@ -108,11 +108,11 @@ abstract class MethodLabelExtractor( } return methodInfos.mapNotNull { val label = extractLabel(it, filePath) ?: return@mapNotNull null - LabeledResult(it.method.root, label, filePath) + LabeledResult(it.root, label, filePath) } } - abstract fun extractLabel(methodInfo: MethodInfo, filePath: String): String? + abstract fun extractLabel(functionInfo: FunctionInfo, filePath: String): String? } class FilePathExtractor : FileLabelExtractor() { @@ -128,25 +128,20 @@ class FolderExtractor : FileLabelExtractor() { } class MethodNameExtractor( - val hideMethodNames: Boolean = false, - override val filterPredicates: Collection = emptyList(), + override val filterPredicates: Collection = emptyList(), override val javaParser: String = "gumtree", override val pythonParser: String = "antlr" ) : MethodLabelExtractor(filterPredicates, javaParser, pythonParser) { - override fun extractLabel(methodInfo: MethodInfo, filePath: String): String? { - val methodNameNode = methodInfo.method.nameNode ?: return null - val methodRoot = methodInfo.method.root - val methodName = methodInfo.name() ?: return null - - if (hideMethodNames) { - methodRoot.preOrder().forEach { node -> - if (node.getToken() == methodName) { - node.setTechnicalToken("SELF") - } + override fun extractLabel(functionInfo: FunctionInfo, filePath: String): String? { + val name = functionInfo.name ?: return null + functionInfo.root.preOrder().forEach { node -> + if (node.getToken() == name) { + node.setTechnicalToken("SELF") } - methodNameNode.setTechnicalToken("METHOD_NAME") } - return methodName + functionInfo.nameNode?.setTechnicalToken("METHOD_NAME") + // TODO: for some reason it is not normalized, check if something is wrong. Maybe storages normalize the label + return name } } diff --git a/src/main/kotlin/astminer/cli/utils.kt b/src/main/kotlin/astminer/cli/utils.kt index a069d726..44265aff 100644 --- a/src/main/kotlin/astminer/cli/utils.kt +++ b/src/main/kotlin/astminer/cli/utils.kt @@ -56,13 +56,13 @@ fun getLabelExtractor( "method" -> { val filterPredicates = mutableListOf( ModifierFilterPredicate(excludeModifiers), AnnotationFilterPredicate(excludeAnnotations), - MethodNameLengthFilterPredicate(maxMethodNameLength), TokenLengthFilterPredicate(maxTokenLength), + MethodNameWordsNumberFilter(maxMethodNameLength), MethodAnyNodeWordsNumberFilter(maxTokenLength), TreeSizeFilterPredicate(maxTreeSize) ) if (filterConstructors) { - filterPredicates.add(ConstructorFilterPredicate()) + filterPredicates.add(ConstructorFilterPredicate) } - return MethodNameExtractor(hideMethodNames, filterPredicates, javaParser) + return MethodNameExtractor(filterPredicates, javaParser) } } throw UnsupportedOperationException("Unsupported granularity level $granularityLevel") diff --git a/src/main/kotlin/astminer/common/model/FunctionInfo.kt b/src/main/kotlin/astminer/common/model/FunctionInfo.kt new file mode 100644 index 00000000..80238fee --- /dev/null +++ b/src/main/kotlin/astminer/common/model/FunctionInfo.kt @@ -0,0 +1,45 @@ +package astminer.common.model + +class MethodInfoPropertyNotImplementedException(propertyName: String) : + UnsupportedOperationException( + "The property $propertyName of MethodInfo for this language and parser type is not implemented yet. " + + "Consider implementing it." + ) + +private fun notImplemented(propertyName: String): Nothing = throw MethodInfoPropertyNotImplementedException(propertyName) + +interface FunctionInfo { + val nameNode: T? + get() = notImplemented("nameNode") + val name: String? + get() = nameNode?.getToken() + val root: T + get() = notImplemented("root") + val annotations: List + get() = notImplemented("annotations") + val modifiers: List + get() = notImplemented("modifiers") + val parameters: List + get() = notImplemented("parameters") + val returnType: String? + get() = notImplemented("returnType") + + // is null because can be only from a small set like {variableDeclaration, classDeclaration..} + // for instance it cannot be the root of the tree + // thats why it is probably called "element" and not "node" + val enclosingElement: T? + get() = notImplemented("enclosingNode") + val enclosingElementName: String? + get() = notImplemented("enclosingElementName") + val className: String? + get() = notImplemented("className") + val isConstructor: Boolean + get() = notImplemented("isConstructor") +} + +data class MethodInfoParameter(val name: String, val type: String?) + +// TODO: should be removed +class DummyFunctionInfo : FunctionInfo + +fun dummyMethodInfos() = listOf(DummyFunctionInfo()) diff --git a/src/main/kotlin/astminer/common/model/HandlerModel.kt b/src/main/kotlin/astminer/common/model/HandlerModel.kt index 5b843a1d..6f156ba3 100644 --- a/src/main/kotlin/astminer/common/model/HandlerModel.kt +++ b/src/main/kotlin/astminer/common/model/HandlerModel.kt @@ -10,7 +10,7 @@ abstract class LanguageHandler { abstract val parseResult: ParseResult protected abstract val splitter: TreeMethodSplitter - fun splitIntoMethods(): Collection> { + fun splitIntoMethods(): Collection> { val root = parseResult.root ?: return emptyList() return splitter.splitIntoMethods(root) } diff --git a/src/main/kotlin/astminer/common/model/TreeSplittingModel.kt b/src/main/kotlin/astminer/common/model/TreeSplittingModel.kt index c75bfb9d..568b44bc 100644 --- a/src/main/kotlin/astminer/common/model/TreeSplittingModel.kt +++ b/src/main/kotlin/astminer/common/model/TreeSplittingModel.kt @@ -1,7 +1,7 @@ package astminer.common.model interface TreeMethodSplitter { - fun splitIntoMethods(root: T): Collection> + fun splitIntoMethods(root: T): Collection> } class MethodInfo( @@ -18,7 +18,7 @@ class MethodInfo( class MethodNode( val root: T, val returnTypeNode: T?, - val nameNode: T? + val nameNode: T? // why is it nullable?? ) { fun name() = nameNode?.getToken() fun returnType() = returnTypeNode?.getToken() diff --git a/src/main/kotlin/astminer/examples/AllJavaFiles.kt b/src/main/kotlin/astminer/examples/AllJavaFiles.kt index d135e439..69f059e1 100644 --- a/src/main/kotlin/astminer/examples/AllJavaFiles.kt +++ b/src/main/kotlin/astminer/examples/AllJavaFiles.kt @@ -18,11 +18,11 @@ fun allJavaFiles() { val node = JavaParser().parseInputStream(file.inputStream()) ?: return@forFilesWithSuffix node.prettyPrint() JavaMethodSplitter().splitIntoMethods(node).forEach { - println(it.name()) - println(it.returnType()) - println(it.enclosingElementName()) - it.methodParameters.forEach { parameters -> - println("${parameters.name()} ${parameters.returnType()}") + println(it.name) + println(it.returnType) + println(it.className) + it.parameters.forEach { parameter -> + println("${parameter.name} ${parameter.type}") } } storage.store(LabeledResult(node, file.path, file.path)) diff --git a/src/main/kotlin/astminer/examples/AllJavaMethods.kt b/src/main/kotlin/astminer/examples/AllJavaMethods.kt index 7a748f37..05f26779 100644 --- a/src/main/kotlin/astminer/examples/AllJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/AllJavaMethods.kt @@ -1,7 +1,7 @@ package astminer.examples import astminer.cli.LabeledResult -import astminer.common.model.MethodInfo +import astminer.common.model.FunctionInfo import astminer.parse.gumtree.GumTreeNode import astminer.parse.gumtree.java.GumTreeJavaParser import astminer.parse.gumtree.java.GumTreeJavaMethodSplitter @@ -11,10 +11,10 @@ import astminer.storage.path.PathBasedStorageConfig import java.io.File -private fun getCsvFriendlyMethodId(methodInfo: MethodInfo): String { - val className = methodInfo.enclosingElementName() ?: "" - val methodName = methodInfo.name() ?: "unknown_method" - val parameterTypes = methodInfo.methodParameters.joinToString("|") { it.name() ?: "_" } +private fun getCsvFriendlyMethodId(functionInfo: FunctionInfo): String { + val className = functionInfo.className ?: "" + val methodName = functionInfo.name + val parameterTypes = functionInfo.parameters.joinToString("|") { it.name } return "$className.$methodName($parameterTypes)" } diff --git a/src/main/kotlin/astminer/examples/AllPythonMethods.kt b/src/main/kotlin/astminer/examples/AllPythonMethods.kt index 2f55d247..fa755ed0 100644 --- a/src/main/kotlin/astminer/examples/AllPythonMethods.kt +++ b/src/main/kotlin/astminer/examples/AllPythonMethods.kt @@ -1,7 +1,7 @@ package astminer.examples import astminer.cli.LabeledResult -import astminer.common.model.MethodInfo +import astminer.common.model.FunctionInfo import astminer.parse.gumtree.python.GumTreePythonMethodSplitter import astminer.parse.gumtree.GumTreeNode import astminer.parse.gumtree.python.GumTreePythonParser @@ -9,10 +9,10 @@ import astminer.storage.path.Code2VecPathStorage import astminer.storage.path.PathBasedStorageConfig import java.io.File -private fun getCsvFriendlyMethodId(methodInfo: MethodInfo): String { - val className = methodInfo.enclosingElementName() ?: "" - val methodName = methodInfo.name() ?: "unknown_method" - val parameterTypes = methodInfo.methodParameters.joinToString("|") { it.name() ?: "_" } +private fun getCsvFriendlyMethodId(functionInfo: FunctionInfo): String { + val className = functionInfo.className ?: "" + val methodName = functionInfo.name + val parameterTypes = functionInfo.parameters.joinToString("|") { it.name } return "$className.$methodName($parameterTypes)" } diff --git a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt index 86246764..60a3d7af 100644 --- a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt @@ -1,6 +1,7 @@ package astminer.examples import astminer.cli.LabeledResult +import astminer.cli.MethodNameExtractor import astminer.common.* import astminer.parse.antlr.java.JavaMethodSplitter import astminer.parse.antlr.java.JavaParser @@ -25,14 +26,14 @@ fun code2vecJavaMethods() { //extract method nodes val methods = JavaMethodSplitter().splitIntoMethods(fileNode) - methods.forEach { methodInfo -> - val methodNameNode = methodInfo.method.nameNode ?: return@forEach - val methodRoot = methodInfo.method.root - val label = splitToSubtokens(methodNameNode.getToken()).joinToString("|") - methodNameNode.setTechnicalToken("METHOD_NAME") + val labelExtractor = MethodNameExtractor() + methods.forEach { methodInfo -> + val label = labelExtractor.extractLabel(methodInfo, file.absolutePath) ?: return@forEach + // TODO: this is ugly maybe label should be normalized by default + val normalizedLabel = splitToSubtokens(label).joinToString("|") // Retrieve paths from every node individually and store them - storage.store(LabeledResult(methodRoot, label, file.absolutePath)) + storage.store(LabeledResult(methodInfo.root, normalizedLabel, file.absolutePath)) } } diff --git a/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt b/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt index d7724449..e48a3af9 100644 --- a/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt @@ -21,11 +21,12 @@ class JavaMethodSplitter : TreeMethodSplitter { private const val PARAMETER_NAME_NODE = "variableDeclaratorId" } - override fun splitIntoMethods(root: AntlrNode): Collection> { + override fun splitIntoMethods(root: AntlrNode): Collection> { val methodRoots = root.preOrder().filter { decompressTypeLabel(it.getTypeLabel()).last() == METHOD_NODE } - return methodRoots.map { collectMethodInfo(it as AntlrNode) } + return dummyMethodInfos() +// return methodRoots.map { collectMethodInfo(it as AntlrNode) } } private fun collectMethodInfo(methodNode: AntlrNode): MethodInfo { diff --git a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt index 387ab198..22e3b592 100644 --- a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt @@ -16,7 +16,7 @@ class JavaScriptMethodSplitter : TreeMethodSplitter { private const val FUNCTION_NODE = "Function" } - override fun splitIntoMethods(root: AntlrNode): Collection> { + override fun splitIntoMethods(root: AntlrNode): Collection> { val methodRoots: List = root.preOrder().map { node -> when { node.isArrowElement() -> ArrowElement(node as AntlrNode) @@ -25,8 +25,8 @@ class JavaScriptMethodSplitter : TreeMethodSplitter { else -> null } }.filterNotNull() - - return methodRoots.map { it.getElementInfo() } + return dummyMethodInfos() +// return methodRoots.map { it.getElementInfo() } } private fun Node.isArrowElement() = this.getChildOfType(ARROW_NODE) != null diff --git a/src/main/kotlin/astminer/parse/antlr/python/PythonMethodSplitter.kt b/src/main/kotlin/astminer/parse/antlr/python/PythonMethodSplitter.kt index bc62320a..61e58b34 100644 --- a/src/main/kotlin/astminer/parse/antlr/python/PythonMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/python/PythonMethodSplitter.kt @@ -21,11 +21,12 @@ class PythonMethodSplitter : TreeMethodSplitter { private const val PARAMETER_NAME_NODE = "NAME" } - override fun splitIntoMethods(root: AntlrNode): Collection> { + override fun splitIntoMethods(root: AntlrNode): Collection> { val methodRoots = root.preOrder().filter { decompressTypeLabel(it.getTypeLabel()).last() == METHOD_NODE } - return methodRoots.map { collectMethodInfo(it as AntlrNode) } + return dummyMethodInfos() +// return methodRoots.map { collectMethodInfo(it as AntlrNode) } } private fun collectMethodInfo(methodNode: AntlrNode): MethodInfo { diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyMethodSplitter.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyMethodSplitter.kt index b6f387cd..59cc769d 100644 --- a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyMethodSplitter.kt @@ -19,9 +19,10 @@ class FuzzyMethodSplitter : TreeMethodSplitter { private const val PARAMETER_TYPE_NODE = "TYPE_FULL_NAME" } - override fun splitIntoMethods(root: FuzzyNode): Collection> { + override fun splitIntoMethods(root: FuzzyNode): Collection> { val methodRoots = root.preOrder().filter { it.getTypeLabel() == METHOD_NODE } - return methodRoots.map { collectMethodInfo(it as FuzzyNode) } + return dummyMethodInfos() +// return methodRoots.map { collectMethodInfo(it as FuzzyNode) } } private fun collectMethodInfo(methodNode: FuzzyNode): MethodInfo { diff --git a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt index b81b88ec..2f212f4e 100644 --- a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt @@ -17,9 +17,10 @@ class GumTreeJavaMethodSplitter : TreeMethodSplitter { } } - override fun splitIntoMethods(root: GumTreeNode): Collection> { + override fun splitIntoMethods(root: GumTreeNode): Collection> { val methodRoots = root.preOrder().filter { it.getTypeLabel() == TypeLabels.methodDeclaration } - return methodRoots.map { collectMethodInfo(it as GumTreeNode) } + return dummyMethodInfos() +// return methodRoots.map { collectMethodInfo(it as GumTreeNode) } } private fun collectMethodInfo(methodNode: GumTreeNode): MethodInfo { diff --git a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitter.kt b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitter.kt index 22cec1bf..6c1a0cb9 100644 --- a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitter.kt @@ -1,10 +1,6 @@ package astminer.parse.gumtree.python -import astminer.common.model.ElementNode -import astminer.common.model.MethodInfo -import astminer.common.model.MethodNode -import astminer.common.model.ParameterNode -import astminer.common.model.TreeMethodSplitter +import astminer.common.model.* import astminer.common.preOrder import astminer.parse.gumtree.GumTreeNode @@ -33,9 +29,10 @@ class GumTreePythonMethodSplitter : TreeMethodSplitter { } } - override fun splitIntoMethods(root: GumTreeNode): Collection> { + override fun splitIntoMethods(root: GumTreeNode): Collection> { val methodRoots = root.preOrder().filter { TypeLabels.methodDefinitions.contains(it.getTypeLabel()) } - return methodRoots.map { collectMethodInfo(it as GumTreeNode) } + return dummyMethodInfos() +// return methodRoots.map { collectMethodInfo(it as GumTreeNode) } } private fun collectMethodInfo(methodNode: GumTreeNode): MethodInfo { diff --git a/src/test/kotlin/astminer/cli/LabelExtractorTest.kt b/src/test/kotlin/astminer/cli/LabelExtractorTest.kt index e431dec2..845f7597 100644 --- a/src/test/kotlin/astminer/cli/LabelExtractorTest.kt +++ b/src/test/kotlin/astminer/cli/LabelExtractorTest.kt @@ -1,14 +1,11 @@ package astminer.cli import astminer.common.getTechnicalToken -import astminer.common.model.ElementNode -import astminer.common.model.MethodInfo -import astminer.common.model.MethodNode -import astminer.common.model.ParseResult +import astminer.common.model.* import astminer.parse.antlr.AntlrNode +import org.junit.Before import org.junit.Test import kotlin.test.assertEquals -import kotlin.test.assertNull import kotlin.test.assertTrue internal class LabelExtractorTest { @@ -18,7 +15,18 @@ internal class LabelExtractorTest { private const val FOLDER = "folder" private const val FILENAME = "file.txt" private const val METHOD_NAME = "method" - private val DUMMY_ROOT = AntlrNode("", null, null) + } + + private var dummyRoot = AntlrNode("", null, null) + + private fun makeMethodInfo(nameNode: AntlrNode) = object : FunctionInfo { + override val root: AntlrNode = dummyRoot + override val nameNode: AntlrNode = nameNode + } + + @Before + fun setUp() { + dummyRoot = AntlrNode("", null, null) } @Test @@ -32,11 +40,11 @@ internal class LabelExtractorTest { @Test fun testNonEmptyFilePathExtractor() { val labelExtractor = FilePathExtractor() - val nonEmptyParseResult = ParseResult(DUMMY_ROOT, PATH_STRING) + val nonEmptyParseResult = ParseResult(dummyRoot, PATH_STRING) val labeledParseResults = labelExtractor.toLabeledData(nonEmptyParseResult) assertEquals(1, labeledParseResults.size) val (root, label) = labeledParseResults[0] - assertEquals(DUMMY_ROOT, root) + assertEquals(dummyRoot, root) assertEquals(PATH_STRING, label) } @@ -51,39 +59,39 @@ internal class LabelExtractorTest { @Test fun testNonEmptyFolderExtractor() { val labelExtractor = FolderExtractor() - val nonEmptyParseResult = ParseResult(DUMMY_ROOT, PATH_STRING) + val nonEmptyParseResult = ParseResult(dummyRoot, PATH_STRING) val labeledParseResults = labelExtractor.toLabeledData(nonEmptyParseResult) assertEquals(1, labeledParseResults.size) val (root, label) = labeledParseResults[0] - assertEquals(DUMMY_ROOT, root) + assertEquals(dummyRoot, root) assertEquals(FOLDER, label) } @Test - fun testMethodNameExtractor() { - val nameNode = AntlrNode("", DUMMY_ROOT, METHOD_NAME) - val methodInfo = MethodInfo( - MethodNode(DUMMY_ROOT, null, nameNode), - ElementNode(null, null), - emptyList() - ) - val methodNameExtractor = MethodNameExtractor(false) - val label = methodNameExtractor.extractLabel(methodInfo, PATH_STRING) + fun `test method name extractor extracts correct method name`() { + val nameNode = AntlrNode("", dummyRoot, METHOD_NAME) +// val methodInfo = MethodInfo( +// MethodNode(dummyRoot, null, nameNode), +// ElementNode(null, null), +// emptyList() +// ) + val method = makeMethodInfo(nameNode) + val methodNameExtractor = MethodNameExtractor() + val label = methodNameExtractor.extractLabel(method, PATH_STRING) assertEquals(METHOD_NAME, label) - assertNull(nameNode.getTechnicalToken()) } @Test - fun testMethodNameExtractorHide() { - val nameNode = AntlrNode("", DUMMY_ROOT, METHOD_NAME) - val methodInfo = MethodInfo( - MethodNode(DUMMY_ROOT, null, nameNode), - ElementNode(null, null), - emptyList() - ) - val methodNameExtractor = MethodNameExtractor(true) - val label = methodNameExtractor.extractLabel(methodInfo, PATH_STRING) - assertEquals(METHOD_NAME, label) + fun `test method name extractor hides method name with technical token`() { + val nameNode = AntlrNode("", dummyRoot, METHOD_NAME) +// val methodInfo = MethodInfo( +// MethodNode(dummyRoot, null, nameNode), +// ElementNode(null, null), +// emptyList() +// ) + val methodInfo = makeMethodInfo(nameNode) + val methodNameExtractor = MethodNameExtractor() + methodNameExtractor.extractLabel(methodInfo, PATH_STRING) assertEquals("METHOD_NAME", nameNode.getTechnicalToken()) } } diff --git a/src/test/kotlin/astminer/parse/antlr/java/JavaMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/java/JavaMethodSplitterTest.kt index 58f707ff..ed3cf492 100644 --- a/src/test/kotlin/astminer/parse/antlr/java/JavaMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/java/JavaMethodSplitterTest.kt @@ -1,6 +1,6 @@ package astminer.parse.antlr.java -import astminer.common.model.MethodInfo +import astminer.common.model.FunctionInfo import astminer.parse.antlr.AntlrNode import org.junit.Test import kotlin.test.assertEquals @@ -15,89 +15,89 @@ class JavaMethodSplitterTest { val parser = JavaParser() } - var methodInfos: Collection> = listOf() + var functionInfos: Collection> = listOf() @BeforeTest fun parseTree() { val testTree = parser.parseInputStream(File("src/test/resources/methodSplitting/testMethodSplitting.java").inputStream()) assertNotNull(testTree) - methodInfos = methodSplitter.splitIntoMethods(testTree) + functionInfos = methodSplitter.splitIntoMethods(testTree) } @Test fun testValidSplitting() { - assertEquals(N_FUNCTIONS, methodInfos.size, "Test file contains $N_FUNCTIONS methods") + assertEquals(N_FUNCTIONS, functionInfos.size, "Test file contains $N_FUNCTIONS methods") } @Test fun testReturnVoid() { - val methodVoid = methodInfos.find { it.name() == "functionReturningVoid" } + val methodVoid = functionInfos.find { it.name == "functionReturningVoid" } assertNotNull(methodVoid) - assertEquals( "void", methodVoid.returnType()) + assertEquals( "void", methodVoid.returnType) } @Test fun testReturnInt() { - val methodInt = methodInfos.find { it.name() == "functionReturningInt" } + val methodInt = functionInfos.find { it.name == "functionReturningInt" } assertNotNull(methodInt) - assertEquals( "int", methodInt.returnType()) + assertEquals( "int", methodInt.returnType) } @Test fun testReturnStrings() { - val methodStrings = methodInfos.find { it.name() == "functionReturningStrings" } + val methodStrings = functionInfos.find { it.name == "functionReturningStrings" } assertNotNull(methodStrings) - assertEquals( "String[]", methodStrings.returnType()) + assertEquals( "String[]", methodStrings.returnType) } @Test fun testReturnClass() { - val methodClass = methodInfos.find { it.name() == "functionReturningClass" } + val methodClass = functionInfos.find { it.name == "functionReturningClass" } assertNotNull(methodClass) - assertEquals( "Class1", methodClass.returnType()) + assertEquals( "Class1", methodClass.returnType) } @Test fun testFunctionInClass() { - val methodClass = methodInfos.find { it.name() == "functionInClass1" } + val methodClass = functionInfos.find { it.name == "functionInClass1" } assertNotNull(methodClass) - assertEquals( "Class1", methodClass.enclosingElementName()) + assertEquals( "Class1", methodClass.className) } @Test fun testFunctionInNestedClass() { - val methodClass = methodInfos.find { it.name() == "functionInClass2" } + val methodClass = functionInfos.find { it.name == "functionInClass2" } assertNotNull(methodClass) - assertEquals( "Class2", methodClass.enclosingElementName()) + assertEquals( "Class2", methodClass.className) } @Test fun testNoParameters() { - val methodNoParameters = methodInfos.find { it.name() == "functionWithNoParameters" } + val methodNoParameters = functionInfos.find { it.name == "functionWithNoParameters" } assertNotNull(methodNoParameters) - assertEquals(0, methodNoParameters.methodParameters.size) + assertEquals(0, methodNoParameters.parameters.size) } @Test fun testOneParameter() { - val methodOneParameter = methodInfos.find { it.name() == "functionWithOneParameter" } + val methodOneParameter = functionInfos.find { it.name == "functionWithOneParameter" } assertNotNull(methodOneParameter) - assertEquals(1, methodOneParameter.methodParameters.size) - val parameter = methodOneParameter.methodParameters[0] - assertEquals("p1", parameter.name()) - assertEquals("int", parameter.returnType()) + assertEquals(1, methodOneParameter.parameters.size) + val parameter = methodOneParameter.parameters[0] + assertEquals("p1", parameter.name) + assertEquals("int", parameter.type) } @Test fun testThreeParameters() { - val methodThreeParameters = methodInfos.find { it.name() == "functionWithThreeParameters" } + val methodThreeParameters = functionInfos.find { it.name == "functionWithThreeParameters" } assertNotNull(methodThreeParameters) - assertEquals(3, methodThreeParameters.methodParameters.size) + assertEquals(3, methodThreeParameters.parameters.size) val methodTypes = listOf("Class", "String[][]", "int[]") for (i in 0 until 3) { - val parameter = methodThreeParameters.methodParameters[i] - assertEquals("p${i + 1}", parameter.name()) - assertEquals(methodTypes[i], parameter.returnType()) + val parameter = methodThreeParameters.parameters[i] + assertEquals("p${i + 1}", parameter.name) + assertEquals(methodTypes[i], parameter.type) } } } \ No newline at end of file diff --git a/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitterTest.kt index 2e29d9d6..fc9ef6b3 100644 --- a/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitterTest.kt @@ -1,6 +1,6 @@ package astminer.parse.antlr.javascript -import astminer.common.model.MethodInfo +import astminer.common.model.FunctionInfo import astminer.parse.antlr.AntlrNode import org.junit.Test import java.io.File @@ -17,18 +17,18 @@ class JavaScriptMethodSplitterTest { val parser = JavaScriptParser() } - var methodInfos: Collection> = listOf() + var functionInfos: Collection> = listOf() @BeforeTest fun parseTree() { val testTree = parser.parseInputStream(File(testFilePath).inputStream()) assertNotNull(testTree) - methodInfos = methodSplitter.splitIntoMethods(testTree) + functionInfos = methodSplitter.splitIntoMethods(testTree) } @Test fun testValidSplitting() { - assertEquals(N_METHODS, methodInfos.size, "Test file contains $N_METHODS methods") + assertEquals(N_METHODS, functionInfos.size, "Test file contains $N_METHODS methods") } @Test @@ -43,16 +43,16 @@ class JavaScriptMethodSplitterTest { } } - fun MethodInfo.getJsonInfo(): String { + fun FunctionInfo.getJsonInfo(): String { return "info : {" + - "name : ${name()}, " + - "args : ${methodParameters.map { it.name() }.joinToString(", ")}, " + - "enclosing element : ${enclosingElement.root?.getTypeLabel()?.getEnclosingElementType()}, " + - "enclosing element name : ${enclosingElementName()}" + + "name : ${name}, " + + "args : ${parameters.map { it.name }.joinToString(", ")}, " + + "enclosing element : ${enclosingElement?.getTypeLabel()?.getEnclosingElementType()}, " + + "enclosing element name : ${className}" + "}" } - val actualJsonInfos = methodInfos.map { it.getJsonInfo() }.sorted() + val actualJsonInfos = functionInfos.map { it.getJsonInfo() }.sorted() val text = File(testFilePath).readText() val expectedJsonInfos = Regex("info : \\{.*\\}").findAll(text).toList().map { it.value }.sorted() diff --git a/src/test/kotlin/astminer/parse/antlr/python/PythonMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/python/PythonMethodSplitterTest.kt index 6391e041..bb673852 100644 --- a/src/test/kotlin/astminer/parse/antlr/python/PythonMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/python/PythonMethodSplitterTest.kt @@ -1,5 +1,6 @@ package astminer.parse.antlr.python +import astminer.common.model.FunctionInfo import astminer.common.model.MethodInfo import astminer.parse.antlr.AntlrNode import org.junit.Test @@ -16,7 +17,7 @@ class PythonMethodSplitterTest { val parser = PythonParser() } - var methodInfos: Collection> = listOf() + var methodInfos: Collection> = listOf() @BeforeTest fun parseTree() { @@ -32,35 +33,35 @@ class PythonMethodSplitterTest { @Test fun testFunctionNotInClass() { - val methodClass = methodInfos.find { it.name() == "funWithNoClass" } + val methodClass = methodInfos.find { it.name == "funWithNoClass" } assertNotNull(methodClass) assertNull(methodClass.enclosingElement.root) } @Test fun testFunctionInClass() { - val methodClass = methodInfos.find { it.name() == "funInClass1" } + val methodClass = methodInfos.find { it.name == "funInClass1" } assertNotNull(methodClass) assertEquals( "Class1", methodClass.enclosingElementName()) } @Test fun testFunctionInNestedClass() { - val methodClass = methodInfos.find { it.name() == "funInClass2" } + val methodClass = methodInfos.find { it.name == "funInClass2" } assertNotNull(methodClass) assertEquals( "Class2", methodClass.enclosingElementName()) } @Test fun testNoParameters() { - val methodNoParameters = methodInfos.find { it.name() == "functionWithNoParameters" } + val methodNoParameters = methodInfos.find { it.name == "functionWithNoParameters" } assertNotNull(methodNoParameters) assertEquals(0, methodNoParameters.methodParameters.size) } @Test fun testOneParameter() { - val methodOneParameter = methodInfos.find { it.name() == "functionWithOneParameter" } + val methodOneParameter = methodInfos.find { it.name == "functionWithOneParameter" } assertNotNull(methodOneParameter) assertEquals(1, methodOneParameter.methodParameters.size) val parameter = methodOneParameter.methodParameters[0] @@ -69,7 +70,7 @@ class PythonMethodSplitterTest { @Test fun testThreeParameters() { - val methodThreeParameters = methodInfos.find { it.name() == "functionWithThreeParameters" } + val methodThreeParameters = methodInfos.find { it.name == "functionWithThreeParameters" } assertNotNull(methodThreeParameters) assertEquals(3, methodThreeParameters.methodParameters.size) for (i in 0 until 3) { From 5b2abd3e39631a7991e40dabea8dded39d4cf7d9 Mon Sep 17 00:00:00 2001 From: illided Date: Thu, 22 Apr 2021 18:26:08 +0300 Subject: [PATCH 071/308] js splitter bug fixed --- .../astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt index ccfc4cd5..b933165f 100644 --- a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt @@ -98,7 +98,7 @@ abstract class JavaScriptElement(private val element: AntlrNode) { } private fun Node.hasLastLabel(typeLabel: String): Boolean { - return decompressTypeLabel(typeLabel).last() == typeLabel + return decompressTypeLabel(this.typeLabel).last() == typeLabel } private fun AntlrNode.getItOrChildrenOfType(typeLabel: String) : List { From 4f922af41a893c9b6cb44df505bfb828913c6b39 Mon Sep 17 00:00:00 2001 From: furetur Date: Sat, 24 Apr 2021 23:12:10 +0500 Subject: [PATCH 072/308] it finally compiles --- build.gradle.kts | 8 ++ .../astminer/common/model/FunctionInfo.kt | 2 + .../antlr/python/PythonMethodSplitterTest.kt | 20 +-- .../parse/cpp/FuzzyMethodSplitterTest.kt | 55 ++++---- .../java/GumTreeJavaMethodSplitterTest.kt | 53 ++++---- .../python/GumTreeJavaMethodSplitterTest.kt | 121 +++++++++--------- 6 files changed, 136 insertions(+), 123 deletions(-) diff --git a/build.gradle.kts b/build.gradle.kts index 71436212..8018da5d 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -176,3 +176,11 @@ jmh { benchmarkMode = listOf("AverageTime") resultsFile = file("build/reports/benchmarks.csv") } +val compileKotlin: KotlinCompile by tasks +compileKotlin.kotlinOptions { + jvmTarget = "1.8" +} +val compileTestKotlin: KotlinCompile by tasks +compileTestKotlin.kotlinOptions { + jvmTarget = "1.8" +} \ No newline at end of file diff --git a/src/main/kotlin/astminer/common/model/FunctionInfo.kt b/src/main/kotlin/astminer/common/model/FunctionInfo.kt index 80238fee..c393febf 100644 --- a/src/main/kotlin/astminer/common/model/FunctionInfo.kt +++ b/src/main/kotlin/astminer/common/model/FunctionInfo.kt @@ -23,6 +23,8 @@ interface FunctionInfo { get() = notImplemented("parameters") val returnType: String? get() = notImplemented("returnType") + val returnTypeNode: T? + get() = notImplemented("returnTypeNode") // is null because can be only from a small set like {variableDeclaration, classDeclaration..} // for instance it cannot be the root of the tree diff --git a/src/test/kotlin/astminer/parse/antlr/python/PythonMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/python/PythonMethodSplitterTest.kt index bb673852..901fe4e8 100644 --- a/src/test/kotlin/astminer/parse/antlr/python/PythonMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/python/PythonMethodSplitterTest.kt @@ -35,47 +35,47 @@ class PythonMethodSplitterTest { fun testFunctionNotInClass() { val methodClass = methodInfos.find { it.name == "funWithNoClass" } assertNotNull(methodClass) - assertNull(methodClass.enclosingElement.root) + assertNull(methodClass.enclosingElement) } @Test fun testFunctionInClass() { val methodClass = methodInfos.find { it.name == "funInClass1" } assertNotNull(methodClass) - assertEquals( "Class1", methodClass.enclosingElementName()) + assertEquals( "Class1", methodClass.enclosingElementName) } @Test fun testFunctionInNestedClass() { val methodClass = methodInfos.find { it.name == "funInClass2" } assertNotNull(methodClass) - assertEquals( "Class2", methodClass.enclosingElementName()) + assertEquals( "Class2", methodClass.enclosingElementName) } @Test fun testNoParameters() { val methodNoParameters = methodInfos.find { it.name == "functionWithNoParameters" } assertNotNull(methodNoParameters) - assertEquals(0, methodNoParameters.methodParameters.size) + assertEquals(0, methodNoParameters.parameters.size) } @Test fun testOneParameter() { val methodOneParameter = methodInfos.find { it.name == "functionWithOneParameter" } assertNotNull(methodOneParameter) - assertEquals(1, methodOneParameter.methodParameters.size) - val parameter = methodOneParameter.methodParameters[0] - assertEquals("p1", parameter.name()) + assertEquals(1, methodOneParameter.parameters.size) + val parameter = methodOneParameter.parameters[0] + assertEquals("p1", parameter.name) } @Test fun testThreeParameters() { val methodThreeParameters = methodInfos.find { it.name == "functionWithThreeParameters" } assertNotNull(methodThreeParameters) - assertEquals(3, methodThreeParameters.methodParameters.size) + assertEquals(3, methodThreeParameters.parameters.size) for (i in 0 until 3) { - val parameter = methodThreeParameters.methodParameters[i] - assertEquals("p${i + 1}", parameter.name()) + val parameter = methodThreeParameters.parameters[i] + assertEquals("p${i + 1}", parameter.name) } } } \ No newline at end of file diff --git a/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt index 7190776f..33c9c580 100644 --- a/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt @@ -1,5 +1,6 @@ package astminer.parse.cpp +import astminer.common.model.FunctionInfo import astminer.common.model.MethodInfo import astminer.parse.fuzzy.cpp.FuzzyCppParser import astminer.parse.fuzzy.cpp.FuzzyMethodSplitter @@ -19,7 +20,7 @@ class FuzzyMethodSplitterTest { val parser = FuzzyCppParser() } - var methodInfos: Collection> = listOf() + var methodInfos: Collection> = listOf() @BeforeTest fun parseTree() { @@ -35,79 +36,79 @@ class FuzzyMethodSplitterTest { @Test fun testReturnVoid() { - val methodVoid = methodInfos.find { it.name() == "functionReturningVoid" } + val methodVoid = methodInfos.find { it.name == "functionReturningVoid" } assertNotNull(methodVoid) - assertEquals( "void", methodVoid.returnType()) + assertEquals( "void", methodVoid.returnType) } @Test fun testReturnInt() { - val methodInt = methodInfos.find { it.name() == "functionReturningInt" } + val methodInt = methodInfos.find { it.name == "functionReturningInt" } assertNotNull(methodInt) - assertEquals( "int", methodInt.returnType()) + assertEquals( "int", methodInt.returnType) } @Test fun testReturnString() { - val methodString = methodInfos.find { it.name() == "functionReturningString" } + val methodString = methodInfos.find { it.name == "functionReturningString" } assertNotNull(methodString) - assertEquals( "string", methodString.returnType()) + assertEquals( "string", methodString.returnType) } @Test fun testReturnClass() { - val methodClass = methodInfos.find { it.name() == "functionReturningClass" } + val methodClass = methodInfos.find { it.name == "functionReturningClass" } assertNotNull(methodClass) - assertEquals( "Class", methodClass.returnType()) + assertEquals( "Class", methodClass.returnType) } @Test fun testFunctionNotInClass() { - val methodClass = methodInfos.find { it.name() == "functionWithNoClass" } + val methodClass = methodInfos.find { it.name == "functionWithNoClass" } assertNotNull(methodClass) - assertNull(methodClass.enclosingElement.root) + assertNull(methodClass.enclosingElement) } @Test fun testFunctionInClass() { - val methodClass = methodInfos.find { it.name() == "functionInClass1" } + val methodClass = methodInfos.find { it.name == "functionInClass1" } assertNotNull(methodClass) - assertEquals( "Class1", methodClass.enclosingElementName()) + assertEquals( "Class1", methodClass.enclosingElementName) } @Test fun testFunctionInNestedClass() { - val methodClass = methodInfos.find { it.name() == "functionInClass2" } + val methodClass = methodInfos.find { it.name == "functionInClass2" } assertNotNull(methodClass) - assertEquals( "Class2", methodClass.enclosingElementName()) + assertEquals( "Class2", methodClass.enclosingElementName) } @Test fun testNoParameters() { - val methodNoParameters = methodInfos.find { it.name() == "functionWithNoParameters" } + val methodNoParameters = methodInfos.find { it.name == "functionWithNoParameters" } assertNotNull(methodNoParameters) - assertEquals(0, methodNoParameters.methodParameters.size) + assertEquals(0, methodNoParameters.parameters.size) } @Test fun testOneParameter() { - val methodOneParameter = methodInfos.find { it.name() == "functionWithOneParameter" } + val methodOneParameter = methodInfos.find { it.name == "functionWithOneParameter" } assertNotNull(methodOneParameter) - assertEquals(1, methodOneParameter.methodParameters.size) - val parameter = methodOneParameter.methodParameters[0] - assertEquals("p1", parameter.name()) - assertEquals("int", parameter.returnType()) + assertEquals(1, methodOneParameter.parameters.size) + val parameter = methodOneParameter.parameters[0] + assertEquals("p1", parameter.name) + assertEquals("int", parameter.type) } @Test fun testThreeParameters() { - val methodThreeParameters = methodInfos.find { it.name() == "functionWithThreeParameters" } + val methodThreeParameters = methodInfos.find { it.name == "functionWithThreeParameters" } assertNotNull(methodThreeParameters) - assertEquals(3, methodThreeParameters.methodParameters.size) + assertEquals(3, methodThreeParameters.parameters.size) for (i in 0 until 3) { - val parameter = methodThreeParameters.methodParameters[i] - assertEquals("p${i + 1}", parameter.name()) - assertEquals("int", parameter.returnType()) + val parameter = methodThreeParameters.parameters[i] + assertEquals("p${i + 1}", parameter.name) + assertEquals("int", parameter.type) } } } \ No newline at end of file diff --git a/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitterTest.kt index 76f9868f..2ed6cd43 100644 --- a/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitterTest.kt @@ -1,5 +1,6 @@ package astminer.parse.gumtree.java +import astminer.common.model.FunctionInfo import astminer.common.model.MethodInfo import astminer.parse.gumtree.GumTreeNode import org.junit.Test @@ -11,7 +12,7 @@ private fun createTree(filename: String): GumTreeNode { return parser.parseInputStream(File(filename).inputStream()) as GumTreeNode } -private fun createAndSplitTree(filename: String): Collection> { +private fun createAndSplitTree(filename: String): Collection> { return GumTreeJavaMethodSplitter().splitIntoMethods(createTree(filename)) } @@ -22,11 +23,11 @@ class GumTreeJavaMethodSplitterTest { assertEquals(1, methodInfos.size) with(methodInfos.first()) { - assertEquals("fun", name()) - assertEquals("void", returnType()) - assertEquals("SingleFunction", enclosingElementName()) - assertEquals(listOf("args", "param"), methodParameters.map { it.name() }.toList()) - assertEquals(listOf("String[]", "int"), methodParameters.map { it.returnType() }.toList()) + assertEquals("fun", name) + assertEquals("void", returnType) + assertEquals("SingleFunction", enclosingElementName) + assertEquals(listOf("args", "param"), parameters.map { it.name }.toList()) + assertEquals(listOf("String[]", "int"), parameters.map { it.type }.toList()) } } @@ -37,11 +38,11 @@ class GumTreeJavaMethodSplitterTest { assertEquals(1, methodInfos.size) with(methodInfos.first()) { - assertEquals("main", name()) - assertEquals("void", returnType()) - assertEquals("InnerClass", enclosingElementName()) - assertEquals(listOf("args"), methodParameters.map { it.name() }.toList()) - assertEquals(listOf("String[]"), methodParameters.map { it.returnType() }.toList()) + assertEquals("main", name) + assertEquals("void", returnType) + assertEquals("InnerClass", enclosingElementName) + assertEquals(listOf("args"), parameters.map { it.name }.toList()) + assertEquals(listOf("String[]"), parameters.map { it.type }.toList()) } } @@ -51,18 +52,18 @@ class GumTreeJavaMethodSplitterTest { assertEquals(2, methodInfos.size) with(methodInfos.first()) { - assertEquals("main", name()) - assertEquals("void", returnType()) - assertEquals("InnerClass", enclosingElementName()) - assertEquals(listOf("args"), methodParameters.map { it.name() }.toList()) - assertEquals(listOf("String[]"), methodParameters.map { it.returnType() }.toList()) + assertEquals("main", name) + assertEquals("void", returnType) + assertEquals("InnerClass", enclosingElementName) + assertEquals(listOf("args"), parameters.map { it.name }.toList()) + assertEquals(listOf("String[]"), parameters.map { it.type }.toList()) } with(methodInfos.last()) { - assertEquals("fun", name()) - assertEquals("void", returnType()) - assertEquals("SingleMethodInnerClass", enclosingElementName()) - assertEquals(listOf("args", "param"), methodParameters.map { it.name() }.toList()) - assertEquals(listOf("String[]", "int"), methodParameters.map { it.returnType() }.toList()) + assertEquals("fun", name) + assertEquals("void", returnType) + assertEquals("SingleMethodInnerClass", enclosingElementName) + assertEquals(listOf("args", "param"), parameters.map { it.name }.toList()) + assertEquals(listOf("String[]", "int"), parameters.map { it.type }.toList()) } } @@ -72,11 +73,11 @@ class GumTreeJavaMethodSplitterTest { assertEquals(1, methodInfos.size) with(methodInfos.first()) { - assertEquals("fun", name()) - assertEquals("int", returnType()) - assertEquals("SingleFunction", enclosingElementName()) - assertEquals(listOf("args", "param"), methodParameters.map { it.name() }.toList()) - assertEquals(listOf("int", "SingleFunction"), methodParameters.map { it.returnType() }.toList()) + assertEquals("fun", name) + assertEquals("int", returnType) + assertEquals("SingleFunction", enclosingElementName) + assertEquals(listOf("args", "param"), parameters.map { it.name }.toList()) + assertEquals(listOf("int", "SingleFunction"), parameters.map { it.type }.toList()) } } } \ No newline at end of file diff --git a/src/test/kotlin/astminer/parse/gumtree/python/GumTreeJavaMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/gumtree/python/GumTreeJavaMethodSplitterTest.kt index 6e1c6025..7592b5eb 100644 --- a/src/test/kotlin/astminer/parse/gumtree/python/GumTreeJavaMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/gumtree/python/GumTreeJavaMethodSplitterTest.kt @@ -1,5 +1,6 @@ package astminer.parse.gumtree.python +import astminer.common.model.FunctionInfo import astminer.common.model.MethodInfo import astminer.parse.gumtree.GumTreeNode import org.junit.Test @@ -11,7 +12,7 @@ class GumTreeJavaMethodSplitterTest { private fun parse(filename: String): GumTreeNode? = GumTreePythonParser().parseInputStream(File(filename).inputStream()) - private fun splitMethods(filename: String): Collection> = parse(filename)?.let { + private fun splitMethods(filename: String): Collection> = parse(filename)?.let { GumTreePythonMethodSplitter().splitIntoMethods(it) } ?: emptyList() @@ -33,138 +34,138 @@ class GumTreeJavaMethodSplitterTest { "func_dif_args_typed_return", "complex_args_full_typed" ) val methodInfos = splitMethods(createPath("1.py")) - val parsedNames = methodInfos.map { it.name() }.toSet() + val parsedNames = methodInfos.map { it.name }.toSet() assertEquals(realNames, parsedNames) } @Test fun methodInfoTest1TypedArgs() { val methodInfos = splitMethods(createPath("1.py")) - val method = methodInfos.firstOrNull { it.name() == "complex_args_full_typed" } + val method = methodInfos.firstOrNull { it.name == "complex_args_full_typed" } assertNotNull(method) with(method) { - assertEquals("complex_args_full_typed", name()) - assertEquals(null, this.method.returnTypeNode) - assertEquals(1, methodParameters.size) - assertEquals(listOf("node"), methodParameters.map { it.name() }.toList()) - assertEquals(listOf("JsonNodeType"), methodParameters.map { it.returnType() }.toList()) + assertEquals("complex_args_full_typed", name) + assertEquals(null, returnTypeNode) + assertEquals(1, parameters.size) + assertEquals(listOf("node"), parameters.map { it.name }.toList()) + assertEquals(listOf("JsonNodeType"), parameters.map { it.type }.toList()) } } @Test fun methodInfoTest2ManyArgs() { val methodInfos = splitMethods(createPath("1.py")) - val method = methodInfos.firstOrNull { it.name() == "func_dif_args_typed_return" } + val method = methodInfos.firstOrNull { it.name == "func_dif_args_typed_return" } assertNotNull(method) with(method) { - assertEquals("func_dif_args_typed_return", name()) - assertEquals("Constant-int", this.method.returnTypeNode?.getTypeLabel()) - assertEquals(6, methodParameters.size) - assertEquals(listOf("a", "b", "c", "d", "e", "f"), methodParameters.map { it.name() }.toList()) - assertEquals(emptyList(), methodParameters.mapNotNull { it.returnType() }.toList()) + assertEquals("func_dif_args_typed_return", name) + assertEquals("Constant-int", returnTypeNode?.getTypeLabel()) + assertEquals(6, parameters.size) + assertEquals(listOf("a", "b", "c", "d", "e", "f"), parameters.map { it.name }.toList()) + assertEquals(emptyList(), parameters.mapNotNull { it.type }.toList()) } } @Test fun methodInfoTest3EnclosingClass() { val methodInfos = splitMethods(createPath("2.py")) - val method = methodInfos.firstOrNull { it.name() == "foo_typed" } + val method = methodInfos.firstOrNull { it.name == "foo_typed" } assertNotNull(method) with(method) { - assertEquals("foo_typed", name()) - assertEquals("A", enclosingElementName()) - assertEquals(null, this.method.returnTypeNode) - assertEquals(3, methodParameters.size) - assertEquals(listOf("self", "x", "y"), methodParameters.map { it.name() }.toList()) - assertEquals(listOf(null, "int", "int"), methodParameters.map { it.returnType() }.toList()) + assertEquals("foo_typed", name) + assertEquals("A", enclosingElementName) + assertEquals(null, returnTypeNode) + assertEquals(3, parameters.size) + assertEquals(listOf("self", "x", "y"), parameters.map { it.name }.toList()) + assertEquals(listOf(null, "int", "int"), parameters.map { it.type }.toList()) } } @Test fun methodInfoTest4EnclosingClass() { val methodInfos = splitMethods(createPath("2.py")) - val method = methodInfos.firstOrNull { it.name() == "bar_typed" } + val method = methodInfos.firstOrNull { it.name == "bar_typed" } assertNotNull(method) with(method) { - assertEquals("bar_typed", name()) - assertEquals("C", enclosingElementName()) - assertEquals(null, this.method.returnTypeNode) - assertEquals(2, methodParameters.size) - assertEquals(listOf("self", "x"), methodParameters.map { it.name() }.toList()) - assertEquals(listOf(null, "int"), methodParameters.map { it.returnType() }.toList()) + assertEquals("bar_typed", name) + assertEquals("C", enclosingElementName) + assertEquals(null, returnTypeNode) + assertEquals(2, parameters.size) + assertEquals(listOf("self", "x"), parameters.map { it.name }.toList()) + assertEquals(listOf(null, "int"), parameters.map { it.type }.toList()) } } @Test fun methodInfoTest5AsyncDef() { val methodInfos = splitMethods(createPath("3.py")) - val method = methodInfos.firstOrNull { it.name() == "async_schrecklich_typed" } + val method = methodInfos.firstOrNull { it.name == "async_schrecklich_typed" } assertNotNull(method) with(method) { - assertEquals("async_schrecklich_typed", name()) - assertEquals("AsyncFunctionDef", this.method.root.getTypeLabel()) - assertEquals(null, enclosingElementName()) - assertEquals("Constant-int", this.method.returnTypeNode?.getTypeLabel()) - assertEquals(4, methodParameters.size) - assertEquals(listOf("event", "x", "args", "kwargs"), methodParameters.map { it.name() }.toList()) - assertEquals(listOf("str", "int", null, null), methodParameters.map { it.returnType() }.toList()) + assertEquals("async_schrecklich_typed", name) + assertEquals("AsyncFunctionDef", root.getTypeLabel()) + assertEquals(null, enclosingElementName) + assertEquals("Constant-int", returnTypeNode?.getTypeLabel()) + assertEquals(4, parameters.size) + assertEquals(listOf("event", "x", "args", "kwargs"), parameters.map { it.name }.toList()) + assertEquals(listOf("str", "int", null, null), parameters.map { it.type }.toList()) } } @Test fun methodInfoTest6Doc() { val methodInfos = splitMethods(createPath("3.py")) - val method = methodInfos.firstOrNull { it.name() == "async_simple_no_typed" } + val method = methodInfos.firstOrNull { it.name == "async_simple_no_typed" } assertNotNull(method) with(method) { - assertEquals("async_simple_no_typed", name()) - assertEquals("AsyncFunctionDef", this.method.root.getTypeLabel()) - assertEquals(null, enclosingElementName()) + assertEquals("async_simple_no_typed", name) + assertEquals("AsyncFunctionDef", root.getTypeLabel()) + assertEquals(null, enclosingElementName) assertEquals( "\n async doc\n ", - this.method.root.getChildOfType("body") + root.getChildOfType("body") ?.getChildOfType("Expr") ?.getChildOfType("Constant-str") ?.getToken() ) - assertEquals(4, methodParameters.size) + assertEquals(4, parameters.size) assertEquals( listOf("gh", "original_issue", "branch", "backport_pr_number"), - methodParameters.map { it.name() }.toList() + parameters.map { it.name }.toList() ) - assertEquals(listOf(null, null, null, null), methodParameters.map { it.returnType() }.toList()) + assertEquals(listOf(null, null, null, null), parameters.map { it.type }.toList()) } } @Test fun methodInfoTest7InnerFunc() { val methodInfos = splitMethods(createPath("4.py")) - val method = methodInfos.firstOrNull { it.name() == "foo_2" } + val method = methodInfos.firstOrNull { it.name == "foo_2" } assertNotNull(method) with(method) { - assertEquals("foo_2", name()) - assertEquals("foo_1", method.method.root.parent?.wrappedNode?.parent?.label) - assertEquals(null, enclosingElementName()) - assertEquals("Constant-NoneType", this.method.returnTypeNode?.getTypeLabel()) - assertEquals(1, methodParameters.size) - assertEquals(listOf("c"), methodParameters.map { it.name() }.toList()) - assertEquals(listOf(null), methodParameters.map { it.returnType() }.toList()) + assertEquals("foo_2", name) + assertEquals("foo_1", method.root.parent?.wrappedNode?.parent?.label) + assertEquals(null, enclosingElementName) + assertEquals("Constant-NoneType", this.returnTypeNode?.getTypeLabel()) + assertEquals(1, parameters.size) + assertEquals(listOf("c"), parameters.map { it.name }.toList()) + assertEquals(listOf(null), parameters.map { it.type }.toList()) } } @Test fun methodInfoTest8InnerFunc() { val methodInfos = splitMethods(createPath("4.py")) - val method = methodInfos.firstOrNull { it.name() == "bar_2" } + val method = methodInfos.firstOrNull { it.name == "bar_2" } assertNotNull(method) with(method) { - assertEquals("bar_2", name()) - assertEquals("bar_1", method.method.root.parent?.wrappedNode?.parent?.label) - assertEquals(null, enclosingElementName()) - assertEquals("Constant-int", this.method.returnTypeNode?.getTypeLabel()) - assertEquals(2, methodParameters.size) - assertEquals(listOf("d", "e"), methodParameters.map { it.name() }.toList()) - assertEquals(listOf("int", "int"), methodParameters.map { it.returnType() }.toList()) + assertEquals("bar_2", name) + assertEquals("bar_1", method.root.parent?.wrappedNode?.parent?.label) + assertEquals(null, enclosingElementName) + assertEquals("Constant-int", this.returnTypeNode?.getTypeLabel()) + assertEquals(2, parameters.size) + assertEquals(listOf("d", "e"), parameters.map { it.name }.toList()) + assertEquals(listOf("int", "int"), parameters.map { it.type }.toList()) } } } From 24f86628efbdb437f4e17d3542598095a74b2829 Mon Sep 17 00:00:00 2001 From: illided Date: Sun, 25 Apr 2021 16:35:39 +0300 Subject: [PATCH 073/308] preorder moved into Node --- .../kotlin/astminer/cli/FilterPredicates.kt | 2 +- .../kotlin/astminer/cli/LabelExtractors.kt | 2 +- src/main/kotlin/astminer/cli/ProjectParser.kt | 2 +- src/main/kotlin/astminer/common/TreeUtil.kt | 12 +++---- .../astminer/common/model/ParsingModel.kt | 36 ++++++++++++++++++- .../astminer/featureextraction/TreeFeature.kt | 2 +- .../javascript/JavaScriptMethodSplitter.kt | 2 +- .../gumtree/java/GumTreeJavaMethodSplitter.kt | 2 +- .../python/GumTreePythonMethodSplitter.kt | 2 +- .../astminer/storage/ast/CsvAstStorage.kt | 2 +- .../astminer/storage/ast/DotAstStorage.kt | 2 +- .../astminer/parse/antlr/AntrlUtilTest.kt | 2 +- 12 files changed, 51 insertions(+), 17 deletions(-) diff --git a/src/main/kotlin/astminer/cli/FilterPredicates.kt b/src/main/kotlin/astminer/cli/FilterPredicates.kt index 0d39094a..b5bf0355 100644 --- a/src/main/kotlin/astminer/cli/FilterPredicates.kt +++ b/src/main/kotlin/astminer/cli/FilterPredicates.kt @@ -2,7 +2,7 @@ package astminer.cli import astminer.common.model.MethodInfo import astminer.common.model.Node -import astminer.common.preOrder +/*import astminer.common.preOrder*/ import astminer.common.splitToSubtokens abstract class MethodFilterPredicate { diff --git a/src/main/kotlin/astminer/cli/LabelExtractors.kt b/src/main/kotlin/astminer/cli/LabelExtractors.kt index e26eba0d..a9a52f55 100644 --- a/src/main/kotlin/astminer/cli/LabelExtractors.kt +++ b/src/main/kotlin/astminer/cli/LabelExtractors.kt @@ -3,7 +3,7 @@ package astminer.cli import astminer.common.model.MethodInfo import astminer.common.model.Node import astminer.common.model.ParseResult -import astminer.common.preOrder +/*import astminer.common.preOrder*/ import astminer.common.setTechnicalToken import astminer.parse.antlr.AntlrNode import astminer.parse.antlr.java.JavaMethodSplitter diff --git a/src/main/kotlin/astminer/cli/ProjectParser.kt b/src/main/kotlin/astminer/cli/ProjectParser.kt index cccf4c16..cf56ed15 100644 --- a/src/main/kotlin/astminer/cli/ProjectParser.kt +++ b/src/main/kotlin/astminer/cli/ProjectParser.kt @@ -3,7 +3,7 @@ package astminer.cli import astminer.storage.ast.CsvAstStorage import astminer.storage.ast.DotAstStorage import astminer.common.getProjectFilesWithExtension -import astminer.common.preOrder +/*import astminer.common.preOrder*/ import astminer.storage.Storage import astminer.storage.TokenProcessor import com.github.ajalt.clikt.core.CliktCommand diff --git a/src/main/kotlin/astminer/common/TreeUtil.kt b/src/main/kotlin/astminer/common/TreeUtil.kt index dcbaa9e7..ef76d964 100644 --- a/src/main/kotlin/astminer/common/TreeUtil.kt +++ b/src/main/kotlin/astminer/common/TreeUtil.kt @@ -10,19 +10,19 @@ fun Node.postOrderIterator(): Iterator { return postOrder().listIterator() } -fun Node.preOrderIterator(): Iterator { +/*fun Node.preOrderIterator(): Iterator { return preOrder().listIterator() -} +}*/ fun doTraversePostOrder(node: Node, resultList: MutableList) { node.children.forEach { doTraversePostOrder(it, resultList) } resultList.add(node) } -fun doTraversePreOrder(node: Node, resultList: MutableList) { +/*fun doTraversePreOrder(node: Node, resultList: MutableList) { resultList.add(node) node.children.forEach { doTraversePreOrder(it, resultList) } -} +}*/ fun Node.postOrder(): List { val result: MutableList = ArrayList() @@ -30,11 +30,11 @@ fun Node.postOrder(): List { return result } -fun Node.preOrder(): List { +/*fun Node.preOrder(): List { val result: MutableList = ArrayList() doTraversePreOrder(this, result) return result -} +}*/ const val DEFAULT_TOKEN = "EMPTY_TOKEN" const val TECHNICAL_TOKEN_KEY = "technical_token" diff --git a/src/main/kotlin/astminer/common/model/ParsingModel.kt b/src/main/kotlin/astminer/common/model/ParsingModel.kt index 140ee28d..53cc5b4f 100644 --- a/src/main/kotlin/astminer/common/model/ParsingModel.kt +++ b/src/main/kotlin/astminer/common/model/ParsingModel.kt @@ -3,6 +3,8 @@ package astminer.common.model import astminer.cli.LabeledResult import java.io.File import java.io.InputStream +import java.util.* +import kotlin.collections.HashMap abstract class Node{ @@ -29,7 +31,39 @@ abstract class Node{ open fun getChildOfType(typeLabel: String) = getChildrenOfType(typeLabel).firstOrNull() abstract fun removeChildrenOfType(typeLabel: String) - //TODO(move orders here) + + fun preOrderIterator(): Iterator = PreOrderIterator(this) + open fun preOrder(): List = PreOrderIterator(this).asSequence().toList() +} + +class PreOrderIterator(root: Node): Iterator { + private val stack = ArrayDeque() + + init { + stack.push(root) + } + + override fun hasNext(): Boolean { + return stack.isNotEmpty() + } + + override fun next(): Node { + val currentNode = stack.pop() + currentNode.children.asReversed().forEach { stack.push(it) } + return currentNode + } +} + +class PostOrderIterator(root: Node): Iterator { + + override fun hasNext(): Boolean { + TODO("Not yet implemented") + } + + override fun next(): Node { + TODO("Not yet implemented") + } + } interface Parser { diff --git a/src/main/kotlin/astminer/featureextraction/TreeFeature.kt b/src/main/kotlin/astminer/featureextraction/TreeFeature.kt index 1c54b1b4..d51229ab 100644 --- a/src/main/kotlin/astminer/featureextraction/TreeFeature.kt +++ b/src/main/kotlin/astminer/featureextraction/TreeFeature.kt @@ -1,7 +1,7 @@ package astminer.featureextraction import astminer.common.model.Node -import astminer.common.preOrder +/*import astminer.common.preOrder*/ /** * Interface that describes tree feature. diff --git a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt index b933165f..1369a8a7 100644 --- a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt @@ -1,7 +1,7 @@ package astminer.parse.antlr.javascript import astminer.common.model.* -import astminer.common.preOrder +/*import astminer.common.preOrder*/ import astminer.parse.antlr.AntlrNode import astminer.parse.antlr.decompressTypeLabel diff --git a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt index 74e27e8f..a0064222 100644 --- a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt @@ -1,7 +1,7 @@ package astminer.parse.gumtree.java import astminer.common.model.* -import astminer.common.preOrder +/*import astminer.common.preOrder*/ import astminer.parse.gumtree.GumTreeNode private fun GumTreeNode.isTypeNode() = typeLabel.endsWith("Type") diff --git a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitter.kt b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitter.kt index 5d472cf8..8babeea5 100644 --- a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitter.kt @@ -5,7 +5,7 @@ import astminer.common.model.MethodInfo import astminer.common.model.MethodNode import astminer.common.model.ParameterNode import astminer.common.model.TreeMethodSplitter -import astminer.common.preOrder +/*import astminer.common.preOrder*/ import astminer.parse.gumtree.GumTreeNode class GumTreePythonMethodSplitter : TreeMethodSplitter { diff --git a/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt b/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt index 099af188..e00bc23d 100644 --- a/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt +++ b/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt @@ -2,7 +2,7 @@ package astminer.storage.ast import astminer.cli.LabeledResult import astminer.common.model.Node -import astminer.common.preOrder +/*import astminer.common.preOrder*/ import astminer.common.storage.* import astminer.storage.Storage import java.io.File diff --git a/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt b/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt index c709341f..d9fb002d 100644 --- a/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt +++ b/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt @@ -2,7 +2,7 @@ package astminer.storage.ast import astminer.cli.LabeledResult import astminer.common.model.Node -import astminer.common.preOrder +/*import astminer.common.preOrder*/ import astminer.common.storage.RankedIncrementalIdStorage import astminer.storage.Storage import astminer.storage.TokenProcessor diff --git a/src/test/kotlin/astminer/parse/antlr/AntrlUtilTest.kt b/src/test/kotlin/astminer/parse/antlr/AntrlUtilTest.kt index 6bede722..29ed651e 100644 --- a/src/test/kotlin/astminer/parse/antlr/AntrlUtilTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/AntrlUtilTest.kt @@ -1,6 +1,6 @@ package astminer.parse.antlr -import astminer.common.preOrder +/*import astminer.common.preOrder*/ import astminer.parse.antlr.java.JavaParser import org.junit.Assert import org.junit.Test From 6afd62097954c2083a636edd78fec3984ceb0d85 Mon Sep 17 00:00:00 2001 From: illided Date: Sun, 25 Apr 2021 16:52:40 +0300 Subject: [PATCH 074/308] test commit --- .../kotlin/astminer/parse/antlr/python/PythonMethodSplitter.kt | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/kotlin/astminer/parse/antlr/python/PythonMethodSplitter.kt b/src/main/kotlin/astminer/parse/antlr/python/PythonMethodSplitter.kt index 61e58b34..b0739a96 100644 --- a/src/main/kotlin/astminer/parse/antlr/python/PythonMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/python/PythonMethodSplitter.kt @@ -43,7 +43,6 @@ class PythonMethodSplitter : TreeMethodSplitter { parametersRoot != null -> getListOfParameters(parametersRoot) else -> emptyList() } - return MethodInfo( MethodNode(methodNode, null, methodName), ElementNode(classRoot, className), From 54a5201cc20cae4c7f9e0019864104564c5bd1bf Mon Sep 17 00:00:00 2001 From: furetur Date: Mon, 26 Apr 2021 17:56:43 +0500 Subject: [PATCH 075/308] removed returnTypeNode from FunctionInfo --- .../kotlin/astminer/common/model/FunctionInfo.kt | 2 -- ...est.kt => GumTreePythonMethodSplitterTest.kt} | 16 ++++++++-------- 2 files changed, 8 insertions(+), 10 deletions(-) rename src/test/kotlin/astminer/parse/gumtree/python/{GumTreeJavaMethodSplitterTest.kt => GumTreePythonMethodSplitterTest.kt} (93%) diff --git a/src/main/kotlin/astminer/common/model/FunctionInfo.kt b/src/main/kotlin/astminer/common/model/FunctionInfo.kt index c393febf..80238fee 100644 --- a/src/main/kotlin/astminer/common/model/FunctionInfo.kt +++ b/src/main/kotlin/astminer/common/model/FunctionInfo.kt @@ -23,8 +23,6 @@ interface FunctionInfo { get() = notImplemented("parameters") val returnType: String? get() = notImplemented("returnType") - val returnTypeNode: T? - get() = notImplemented("returnTypeNode") // is null because can be only from a small set like {variableDeclaration, classDeclaration..} // for instance it cannot be the root of the tree diff --git a/src/test/kotlin/astminer/parse/gumtree/python/GumTreeJavaMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitterTest.kt similarity index 93% rename from src/test/kotlin/astminer/parse/gumtree/python/GumTreeJavaMethodSplitterTest.kt rename to src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitterTest.kt index 7592b5eb..30f9ca8b 100644 --- a/src/test/kotlin/astminer/parse/gumtree/python/GumTreeJavaMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitterTest.kt @@ -8,7 +8,7 @@ import java.io.File import kotlin.test.assertEquals import kotlin.test.assertNotNull -class GumTreeJavaMethodSplitterTest { +class GumTreePythonMethodSplitterTest { private fun parse(filename: String): GumTreeNode? = GumTreePythonParser().parseInputStream(File(filename).inputStream()) @@ -45,7 +45,7 @@ class GumTreeJavaMethodSplitterTest { assertNotNull(method) with(method) { assertEquals("complex_args_full_typed", name) - assertEquals(null, returnTypeNode) + assertEquals(null, returnType) assertEquals(1, parameters.size) assertEquals(listOf("node"), parameters.map { it.name }.toList()) assertEquals(listOf("JsonNodeType"), parameters.map { it.type }.toList()) @@ -59,7 +59,7 @@ class GumTreeJavaMethodSplitterTest { assertNotNull(method) with(method) { assertEquals("func_dif_args_typed_return", name) - assertEquals("Constant-int", returnTypeNode?.getTypeLabel()) + assertEquals("int", returnType) assertEquals(6, parameters.size) assertEquals(listOf("a", "b", "c", "d", "e", "f"), parameters.map { it.name }.toList()) assertEquals(emptyList(), parameters.mapNotNull { it.type }.toList()) @@ -74,7 +74,7 @@ class GumTreeJavaMethodSplitterTest { with(method) { assertEquals("foo_typed", name) assertEquals("A", enclosingElementName) - assertEquals(null, returnTypeNode) + assertEquals(null, returnType) assertEquals(3, parameters.size) assertEquals(listOf("self", "x", "y"), parameters.map { it.name }.toList()) assertEquals(listOf(null, "int", "int"), parameters.map { it.type }.toList()) @@ -89,7 +89,7 @@ class GumTreeJavaMethodSplitterTest { with(method) { assertEquals("bar_typed", name) assertEquals("C", enclosingElementName) - assertEquals(null, returnTypeNode) + assertEquals(null, returnType) assertEquals(2, parameters.size) assertEquals(listOf("self", "x"), parameters.map { it.name }.toList()) assertEquals(listOf(null, "int"), parameters.map { it.type }.toList()) @@ -105,7 +105,7 @@ class GumTreeJavaMethodSplitterTest { assertEquals("async_schrecklich_typed", name) assertEquals("AsyncFunctionDef", root.getTypeLabel()) assertEquals(null, enclosingElementName) - assertEquals("Constant-int", returnTypeNode?.getTypeLabel()) + assertEquals("int", returnType) assertEquals(4, parameters.size) assertEquals(listOf("event", "x", "args", "kwargs"), parameters.map { it.name }.toList()) assertEquals(listOf("str", "int", null, null), parameters.map { it.type }.toList()) @@ -146,7 +146,7 @@ class GumTreeJavaMethodSplitterTest { assertEquals("foo_2", name) assertEquals("foo_1", method.root.parent?.wrappedNode?.parent?.label) assertEquals(null, enclosingElementName) - assertEquals("Constant-NoneType", this.returnTypeNode?.getTypeLabel()) + assertEquals("None", returnType) assertEquals(1, parameters.size) assertEquals(listOf("c"), parameters.map { it.name }.toList()) assertEquals(listOf(null), parameters.map { it.type }.toList()) @@ -162,7 +162,7 @@ class GumTreeJavaMethodSplitterTest { assertEquals("bar_2", name) assertEquals("bar_1", method.root.parent?.wrappedNode?.parent?.label) assertEquals(null, enclosingElementName) - assertEquals("Constant-int", this.returnTypeNode?.getTypeLabel()) + assertEquals("int", returnType) assertEquals(2, parameters.size) assertEquals(listOf("d", "e"), parameters.map { it.name }.toList()) assertEquals(listOf("int", "int"), parameters.map { it.type }.toList()) From ddb55492a259bf63a72405990016d5938d8aeaf3 Mon Sep 17 00:00:00 2001 From: furetur Date: Mon, 26 Apr 2021 18:54:36 +0500 Subject: [PATCH 076/308] grouped FunctionInfo properties into a new EnclosingElement dataclass --- .../astminer/common/model/FunctionInfo.kt | 21 ++++++++++--------- .../kotlin/astminer/examples/AllJavaFiles.kt | 2 +- .../astminer/examples/AllJavaMethods.kt | 2 +- .../astminer/examples/AllPythonMethods.kt | 2 +- .../antlr/java/JavaMethodSplitterTest.kt | 4 ++-- .../JavaScriptMethodSplitterTest.kt | 19 +++++++++-------- .../antlr/python/PythonMethodSplitterTest.kt | 4 ++-- .../parse/cpp/FuzzyMethodSplitterTest.kt | 4 ++-- .../java/GumTreeJavaMethodSplitterTest.kt | 10 ++++----- .../python/GumTreePythonMethodSplitterTest.kt | 12 +++++------ 10 files changed, 41 insertions(+), 39 deletions(-) diff --git a/src/main/kotlin/astminer/common/model/FunctionInfo.kt b/src/main/kotlin/astminer/common/model/FunctionInfo.kt index 80238fee..7db831a6 100644 --- a/src/main/kotlin/astminer/common/model/FunctionInfo.kt +++ b/src/main/kotlin/astminer/common/model/FunctionInfo.kt @@ -23,22 +23,23 @@ interface FunctionInfo { get() = notImplemented("parameters") val returnType: String? get() = notImplemented("returnType") - - // is null because can be only from a small set like {variableDeclaration, classDeclaration..} - // for instance it cannot be the root of the tree - // thats why it is probably called "element" and not "node" - val enclosingElement: T? - get() = notImplemented("enclosingNode") - val enclosingElementName: String? - get() = notImplemented("enclosingElementName") - val className: String? - get() = notImplemented("className") + val enclosingElement: EnclosingElement? + get() = notImplemented("enclosingElement") val isConstructor: Boolean get() = notImplemented("isConstructor") } data class MethodInfoParameter(val name: String, val type: String?) +data class EnclosingElement(val type: EnclosingElementType, val name: String?, val root: T) + +enum class EnclosingElementType { + Class, + Function, + Method, + VariableDeclaration, +} + // TODO: should be removed class DummyFunctionInfo : FunctionInfo diff --git a/src/main/kotlin/astminer/examples/AllJavaFiles.kt b/src/main/kotlin/astminer/examples/AllJavaFiles.kt index 69f059e1..be342c49 100644 --- a/src/main/kotlin/astminer/examples/AllJavaFiles.kt +++ b/src/main/kotlin/astminer/examples/AllJavaFiles.kt @@ -20,7 +20,7 @@ fun allJavaFiles() { JavaMethodSplitter().splitIntoMethods(node).forEach { println(it.name) println(it.returnType) - println(it.className) + println(it.enclosingElement?.name) it.parameters.forEach { parameter -> println("${parameter.name} ${parameter.type}") } diff --git a/src/main/kotlin/astminer/examples/AllJavaMethods.kt b/src/main/kotlin/astminer/examples/AllJavaMethods.kt index 05f26779..28029dbf 100644 --- a/src/main/kotlin/astminer/examples/AllJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/AllJavaMethods.kt @@ -12,7 +12,7 @@ import java.io.File private fun getCsvFriendlyMethodId(functionInfo: FunctionInfo): String { - val className = functionInfo.className ?: "" + val className = functionInfo.enclosingElement?.name ?: "" val methodName = functionInfo.name val parameterTypes = functionInfo.parameters.joinToString("|") { it.name } return "$className.$methodName($parameterTypes)" diff --git a/src/main/kotlin/astminer/examples/AllPythonMethods.kt b/src/main/kotlin/astminer/examples/AllPythonMethods.kt index fa755ed0..cfc55ef4 100644 --- a/src/main/kotlin/astminer/examples/AllPythonMethods.kt +++ b/src/main/kotlin/astminer/examples/AllPythonMethods.kt @@ -10,7 +10,7 @@ import astminer.storage.path.PathBasedStorageConfig import java.io.File private fun getCsvFriendlyMethodId(functionInfo: FunctionInfo): String { - val className = functionInfo.className ?: "" + val className = functionInfo.enclosingElement?.name ?: "" val methodName = functionInfo.name val parameterTypes = functionInfo.parameters.joinToString("|") { it.name } return "$className.$methodName($parameterTypes)" diff --git a/src/test/kotlin/astminer/parse/antlr/java/JavaMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/java/JavaMethodSplitterTest.kt index ed3cf492..8c0f8e6c 100644 --- a/src/test/kotlin/astminer/parse/antlr/java/JavaMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/java/JavaMethodSplitterTest.kt @@ -61,14 +61,14 @@ class JavaMethodSplitterTest { fun testFunctionInClass() { val methodClass = functionInfos.find { it.name == "functionInClass1" } assertNotNull(methodClass) - assertEquals( "Class1", methodClass.className) + assertEquals( "Class1", methodClass.enclosingElement?.name) } @Test fun testFunctionInNestedClass() { val methodClass = functionInfos.find { it.name == "functionInClass2" } assertNotNull(methodClass) - assertEquals( "Class2", methodClass.className) + assertEquals( "Class2", methodClass.enclosingElement?.name) } @Test diff --git a/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitterTest.kt index fc9ef6b3..c49177bb 100644 --- a/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitterTest.kt @@ -1,5 +1,6 @@ package astminer.parse.antlr.javascript +import astminer.common.model.EnclosingElementType import astminer.common.model.FunctionInfo import astminer.parse.antlr.AntlrNode import org.junit.Test @@ -33,12 +34,12 @@ class JavaScriptMethodSplitterTest { @Test fun testValidMethodInfo() { - fun String.getEnclosingElementType(): String { - return when { - "functionDeclaration" in this -> "fun" - "classDeclaration" in this -> "class" - "methodDefinition" in this -> "method" - "variableDeclaration" in this -> "var" + fun EnclosingElementType.getEnclosingElementType(): String { + return when (this) { + EnclosingElementType.Function -> "fun" + EnclosingElementType.Class -> "class" + EnclosingElementType.Method -> "method" + EnclosingElementType.VariableDeclaration -> "var" else -> "" } } @@ -46,9 +47,9 @@ class JavaScriptMethodSplitterTest { fun FunctionInfo.getJsonInfo(): String { return "info : {" + "name : ${name}, " + - "args : ${parameters.map { it.name }.joinToString(", ")}, " + - "enclosing element : ${enclosingElement?.getTypeLabel()?.getEnclosingElementType()}, " + - "enclosing element name : ${className}" + + "args : ${parameters.joinToString(", ") { it.name }}, " + + "enclosing element : ${enclosingElement?.type?.getEnclosingElementType()}, " + + "enclosing element name : ${enclosingElement?.name}" + "}" } diff --git a/src/test/kotlin/astminer/parse/antlr/python/PythonMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/python/PythonMethodSplitterTest.kt index 901fe4e8..e323c7e9 100644 --- a/src/test/kotlin/astminer/parse/antlr/python/PythonMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/python/PythonMethodSplitterTest.kt @@ -42,14 +42,14 @@ class PythonMethodSplitterTest { fun testFunctionInClass() { val methodClass = methodInfos.find { it.name == "funInClass1" } assertNotNull(methodClass) - assertEquals( "Class1", methodClass.enclosingElementName) + assertEquals( "Class1", methodClass.enclosingElement?.name) } @Test fun testFunctionInNestedClass() { val methodClass = methodInfos.find { it.name == "funInClass2" } assertNotNull(methodClass) - assertEquals( "Class2", methodClass.enclosingElementName) + assertEquals( "Class2", methodClass.enclosingElement?.name) } @Test diff --git a/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt index 33c9c580..f47376e4 100644 --- a/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt @@ -73,14 +73,14 @@ class FuzzyMethodSplitterTest { fun testFunctionInClass() { val methodClass = methodInfos.find { it.name == "functionInClass1" } assertNotNull(methodClass) - assertEquals( "Class1", methodClass.enclosingElementName) + assertEquals( "Class1", methodClass.enclosingElement?.name) } @Test fun testFunctionInNestedClass() { val methodClass = methodInfos.find { it.name == "functionInClass2" } assertNotNull(methodClass) - assertEquals( "Class2", methodClass.enclosingElementName) + assertEquals( "Class2", methodClass.enclosingElement?.name) } @Test diff --git a/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitterTest.kt index 2ed6cd43..1c966691 100644 --- a/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitterTest.kt @@ -25,7 +25,7 @@ class GumTreeJavaMethodSplitterTest { with(methodInfos.first()) { assertEquals("fun", name) assertEquals("void", returnType) - assertEquals("SingleFunction", enclosingElementName) + assertEquals("SingleFunction", enclosingElement?.name) assertEquals(listOf("args", "param"), parameters.map { it.name }.toList()) assertEquals(listOf("String[]", "int"), parameters.map { it.type }.toList()) } @@ -40,7 +40,7 @@ class GumTreeJavaMethodSplitterTest { with(methodInfos.first()) { assertEquals("main", name) assertEquals("void", returnType) - assertEquals("InnerClass", enclosingElementName) + assertEquals("InnerClass", enclosingElement?.name) assertEquals(listOf("args"), parameters.map { it.name }.toList()) assertEquals(listOf("String[]"), parameters.map { it.type }.toList()) } @@ -54,14 +54,14 @@ class GumTreeJavaMethodSplitterTest { with(methodInfos.first()) { assertEquals("main", name) assertEquals("void", returnType) - assertEquals("InnerClass", enclosingElementName) + assertEquals("InnerClass", enclosingElement?.name) assertEquals(listOf("args"), parameters.map { it.name }.toList()) assertEquals(listOf("String[]"), parameters.map { it.type }.toList()) } with(methodInfos.last()) { assertEquals("fun", name) assertEquals("void", returnType) - assertEquals("SingleMethodInnerClass", enclosingElementName) + assertEquals("SingleMethodInnerClass", enclosingElement?.name) assertEquals(listOf("args", "param"), parameters.map { it.name }.toList()) assertEquals(listOf("String[]", "int"), parameters.map { it.type }.toList()) } @@ -75,7 +75,7 @@ class GumTreeJavaMethodSplitterTest { with(methodInfos.first()) { assertEquals("fun", name) assertEquals("int", returnType) - assertEquals("SingleFunction", enclosingElementName) + assertEquals("SingleFunction", enclosingElement?.name) assertEquals(listOf("args", "param"), parameters.map { it.name }.toList()) assertEquals(listOf("int", "SingleFunction"), parameters.map { it.type }.toList()) } diff --git a/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitterTest.kt index 30f9ca8b..37ec3f38 100644 --- a/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitterTest.kt @@ -73,7 +73,7 @@ class GumTreePythonMethodSplitterTest { assertNotNull(method) with(method) { assertEquals("foo_typed", name) - assertEquals("A", enclosingElementName) + assertEquals("A", enclosingElement?.name) assertEquals(null, returnType) assertEquals(3, parameters.size) assertEquals(listOf("self", "x", "y"), parameters.map { it.name }.toList()) @@ -88,7 +88,7 @@ class GumTreePythonMethodSplitterTest { assertNotNull(method) with(method) { assertEquals("bar_typed", name) - assertEquals("C", enclosingElementName) + assertEquals("C", enclosingElement?.name) assertEquals(null, returnType) assertEquals(2, parameters.size) assertEquals(listOf("self", "x"), parameters.map { it.name }.toList()) @@ -104,7 +104,7 @@ class GumTreePythonMethodSplitterTest { with(method) { assertEquals("async_schrecklich_typed", name) assertEquals("AsyncFunctionDef", root.getTypeLabel()) - assertEquals(null, enclosingElementName) + assertEquals(null, enclosingElement?.name) assertEquals("int", returnType) assertEquals(4, parameters.size) assertEquals(listOf("event", "x", "args", "kwargs"), parameters.map { it.name }.toList()) @@ -120,7 +120,7 @@ class GumTreePythonMethodSplitterTest { with(method) { assertEquals("async_simple_no_typed", name) assertEquals("AsyncFunctionDef", root.getTypeLabel()) - assertEquals(null, enclosingElementName) + assertEquals(null, enclosingElement?.name) assertEquals( "\n async doc\n ", root.getChildOfType("body") @@ -145,7 +145,7 @@ class GumTreePythonMethodSplitterTest { with(method) { assertEquals("foo_2", name) assertEquals("foo_1", method.root.parent?.wrappedNode?.parent?.label) - assertEquals(null, enclosingElementName) + assertEquals(null, enclosingElement?.name) assertEquals("None", returnType) assertEquals(1, parameters.size) assertEquals(listOf("c"), parameters.map { it.name }.toList()) @@ -161,7 +161,7 @@ class GumTreePythonMethodSplitterTest { with(method) { assertEquals("bar_2", name) assertEquals("bar_1", method.root.parent?.wrappedNode?.parent?.label) - assertEquals(null, enclosingElementName) + assertEquals(null, enclosingElement?.name) assertEquals("int", returnType) assertEquals(2, parameters.size) assertEquals(listOf("d", "e"), parameters.map { it.name }.toList()) From 2f9ae03874a10c259d0815eaf1a219c3df84703e Mon Sep 17 00:00:00 2001 From: furetur Date: Mon, 26 Apr 2021 22:52:38 +0500 Subject: [PATCH 077/308] refactored filters --- .../kotlin/astminer/cli/LabelExtractors.kt | 1 + src/main/kotlin/astminer/cli/utils.kt | 1 + .../astminer/featureextraction/TreeFeature.kt | 2 ++ .../kotlin/astminer/filters/CommonFilters.kt | 21 +++++++++++++++++++ .../kotlin/astminer/filters/FileFilters.kt | 8 +++++++ .../MethodFilters.kt} | 12 +---------- 6 files changed, 34 insertions(+), 11 deletions(-) create mode 100644 src/main/kotlin/astminer/filters/CommonFilters.kt create mode 100644 src/main/kotlin/astminer/filters/FileFilters.kt rename src/main/kotlin/astminer/{cli/FilterPredicates.kt => filters/MethodFilters.kt} (83%) diff --git a/src/main/kotlin/astminer/cli/LabelExtractors.kt b/src/main/kotlin/astminer/cli/LabelExtractors.kt index df2b2a92..789d8196 100644 --- a/src/main/kotlin/astminer/cli/LabelExtractors.kt +++ b/src/main/kotlin/astminer/cli/LabelExtractors.kt @@ -5,6 +5,7 @@ import astminer.common.model.ParseResult import astminer.common.model.FunctionInfo import astminer.common.preOrder import astminer.common.setTechnicalToken +import astminer.filters.MethodFilter import astminer.parse.antlr.AntlrNode import astminer.parse.antlr.java.JavaMethodSplitter import astminer.parse.antlr.javascript.JavaScriptMethodSplitter diff --git a/src/main/kotlin/astminer/cli/utils.kt b/src/main/kotlin/astminer/cli/utils.kt index 44265aff..d8071535 100644 --- a/src/main/kotlin/astminer/cli/utils.kt +++ b/src/main/kotlin/astminer/cli/utils.kt @@ -6,6 +6,7 @@ import astminer.parse.fuzzy.cpp.FuzzyCppParser import astminer.parse.gumtree.java.GumTreeJavaParser import astminer.common.model.Node import astminer.common.model.Parser +import astminer.filters.* import astminer.parse.antlr.javascript.JavaScriptParser fun getParser( diff --git a/src/main/kotlin/astminer/featureextraction/TreeFeature.kt b/src/main/kotlin/astminer/featureextraction/TreeFeature.kt index 7a7401f0..708d9461 100644 --- a/src/main/kotlin/astminer/featureextraction/TreeFeature.kt +++ b/src/main/kotlin/astminer/featureextraction/TreeFeature.kt @@ -52,6 +52,8 @@ object NumberOfNodes : TreeFeature { } } +fun Node.treeSize() = NumberOfNodes.compute(this) + /** * Tree feature for computing list of all node tokens from a given tree. */ diff --git a/src/main/kotlin/astminer/filters/CommonFilters.kt b/src/main/kotlin/astminer/filters/CommonFilters.kt new file mode 100644 index 00000000..547ad711 --- /dev/null +++ b/src/main/kotlin/astminer/filters/CommonFilters.kt @@ -0,0 +1,21 @@ +package astminer.filters + +import astminer.common.model.FunctionInfo +import astminer.common.model.Node +import astminer.common.model.ParseResult +import astminer.featureextraction.treeSize + +class TreeSizeFilterPredicate(private val maxSize: Int) : MethodFilter, FileFilter { + private fun isTreeFiltered(root: Node): Boolean { + return if (maxSize == -1) { + true + } else { + root.treeSize() <= maxSize + } + } + + override fun isFiltered(parseResult: ParseResult) = + if (parseResult.root != null) isTreeFiltered(parseResult.root) else false + + override fun isFiltered(functionInfo: FunctionInfo) = isTreeFiltered(functionInfo.root) +} diff --git a/src/main/kotlin/astminer/filters/FileFilters.kt b/src/main/kotlin/astminer/filters/FileFilters.kt new file mode 100644 index 00000000..ac1dc126 --- /dev/null +++ b/src/main/kotlin/astminer/filters/FileFilters.kt @@ -0,0 +1,8 @@ +package astminer.filters + +import astminer.common.model.Node +import astminer.common.model.ParseResult + +interface FileFilter { + fun isFiltered(parseResult: ParseResult): Boolean +} diff --git a/src/main/kotlin/astminer/cli/FilterPredicates.kt b/src/main/kotlin/astminer/filters/MethodFilters.kt similarity index 83% rename from src/main/kotlin/astminer/cli/FilterPredicates.kt rename to src/main/kotlin/astminer/filters/MethodFilters.kt index 754dee06..c3df2e64 100644 --- a/src/main/kotlin/astminer/cli/FilterPredicates.kt +++ b/src/main/kotlin/astminer/filters/MethodFilters.kt @@ -1,4 +1,4 @@ -package astminer.cli +package astminer.filters import astminer.common.model.FunctionInfo import astminer.common.model.Node @@ -43,13 +43,3 @@ class MethodAnyNodeWordsNumberFilter(private val maxWordsNumber: Int) : MethodFi } } } - -class TreeSizeFilterPredicate(private val maxSize: Int) : MethodFilter { - override fun isFiltered(functionInfo: FunctionInfo): Boolean { - return if (maxSize == -1) { - true - } else { - functionInfo.root.preOrder().size <= maxSize - } - } -} From ecb0198021a613dbae420afce057b15c3ab6d28a Mon Sep 17 00:00:00 2001 From: illided Date: Tue, 27 Apr 2021 14:38:15 +0300 Subject: [PATCH 078/308] prototype of antlr java function info added --- .../kotlin/astminer/parse/antlr/AntlrUtil.kt | 16 ++++ .../parse/antlr/java/AntlrJavaFunctionInfo.kt | 84 ++++++++++++++++++ .../parse/antlr/java/JavaMethodSplitter.kt | 88 +------------------ 3 files changed, 104 insertions(+), 84 deletions(-) create mode 100644 src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt diff --git a/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt b/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt index 394feac9..52d6852a 100644 --- a/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt +++ b/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt @@ -72,3 +72,19 @@ fun compressTree(root: AntlrNode): AntlrNode { fun decompressTypeLabel(typeLabel: String) = typeLabel.split("|") + +fun Node.hasLastLabel(label: String): Boolean { + return decompressTypeLabel(this.getTypeLabel()).last() == label +} + +fun Node.hasLastLabel(labels: List): Boolean { + return labels.contains(decompressTypeLabel(this.getTypeLabel()).last()) +} + +fun Node.hasFirstLabel(label: String): Boolean { + return decompressTypeLabel(this.getTypeLabel()).first() == label +} + +fun Node.firstLabelIn(labels: List): Boolean { + return labels.contains(decompressTypeLabel(this.getTypeLabel()).first()) +} \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt b/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt new file mode 100644 index 00000000..56d517d3 --- /dev/null +++ b/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt @@ -0,0 +1,84 @@ +package astminer.parse.antlr.java + +import astminer.common.model.* +import astminer.parse.antlr.AntlrNode +import astminer.parse.antlr.firstLabelIn +import astminer.parse.antlr.hasLastLabel + +data class AntlrJavaFunctionInfo(override val root: AntlrNode) : FunctionInfo { + override val nameNode: AntlrNode? = collectNameNode() + override val parameters: List = collectParameters() + override val returnType: String? = collectReturnType() + override val enclosingElement: EnclosingElement? = collectEnclosingClass(root) + + companion object { + private const val METHOD_RETURN_TYPE_NODE = "typeTypeOrVoid" + private const val METHOD_NAME_NODE = "IDENTIFIER" + + private const val CLASS_DECLARATION_NODE = "classDeclaration" + private const val CLASS_NAME_NODE = "IDENTIFIER" + + private const val METHOD_PARAMETER_NODE = "formalParameters" + private const val METHOD_PARAMETER_INNER_NODE = "formalParameterList" + private val METHOD_SINGLE_PARAMETER_NODES = listOf("formalParameter", "lastFormalParameter") + private const val PARAMETER_RETURN_TYPE_NODE = "typeType" + private const val PARAMETER_NAME_NODE = "variableDeclaratorId" + } + + private fun collectNameNode(): AntlrNode? { + return root.getChildOfType(METHOD_NAME_NODE) + } + + private fun collectReturnType(): String? { + val returnTypeNode = root.getChildOfType(METHOD_RETURN_TYPE_NODE) + return returnTypeNode?.let { collectParameterToken(it) } + //TODO(check postprocessing) + } + + private fun collectEnclosingClass(node: AntlrNode?): EnclosingElement? { + return when { + node == null -> null + node.hasLastLabel(CLASS_DECLARATION_NODE) -> EnclosingElement( + type = EnclosingElementType.Class, + name = node.getChildOfType(CLASS_NAME_NODE)?.getToken(), + root = node + ) + else -> collectEnclosingClass(node.getParent() as AntlrNode) + } + } + + private fun collectParameters(): List { + val parametersRoot = root.getChildOfType(METHOD_PARAMETER_NODE) + val innerParametersRoot = parametersRoot?.getChildOfType(METHOD_PARAMETER_INNER_NODE) ?: return emptyList() + + if (innerParametersRoot.hasLastLabel(METHOD_SINGLE_PARAMETER_NODES)) { + return listOf(getParameterInfo(innerParametersRoot)) + } + + return innerParametersRoot.getChildren().filter { + it.firstLabelIn(METHOD_SINGLE_PARAMETER_NODES) + }.map { getParameterInfo(it) } + } + + private fun getParameterInfo(parameterNode: AntlrNode): MethodInfoParameter { + val returnTypeNode = parameterNode.getChildOfType(PARAMETER_RETURN_TYPE_NODE) + val returnTypeToken = returnTypeNode?.let { collectParameterToken(it) } + + val parameterName = parameterNode.getChildOfType(PARAMETER_NAME_NODE)?.getToken() + ?: throw IllegalStateException("Parameter name wasn't found") + + return MethodInfoParameter(parameterName, returnTypeToken) + + } + + //TODO(rename) + private fun collectParameterToken(parameterNode: AntlrNode): String { + if (parameterNode.isLeaf()) { + return parameterNode.getToken() + } + return parameterNode.getChildren().joinToString(separator = "") { child -> + collectParameterToken(child) + } + } +} + diff --git a/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt b/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt index e48a3af9..e4853f06 100644 --- a/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt @@ -3,95 +3,15 @@ package astminer.parse.antlr.java import astminer.common.* import astminer.common.model.* import astminer.parse.antlr.AntlrNode -import astminer.parse.antlr.decompressTypeLabel +import astminer.parse.antlr.hasLastLabel class JavaMethodSplitter : TreeMethodSplitter { - companion object { - private const val METHOD_NODE = "methodDeclaration" - private const val METHOD_RETURN_TYPE_NODE = "typeTypeOrVoid" - private const val METHOD_NAME_NODE = "IDENTIFIER" - - private const val CLASS_DECLARATION_NODE = "classDeclaration" - private const val CLASS_NAME_NODE = "IDENTIFIER" - - private const val METHOD_PARAMETER_NODE = "formalParameters" - private const val METHOD_PARAMETER_INNER_NODE = "formalParameterList" - private val METHOD_SINGLE_PARAMETER_NODE = listOf("formalParameter", "lastFormalParameter") - private const val PARAMETER_RETURN_TYPE_NODE = "typeType" - private const val PARAMETER_NAME_NODE = "variableDeclaratorId" - } + private val methodNodeType = "methodDeclaration" override fun splitIntoMethods(root: AntlrNode): Collection> { val methodRoots = root.preOrder().filter { - decompressTypeLabel(it.getTypeLabel()).last() == METHOD_NODE - } - return dummyMethodInfos() -// return methodRoots.map { collectMethodInfo(it as AntlrNode) } - } - - private fun collectMethodInfo(methodNode: AntlrNode): MethodInfo { - val methodName = methodNode.getChildOfType(METHOD_NAME_NODE) - val methodReturnTypeNode = methodNode.getChildOfType(METHOD_RETURN_TYPE_NODE) - methodReturnTypeNode?.setToken(collectParameterToken(methodReturnTypeNode)) - - val classRoot = getEnclosingClass(methodNode) - val className = classRoot?.getChildOfType(CLASS_NAME_NODE) - - val parametersRoot = methodNode.getChildOfType(METHOD_PARAMETER_NODE) - val innerParametersRoot = parametersRoot?.getChildOfType(METHOD_PARAMETER_INNER_NODE) - - val parametersList = when { - innerParametersRoot != null -> getListOfParameters(innerParametersRoot) - parametersRoot != null -> getListOfParameters(parametersRoot) - else -> emptyList() - } - - return MethodInfo( - MethodNode(methodNode, methodReturnTypeNode, methodName), - ElementNode(classRoot, className), - parametersList - ) - } - - private fun getEnclosingClass(node: AntlrNode): AntlrNode? { - if (decompressTypeLabel(node.getTypeLabel()).last() == CLASS_DECLARATION_NODE) { - return node - } - val parentNode = node.getParent() as? AntlrNode - if (parentNode != null) { - return getEnclosingClass(parentNode) - } - return null - } - - private fun getListOfParameters(parametersRoot: AntlrNode): List> { - if (METHOD_SINGLE_PARAMETER_NODE.contains(decompressTypeLabel(parametersRoot.getTypeLabel()).last())) { - return listOf(getParameterInfoFromNode(parametersRoot)) - } - return parametersRoot.getChildren().filter { - val firstType = decompressTypeLabel(it.getTypeLabel()).first() - METHOD_SINGLE_PARAMETER_NODE.contains(firstType) - }.map { - getParameterInfoFromNode(it) - } - } - - private fun getParameterInfoFromNode(parameterRoot: AntlrNode): ParameterNode { - val returnTypeNode = parameterRoot.getChildOfType(PARAMETER_RETURN_TYPE_NODE) - returnTypeNode?.setToken(collectParameterToken(returnTypeNode)) - return ParameterNode( - parameterRoot, - returnTypeNode, - parameterRoot.getChildOfType(PARAMETER_NAME_NODE) - ) - } - - private fun collectParameterToken(parameterRoot: AntlrNode): String { - if (parameterRoot.isLeaf()) { - return parameterRoot.getToken() - } - return parameterRoot.getChildren().joinToString(separator = "") { child -> - collectParameterToken(child) + root.hasLastLabel(methodNodeType) } + return methodRoots.map { AntlrJavaFunctionInfo(it as AntlrNode) } } } \ No newline at end of file From 37515694cb4e7b9b76854113b7b2e361fb7e7ca5 Mon Sep 17 00:00:00 2001 From: illided Date: Tue, 27 Apr 2021 15:37:52 +0300 Subject: [PATCH 079/308] antlr node now overrides preorder --- src/main/kotlin/astminer/parse/antlr/AntlrNode.kt | 5 +++-- .../kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt | 2 +- .../parse/antlr/javascript/JavaScriptMethodSplitter.kt | 6 +++--- .../astminer/parse/antlr/python/PythonMethodSplitter.kt | 2 +- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/main/kotlin/astminer/parse/antlr/AntlrNode.kt b/src/main/kotlin/astminer/parse/antlr/AntlrNode.kt index 9bed6095..238377aa 100644 --- a/src/main/kotlin/astminer/parse/antlr/AntlrNode.kt +++ b/src/main/kotlin/astminer/parse/antlr/AntlrNode.kt @@ -2,7 +2,7 @@ package astminer.parse.antlr import astminer.common.model.Node -class AntlrNode(override val typeLabel: String,override var parent: AntlrNode?, token: String?) : Node() { +class AntlrNode(override val typeLabel: String, override var parent: AntlrNode?, token: String?) : Node() { override val children: MutableList = mutableListOf() @@ -22,7 +22,8 @@ class AntlrNode(override val typeLabel: String,override var parent: AntlrNode?, getChildrenOfType(typeLabel).firstOrNull() override fun removeChildrenOfType(typeLabel: String) { - children.removeIf { it.typeLabel == typeLabel } + children.removeIf { it.typeLabel == typeLabel } } + override fun preOrder(): List = super.preOrder().map { it as AntlrNode } } diff --git a/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt b/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt index 6d76de4f..0b983998 100644 --- a/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt @@ -25,7 +25,7 @@ class JavaMethodSplitter : TreeMethodSplitter { val methodRoots = root.preOrder().filter { decompressTypeLabel(it.typeLabel).last() == METHOD_NODE } - return methodRoots.map { collectMethodInfo(it as AntlrNode) } + return methodRoots.map { collectMethodInfo(it) } } private fun collectMethodInfo(methodNode: AntlrNode): MethodInfo { diff --git a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt index 1369a8a7..8b0d1c55 100644 --- a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt @@ -19,9 +19,9 @@ class JavaScriptMethodSplitter : TreeMethodSplitter { override fun splitIntoMethods(root: AntlrNode): Collection> { val methodRoots: List = root.preOrder().map { node -> when { - node.isArrowElement() -> ArrowElement(node as AntlrNode) - node.isFunctionElement() -> FunctionElement(node as AntlrNode) - node.isMethodElement() -> MethodElement(node as AntlrNode) + node.isArrowElement() -> ArrowElement(node) + node.isFunctionElement() -> FunctionElement(node) + node.isMethodElement() -> MethodElement(node) else -> null } }.filterNotNull() diff --git a/src/main/kotlin/astminer/parse/antlr/python/PythonMethodSplitter.kt b/src/main/kotlin/astminer/parse/antlr/python/PythonMethodSplitter.kt index a6baacca..87eaf6ce 100644 --- a/src/main/kotlin/astminer/parse/antlr/python/PythonMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/python/PythonMethodSplitter.kt @@ -25,7 +25,7 @@ class PythonMethodSplitter : TreeMethodSplitter { val methodRoots = root.preOrder().filter { decompressTypeLabel(it.typeLabel).last() == METHOD_NODE } - return methodRoots.map { collectMethodInfo(it as AntlrNode) } + return methodRoots.map { collectMethodInfo(it) } } private fun collectMethodInfo(methodNode: AntlrNode): MethodInfo { From ed4a4f85f61617bdad9ae2d0cf9dca0e0d2e72e6 Mon Sep 17 00:00:00 2001 From: furetur Date: Tue, 27 Apr 2021 17:43:57 +0500 Subject: [PATCH 080/308] made ParseResult.root not nullable --- .../kotlin/astminer/cli/LabelExtractors.kt | 11 ++--------- .../astminer/common/model/ParsingModel.kt | 18 +++++++++++++----- .../kotlin/astminer/parse/ParsingException.kt | 3 +++ .../astminer/parse/antlr/java/JavaParser.kt | 5 +++-- .../parse/antlr/javascript/JavaScriptParser.kt | 5 +++-- .../parse/antlr/python/PythonParser.kt | 5 +++-- .../astminer/parse/fuzzy/cpp/FuzzyCppParser.kt | 11 ++++++----- .../parse/gumtree/java/GumTreeJavaParser.kt | 2 +- .../gumtree/python/GumTreePythonParser.kt | 5 +++-- .../kotlin/astminer/cli/LabelExtractorTest.kt | 17 ----------------- .../gumtree/python/GumTreePythonParserTest.kt | 7 +++---- 11 files changed, 40 insertions(+), 49 deletions(-) create mode 100644 src/main/kotlin/astminer/parse/ParsingException.kt diff --git a/src/main/kotlin/astminer/cli/LabelExtractors.kt b/src/main/kotlin/astminer/cli/LabelExtractors.kt index 0a521109..7ce3f55a 100644 --- a/src/main/kotlin/astminer/cli/LabelExtractors.kt +++ b/src/main/kotlin/astminer/cli/LabelExtractors.kt @@ -36,12 +36,8 @@ abstract class FileLabelExtractor : LabelExtractor { parseResult: ParseResult ): List> { val (root, filePath) = parseResult - return if (root == null) { - emptyList() - } else { - val label = extractLabel(root, filePath) ?: return emptyList() - listOf(LabeledResult(root, label, parseResult.filePath)) - } + val label = extractLabel(root, filePath) ?: return emptyList() + return listOf(LabeledResult(root, label, parseResult.filePath)) } abstract fun extractLabel(root: Node, filePath: String): String? @@ -57,9 +53,6 @@ abstract class MethodLabelExtractor( parseResult: ParseResult ): List> { val (root, filePath) = parseResult - if (root == null) { - return emptyList() - } val fileExtension = File(filePath).extension val methodInfos = when (fileExtension) { "c", "cpp" -> { diff --git a/src/main/kotlin/astminer/common/model/ParsingModel.kt b/src/main/kotlin/astminer/common/model/ParsingModel.kt index cd86b451..52c8d0b2 100644 --- a/src/main/kotlin/astminer/common/model/ParsingModel.kt +++ b/src/main/kotlin/astminer/common/model/ParsingModel.kt @@ -1,6 +1,7 @@ package astminer.common.model import astminer.cli.LabeledResult +import astminer.parse.ParsingException import java.io.File import java.io.InputStream @@ -37,7 +38,7 @@ interface Parser { * @param content input stream to parse * @return root of the AST */ - fun parseInputStream(content: InputStream): T? + fun parseInputStream(content: InputStream): T /** * Parse file into an AST. @@ -52,12 +53,19 @@ interface Parser { * @param handleResult handler to invoke on each file parse result */ fun parseFiles(files: List, handleResult: (ParseResult) -> Any?) { - files.forEach { handleResult(parseFile(it)) } + for (file in files) { + try { + handleResult(parseFile(file)) + } catch (parsingException: ParsingException) { + // TODO: all error reporting should be on the surface, in my opinion + println("Failed to parse file ${file.path}: ${parsingException.message}") + } + } } } -data class ParseResult(val root: T?, val filePath: String) { - fun labeledWith(label: String): LabeledResult? = root?.let { LabeledResult(it, label, filePath) } +data class ParseResult(val root: T, val filePath: String) { + fun labeledWith(label: String): LabeledResult = LabeledResult(root, label, filePath) - fun labeledWithFilePath(): LabeledResult? = labeledWith(filePath) + fun labeledWithFilePath(): LabeledResult = labeledWith(filePath) } diff --git a/src/main/kotlin/astminer/parse/ParsingException.kt b/src/main/kotlin/astminer/parse/ParsingException.kt new file mode 100644 index 00000000..b3c721d1 --- /dev/null +++ b/src/main/kotlin/astminer/parse/ParsingException.kt @@ -0,0 +1,3 @@ +package astminer.parse + +class ParsingException(message: String) : IllegalStateException(message) diff --git a/src/main/kotlin/astminer/parse/antlr/java/JavaParser.kt b/src/main/kotlin/astminer/parse/antlr/java/JavaParser.kt index c2a56e4f..74442807 100644 --- a/src/main/kotlin/astminer/parse/antlr/java/JavaParser.kt +++ b/src/main/kotlin/astminer/parse/antlr/java/JavaParser.kt @@ -1,6 +1,7 @@ package astminer.parse.antlr.java import astminer.common.model.Parser +import astminer.parse.ParsingException import astminer.parse.antlr.AntlrNode import astminer.parse.antlr.convertAntlrTree import org.antlr.v4.runtime.CommonTokenStream @@ -11,7 +12,7 @@ import java.io.InputStream import java.lang.Exception class JavaParser : Parser { - override fun parseInputStream(content: InputStream): AntlrNode? { + override fun parseInputStream(content: InputStream): AntlrNode { return try { val lexer = Java8Lexer(CharStreams.fromStream(content)) lexer.removeErrorListeners() @@ -21,7 +22,7 @@ class JavaParser : Parser { val context = parser.compilationUnit() convertAntlrTree(context, Java8Parser.ruleNames, Java8Parser.VOCABULARY) } catch (e: Exception) { - null + throw ParsingException("Failed to parse Java code: ${e.message}") } } } \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptParser.kt b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptParser.kt index d832b98b..dc821545 100644 --- a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptParser.kt +++ b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptParser.kt @@ -1,6 +1,7 @@ package astminer.parse.antlr.javascript import astminer.common.model.Parser +import astminer.parse.ParsingException import astminer.parse.antlr.AntlrNode import astminer.parse.antlr.convertAntlrTree import me.vovak.antlr.parser.JavaScriptLexer @@ -11,7 +12,7 @@ import java.io.InputStream import java.lang.Exception class JavaScriptParser : Parser { - override fun parseInputStream(content: InputStream): AntlrNode? { + override fun parseInputStream(content: InputStream): AntlrNode { return try { val lexer = JavaScriptLexer(CharStreams.fromStream(content)) lexer.removeErrorListeners() @@ -21,7 +22,7 @@ class JavaScriptParser : Parser { val context = parser.program() convertAntlrTree(context, JavaScriptParser.ruleNames, JavaScriptParser.VOCABULARY) } catch (e: Exception) { - null + throw ParsingException("Failed to parse JavaScript code ${e.message}") } } } \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/antlr/python/PythonParser.kt b/src/main/kotlin/astminer/parse/antlr/python/PythonParser.kt index 19156e5c..714710ba 100644 --- a/src/main/kotlin/astminer/parse/antlr/python/PythonParser.kt +++ b/src/main/kotlin/astminer/parse/antlr/python/PythonParser.kt @@ -3,6 +3,7 @@ package astminer.parse.antlr.python import me.vovak.antlr.parser.Python3Lexer import me.vovak.antlr.parser.Python3Parser import astminer.common.model.Parser +import astminer.parse.ParsingException import astminer.parse.antlr.AntlrNode import astminer.parse.antlr.convertAntlrTree import org.antlr.v4.runtime.CharStreams @@ -11,7 +12,7 @@ import java.io.InputStream import java.lang.Exception class PythonParser : Parser { - override fun parseInputStream(content: InputStream): AntlrNode? { + override fun parseInputStream(content: InputStream): AntlrNode { return try { val lexer = Python3Lexer(CharStreams.fromStream(content)) lexer.removeErrorListeners() @@ -21,7 +22,7 @@ class PythonParser : Parser { val context = parser.file_input() convertAntlrTree(context, Python3Parser.ruleNames, Python3Parser.VOCABULARY) } catch (e: Exception) { - return null + throw ParsingException("Failed to parse Python code: ${e.message}") } } } \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppParser.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppParser.kt index 67633eea..d9735f72 100644 --- a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppParser.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppParser.kt @@ -2,6 +2,7 @@ package astminer.parse.fuzzy.cpp import astminer.common.model.ParseResult import astminer.common.model.Parser +import astminer.parse.ParsingException import io.shiftleft.codepropertygraph.Cpg import io.shiftleft.codepropertygraph.generated.EdgeTypes import io.shiftleft.codepropertygraph.generated.NodeKeys @@ -68,14 +69,13 @@ class FuzzyCppParser : Parser { * @param content to parse * @return root of AST if content was parsed, null otherwise */ - override fun parseInputStream(content: InputStream): FuzzyNode? { + override fun parseInputStream(content: InputStream): FuzzyNode { val file = File.createTempFile("fuzzy", ".cpp") file.deleteOnExit() file.outputStream().use { content.copyTo(it) } - val nodes = parseFile(file) - return nodes.root + return parseFile(file).root } /** @@ -121,10 +121,11 @@ class FuzzyCppParser : Parser { if (File(actualFilePath).absolutePath != File(filePath).absolutePath) { println("While parsing $filePath, actually parsed $actualFilePath") } - return ParseResult(vertexToNode[it], actualFilePath) + val node = vertexToNode[it] ?: throw ParsingException("Unknown fuzzy cpp parser error.") + return ParseResult(node, actualFilePath) } } - return ParseResult(null, filePath) + throw ParsingException("Unknown fuzzy cpp parser error.") } /** diff --git a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaParser.kt b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaParser.kt index d1b97a45..26007811 100644 --- a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaParser.kt +++ b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaParser.kt @@ -13,7 +13,7 @@ class GumTreeJavaParser : Parser { Run.initGenerators() } - override fun parseInputStream(content: InputStream): GumTreeNode? { + override fun parseInputStream(content: InputStream): GumTreeNode { val treeContext = JdtTreeGenerator().generate(InputStreamReader(content)) return wrapGumTreeNode(treeContext) } diff --git a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonParser.kt b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonParser.kt index 0f5ad777..365c7d04 100644 --- a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonParser.kt +++ b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonParser.kt @@ -1,6 +1,7 @@ package astminer.parse.gumtree.python import astminer.common.model.Parser +import astminer.parse.ParsingException import astminer.parse.gumtree.GumTreeNode import com.github.gumtreediff.client.Run import com.github.gumtreediff.gen.python.PythonTreeGenerator @@ -13,11 +14,11 @@ class GumTreePythonParser : Parser { Run.initGenerators() } - override fun parseInputStream(content: InputStream): GumTreeNode? = try { + override fun parseInputStream(content: InputStream): GumTreeNode = try { val context = PythonTreeGenerator().generate(InputStreamReader(content)) wrapGumTreeNode(context) } catch (e: Exception) { - null + throw ParsingException("Failed to parse Python code: ${e.message}") } } diff --git a/src/test/kotlin/astminer/cli/LabelExtractorTest.kt b/src/test/kotlin/astminer/cli/LabelExtractorTest.kt index e431dec2..652e0511 100644 --- a/src/test/kotlin/astminer/cli/LabelExtractorTest.kt +++ b/src/test/kotlin/astminer/cli/LabelExtractorTest.kt @@ -9,7 +9,6 @@ import astminer.parse.antlr.AntlrNode import org.junit.Test import kotlin.test.assertEquals import kotlin.test.assertNull -import kotlin.test.assertTrue internal class LabelExtractorTest { @@ -21,14 +20,6 @@ internal class LabelExtractorTest { private val DUMMY_ROOT = AntlrNode("", null, null) } - @Test - fun testEmptyFilePathExtractor() { - val labelExtractor = FilePathExtractor() - val emptyParseResult = ParseResult(null, PATH_STRING) - val labeledParseResults = labelExtractor.toLabeledData(emptyParseResult) - assertTrue { labeledParseResults.isEmpty() } - } - @Test fun testNonEmptyFilePathExtractor() { val labelExtractor = FilePathExtractor() @@ -40,14 +31,6 @@ internal class LabelExtractorTest { assertEquals(PATH_STRING, label) } - @Test - fun testEmptyFolderExtractor() { - val labelExtractor = FolderExtractor() - val emptyParseResult = ParseResult(null, PATH_STRING) - val labeledParseResults = labelExtractor.toLabeledData(emptyParseResult) - assertTrue { labeledParseResults.isEmpty() } - } - @Test fun testNonEmptyFolderExtractor() { val labelExtractor = FolderExtractor() diff --git a/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonParserTest.kt b/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonParserTest.kt index 48be6f7d..7248d8e6 100644 --- a/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonParserTest.kt +++ b/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonParserTest.kt @@ -1,12 +1,12 @@ package astminer.parse.gumtree.python +import astminer.parse.ParsingException import org.junit.After import org.junit.Before import org.junit.Test import java.io.File import kotlin.test.assertFalse import kotlin.test.assertNotNull -import kotlin.test.assertNull import kotlin.test.assertTrue class GumTreePythonParserTest { @@ -32,11 +32,10 @@ class GumTreePythonParserTest { assertTrue(node.wrappedNode.children.isEmpty()) } - @Test(expected = Test.None::class) + @Test(expected = ParsingException::class) fun invalidCode() { testFile.writeText("INVALID PYTHON CODE") - val node = parser.parseInputStream(testFile.inputStream()) - assertNull(node) + parser.parseInputStream(testFile.inputStream()) } @Test(expected = Test.None::class) From 3dd18e0c7ce5ffa92d5f0c83e2941957710443ba Mon Sep 17 00:00:00 2001 From: illided Date: Tue, 27 Apr 2021 15:50:09 +0300 Subject: [PATCH 081/308] fuzzy and gumtree nodes now overrides preorder --- src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyMethodSplitter.kt | 2 +- src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyNode.kt | 2 ++ src/main/kotlin/astminer/parse/gumtree/GumTreeNode.kt | 2 ++ .../astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt | 2 +- .../parse/gumtree/python/GumTreePythonMethodSplitter.kt | 2 +- 5 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyMethodSplitter.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyMethodSplitter.kt index 80afc833..b83028ce 100644 --- a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyMethodSplitter.kt @@ -21,7 +21,7 @@ class FuzzyMethodSplitter : TreeMethodSplitter { override fun splitIntoMethods(root: FuzzyNode): Collection> { val methodRoots = root.preOrder().filter { it.typeLabel == METHOD_NODE } - return methodRoots.map { collectMethodInfo(it as FuzzyNode) } + return methodRoots.map { collectMethodInfo(it) } } private fun collectMethodInfo(methodNode: FuzzyNode): MethodInfo { diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyNode.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyNode.kt index 6aaae4cb..44474d7f 100644 --- a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyNode.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyNode.kt @@ -30,4 +30,6 @@ class FuzzyNode(override val typeLabel: String,token: String?, order: Int?) : No override fun removeChildrenOfType(typeLabel: String) { childrenMultiset.removeIf { it.typeLabel == typeLabel } } + + override fun preOrder(): List = super.preOrder().map { it as FuzzyNode } } diff --git a/src/main/kotlin/astminer/parse/gumtree/GumTreeNode.kt b/src/main/kotlin/astminer/parse/gumtree/GumTreeNode.kt index 7753a891..211c06cf 100644 --- a/src/main/kotlin/astminer/parse/gumtree/GumTreeNode.kt +++ b/src/main/kotlin/astminer/parse/gumtree/GumTreeNode.kt @@ -26,4 +26,6 @@ class GumTreeNode(val wrappedNode: ITree, val context: TreeContext,override var return children.filterIsInstance() .apply { if (size != children.size) throw TypeCastException("Node have children of different types") } } + + override fun preOrder(): List = super.preOrder().map { it as GumTreeNode } } \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt index a0064222..85ed05ba 100644 --- a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt @@ -19,7 +19,7 @@ class GumTreeJavaMethodSplitter : TreeMethodSplitter { override fun splitIntoMethods(root: GumTreeNode): Collection> { val methodRoots = root.preOrder().filter { it.typeLabel == TypeLabels.methodDeclaration } - return methodRoots.map { collectMethodInfo(it as GumTreeNode) } + return methodRoots.map { collectMethodInfo(it) } } private fun collectMethodInfo(methodNode: GumTreeNode): MethodInfo { diff --git a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitter.kt b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitter.kt index 8babeea5..009c2e15 100644 --- a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitter.kt @@ -35,7 +35,7 @@ class GumTreePythonMethodSplitter : TreeMethodSplitter { override fun splitIntoMethods(root: GumTreeNode): Collection> { val methodRoots = root.preOrder().filter { TypeLabels.methodDefinitions.contains(it.typeLabel) } - return methodRoots.map { collectMethodInfo(it as GumTreeNode) } + return methodRoots.map { collectMethodInfo(it) } } private fun collectMethodInfo(methodNode: GumTreeNode): MethodInfo { From 8c445c61559d7331637be339effb2f851d6fb4a3 Mon Sep 17 00:00:00 2001 From: illided Date: Tue, 27 Apr 2021 16:12:03 +0300 Subject: [PATCH 082/308] redundant imports removed --- src/main/kotlin/astminer/cli/FilterPredicates.kt | 1 - src/main/kotlin/astminer/cli/LabelExtractors.kt | 1 - src/main/kotlin/astminer/cli/ProjectParser.kt | 1 - src/main/kotlin/astminer/featureextraction/TreeFeature.kt | 1 - .../astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt | 1 - .../astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt | 1 - .../astminer/parse/gumtree/python/GumTreePythonMethodSplitter.kt | 1 - src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt | 1 - src/main/kotlin/astminer/storage/ast/DotAstStorage.kt | 1 - src/test/kotlin/astminer/parse/antlr/AntrlUtilTest.kt | 1 - 10 files changed, 10 deletions(-) diff --git a/src/main/kotlin/astminer/cli/FilterPredicates.kt b/src/main/kotlin/astminer/cli/FilterPredicates.kt index b5bf0355..9258f6e5 100644 --- a/src/main/kotlin/astminer/cli/FilterPredicates.kt +++ b/src/main/kotlin/astminer/cli/FilterPredicates.kt @@ -2,7 +2,6 @@ package astminer.cli import astminer.common.model.MethodInfo import astminer.common.model.Node -/*import astminer.common.preOrder*/ import astminer.common.splitToSubtokens abstract class MethodFilterPredicate { diff --git a/src/main/kotlin/astminer/cli/LabelExtractors.kt b/src/main/kotlin/astminer/cli/LabelExtractors.kt index a9a52f55..2e107fdd 100644 --- a/src/main/kotlin/astminer/cli/LabelExtractors.kt +++ b/src/main/kotlin/astminer/cli/LabelExtractors.kt @@ -3,7 +3,6 @@ package astminer.cli import astminer.common.model.MethodInfo import astminer.common.model.Node import astminer.common.model.ParseResult -/*import astminer.common.preOrder*/ import astminer.common.setTechnicalToken import astminer.parse.antlr.AntlrNode import astminer.parse.antlr.java.JavaMethodSplitter diff --git a/src/main/kotlin/astminer/cli/ProjectParser.kt b/src/main/kotlin/astminer/cli/ProjectParser.kt index cf56ed15..0fcb4ad6 100644 --- a/src/main/kotlin/astminer/cli/ProjectParser.kt +++ b/src/main/kotlin/astminer/cli/ProjectParser.kt @@ -3,7 +3,6 @@ package astminer.cli import astminer.storage.ast.CsvAstStorage import astminer.storage.ast.DotAstStorage import astminer.common.getProjectFilesWithExtension -/*import astminer.common.preOrder*/ import astminer.storage.Storage import astminer.storage.TokenProcessor import com.github.ajalt.clikt.core.CliktCommand diff --git a/src/main/kotlin/astminer/featureextraction/TreeFeature.kt b/src/main/kotlin/astminer/featureextraction/TreeFeature.kt index d51229ab..0825519f 100644 --- a/src/main/kotlin/astminer/featureextraction/TreeFeature.kt +++ b/src/main/kotlin/astminer/featureextraction/TreeFeature.kt @@ -1,7 +1,6 @@ package astminer.featureextraction import astminer.common.model.Node -/*import astminer.common.preOrder*/ /** * Interface that describes tree feature. diff --git a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt index 8b0d1c55..246384a2 100644 --- a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt @@ -1,7 +1,6 @@ package astminer.parse.antlr.javascript import astminer.common.model.* -/*import astminer.common.preOrder*/ import astminer.parse.antlr.AntlrNode import astminer.parse.antlr.decompressTypeLabel diff --git a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt index 85ed05ba..04051ae1 100644 --- a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt @@ -1,7 +1,6 @@ package astminer.parse.gumtree.java import astminer.common.model.* -/*import astminer.common.preOrder*/ import astminer.parse.gumtree.GumTreeNode private fun GumTreeNode.isTypeNode() = typeLabel.endsWith("Type") diff --git a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitter.kt b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitter.kt index 009c2e15..686ba54a 100644 --- a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitter.kt @@ -5,7 +5,6 @@ import astminer.common.model.MethodInfo import astminer.common.model.MethodNode import astminer.common.model.ParameterNode import astminer.common.model.TreeMethodSplitter -/*import astminer.common.preOrder*/ import astminer.parse.gumtree.GumTreeNode class GumTreePythonMethodSplitter : TreeMethodSplitter { diff --git a/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt b/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt index e00bc23d..f466bf76 100644 --- a/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt +++ b/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt @@ -2,7 +2,6 @@ package astminer.storage.ast import astminer.cli.LabeledResult import astminer.common.model.Node -/*import astminer.common.preOrder*/ import astminer.common.storage.* import astminer.storage.Storage import java.io.File diff --git a/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt b/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt index d9fb002d..aa838ddd 100644 --- a/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt +++ b/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt @@ -2,7 +2,6 @@ package astminer.storage.ast import astminer.cli.LabeledResult import astminer.common.model.Node -/*import astminer.common.preOrder*/ import astminer.common.storage.RankedIncrementalIdStorage import astminer.storage.Storage import astminer.storage.TokenProcessor diff --git a/src/test/kotlin/astminer/parse/antlr/AntrlUtilTest.kt b/src/test/kotlin/astminer/parse/antlr/AntrlUtilTest.kt index 29ed651e..06fbc454 100644 --- a/src/test/kotlin/astminer/parse/antlr/AntrlUtilTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/AntrlUtilTest.kt @@ -1,6 +1,5 @@ package astminer.parse.antlr -/*import astminer.common.preOrder*/ import astminer.parse.antlr.java.JavaParser import org.junit.Assert import org.junit.Test From d4dbdb33d784f4836f0f8266b35749b31cbb538a Mon Sep 17 00:00:00 2001 From: furetur Date: Tue, 27 Apr 2021 18:36:35 +0500 Subject: [PATCH 083/308] added parseResult to FunctionInfo --- src/main/kotlin/astminer/common/model/FunctionInfo.kt | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/main/kotlin/astminer/common/model/FunctionInfo.kt b/src/main/kotlin/astminer/common/model/FunctionInfo.kt index 7db831a6..c880bfd3 100644 --- a/src/main/kotlin/astminer/common/model/FunctionInfo.kt +++ b/src/main/kotlin/astminer/common/model/FunctionInfo.kt @@ -13,8 +13,12 @@ interface FunctionInfo { get() = notImplemented("nameNode") val name: String? get() = nameNode?.getToken() + val parseResult: ParseResult + get() = notImplemented("parseResult") val root: T - get() = notImplemented("root") + get() = parseResult.root + val filePath: String + get() = parseResult.filePath val annotations: List get() = notImplemented("annotations") val modifiers: List From 36090efc839b92f4cf722e35968e8908b497b2bc Mon Sep 17 00:00:00 2001 From: illided Date: Wed, 28 Apr 2021 12:13:02 +0300 Subject: [PATCH 084/308] small refactoring --- .../kotlin/astminer/parse/antlr/AntlrUtil.kt | 2 +- .../parse/antlr/java/AntlrJavaFunctionInfo.kt | 46 ++++++++++--------- .../parse/antlr/java/JavaMethodSplitter.kt | 2 +- 3 files changed, 26 insertions(+), 24 deletions(-) diff --git a/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt b/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt index 52d6852a..bb7959d4 100644 --- a/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt +++ b/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt @@ -77,7 +77,7 @@ fun Node.hasLastLabel(label: String): Boolean { return decompressTypeLabel(this.getTypeLabel()).last() == label } -fun Node.hasLastLabel(labels: List): Boolean { +fun Node.lastLabelIn(labels: List): Boolean { return labels.contains(decompressTypeLabel(this.getTypeLabel()).last()) } diff --git a/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt b/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt index 56d517d3..96ea18ca 100644 --- a/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt @@ -4,12 +4,13 @@ import astminer.common.model.* import astminer.parse.antlr.AntlrNode import astminer.parse.antlr.firstLabelIn import astminer.parse.antlr.hasLastLabel +import astminer.parse.antlr.lastLabelIn data class AntlrJavaFunctionInfo(override val root: AntlrNode) : FunctionInfo { override val nameNode: AntlrNode? = collectNameNode() override val parameters: List = collectParameters() override val returnType: String? = collectReturnType() - override val enclosingElement: EnclosingElement? = collectEnclosingClass(root) + override val enclosingElement: EnclosingElement? = collectEnclosingClass() companion object { private const val METHOD_RETURN_TYPE_NODE = "typeTypeOrVoid" @@ -31,53 +32,54 @@ data class AntlrJavaFunctionInfo(override val root: AntlrNode) : FunctionInfo? { - return when { - node == null -> null - node.hasLastLabel(CLASS_DECLARATION_NODE) -> EnclosingElement( - type = EnclosingElementType.Class, - name = node.getChildOfType(CLASS_NAME_NODE)?.getToken(), - root = node - ) - else -> collectEnclosingClass(node.getParent() as AntlrNode) + private fun collectEnclosingClass(): EnclosingElement? { + val enclosingClassNode = findEnclosingClassNode(root) ?: return null + return EnclosingElement( + type = EnclosingElementType.Class, + name = enclosingClassNode.getChildOfType(CLASS_NAME_NODE)?.getToken(), + root = enclosingClassNode + ) + } + + private fun findEnclosingClassNode(node: AntlrNode?): AntlrNode? { + if (node == null || node.hasLastLabel(CLASS_DECLARATION_NODE)) { + return node } + return findEnclosingClassNode(node.getParent() as AntlrNode) } private fun collectParameters(): List { val parametersRoot = root.getChildOfType(METHOD_PARAMETER_NODE) val innerParametersRoot = parametersRoot?.getChildOfType(METHOD_PARAMETER_INNER_NODE) ?: return emptyList() - if (innerParametersRoot.hasLastLabel(METHOD_SINGLE_PARAMETER_NODES)) { + if (innerParametersRoot.lastLabelIn(METHOD_SINGLE_PARAMETER_NODES)) { return listOf(getParameterInfo(innerParametersRoot)) } return innerParametersRoot.getChildren().filter { it.firstLabelIn(METHOD_SINGLE_PARAMETER_NODES) - }.map { getParameterInfo(it) } + }.map {singleParameter -> getParameterInfo(singleParameter) } } private fun getParameterInfo(parameterNode: AntlrNode): MethodInfoParameter { val returnTypeNode = parameterNode.getChildOfType(PARAMETER_RETURN_TYPE_NODE) - val returnTypeToken = returnTypeNode?.let { collectParameterToken(it) } + val returnTypeToken = returnTypeNode?.let { getTokensFromSubtree(it) } val parameterName = parameterNode.getChildOfType(PARAMETER_NAME_NODE)?.getToken() ?: throw IllegalStateException("Parameter name wasn't found") return MethodInfoParameter(parameterName, returnTypeToken) - } - //TODO(rename) - private fun collectParameterToken(parameterNode: AntlrNode): String { - if (parameterNode.isLeaf()) { - return parameterNode.getToken() + private fun getTokensFromSubtree(node: AntlrNode): String { + if (node.isLeaf()) { + return node.getToken() } - return parameterNode.getChildren().joinToString(separator = "") { child -> - collectParameterToken(child) + return node.getChildren().joinToString(separator = "") { child -> + getTokensFromSubtree(child) } } } diff --git a/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt b/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt index e4853f06..6c5194c0 100644 --- a/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt @@ -10,7 +10,7 @@ class JavaMethodSplitter : TreeMethodSplitter { override fun splitIntoMethods(root: AntlrNode): Collection> { val methodRoots = root.preOrder().filter { - root.hasLastLabel(methodNodeType) + it.hasLastLabel(methodNodeType) } return methodRoots.map { AntlrJavaFunctionInfo(it as AntlrNode) } } From 7846135e53c062c398044a09659387ff6a43b845 Mon Sep 17 00:00:00 2001 From: illided Date: Wed, 28 Apr 2021 12:40:23 +0300 Subject: [PATCH 085/308] data modifier removed --- .../kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt b/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt index 96ea18ca..ab237d35 100644 --- a/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt @@ -6,7 +6,7 @@ import astminer.parse.antlr.firstLabelIn import astminer.parse.antlr.hasLastLabel import astminer.parse.antlr.lastLabelIn -data class AntlrJavaFunctionInfo(override val root: AntlrNode) : FunctionInfo { +class AntlrJavaFunctionInfo(override val root: AntlrNode) : FunctionInfo { override val nameNode: AntlrNode? = collectNameNode() override val parameters: List = collectParameters() override val returnType: String? = collectReturnType() From be19d2a7f47bc57c38b2d8fbed7eb7cc7fa86efa Mon Sep 17 00:00:00 2001 From: illided Date: Wed, 28 Apr 2021 20:52:29 +0300 Subject: [PATCH 086/308] raw AntlrPythonFunctionInfo.kt added (no tests yet) --- .../antlr/python/AntlrPythonFunctionInfo.kt | 112 ++++++++++++++++++ .../antlr/python/PythonMethodSplitter.kt | 64 +--------- .../antlr/python/PythonMethodSplitterTest.kt | 2 +- .../methodSplitting/testMethodSplitting.py | 10 ++ 4 files changed, 126 insertions(+), 62 deletions(-) create mode 100644 src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt diff --git a/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt b/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt new file mode 100644 index 00000000..65605f40 --- /dev/null +++ b/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt @@ -0,0 +1,112 @@ +package astminer.parse.antlr.python + +import astminer.common.model.* +import astminer.parse.antlr.AntlrNode +import astminer.parse.antlr.decompressTypeLabel +import astminer.parse.antlr.hasLastLabel +import astminer.parse.antlr.lastLabelIn + +class AntlrPythonFunctionInfo(override val root: AntlrNode) : FunctionInfo { + override val nameNode: AntlrNode? = collectNameNode() + override val parameters: List = collectParameters() + override val enclosingElement: EnclosingElement? = collectEnclosingElement() + + companion object { + private const val METHOD_NODE = "funcdef" + private const val METHOD_NAME_NODE = "NAME" + + private const val CLASS_DECLARATION_NODE = "classdef" + private const val CLASS_NAME_NODE = "NAME" + + private const val METHOD_PARAMETER_NODE = "parameters" + private const val METHOD_PARAMETER_INNER_NODE = "typedargslist" + private const val METHOD_SINGLE_PARAMETER_NODE = "tfpdef" + private const val PARAMETER_NAME_NODE = "NAME" + private const val PARAMETER_TYPE_NODE = "test" + //It's seems strange but it works because actual type label will be + //test|or_test|and_test|not_test|comparison|expr|xor_expr... + // ..|and_expr|shift_expr|arith_expr|term|factor|power|atom_expr|atom|NAME + + private val POSSIBLE_ENCLOSING_ELEMENTS = listOf(CLASS_DECLARATION_NODE, METHOD_NODE) + private const val BODY = "suite" + } + + private fun collectNameNode(): AntlrNode? { + return root.getChildOfType(METHOD_NAME_NODE) + } + + private fun collectParameters(): List { + val parametersRoot = root.getChildOfType(METHOD_PARAMETER_NODE) + val innerParametersRoot = parametersRoot?.getChildOfType(METHOD_PARAMETER_INNER_NODE) ?: return emptyList() + + val methodHaveOnlyOneParameter = + innerParametersRoot.lastLabelIn(listOf(METHOD_SINGLE_PARAMETER_NODE, PARAMETER_NAME_NODE)) + if (methodHaveOnlyOneParameter) { + return listOf(assembleMethodInfoParameter(innerParametersRoot)) + } + + return innerParametersRoot.getChildrenOfType(METHOD_SINGLE_PARAMETER_NODE).map { node -> + assembleMethodInfoParameter(node) + } + } + + private fun assembleMethodInfoParameter(parameterNode: AntlrNode): MethodInfoParameter { + val parameterHaveNoDefaultOrType = parameterNode.hasLastLabel(PARAMETER_NAME_NODE) + val parameterName = if (parameterHaveNoDefaultOrType) { + parameterNode.getToken() + } else { + parameterNode.getChildOfType(PARAMETER_NAME_NODE)?.getToken() + } + require(parameterName != null) { "Method name was not found" } + + val parameterType = parameterNode.getChildOfType(PARAMETER_TYPE_NODE)?.getToken() + + return MethodInfoParameter( + name = parameterName, + type = parameterType + ) + } + + //TODO: refactor + private fun collectEnclosingElement(): EnclosingElement? { + val enclosingNode = findEnclosingNode(root.getParent() as AntlrNode?) ?: return null + val type = when { + enclosingNode.hasLastLabel(CLASS_DECLARATION_NODE) -> EnclosingElementType.Class + enclosingNode.hasLastLabel(METHOD_NODE) -> { + when { + enclosingNode.isMethod() -> EnclosingElementType.Method + else -> EnclosingElementType.Function + } + } + else -> throw IllegalStateException("Enclosing node can only be function or class") + } + val name = when (type) { + EnclosingElementType.Class -> enclosingNode.getChildOfType(CLASS_NAME_NODE) + EnclosingElementType.Method, EnclosingElementType.Function -> enclosingNode.getChildOfType(METHOD_NAME_NODE) + else -> throw IllegalStateException("Enclosing node can only be function or class") + }?.getToken() + return EnclosingElement( + type = type, + name = name, + root = enclosingNode + ) + } + + private fun findEnclosingNode(node: AntlrNode?): AntlrNode? { + if (node == null || node.lastLabelIn(POSSIBLE_ENCLOSING_ELEMENTS)) { + return node + } + return findEnclosingNode(node.getParent() as AntlrNode?) + } + + private fun Node.isMethod(): Boolean { + val outerBody = getParent() + if (outerBody?.getTypeLabel() != BODY) return false + + val enclosingNode = outerBody.getParent() + require(enclosingNode != null) { "Found body without enclosing element" } + + val lastLabel = decompressTypeLabel(enclosingNode.getTypeLabel()).last() + return lastLabel == CLASS_DECLARATION_NODE + } +} \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/antlr/python/PythonMethodSplitter.kt b/src/main/kotlin/astminer/parse/antlr/python/PythonMethodSplitter.kt index b0739a96..cf66b82b 100644 --- a/src/main/kotlin/astminer/parse/antlr/python/PythonMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/python/PythonMethodSplitter.kt @@ -7,70 +7,12 @@ import astminer.parse.antlr.decompressTypeLabel class PythonMethodSplitter : TreeMethodSplitter { - - companion object { - private const val METHOD_NODE = "funcdef" - private const val METHOD_NAME_NODE = "NAME" - - private const val CLASS_DECLARATION_NODE = "classdef" - private const val CLASS_NAME_NODE = "NAME" - - private const val METHOD_PARAMETER_NODE = "parameters" - private const val METHOD_PARAMETER_INNER_NODE = "typedargslist" - private const val METHOD_SINGLE_PARAMETER_NODE = "tfpdef" - private const val PARAMETER_NAME_NODE = "NAME" - } + private val methodNode = "funcdef" override fun splitIntoMethods(root: AntlrNode): Collection> { val methodRoots = root.preOrder().filter { - decompressTypeLabel(it.getTypeLabel()).last() == METHOD_NODE - } - return dummyMethodInfos() -// return methodRoots.map { collectMethodInfo(it as AntlrNode) } - } - - private fun collectMethodInfo(methodNode: AntlrNode): MethodInfo { - val methodName = methodNode.getChildOfType(METHOD_NAME_NODE) - - val classRoot = getEnclosingClass(methodNode) - val className = classRoot?.getChildOfType(CLASS_NAME_NODE) - - val parametersRoot = methodNode.getChildOfType(METHOD_PARAMETER_NODE) - val innerParametersRoot = parametersRoot?.getChildOfType(METHOD_PARAMETER_INNER_NODE) - - val parametersList = when { - innerParametersRoot != null -> getListOfParameters(innerParametersRoot) - parametersRoot != null -> getListOfParameters(parametersRoot) - else -> emptyList() - } - return MethodInfo( - MethodNode(methodNode, null, methodName), - ElementNode(classRoot, className), - parametersList - ) - } - - private fun getEnclosingClass(node: AntlrNode): AntlrNode? { - if (decompressTypeLabel(node.getTypeLabel()).last() == CLASS_DECLARATION_NODE) { - return node - } - val parentNode = node.getParent() as? AntlrNode - if (parentNode != null) { - return getEnclosingClass(parentNode) - } - return null - } - - private fun getListOfParameters(parameterRoot: AntlrNode): List> { - if (decompressTypeLabel(parameterRoot.getTypeLabel()).last() == PARAMETER_NAME_NODE) { - return listOf(ParameterNode(parameterRoot, null, parameterRoot)) - } - return parameterRoot.getChildrenOfType(METHOD_SINGLE_PARAMETER_NODE).map { - if (decompressTypeLabel(it.getTypeLabel()).last() == PARAMETER_NAME_NODE) { - ParameterNode(it, null, it) - } else { - ParameterNode(it, null, it.getChildOfType(PARAMETER_NAME_NODE) as AntlrNode) - } + decompressTypeLabel(it.getTypeLabel()).last() == methodNode } + return methodRoots.map { AntlrPythonFunctionInfo(it as AntlrNode) } } } diff --git a/src/test/kotlin/astminer/parse/antlr/python/PythonMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/python/PythonMethodSplitterTest.kt index e323c7e9..8a18125b 100644 --- a/src/test/kotlin/astminer/parse/antlr/python/PythonMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/python/PythonMethodSplitterTest.kt @@ -12,7 +12,7 @@ import kotlin.test.assertNull class PythonMethodSplitterTest { companion object { - const val N_FUNCTIONS = 6 + const val N_FUNCTIONS = 9 val methodSplitter = PythonMethodSplitter() val parser = PythonParser() } diff --git a/src/test/resources/methodSplitting/testMethodSplitting.py b/src/test/resources/methodSplitting/testMethodSplitting.py index befebc43..9ad20984 100644 --- a/src/test/resources/methodSplitting/testMethodSplitting.py +++ b/src/test/resources/methodSplitting/testMethodSplitting.py @@ -17,5 +17,15 @@ def functionWithNoParameters(): def functionWithOneParameter(p1): pass +def functionWithOneTypedParameter(p1: int): + pass + def functionWithThreeParameters(p1, p2 = 4, p3: int = 3): pass + +def functionContainingFunction(): + def someFunction(): + pass + pass + +hello = lambda x, y: x + y \ No newline at end of file From 6ba2cb1e7c62393b5ef6622f70723326b095c20e Mon Sep 17 00:00:00 2001 From: illided Date: Wed, 28 Apr 2021 21:57:13 +0300 Subject: [PATCH 087/308] more tests added --- .../antlr/python/PythonMethodSplitterTest.kt | 90 ++++++++++++++++--- .../methodSplitting/testMethodSplitting.py | 34 ++++--- 2 files changed, 103 insertions(+), 21 deletions(-) diff --git a/src/test/kotlin/astminer/parse/antlr/python/PythonMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/python/PythonMethodSplitterTest.kt index 8a18125b..261c6a00 100644 --- a/src/test/kotlin/astminer/parse/antlr/python/PythonMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/python/PythonMethodSplitterTest.kt @@ -1,5 +1,6 @@ package astminer.parse.antlr.python +import astminer.common.model.EnclosingElementType import astminer.common.model.FunctionInfo import astminer.common.model.MethodInfo import astminer.parse.antlr.AntlrNode @@ -12,7 +13,7 @@ import kotlin.test.assertNull class PythonMethodSplitterTest { companion object { - const val N_FUNCTIONS = 9 + const val N_FUNCTIONS = 16 val methodSplitter = PythonMethodSplitter() val parser = PythonParser() } @@ -33,49 +34,116 @@ class PythonMethodSplitterTest { @Test fun testFunctionNotInClass() { - val methodClass = methodInfos.find { it.name == "funWithNoClass" } + val methodClass = methodInfos.find { it.name == "fun_with_no_class" } assertNotNull(methodClass) assertNull(methodClass.enclosingElement) } @Test fun testFunctionInClass() { - val methodClass = methodInfos.find { it.name == "funInClass1" } + val methodClass = methodInfos.find { it.name == "fun_in_class1" } assertNotNull(methodClass) + assertEquals(EnclosingElementType.Class, methodClass.enclosingElement?.type) assertEquals( "Class1", methodClass.enclosingElement?.name) } @Test fun testFunctionInNestedClass() { - val methodClass = methodInfos.find { it.name == "funInClass2" } + val methodClass = methodInfos.find { it.name == "fun_in_class2" } assertNotNull(methodClass) + assertEquals(EnclosingElementType.Class, methodClass.enclosingElement?.type) assertEquals( "Class2", methodClass.enclosingElement?.name) } @Test fun testNoParameters() { - val methodNoParameters = methodInfos.find { it.name == "functionWithNoParameters" } + val methodNoParameters = methodInfos.find { it.name == "function_with_no_parameters" } assertNotNull(methodNoParameters) assertEquals(0, methodNoParameters.parameters.size) } @Test fun testOneParameter() { - val methodOneParameter = methodInfos.find { it.name == "functionWithOneParameter" } + val methodOneParameter = methodInfos.find { it.name == "function_with_one_parameter" } assertNotNull(methodOneParameter) assertEquals(1, methodOneParameter.parameters.size) val parameter = methodOneParameter.parameters[0] assertEquals("p1", parameter.name) } + @Test + fun testOneTypedParameter() { + val methodOneTypedParameter = methodInfos.find { it.name == "function_with_one_typed_parameter" } + assertNotNull(methodOneTypedParameter) + assertEquals(1, methodOneTypedParameter.parameters.size) + val parameter = methodOneTypedParameter.parameters[0] + assertEquals("p1", parameter.name) + assertEquals("int", parameter.type) + } + @Test fun testThreeParameters() { - val methodThreeParameters = methodInfos.find { it.name == "functionWithThreeParameters" } + val methodThreeParameters = methodInfos.find { it.name == "function_with_three_parameters" } assertNotNull(methodThreeParameters) assertEquals(3, methodThreeParameters.parameters.size) - for (i in 0 until 3) { - val parameter = methodThreeParameters.parameters[i] - assertEquals("p${i + 1}", parameter.name) - } + val parameters = methodThreeParameters.parameters + assertEquals("p1", parameters[0].name) + + assertEquals("p2", parameters[1].name) + + assertEquals("p3", parameters[2].name) + assertEquals("int", parameters[2].type) + } + + @Test + fun testParameterInClass() { + val methodOneParameter = methodInfos.find { it.name == "fun_with_parameter_in_class" } + assertNotNull(methodOneParameter) + assertEquals(2, methodOneParameter.parameters.size) + val parameter = methodOneParameter.parameters[1] + assertEquals("p1", parameter.name) + } + + @Test + fun testTypedParameterInClass() { + val methodOneTypedParameter = methodInfos.find { it.name == "fun_with_typed_parameter_in_class" } + assertNotNull(methodOneTypedParameter) + assertEquals(2, methodOneTypedParameter.parameters.size) + val parameter = methodOneTypedParameter.parameters[1] + assertEquals("p1", parameter.name) + assertEquals("int", parameter.type) + } + + @Test + fun testEnclosingFunction() { + val functionInsideFunction = methodInfos.find { it.name == "function_inside_function" } + assertNotNull(functionInsideFunction) + val enclosingElement = functionInsideFunction.enclosingElement + + assertNotNull(enclosingElement) + assertEquals("function_containing_function", enclosingElement.name) + assertEquals(EnclosingElementType.Function, enclosingElement.type) + } + + @Test + fun testEnclosingMethod() { + val functionInsideMethod = methodInfos.find { it.name == "function_inside_method" } + assertNotNull(functionInsideMethod) + val enclosingElement = functionInsideMethod.enclosingElement + + assertNotNull(enclosingElement) + assertEquals("some_method", enclosingElement.name) + assertEquals(EnclosingElementType.Method, enclosingElement.type) + } + + @Test + fun testEnclosingFunctionInsideMethod() { + val funInsideFunInsideMethod = methodInfos.find { it.name == "fun_inside_fun_inside_method" } + assertNotNull(funInsideFunInsideMethod) + val enclosingElement = funInsideFunInsideMethod.enclosingElement + + assertNotNull(enclosingElement) + assertEquals("second_function_inside_method", enclosingElement.name) + assertEquals(EnclosingElementType.Function, enclosingElement.type) } } \ No newline at end of file diff --git a/src/test/resources/methodSplitting/testMethodSplitting.py b/src/test/resources/methodSplitting/testMethodSplitting.py index 9ad20984..1eb20c86 100644 --- a/src/test/resources/methodSplitting/testMethodSplitting.py +++ b/src/test/resources/methodSplitting/testMethodSplitting.py @@ -1,31 +1,45 @@ -def funWithNoClass(): +def fun_with_no_class(): pass class Class1: - def funInClass1(): + def fun_in_class1(self): pass class Class2: - def funInClass2(): + def fun_in_class2(self): pass -def functionWithNoParameters(): +def function_with_no_parameters(): pass -def functionWithOneParameter(p1): +def function_with_one_parameter(p1): pass -def functionWithOneTypedParameter(p1: int): +def function_with_one_typed_parameter(p1: int): pass -def functionWithThreeParameters(p1, p2 = 4, p3: int = 3): +def function_with_three_parameters(p1, p2 = 4, p3: int = 3): pass -def functionContainingFunction(): - def someFunction(): +class Class3: + def fun_with_parameter_in_class(self, p1): + pass + + def fun_with_typed_parameter_in_class(self, p1: int): + pass + +def function_containing_function(): + def function_inside_function(): pass pass -hello = lambda x, y: x + y \ No newline at end of file +class Class4: + def some_method(self): + def function_inside_method(): + pass + def second_method(self): + def second_function_inside_method(): + def fun_inside_fun_inside_method(): + pass \ No newline at end of file From 434b1488d6a32e1095ecb2c7e975609c73e05ca4 Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Thu, 29 Apr 2021 16:47:06 +0300 Subject: [PATCH 088/308] Update gradle, add publishing task --- .gitignore | 6 + build.gradle.kts | 136 +++++++---------------- gradle.properties | 5 +- gradle/wrapper/gradle-wrapper.properties | 2 +- 4 files changed, 51 insertions(+), 98 deletions(-) diff --git a/.gitignore b/.gitignore index 3e0eeee6..f1c745c3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,10 @@ *.iml +*.csv + .idea/ .gradle/ examples/out/ +src/main/generated/ +build/ + +.DS_Store diff --git a/build.gradle.kts b/build.gradle.kts index 152dadb8..721d2124 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -1,80 +1,49 @@ -import tanvd.kosogor.proxy.publishJar -import tanvd.kosogor.proxy.shadowJar -import org.jetbrains.kotlin.gradle.tasks.KotlinCompile - -group = "io.github.vovak.astminer" - -val branchName: String by project -val ciVersion: String by project - -version = if (project.hasProperty("ciVersion")) { - ciVersion -} else { - "0.6" -} - -println(version) - plugins { id("java") - kotlin("jvm") version "1.3.61" apply true + kotlin("jvm") version "1.4.32" apply true id("antlr") id("idea") id("application") - id("tanvd.kosogor") version "1.0.6" id("org.jetbrains.dokka") version "0.9.18" id("me.champeau.gradle.jmh") version "0.5.0" -} - - -application { - mainClassName = "astminer.MainKt" + id("maven-publish") } defaultTasks("run") repositories { - mavenLocal() mavenCentral() - jcenter() } - -val generatedSourcesPath = "src/main/generated" - dependencies { + // ===== Parsers ===== antlr("org.antlr:antlr4:4.7.1") - implementation(kotlin("stdlib")) - // https://mvnrepository.com/artifact/com.github.gumtreediff api("com.github.gumtreediff", "core", "2.1.0") api("com.github.gumtreediff", "client", "2.1.0") api("com.github.gumtreediff", "gen.jdt", "2.1.0") - // https://mvnrepository.com/artifact/io.shiftleft/fuzzyc2cpg api("io.shiftleft", "fuzzyc2cpg_2.13", "1.2.9") + + // ===== Main ===== + implementation(kotlin("stdlib")) + implementation("com.github.ajalt", "clikt", "2.1.0") + + // ===== Test ===== // https://mvnrepository.com/artifact/org.slf4j/slf4j-simple testImplementation("org.slf4j", "slf4j-simple", "1.7.30") - testImplementation("junit:junit:4.11") testImplementation(kotlin("test-junit")) - implementation("com.github.ajalt", "clikt", "2.1.0") - - jmhImplementation("org.jetbrains.kotlin:kotlin-reflect:1.3.61") + // ===== JMH ===== + jmhImplementation("org.jetbrains.kotlin:kotlin-reflect:1.4.32") jmhImplementation("org.openjdk.jmh:jmh-core:1.21") jmhImplementation("org.openjdk.jmh:jmh-generator-annprocess:1.21") } -val shadowJar = shadowJar { - jar { - archiveName = "lib-$version.jar" - mainClass = "astminer.MainKt" - } -}.apply { - task.archiveClassifier.set("") -} - +val generatedSourcesPath = "src/main/generated" +sourceSets["main"].java.srcDir(file(generatedSourcesPath)) +idea.module.generatedSourceDirs.add(file(generatedSourcesPath)) tasks.generateGrammarSource { maxHeapSize = "64m" @@ -100,53 +69,12 @@ tasks.clean { tasks.compileKotlin { dependsOn(tasks.generateGrammarSource) + kotlinOptions.jvmTarget = "1.8" } tasks.compileJava { dependsOn(tasks.generateGrammarSource) -} - -configure { - sourceCompatibility = JavaVersion.VERSION_1_8 -} -tasks.withType { - kotlinOptions.jvmTarget = "1.8" -} - -sourceSets["main"].java.srcDir(file(generatedSourcesPath)) - - -idea { - module { - generatedSourceDirs.add(file(generatedSourcesPath)) - } -} - -publishJar { - publication { - artifactId = if (project.hasProperty("branchName")) { - when(branchName) { - "master" -> "astminer" - "master-dev" -> "astminer-dev" - else -> "" - } - } else { - "astminer" - } - } - - bintray { - - // If username and secretKey not set, will be taken from System environment param `bintray_user`, 'bintray_key' - repository = "astminer" - - info { - githubRepo = "JetBrains-Research/astminer" - vcsUrl = "https://github.com/JetBrains-Research/astminer" - labels.addAll(listOf("mining", "ast", "ml4se", "code2vec", "path-based representations")) - license = "MIT" - description = "Extract AST, AST-related metrics, and path-based representations from source code" - } - } + targetCompatibility = "1.8" + sourceCompatibility = "1.8" } tasks.dokka { @@ -154,13 +82,6 @@ tasks.dokka { outputDirectory = "$buildDir/javadoc" } -configure { - sourceCompatibility = JavaVersion.VERSION_1_8 -} -tasks.withType { - kotlinOptions.jvmTarget = "1.8" -} - jmh { duplicateClassesStrategy = DuplicatesStrategy.WARN profilers = listOf("gc") @@ -175,3 +96,26 @@ jmh { benchmarkMode = listOf("AverageTime") resultsFile = file("build/reports/benchmarks.csv") } + +val groupId: String by project +val artifactId: String by project +val version: String by project +publishing { + publications { + create("maven") { + groupId = groupId + artifactId = artifactId + version = version + } + } + repositories { + maven { + name = artifactId + url = uri("https://oss.sonatype.org/service/local/staging/deploy/maven2/") + credentials { + username = System.getenv("MAVEN_USERNAME") + password = System.getenv("MAVEN_PASSWORD") + } + } + } +} diff --git a/gradle.properties b/gradle.properties index a725de05..24ccb4ec 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,2 +1,5 @@ kotlin.code.style=official -org.gradle.jvmargs=-Xmx32768M \ No newline at end of file + +groupId=io.github.vovak +artifactId=ASTMiner +version=0.6.dev1 diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index 33682bbb..442d9132 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,5 +1,5 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-6.6.1-all.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-6.8.3-bin.zip zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists From c52041bdb06b6676a1f80e8918b2a28865f1aa9b Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Thu, 29 Apr 2021 18:05:39 +0300 Subject: [PATCH 089/308] Use space packages --- build.gradle.kts | 19 +++++++++---------- gradle.properties | 4 ---- 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/build.gradle.kts b/build.gradle.kts index 721d2124..7aa662dc 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -1,3 +1,5 @@ +val version = "0.6.dev1" + plugins { id("java") kotlin("jvm") version "1.4.32" apply true @@ -97,25 +99,22 @@ jmh { resultsFile = file("build/reports/benchmarks.csv") } -val groupId: String by project -val artifactId: String by project -val version: String by project publishing { publications { create("maven") { - groupId = groupId - artifactId = artifactId + groupId = "io.github.vovak" + artifactId = "astminer" version = version + from(components["java"]) } } repositories { maven { - name = artifactId - url = uri("https://oss.sonatype.org/service/local/staging/deploy/maven2/") + url = uri("https://packages.jetbrains.team/maven/p/astminer/astminer") credentials { - username = System.getenv("MAVEN_USERNAME") - password = System.getenv("MAVEN_PASSWORD") + username = System.getenv("publish-user") + password = System.getenv("publish-password") } } } -} +} \ No newline at end of file diff --git a/gradle.properties b/gradle.properties index 24ccb4ec..7fc6f1ff 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,5 +1 @@ kotlin.code.style=official - -groupId=io.github.vovak -artifactId=ASTMiner -version=0.6.dev1 From c15ae1ee3831419606988a10d560b2259885b1ab Mon Sep 17 00:00:00 2001 From: Vladimir Kovalenko Date: Thu, 29 Apr 2021 18:05:37 +0300 Subject: [PATCH 090/308] change package url --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 0851c456..063609f3 100644 --- a/README.md +++ b/README.md @@ -72,7 +72,7 @@ Astminer is available in [Bintray repo](https://bintray.com/egor-bogomolov/astmi ``` repositories { maven { - url "https://dl.bintray.com/egor-bogomolov/astminer" + url "https://packages.jetbrains.team/maven/p/astminer/astminer" } } @@ -84,7 +84,7 @@ dependencies { If you use `build.gradle.kts`: ``` repositories { - maven(url = "https://dl.bintray.com/egor-bogomolov/astminer/") + maven(url = uri("https://packages.jetbrains.team/maven/p/astminer/astminer")) } dependencies { From e9a28eb2a0f7e9559c3a7700abb1f52905f23659 Mon Sep 17 00:00:00 2001 From: Vladimir Kovalenko Date: Thu, 29 Apr 2021 18:07:26 +0300 Subject: [PATCH 091/308] add release job for Space automation --- .space.kts | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 .space.kts diff --git a/.space.kts b/.space.kts new file mode 100644 index 00000000..8ee7aa21 --- /dev/null +++ b/.space.kts @@ -0,0 +1,15 @@ +job("Release") { + startOn { + gitPush { + enabled = false + } + } + + container("openjdk:11") { + shellScript { + content = """ + ./gradlew publish + """ + } + } +} \ No newline at end of file From 4f77a6b15a59883beaabbe93a43635dc95c29377 Mon Sep 17 00:00:00 2001 From: Vladimir Kovalenko Date: Thu, 29 Apr 2021 18:21:28 +0300 Subject: [PATCH 092/308] add secrets to build env --- .space.kts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.space.kts b/.space.kts index 8ee7aa21..f4c8792c 100644 --- a/.space.kts +++ b/.space.kts @@ -6,6 +6,9 @@ job("Release") { } container("openjdk:11") { + env["publish-user"] = Secrets("publish-user") + env["publish-password"] = Secrets("publish-password") + shellScript { content = """ ./gradlew publish From c6014e0606d954298d8c0d80a9f64e28b98168e3 Mon Sep 17 00:00:00 2001 From: Vladimir Kovalenko Date: Thu, 29 Apr 2021 18:30:27 +0300 Subject: [PATCH 093/308] use `code` for `astminer` --- README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 063609f3..4fc8e3b4 100644 --- a/README.md +++ b/README.md @@ -94,7 +94,7 @@ dependencies { #### Examples -If you want to use astminer as a library in your Java/Kotlin based data mining tool, check the following examples: +If you want to use `astminer` as a library in your Java/Kotlin based data mining tool, check the following examples: * A few [simple usage examples](src/main/kotlin/astminer/examples) can be run with `./gradlew run`. @@ -104,7 +104,7 @@ Please consider trying Kotlin for your data mining pipelines: from our experienc ### Output format -For path-based representations, astminer supports two output formats. In both of them, we store 4 `.csv` files: +For path-based representations, `astminer` supports two output formats. In both of them, we store 4 `.csv` files: 1. `node_types.csv` contains numeric ids and corresponding node types with directions (up/down, as described in [paper](https://arxiv.org/pdf/1803.09544.pdf)); 2. `tokens.csv` contains numeric ids and corresponding tokens; 3. `paths.csv` contains numeric ids and AST paths in form of space-separated sequences of node type ids; @@ -132,14 +132,14 @@ If the language has a parsing tool that is available as Java library: See [FuzzyCppParser](src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt) for an example of a wrapper. ## Contribution -We believe that astminer could find use beyond our own mining tasks. +We believe that `astminer` could find use beyond our own mining tasks. -Please help make astminer easier to use by sharing your use cases. Pull requests are welcome as well. +Please help make `astminer` easier to use by sharing your use cases. Pull requests are welcome as well. Support for other languages and documentation are the key areas of improvement. ## Citing astminer -A [paper](https://zenodo.org/record/2595271) dedicated to astminer (more precisely, to its older version [PathMiner](https://github.com/vovak/astminer/tree/pathminer)) was presented at [MSR'19](https://2019.msrconf.org/). -If you use astminer in your academic work, please cite it. +A [paper](https://zenodo.org/record/2595271) dedicated to `astminer` (more precisely, to its older version [PathMiner](https://github.com/vovak/astminer/tree/pathminer)) was presented at [MSR'19](https://2019.msrconf.org/). +If you use `astminer` in your academic work, please cite it. ``` @inproceedings{kovalenko2019pathminer, title={PathMiner: a library for mining of path-based representations of code}, From 53f5d1fee398847a3d47dd3dc8cdf174196f1cd1 Mon Sep 17 00:00:00 2001 From: Vladimir Kovalenko Date: Thu, 29 Apr 2021 18:33:35 +0300 Subject: [PATCH 094/308] remove bintray button --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 4fc8e3b4..e98798f9 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,5 @@ [![JetBrains Research](https://jb.gg/badges/research.svg)](https://confluence.jetbrains.com/display/ALL/JetBrains+on+GitHub) [![CircleCI](https://circleci.com/gh/JetBrains-Research/astminer.svg?style=svg)](https://circleci.com/gh/JetBrains-Research/astminer) -[ ![Download](https://api.bintray.com/packages/egor-bogomolov/astminer/astminer/images/download.svg) ](https://bintray.com/egor-bogomolov/astminer/astminer/_latestVersion) # astminer A library for mining of [path-based representations of code](https://arxiv.org/pdf/1803.09544.pdf) and more, supported by the [Machine Learning Methods for Software Engineering](https://research.jetbrains.org/groups/ml_methods) group at [JetBrains Research](https://research.jetbrains.org). From 42b7a40e719f3e561902dfdcb61cce278c5f1a35 Mon Sep 17 00:00:00 2001 From: Vladimir Kovalenko Date: Thu, 29 Apr 2021 18:36:18 +0300 Subject: [PATCH 095/308] add test secret output --- .space.kts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.space.kts b/.space.kts index f4c8792c..eed56c96 100644 --- a/.space.kts +++ b/.space.kts @@ -8,9 +8,10 @@ job("Release") { container("openjdk:11") { env["publish-user"] = Secrets("publish-user") env["publish-password"] = Secrets("publish-password") - + shellScript { content = """ + echo ${'$'}test-secret ./gradlew publish """ } From 78a07e3c25b77eb54ff5c7a2c55609b6de8f102e Mon Sep 17 00:00:00 2001 From: furetur Date: Thu, 29 Apr 2021 20:37:20 +0500 Subject: [PATCH 096/308] replaced parseResult with root + filePath in FunctionInfo --- src/main/kotlin/astminer/common/model/FunctionInfo.kt | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/main/kotlin/astminer/common/model/FunctionInfo.kt b/src/main/kotlin/astminer/common/model/FunctionInfo.kt index c880bfd3..4d2cbffa 100644 --- a/src/main/kotlin/astminer/common/model/FunctionInfo.kt +++ b/src/main/kotlin/astminer/common/model/FunctionInfo.kt @@ -13,12 +13,10 @@ interface FunctionInfo { get() = notImplemented("nameNode") val name: String? get() = nameNode?.getToken() - val parseResult: ParseResult - get() = notImplemented("parseResult") val root: T - get() = parseResult.root + get() = notImplemented("root") val filePath: String - get() = parseResult.filePath + get() = notImplemented("filePath") val annotations: List get() = notImplemented("annotations") val modifiers: List From 787f4aabf44bf2b3da87def7c32a0ab8a5d50b0b Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Thu, 29 Apr 2021 18:44:28 +0300 Subject: [PATCH 097/308] Rename env variables --- .space.kts | 3 --- build.gradle.kts | 4 ++-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/.space.kts b/.space.kts index f4c8792c..8ee7aa21 100644 --- a/.space.kts +++ b/.space.kts @@ -6,9 +6,6 @@ job("Release") { } container("openjdk:11") { - env["publish-user"] = Secrets("publish-user") - env["publish-password"] = Secrets("publish-password") - shellScript { content = """ ./gradlew publish diff --git a/build.gradle.kts b/build.gradle.kts index 7aa662dc..24fb255f 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -112,8 +112,8 @@ publishing { maven { url = uri("https://packages.jetbrains.team/maven/p/astminer/astminer") credentials { - username = System.getenv("publish-user") - password = System.getenv("publish-password") + username = System.getenv("PUBLISH_USER") + password = System.getenv("PUBLISH_PASSWORD") } } } From 5978ad918a1cc2505740a18ca7d69b3bb44cbe21 Mon Sep 17 00:00:00 2001 From: Vladimir Kovalenko Date: Thu, 29 Apr 2021 18:51:53 +0300 Subject: [PATCH 098/308] add secrets to env --- .space.kts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.space.kts b/.space.kts index 8ee7aa21..72d1f0ed 100644 --- a/.space.kts +++ b/.space.kts @@ -6,6 +6,9 @@ job("Release") { } container("openjdk:11") { + env["PUBLISH_USER"] = Secrets("PUBLISH_USER") + env["PUBLISH_PASSWORD"] = Secrets("PUBLISH_PASSWORD") + shellScript { content = """ ./gradlew publish From d9d930de1db452561c4bb5bfaa8ed62f64b11019 Mon Sep 17 00:00:00 2001 From: Vladimir Kovalenko Date: Thu, 29 Apr 2021 19:00:01 +0300 Subject: [PATCH 099/308] try lowercase for secret IDs --- .space.kts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.space.kts b/.space.kts index 72d1f0ed..0b13747b 100644 --- a/.space.kts +++ b/.space.kts @@ -6,8 +6,8 @@ job("Release") { } container("openjdk:11") { - env["PUBLISH_USER"] = Secrets("PUBLISH_USER") - env["PUBLISH_PASSWORD"] = Secrets("PUBLISH_PASSWORD") + env["PUBLISH_USER"] = Secrets("publish_user") + env["PUBLISH_PASSWORD"] = Secrets("publish_password") shellScript { content = """ From b842cfbaeb8d032de97e68b10a8eb124b984c0b4 Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Thu, 29 Apr 2021 19:05:02 +0300 Subject: [PATCH 100/308] Release without credentials --- .space.kts | 3 --- build.gradle.kts | 4 ++-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/.space.kts b/.space.kts index 0b13747b..8ee7aa21 100644 --- a/.space.kts +++ b/.space.kts @@ -6,9 +6,6 @@ job("Release") { } container("openjdk:11") { - env["PUBLISH_USER"] = Secrets("publish_user") - env["PUBLISH_PASSWORD"] = Secrets("publish_password") - shellScript { content = """ ./gradlew publish diff --git a/build.gradle.kts b/build.gradle.kts index 24fb255f..f067ce84 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -112,8 +112,8 @@ publishing { maven { url = uri("https://packages.jetbrains.team/maven/p/astminer/astminer") credentials { - username = System.getenv("PUBLISH_USER") - password = System.getenv("PUBLISH_PASSWORD") + username = System.getenv("PUBLISH_USER")?.takeIf { it.isNotBlank() } ?: "" + password = System.getenv("PUBLISH_PASSWORD")?.takeIf { it.isNotBlank() } ?: "" } } } From fe23282b687c3b207141745c35aa6667c6f13464 Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Thu, 29 Apr 2021 19:13:13 +0300 Subject: [PATCH 101/308] Add env variables from space and publish to maven local --- .space.kts | 3 +++ build.gradle.kts | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/.space.kts b/.space.kts index 8ee7aa21..72252d41 100644 --- a/.space.kts +++ b/.space.kts @@ -6,6 +6,9 @@ job("Release") { } container("openjdk:11") { + env["PUBLISH_USER"] = Secrets("publish_user") + env["PUBLISH_PASSWORD"] = Secrets("publish_password") + shellScript { content = """ ./gradlew publish diff --git a/build.gradle.kts b/build.gradle.kts index f067ce84..518c7573 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -1,4 +1,4 @@ -val version = "0.6.dev1" +val version = "0.6.0" plugins { id("java") @@ -9,6 +9,7 @@ plugins { id("org.jetbrains.dokka") version "0.9.18" id("me.champeau.gradle.jmh") version "0.5.0" id("maven-publish") + id("tanvd.kosogor") version "1.0.10" apply true } defaultTasks("run") From a54c31afbd2505dba6d74319f634a5b9131ff4a4 Mon Sep 17 00:00:00 2001 From: Vladimir Kovalenko Date: Thu, 29 Apr 2021 19:19:06 +0300 Subject: [PATCH 102/308] build before publishing --- .space.kts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.space.kts b/.space.kts index 72252d41..b50a2c46 100644 --- a/.space.kts +++ b/.space.kts @@ -11,7 +11,7 @@ job("Release") { shellScript { content = """ - ./gradlew publish + ./gradlew build publish """ } } From d91a68e8e57c3a57c02bcffb4f10775bc68c8865 Mon Sep 17 00:00:00 2001 From: Vladimir Kovalenko Date: Thu, 29 Apr 2021 19:20:16 +0300 Subject: [PATCH 103/308] run tests on push --- .space.kts | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.space.kts b/.space.kts index b50a2c46..34d1b2fd 100644 --- a/.space.kts +++ b/.space.kts @@ -1,3 +1,13 @@ +job("Test") { + container("openjdk:11") { + shellScript { + content = """ + ./gradlew test + """ + } + } +} + job("Release") { startOn { gitPush { From 03df480a016ce533dfe564957fe5967ce4a91e6c Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Thu, 29 Apr 2021 19:21:52 +0300 Subject: [PATCH 104/308] Return shadow jar --- build.gradle.kts | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/build.gradle.kts b/build.gradle.kts index 518c7573..160e49d7 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -1,3 +1,5 @@ +import tanvd.kosogor.proxy.shadowJar + val version = "0.6.0" plugins { @@ -118,4 +120,14 @@ publishing { } } } -} \ No newline at end of file +} + +application.mainClassName = "astminer.MainKt" +shadowJar { + jar { + archiveName = "astminer-$version.jar" + mainClass = "astminer.MainKt" + } +}.apply { + task.archiveClassifier.set("") +} From 6d502d6543082a4906ca0921fca63d66f82fd95d Mon Sep 17 00:00:00 2001 From: Vladimir Kovalenko Date: Thu, 29 Apr 2021 19:28:27 +0300 Subject: [PATCH 105/308] use ubuntu container --- .space.kts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.space.kts b/.space.kts index 34d1b2fd..5232ea86 100644 --- a/.space.kts +++ b/.space.kts @@ -1,5 +1,5 @@ job("Test") { - container("openjdk:11") { + container("ubuntu") { shellScript { content = """ ./gradlew test @@ -15,7 +15,7 @@ job("Release") { } } - container("openjdk:11") { + container("ubuntu") { env["PUBLISH_USER"] = Secrets("publish_user") env["PUBLISH_PASSWORD"] = Secrets("publish_password") From 1a3db1cd31d2157745dce9fbdb62782bbc8dffa0 Mon Sep 17 00:00:00 2001 From: Vladimir Kovalenko Date: Thu, 29 Apr 2021 19:35:59 +0300 Subject: [PATCH 106/308] install openjdk before the build --- .space.kts | 1 + 1 file changed, 1 insertion(+) diff --git a/.space.kts b/.space.kts index 5232ea86..7f8de2a8 100644 --- a/.space.kts +++ b/.space.kts @@ -21,6 +21,7 @@ job("Release") { shellScript { content = """ + apt-get update && apt-get install -y openjdk-8-jdk ./gradlew build publish """ } From f22a0645a9fd64c574bb1aac00b9268b419595b9 Mon Sep 17 00:00:00 2001 From: Vladimir Kovalenko Date: Thu, 29 Apr 2021 19:45:29 +0300 Subject: [PATCH 107/308] install gcc before the build --- .space.kts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.space.kts b/.space.kts index 7f8de2a8..63d0f69b 100644 --- a/.space.kts +++ b/.space.kts @@ -2,6 +2,7 @@ job("Test") { container("ubuntu") { shellScript { content = """ + apt-get update && apt-get install -y openjdk-8-jdk gcc ./gradlew test """ } @@ -21,7 +22,7 @@ job("Release") { shellScript { content = """ - apt-get update && apt-get install -y openjdk-8-jdk + apt-get update && apt-get install -y openjdk-8-jdk gcc ./gradlew build publish """ } From 285e5a589b3153d3b75715ee4bb3cd2362670e81 Mon Sep 17 00:00:00 2001 From: Vladimir Kovalenko Date: Thu, 29 Apr 2021 19:53:16 +0300 Subject: [PATCH 108/308] install g++, not gcc, before the build --- .space.kts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.space.kts b/.space.kts index 63d0f69b..88c22abd 100644 --- a/.space.kts +++ b/.space.kts @@ -2,7 +2,7 @@ job("Test") { container("ubuntu") { shellScript { content = """ - apt-get update && apt-get install -y openjdk-8-jdk gcc + apt-get update && apt-get install -y openjdk-8-jdk g++ ./gradlew test """ } @@ -22,7 +22,7 @@ job("Release") { shellScript { content = """ - apt-get update && apt-get install -y openjdk-8-jdk gcc + apt-get update && apt-get install -y openjdk-8-jdk g++ ./gradlew build publish """ } From 71f8239912d4e4ae6fb01c6f2862b183786d0ca6 Mon Sep 17 00:00:00 2001 From: Vladimir Kovalenko Date: Thu, 29 Apr 2021 19:53:48 +0300 Subject: [PATCH 109/308] fix readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e98798f9..a9cc945d 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,7 @@ Parse all files written in specified language into ASTs, split into methods, and #### Import -Astminer is available in [Bintray repo](https://bintray.com/egor-bogomolov/astminer/astminer). You can add the dependency in your `build.gradle` file: +Astminer is available in the JetBrains Space package repository. You can add the dependency in your `build.gradle` file: ``` repositories { maven { From b218627eebad392a0704e50237ebd99b73219c09 Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Thu, 29 Apr 2021 20:00:24 +0300 Subject: [PATCH 110/308] Install build-essential --- .space.kts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.space.kts b/.space.kts index 88c22abd..0c326f62 100644 --- a/.space.kts +++ b/.space.kts @@ -22,7 +22,7 @@ job("Release") { shellScript { content = """ - apt-get update && apt-get install -y openjdk-8-jdk g++ + apt-get update && apt-get install -y openjdk-8-jdk build-essential ./gradlew build publish """ } From 7fd6c47965877e21d6081d199d894c17b41b509d Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Thu, 29 Apr 2021 20:08:48 +0300 Subject: [PATCH 111/308] Return installing g++ --- .space.kts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.space.kts b/.space.kts index 0c326f62..88c22abd 100644 --- a/.space.kts +++ b/.space.kts @@ -22,7 +22,7 @@ job("Release") { shellScript { content = """ - apt-get update && apt-get install -y openjdk-8-jdk build-essential + apt-get update && apt-get install -y openjdk-8-jdk g++ ./gradlew build publish """ } From 9ea602c67097972e179b0f1905130d25d529ac41 Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Thu, 29 Apr 2021 20:20:12 +0300 Subject: [PATCH 112/308] Specify version in gradle --- build.gradle.kts | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/build.gradle.kts b/build.gradle.kts index 160e49d7..35855440 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -1,6 +1,7 @@ import tanvd.kosogor.proxy.shadowJar -val version = "0.6.0" +group = "io.github.vovak.astminer" +version = "0.6.0" plugins { id("java") @@ -103,14 +104,6 @@ jmh { } publishing { - publications { - create("maven") { - groupId = "io.github.vovak" - artifactId = "astminer" - version = version - from(components["java"]) - } - } repositories { maven { url = uri("https://packages.jetbrains.team/maven/p/astminer/astminer") From e22d3754688e1fc7d2609012bc9498ed32118826 Mon Sep 17 00:00:00 2001 From: Vladimir Kovalenko Date: Thu, 29 Apr 2021 20:36:38 +0300 Subject: [PATCH 113/308] specify version explicitly --- build.gradle.kts | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/build.gradle.kts b/build.gradle.kts index 35855440..7aaee176 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -104,6 +104,14 @@ jmh { } publishing { + publications { + create("maven") { + groupId = "io.github.vovak" + artifactId = "astminer" + version = "0.6.0" + from(components["java"]) + } + } repositories { maven { url = uri("https://packages.jetbrains.team/maven/p/astminer/astminer") @@ -118,7 +126,7 @@ publishing { application.mainClassName = "astminer.MainKt" shadowJar { jar { - archiveName = "astminer-$version.jar" + archiveName = "astminer-0.6.0.jar" mainClass = "astminer.MainKt" } }.apply { From 799ed4e07b922d383336ee9a30a8dc6b3b75e333 Mon Sep 17 00:00:00 2001 From: Vladimir Kovalenko Date: Thu, 29 Apr 2021 20:44:24 +0300 Subject: [PATCH 114/308] update version in README --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index a9cc945d..5715387b 100644 --- a/README.md +++ b/README.md @@ -76,7 +76,7 @@ repositories { } dependencies { - compile 'io.github.vovak.astminer:astminer:0.6' + compile 'io.github.vovak.astminer:astminer:0.6.0' } ``` @@ -87,7 +87,7 @@ repositories { } dependencies { - compile("io.github.vovak.astminer", "astminer", "0.6") + compile("io.github.vovak.astminer", "astminer", "0.6.0") } ``` From e9206ea9093691851bc59afab872659b9ae4dad5 Mon Sep 17 00:00:00 2001 From: furetur Date: Fri, 30 Apr 2021 08:57:51 +0500 Subject: [PATCH 115/308] saved refactored label extractors --- .../java/astminer/examples/AllJavaFiles.java | 5 +--- .../kotlin/astminer/cli/LabelExtractors.kt | 10 +------- .../astminer/common/model/ParsingModel.kt | 2 +- .../kotlin/astminer/examples/AllJavaFiles.kt | 4 ++-- .../astminer/examples/AllJavaMethods.kt | 4 ++-- .../astminer/examples/AllPythonMethods.kt | 4 ++-- .../astminer/examples/Code2VecJavaMethods.kt | 4 ++-- .../astminer/problem/FileLevelProblems.kt | 20 ++++++++++++++++ .../astminer/problem/FunctionLevelProblems.kt | 23 +++++++++++++++++++ .../kotlin/astminer/problem/LabeledResult.kt | 13 +++++++++++ src/main/kotlin/astminer/storage/Storage.kt | 2 +- .../astminer/storage/ast/CsvAstStorage.kt | 2 +- .../astminer/storage/ast/DotAstStorage.kt | 2 +- .../astminer/storage/path/PathBasedStorage.kt | 2 +- src/test/kotlin/astminer/common/TestUtils.kt | 2 +- 15 files changed, 72 insertions(+), 27 deletions(-) create mode 100644 src/main/kotlin/astminer/problem/FileLevelProblems.kt create mode 100644 src/main/kotlin/astminer/problem/FunctionLevelProblems.kt create mode 100644 src/main/kotlin/astminer/problem/LabeledResult.kt diff --git a/src/main/java/astminer/examples/AllJavaFiles.java b/src/main/java/astminer/examples/AllJavaFiles.java index 5f705206..7dfa2d08 100644 --- a/src/main/java/astminer/examples/AllJavaFiles.java +++ b/src/main/java/astminer/examples/AllJavaFiles.java @@ -1,6 +1,6 @@ package astminer.examples; -import astminer.cli.LabeledResult; +import astminer.problem.LabeledResult; import astminer.common.model.*; import astminer.parse.gumtree.java.GumTreeJavaParser; import astminer.storage.*; @@ -30,9 +30,6 @@ public static void runExample() { @Override public FileVisitResult visitFile(@NotNull Path file, BasicFileAttributes attributes) throws IOException { Node fileTree = new GumTreeJavaParser().parseInputStream(new FileInputStream(file.toFile())); - if (fileTree == null) { - return FileVisitResult.CONTINUE; - } String filePath = file.toAbsolutePath().toString(); pathStorage.store(new LabeledResult<>(fileTree, filePath, filePath)); diff --git a/src/main/kotlin/astminer/cli/LabelExtractors.kt b/src/main/kotlin/astminer/cli/LabelExtractors.kt index 09b03441..1e164560 100644 --- a/src/main/kotlin/astminer/cli/LabelExtractors.kt +++ b/src/main/kotlin/astminer/cli/LabelExtractors.kt @@ -5,6 +5,7 @@ import astminer.common.model.ParseResult import astminer.common.model.FunctionInfo import astminer.common.preOrder import astminer.common.setTechnicalToken +import astminer.problem.LabeledResult import astminer.parse.antlr.AntlrNode import astminer.parse.antlr.java.JavaMethodSplitter import astminer.parse.antlr.javascript.JavaScriptMethodSplitter @@ -17,15 +18,6 @@ import astminer.parse.gumtree.python.GumTreePythonMethodSplitter import java.io.File -/** - * An AST subtree with a label and the path of the source file. - * @property root The root of the AST subtree. - * @property label Any label for this subtree. - * @property filePath The path to the source file where the AST is from. - */ -data class LabeledResult(val root: T, val label: String, val filePath: String) - - interface LabelExtractor { fun toLabeledData(parseResult: ParseResult): List> } diff --git a/src/main/kotlin/astminer/common/model/ParsingModel.kt b/src/main/kotlin/astminer/common/model/ParsingModel.kt index 52c8d0b2..e99cc0af 100644 --- a/src/main/kotlin/astminer/common/model/ParsingModel.kt +++ b/src/main/kotlin/astminer/common/model/ParsingModel.kt @@ -1,6 +1,6 @@ package astminer.common.model -import astminer.cli.LabeledResult +import astminer.problem.LabeledResult import astminer.parse.ParsingException import java.io.File import java.io.InputStream diff --git a/src/main/kotlin/astminer/examples/AllJavaFiles.kt b/src/main/kotlin/astminer/examples/AllJavaFiles.kt index be342c49..69f7781d 100644 --- a/src/main/kotlin/astminer/examples/AllJavaFiles.kt +++ b/src/main/kotlin/astminer/examples/AllJavaFiles.kt @@ -1,6 +1,6 @@ package astminer.examples -import astminer.cli.LabeledResult +import astminer.problem.LabeledResult import astminer.parse.antlr.java.JavaMethodSplitter import astminer.parse.antlr.java.JavaParser import astminer.storage.path.Code2VecPathStorage @@ -15,7 +15,7 @@ fun allJavaFiles() { val storage = Code2VecPathStorage(outputDir, PathBasedStorageConfig(5, 5)) File(inputDir).forFilesWithSuffix("11.java") { file -> - val node = JavaParser().parseInputStream(file.inputStream()) ?: return@forFilesWithSuffix + val node = JavaParser().parseInputStream(file.inputStream()) node.prettyPrint() JavaMethodSplitter().splitIntoMethods(node).forEach { println(it.name) diff --git a/src/main/kotlin/astminer/examples/AllJavaMethods.kt b/src/main/kotlin/astminer/examples/AllJavaMethods.kt index 28029dbf..61000d60 100644 --- a/src/main/kotlin/astminer/examples/AllJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/AllJavaMethods.kt @@ -1,6 +1,6 @@ package astminer.examples -import astminer.cli.LabeledResult +import astminer.problem.LabeledResult import astminer.common.model.FunctionInfo import astminer.parse.gumtree.GumTreeNode import astminer.parse.gumtree.java.GumTreeJavaParser @@ -29,7 +29,7 @@ fun allJavaMethods() { File(inputDir).forFilesWithSuffix(".java") { file -> //parse file - val fileNode = GumTreeJavaParser().parseInputStream(file.inputStream()) ?: return@forFilesWithSuffix + val fileNode = GumTreeJavaParser().parseInputStream(file.inputStream()) //extract method nodes val methodNodes = GumTreeJavaMethodSplitter().splitIntoMethods(fileNode) diff --git a/src/main/kotlin/astminer/examples/AllPythonMethods.kt b/src/main/kotlin/astminer/examples/AllPythonMethods.kt index cfc55ef4..d36d99a1 100644 --- a/src/main/kotlin/astminer/examples/AllPythonMethods.kt +++ b/src/main/kotlin/astminer/examples/AllPythonMethods.kt @@ -1,6 +1,6 @@ package astminer.examples -import astminer.cli.LabeledResult +import astminer.problem.LabeledResult import astminer.common.model.FunctionInfo import astminer.parse.gumtree.python.GumTreePythonMethodSplitter import astminer.parse.gumtree.GumTreeNode @@ -24,7 +24,7 @@ fun allPythonMethods() { File(inputDir).forFilesWithSuffix(".py") { file -> // parse file - val fileNode = GumTreePythonParser().parseInputStream(file.inputStream()) ?: return@forFilesWithSuffix + val fileNode = GumTreePythonParser().parseInputStream(file.inputStream()) // extract method nodes val methodNodes = GumTreePythonMethodSplitter().splitIntoMethods(fileNode) diff --git a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt index 60a3d7af..a914b65f 100644 --- a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt @@ -1,6 +1,6 @@ package astminer.examples -import astminer.cli.LabeledResult +import astminer.problem.LabeledResult import astminer.cli.MethodNameExtractor import astminer.common.* import astminer.parse.antlr.java.JavaMethodSplitter @@ -21,7 +21,7 @@ fun code2vecJavaMethods() { File(folder).forFilesWithSuffix(".java") { file -> //parse file - val fileNode = JavaParser().parseInputStream(file.inputStream()) ?: return@forFilesWithSuffix + val fileNode = JavaParser().parseInputStream(file.inputStream()) //extract method nodes val methods = JavaMethodSplitter().splitIntoMethods(fileNode) diff --git a/src/main/kotlin/astminer/problem/FileLevelProblems.kt b/src/main/kotlin/astminer/problem/FileLevelProblems.kt new file mode 100644 index 00000000..b537e09d --- /dev/null +++ b/src/main/kotlin/astminer/problem/FileLevelProblems.kt @@ -0,0 +1,20 @@ +package astminer.problem + +import astminer.common.model.Node +import astminer.common.model.ParseResult +import java.io.File + +interface FileLevelProblem { + fun process(parseResult: ParseResult): LabeledResult? +} + +object FilePathExtractor : FileLevelProblem { + override fun process(parseResult: ParseResult): LabeledResult = parseResult.labeledWithFilePath() +} + +class FolderExtractor : FileLevelProblem { + override fun process(parseResult: ParseResult): LabeledResult? { + val folderName = File(parseResult.filePath).parentFile.name ?: return null + return parseResult.labeledWith(folderName) + } +} diff --git a/src/main/kotlin/astminer/problem/FunctionLevelProblems.kt b/src/main/kotlin/astminer/problem/FunctionLevelProblems.kt new file mode 100644 index 00000000..728f6ebb --- /dev/null +++ b/src/main/kotlin/astminer/problem/FunctionLevelProblems.kt @@ -0,0 +1,23 @@ +package astminer.problem + +import astminer.common.model.FunctionInfo +import astminer.common.model.Node +import astminer.common.preOrder +import astminer.common.setTechnicalToken + +interface FunctionLevelProblem { + fun process(functionInfo: FunctionInfo): LabeledResult? +} + +object MethodNameExtractor : FunctionLevelProblem { + override fun process(functionInfo: FunctionInfo): LabeledResult? { + val name = functionInfo.name ?: return null + functionInfo.root.preOrder().forEach { node -> + if (node.getToken() == name) { + node.setTechnicalToken("SELF") + } + } + functionInfo.nameNode?.setTechnicalToken("METHOD_NAME") + return LabeledResult(functionInfo.root, name, functionInfo.filePath) + } +} \ No newline at end of file diff --git a/src/main/kotlin/astminer/problem/LabeledResult.kt b/src/main/kotlin/astminer/problem/LabeledResult.kt new file mode 100644 index 00000000..3fc49fe9 --- /dev/null +++ b/src/main/kotlin/astminer/problem/LabeledResult.kt @@ -0,0 +1,13 @@ +package astminer.problem + +import astminer.common.model.Node + +/** + * An AST subtree with a label and the path of the source file. + * @property root The root of the AST subtree. + * @property label Any label for this subtree. + * @property filePath The path to the source file where the AST is from. + */ +data class LabeledResult(val root: T, val label: String, val filePath: String) + + diff --git a/src/main/kotlin/astminer/storage/Storage.kt b/src/main/kotlin/astminer/storage/Storage.kt index c7c26032..117bcddc 100644 --- a/src/main/kotlin/astminer/storage/Storage.kt +++ b/src/main/kotlin/astminer/storage/Storage.kt @@ -1,6 +1,6 @@ package astminer.storage -import astminer.cli.LabeledResult +import astminer.problem.LabeledResult import astminer.common.model.Node import java.io.Closeable diff --git a/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt b/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt index 2e6b8cab..27c3d34b 100644 --- a/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt +++ b/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt @@ -1,6 +1,6 @@ package astminer.storage.ast -import astminer.cli.LabeledResult +import astminer.problem.LabeledResult import astminer.common.model.Node import astminer.common.preOrder import astminer.common.storage.* diff --git a/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt b/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt index 087b7149..310b87a2 100644 --- a/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt +++ b/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt @@ -1,6 +1,6 @@ package astminer.storage.ast -import astminer.cli.LabeledResult +import astminer.problem.LabeledResult import astminer.common.model.Node import astminer.common.preOrder import astminer.common.storage.RankedIncrementalIdStorage diff --git a/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt b/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt index e998543c..ec8ec0e5 100644 --- a/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt +++ b/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt @@ -1,6 +1,6 @@ package astminer.storage.path -import astminer.cli.LabeledResult +import astminer.problem.LabeledResult import astminer.common.model.* import astminer.common.storage.* import astminer.paths.PathMiner diff --git a/src/test/kotlin/astminer/common/TestUtils.kt b/src/test/kotlin/astminer/common/TestUtils.kt index b28e7f02..4751bb99 100644 --- a/src/test/kotlin/astminer/common/TestUtils.kt +++ b/src/test/kotlin/astminer/common/TestUtils.kt @@ -1,6 +1,6 @@ package astminer.common -import astminer.cli.LabeledResult +import astminer.problem.LabeledResult import astminer.common.model.Node From c3dd34e980c7360155bb9b41705bccc7beaeb03c Mon Sep 17 00:00:00 2001 From: furetur Date: Fri, 30 Apr 2021 17:40:22 +0500 Subject: [PATCH 116/308] pipeline sketch --- .../kotlin/astminer/cli/LabelExtractors.kt | 14 +++--- .../astminer/common/model/HandlerModel.kt | 5 +- .../kotlin/astminer/filters/CommonFilters.kt | 18 ++++++-- .../kotlin/astminer/filters/FileFilters.kt | 4 +- src/main/kotlin/astminer/filters/Filter.kt | 5 ++ .../astminer/filters/FunctionFilters.kt | 45 ++++++++++++++++++ .../kotlin/astminer/filters/MethodFilters.kt | 45 ------------------ src/main/kotlin/astminer/pipeline/Pipeline.kt | 41 +++++++++++++++++ .../astminer/pipeline/PipelineFrontend.kt | 46 +++++++++++++++++++ .../astminer/problem/FileLevelProblems.kt | 12 ++--- .../astminer/problem/FunctionLevelProblems.kt | 16 +++---- src/main/kotlin/astminer/problem/Problem.kt | 7 +++ 12 files changed, 183 insertions(+), 75 deletions(-) create mode 100644 src/main/kotlin/astminer/filters/Filter.kt create mode 100644 src/main/kotlin/astminer/filters/FunctionFilters.kt delete mode 100644 src/main/kotlin/astminer/filters/MethodFilters.kt create mode 100644 src/main/kotlin/astminer/pipeline/Pipeline.kt create mode 100644 src/main/kotlin/astminer/pipeline/PipelineFrontend.kt create mode 100644 src/main/kotlin/astminer/problem/Problem.kt diff --git a/src/main/kotlin/astminer/cli/LabelExtractors.kt b/src/main/kotlin/astminer/cli/LabelExtractors.kt index 8330d828..0a7206ad 100644 --- a/src/main/kotlin/astminer/cli/LabelExtractors.kt +++ b/src/main/kotlin/astminer/cli/LabelExtractors.kt @@ -6,7 +6,7 @@ import astminer.common.model.FunctionInfo import astminer.common.preOrder import astminer.common.setTechnicalToken import astminer.problem.LabeledResult -import astminer.filters.MethodFilter +import astminer.filters.FunctionFilter import astminer.parse.antlr.AntlrNode import astminer.parse.antlr.java.JavaMethodSplitter import astminer.parse.antlr.javascript.JavaScriptMethodSplitter @@ -37,9 +37,9 @@ abstract class FileLabelExtractor : LabelExtractor { } abstract class MethodLabelExtractor( - open val filterPredicates: Collection = emptyList(), - open val javaParser: String = "gumtree", - open val pythonParser: String = "antlr" + open val filterPredicates: Collection = emptyList(), + open val javaParser: String = "gumtree", + open val pythonParser: String = "antlr" ) : LabelExtractor { override fun toLabeledData( @@ -114,9 +114,9 @@ class FolderExtractor : FileLabelExtractor() { } class MethodNameExtractor( - override val filterPredicates: Collection = emptyList(), - override val javaParser: String = "gumtree", - override val pythonParser: String = "antlr" + override val filterPredicates: Collection = emptyList(), + override val javaParser: String = "gumtree", + override val pythonParser: String = "antlr" ) : MethodLabelExtractor(filterPredicates, javaParser, pythonParser) { override fun extractLabel(functionInfo: FunctionInfo, filePath: String): String? { diff --git a/src/main/kotlin/astminer/common/model/HandlerModel.kt b/src/main/kotlin/astminer/common/model/HandlerModel.kt index 6f156ba3..54e03355 100644 --- a/src/main/kotlin/astminer/common/model/HandlerModel.kt +++ b/src/main/kotlin/astminer/common/model/HandlerModel.kt @@ -6,12 +6,11 @@ interface HandlerFactory { fun createHandler(file: File): LanguageHandler } -abstract class LanguageHandler { +abstract class LanguageHandler { abstract val parseResult: ParseResult protected abstract val splitter: TreeMethodSplitter fun splitIntoMethods(): Collection> { - val root = parseResult.root ?: return emptyList() - return splitter.splitIntoMethods(root) + return splitter.splitIntoMethods(parseResult.root) } } diff --git a/src/main/kotlin/astminer/filters/CommonFilters.kt b/src/main/kotlin/astminer/filters/CommonFilters.kt index 547ad711..2a2a7007 100644 --- a/src/main/kotlin/astminer/filters/CommonFilters.kt +++ b/src/main/kotlin/astminer/filters/CommonFilters.kt @@ -5,7 +5,7 @@ import astminer.common.model.Node import astminer.common.model.ParseResult import astminer.featureextraction.treeSize -class TreeSizeFilterPredicate(private val maxSize: Int) : MethodFilter, FileFilter { +abstract class TreeSizeFilterPredicate(private val maxSize: Int) : Filter { private fun isTreeFiltered(root: Node): Boolean { return if (maxSize == -1) { true @@ -14,8 +14,18 @@ class TreeSizeFilterPredicate(private val maxSize: Int) : MethodFilter, FileFilt } } - override fun isFiltered(parseResult: ParseResult) = - if (parseResult.root != null) isTreeFiltered(parseResult.root) else false + protected abstract val T.tree: Node - override fun isFiltered(functionInfo: FunctionInfo) = isTreeFiltered(functionInfo.root) + override fun isFiltered(entity: T) = isTreeFiltered(entity.tree) +} + +class FileTreeSizeFilterPredicate(maxSize: Int) : TreeSizeFilterPredicate>(maxSize), FileFilter { + override val ParseResult.tree: Node + get() = root +} + +class FunctionTreeSizeFilterPredicate(maxSize: Int) : TreeSizeFilterPredicate>(maxSize), + FunctionFilter { + override val FunctionInfo.tree: Node + get() = root } diff --git a/src/main/kotlin/astminer/filters/FileFilters.kt b/src/main/kotlin/astminer/filters/FileFilters.kt index ac1dc126..bab9cf3c 100644 --- a/src/main/kotlin/astminer/filters/FileFilters.kt +++ b/src/main/kotlin/astminer/filters/FileFilters.kt @@ -3,6 +3,6 @@ package astminer.filters import astminer.common.model.Node import astminer.common.model.ParseResult -interface FileFilter { - fun isFiltered(parseResult: ParseResult): Boolean +interface FileFilter: Filter> { + override fun isFiltered(entity: ParseResult): Boolean } diff --git a/src/main/kotlin/astminer/filters/Filter.kt b/src/main/kotlin/astminer/filters/Filter.kt new file mode 100644 index 00000000..b50e30b6 --- /dev/null +++ b/src/main/kotlin/astminer/filters/Filter.kt @@ -0,0 +1,5 @@ +package astminer.filters + +interface Filter { + fun isFiltered(entity: T): Boolean +} diff --git a/src/main/kotlin/astminer/filters/FunctionFilters.kt b/src/main/kotlin/astminer/filters/FunctionFilters.kt new file mode 100644 index 00000000..1592f35a --- /dev/null +++ b/src/main/kotlin/astminer/filters/FunctionFilters.kt @@ -0,0 +1,45 @@ +package astminer.filters + +import astminer.common.model.FunctionInfo +import astminer.common.model.Node +import astminer.common.preOrder +import astminer.common.splitToSubtokens + +interface FunctionFilter : Filter> { + override fun isFiltered(entity: FunctionInfo): Boolean +} + +class ModifierFilterPredicate(private val excludeModifiers: List) : FunctionFilter { + override fun isFiltered(entity: FunctionInfo): Boolean = + !excludeModifiers.any { modifier -> modifier in entity.modifiers } +} + +class AnnotationFilterPredicate(private val excludeAnnotations: List) : FunctionFilter { + override fun isFiltered(entity: FunctionInfo): Boolean = + !excludeAnnotations.any { annotation -> annotation in entity.annotations } +} + +object ConstructorFilterPredicate : FunctionFilter { + override fun isFiltered(entity: FunctionInfo) = !entity.isConstructor +} + +class MethodNameWordsNumberFilter(private val maxWordsNumber: Int) : FunctionFilter { + override fun isFiltered(entity: FunctionInfo): Boolean { + return if (maxWordsNumber == -1) { + true + } else { + val name = entity.name + name != null && splitToSubtokens(name).size <= maxWordsNumber + } + } +} + +class MethodAnyNodeWordsNumberFilter(private val maxWordsNumber: Int) : FunctionFilter { + override fun isFiltered(entity: FunctionInfo): Boolean { + return if (maxWordsNumber == -1) { + true + } else { + !entity.root.preOrder().any { node -> splitToSubtokens(node.getToken()).size > maxWordsNumber } + } + } +} diff --git a/src/main/kotlin/astminer/filters/MethodFilters.kt b/src/main/kotlin/astminer/filters/MethodFilters.kt deleted file mode 100644 index c3df2e64..00000000 --- a/src/main/kotlin/astminer/filters/MethodFilters.kt +++ /dev/null @@ -1,45 +0,0 @@ -package astminer.filters - -import astminer.common.model.FunctionInfo -import astminer.common.model.Node -import astminer.common.preOrder -import astminer.common.splitToSubtokens - -interface MethodFilter { - fun isFiltered(functionInfo: FunctionInfo): Boolean -} - -class ModifierFilterPredicate(private val excludeModifiers: List) : MethodFilter { - override fun isFiltered(functionInfo: FunctionInfo): Boolean = - !excludeModifiers.any { modifier -> modifier in functionInfo.modifiers } -} - -class AnnotationFilterPredicate(private val excludeAnnotations: List) : MethodFilter { - override fun isFiltered(functionInfo: FunctionInfo): Boolean = - !excludeAnnotations.any { annotation -> annotation in functionInfo.annotations } -} - -object ConstructorFilterPredicate : MethodFilter { - override fun isFiltered(functionInfo: FunctionInfo) = !functionInfo.isConstructor -} - -class MethodNameWordsNumberFilter(private val maxWordsNumber: Int) : MethodFilter { - override fun isFiltered(functionInfo: FunctionInfo): Boolean { - return if (maxWordsNumber == -1) { - true - } else { - val name = functionInfo.name - name != null && splitToSubtokens(name).size <= maxWordsNumber - } - } -} - -class MethodAnyNodeWordsNumberFilter(private val maxWordsNumber: Int) : MethodFilter { - override fun isFiltered(functionInfo: FunctionInfo): Boolean { - return if (maxWordsNumber == -1) { - true - } else { - !functionInfo.root.preOrder().any { node -> splitToSubtokens(node.getToken()).size > maxWordsNumber } - } - } -} diff --git a/src/main/kotlin/astminer/pipeline/Pipeline.kt b/src/main/kotlin/astminer/pipeline/Pipeline.kt new file mode 100644 index 00000000..1ee6eb7d --- /dev/null +++ b/src/main/kotlin/astminer/pipeline/Pipeline.kt @@ -0,0 +1,41 @@ +package astminer.pipeline + +import astminer.common.model.FunctionInfo +import astminer.common.model.Node +import astminer.common.preOrder +import astminer.filters.Filter +import astminer.problem.LabeledResult +import astminer.problem.Problem +import astminer.storage.Storage +import java.io.File + +class Pipeline( + private val frontend: PipelineFrontend, + private val filters: List>, + private val problem: Problem, + private val excludedNodeTypes: List, + private val storage: Storage +) { + + private fun T.passesThroughFilters() = filters.all { filter -> filter.isFiltered(this) } + + private fun LabeledResult.excludeNodes() { + root.preOrder().forEach { node -> + excludedNodeTypes.forEach { node.removeChildrenOfType(it) } + } + } + + fun run(files: List) { + val entities = frontend.parseEntities(files) + + val labeledResults = entities + .filter { functionInfo -> functionInfo.passesThroughFilters() } + .mapNotNull { problem.process(it) } + + for (labeledResult in labeledResults) { + labeledResult.excludeNodes() + } + + storage.store(labeledResults.asIterable()) + } +} \ No newline at end of file diff --git a/src/main/kotlin/astminer/pipeline/PipelineFrontend.kt b/src/main/kotlin/astminer/pipeline/PipelineFrontend.kt new file mode 100644 index 00000000..e7e0879f --- /dev/null +++ b/src/main/kotlin/astminer/pipeline/PipelineFrontend.kt @@ -0,0 +1,46 @@ +package astminer.pipeline + +import astminer.common.model.* +import astminer.parse.getHandlerFactory +import java.io.File + +interface PipelineFrontend { + fun parseEntities(files: List): Sequence +} + +abstract class CompositePipelineFrontend(private val parserType: String, extensions: List) : + PipelineFrontend { + + private val handlerFactories = extensions.associateWith { getHandlerFactory(it, parserType) } + + private val File.handler: LanguageHandler? + get() = handlerFactories[extension]?.createHandler(this) + + protected abstract fun LanguageHandler.getEntities(): Sequence + + override fun parseEntities(files: List): Sequence { + return files.asSequence().flatMap { file -> + val handler = file.handler + if (handler != null) { + handler.getEntities() + } else { + println("Failed") + emptySequence() + } + } + } +} + +class FilePipelineFrontend(parserType: String, extensions: List) : + CompositePipelineFrontend>( + parserType, extensions + ) { + override fun LanguageHandler.getEntities(): Sequence> = sequenceOf(parseResult) +} + +class FunctionPipelineFrontend(parserType: String, extensions: List) : + CompositePipelineFrontend>(parserType, extensions) { + + override fun LanguageHandler.getEntities(): Sequence> = + splitIntoMethods().asSequence() +} diff --git a/src/main/kotlin/astminer/problem/FileLevelProblems.kt b/src/main/kotlin/astminer/problem/FileLevelProblems.kt index b537e09d..ffc772f1 100644 --- a/src/main/kotlin/astminer/problem/FileLevelProblems.kt +++ b/src/main/kotlin/astminer/problem/FileLevelProblems.kt @@ -4,17 +4,17 @@ import astminer.common.model.Node import astminer.common.model.ParseResult import java.io.File -interface FileLevelProblem { - fun process(parseResult: ParseResult): LabeledResult? +interface FileLevelProblem : Problem> { + override fun process(entity: ParseResult): LabeledResult? } object FilePathExtractor : FileLevelProblem { - override fun process(parseResult: ParseResult): LabeledResult = parseResult.labeledWithFilePath() + override fun process(entity: ParseResult): LabeledResult = entity.labeledWithFilePath() } class FolderExtractor : FileLevelProblem { - override fun process(parseResult: ParseResult): LabeledResult? { - val folderName = File(parseResult.filePath).parentFile.name ?: return null - return parseResult.labeledWith(folderName) + override fun process(entity: ParseResult): LabeledResult? { + val folderName = File(entity.filePath).parentFile.name ?: return null + return entity.labeledWith(folderName) } } diff --git a/src/main/kotlin/astminer/problem/FunctionLevelProblems.kt b/src/main/kotlin/astminer/problem/FunctionLevelProblems.kt index 728f6ebb..00bfba91 100644 --- a/src/main/kotlin/astminer/problem/FunctionLevelProblems.kt +++ b/src/main/kotlin/astminer/problem/FunctionLevelProblems.kt @@ -5,19 +5,19 @@ import astminer.common.model.Node import astminer.common.preOrder import astminer.common.setTechnicalToken -interface FunctionLevelProblem { - fun process(functionInfo: FunctionInfo): LabeledResult? +interface FunctionLevelProblem : Problem> { + override fun process(entity: FunctionInfo): LabeledResult? } object MethodNameExtractor : FunctionLevelProblem { - override fun process(functionInfo: FunctionInfo): LabeledResult? { - val name = functionInfo.name ?: return null - functionInfo.root.preOrder().forEach { node -> + override fun process(entity: FunctionInfo): LabeledResult? { + val name = entity.name ?: return null + entity.root.preOrder().forEach { node -> if (node.getToken() == name) { node.setTechnicalToken("SELF") } } - functionInfo.nameNode?.setTechnicalToken("METHOD_NAME") - return LabeledResult(functionInfo.root, name, functionInfo.filePath) + entity.nameNode?.setTechnicalToken("METHOD_NAME") + return LabeledResult(entity.root, name, entity.filePath) } -} \ No newline at end of file +} diff --git a/src/main/kotlin/astminer/problem/Problem.kt b/src/main/kotlin/astminer/problem/Problem.kt new file mode 100644 index 00000000..d6576eab --- /dev/null +++ b/src/main/kotlin/astminer/problem/Problem.kt @@ -0,0 +1,7 @@ +package astminer.problem + +import astminer.common.model.Node + +interface Problem { + fun process(entity: T): LabeledResult? +} From 51277f57427f33301bbd0b09bc028d12e655b754 Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Fri, 30 Apr 2021 16:58:54 +0300 Subject: [PATCH 117/308] Fix kotlin warnings --- .../astminer/featureextraction/TreeFeature.kt | 2 +- src/main/kotlin/astminer/paths/PathWorker.kt | 2 ++ .../kotlin/astminer/cli/Code2VecExtractorTest.kt | 7 ++++--- .../astminer/cli/PathContextsExtractorTest.kt | 7 ++++--- .../featureextraction/TreeFeatureTest.kt | 16 ++++++++-------- .../featureextraction/TreeFeatureTestUtil.kt | 6 +++--- .../featureextraction/TreeFeatureTestUtilTest.kt | 2 +- .../kotlin/astminer/parse/antlr/AntrlUtilTest.kt | 4 ++-- 8 files changed, 25 insertions(+), 21 deletions(-) diff --git a/src/main/kotlin/astminer/featureextraction/TreeFeature.kt b/src/main/kotlin/astminer/featureextraction/TreeFeature.kt index 7a7401f0..c34c8d10 100644 --- a/src/main/kotlin/astminer/featureextraction/TreeFeature.kt +++ b/src/main/kotlin/astminer/featureextraction/TreeFeature.kt @@ -21,7 +21,7 @@ interface TreeFeature { */ object Depth : TreeFeature { override fun compute(tree: Node): Int { - val max = tree.getChildren().map { compute(it) }.max() ?: 0 + val max = tree.getChildren().map { compute(it) }.maxOrNull() ?: 0 return max + 1 } } diff --git a/src/main/kotlin/astminer/paths/PathWorker.kt b/src/main/kotlin/astminer/paths/PathWorker.kt index ad1bceff..a4d83a1e 100644 --- a/src/main/kotlin/astminer/paths/PathWorker.kt +++ b/src/main/kotlin/astminer/paths/PathWorker.kt @@ -15,6 +15,8 @@ class PathWorker { this.setMetadata(PATH_PIECES_KEY, pathPieces) } + // In runtime all generics upcast to upper bound, therefore it's impossible to check type inside List + @Suppress("UNCHECKED_CAST") private fun Node.getPathPieces(): List? = this.getMetadata(PATH_PIECES_KEY) as List? } diff --git a/src/test/kotlin/astminer/cli/Code2VecExtractorTest.kt b/src/test/kotlin/astminer/cli/Code2VecExtractorTest.kt index eb69e393..ab46a4b7 100644 --- a/src/test/kotlin/astminer/cli/Code2VecExtractorTest.kt +++ b/src/test/kotlin/astminer/cli/Code2VecExtractorTest.kt @@ -5,6 +5,7 @@ import astminer.cli.util.languagesToString import astminer.cli.util.verifyPathContextExtraction import org.junit.Test import java.io.File +import java.nio.file.Files.createTempDirectory internal class Code2VecExtractorTest { private val testDataDir = File("src/test/resources") @@ -12,14 +13,14 @@ internal class Code2VecExtractorTest { @Test fun testDefaultExtraction() { - val extractedDataDir = createTempDir("extractedData") + val extractedDataDir = createTempDirectory("extractedData") val languages = listOf("java", "py") - val cliArgs = CliArgs.Builder(testDataDir, extractedDataDir) + val cliArgs = CliArgs.Builder(testDataDir, extractedDataDir.toFile()) .extensions(languagesToString(languages)) .build() code2VecExtractor.main(cliArgs.args) - verifyPathContextExtraction(extractedDataDir, languages, false) + verifyPathContextExtraction(extractedDataDir.toFile(), languages, false) } } diff --git a/src/test/kotlin/astminer/cli/PathContextsExtractorTest.kt b/src/test/kotlin/astminer/cli/PathContextsExtractorTest.kt index 39adb422..f98bbe24 100644 --- a/src/test/kotlin/astminer/cli/PathContextsExtractorTest.kt +++ b/src/test/kotlin/astminer/cli/PathContextsExtractorTest.kt @@ -5,6 +5,7 @@ import astminer.cli.util.languagesToString import astminer.cli.util.verifyPathContextExtraction import org.junit.Test import java.io.File +import java.nio.file.Files.createTempDirectory internal class PathContextsExtractorTest { private val testDataDir = File("src/test/resources") @@ -12,13 +13,13 @@ internal class PathContextsExtractorTest { @Test fun testDefaultExtraction() { - val extractedDataDir = createTempDir("extractedData") + val extractedDataDir = createTempDirectory("extractedData") val languages = listOf("java", "py") - val cliArgs = CliArgs.Builder(testDataDir, extractedDataDir) + val cliArgs = CliArgs.Builder(testDataDir, extractedDataDir.toFile()) .extensions(languagesToString(languages)) .build() pathContextsExtractor.main(cliArgs.args) - verifyPathContextExtraction(extractedDataDir, languages, false) + verifyPathContextExtraction(extractedDataDir.toFile(), languages, false) } } \ No newline at end of file diff --git a/src/test/kotlin/astminer/featureextraction/TreeFeatureTest.kt b/src/test/kotlin/astminer/featureextraction/TreeFeatureTest.kt index 97167ba4..2183c686 100644 --- a/src/test/kotlin/astminer/featureextraction/TreeFeatureTest.kt +++ b/src/test/kotlin/astminer/featureextraction/TreeFeatureTest.kt @@ -9,42 +9,42 @@ class TreeFeatureTest { @Test fun testDepthFeature() { val printedTree = File("src/test/resources/featureextraction/prettyTree.txt").readText() - val tree: PrettyNode = restoreFromPrettyPrint(printedTree)!! + val tree: PrettyNode = restoreFromPrettyPrint(printedTree) Assert.assertEquals(4, Depth.compute(tree)) } @Test fun testNumberOfNodes() { val printedTree = File("src/test/resources/featureextraction/prettyTree.txt").readText() - val tree: PrettyNode = restoreFromPrettyPrint(printedTree)!! + val tree: PrettyNode = restoreFromPrettyPrint(printedTree) Assert.assertEquals(12, NumberOfNodes.compute(tree)) } @Test fun testBranchingFactorOfLeaf() { val printedTree = File("src/test/resources/featureextraction/prettyLeaf.txt").readText() - val tree: PrettyNode = restoreFromPrettyPrint(printedTree)!! + val tree: PrettyNode = restoreFromPrettyPrint(printedTree) Assert.assertTrue(BranchingFactor.compute(tree) == 0.0) } @Test fun testBranchingFactor() { val printedTree = File("src/test/resources/featureextraction/prettyTree_bf.txt").readText() - val tree: PrettyNode = restoreFromPrettyPrint(printedTree)!! + val tree: PrettyNode = restoreFromPrettyPrint(printedTree) Assert.assertTrue(BranchingFactor.compute(tree) == 2.0) } @Test fun testCompressiblePathLengthsInLeaf() { val printedTree = File("src/test/resources/featureextraction/prettyLeaf.txt").readText() - val tree: PrettyNode = restoreFromPrettyPrint(printedTree)!! + val tree: PrettyNode = restoreFromPrettyPrint(printedTree) Assert.assertEquals(listOf(), CompressiblePathLengths.compute(tree)) } @Test fun testCompressiblePathLengths() { val printedTree = File("src/test/resources/featureextraction/prettyTree_paths.txt").readText() - val tree: PrettyNode = restoreFromPrettyPrint(printedTree)!! + val tree: PrettyNode = restoreFromPrettyPrint(printedTree) val expected = listOf(4, 1, 2, 2, 4, 4, 5).sorted() Assert.assertEquals(expected, CompressiblePathLengths.compute(tree).sorted()) } @@ -52,7 +52,7 @@ class TreeFeatureTest { @Test fun testNodeTypes() { val printedTree = File("src/test/resources/featureextraction/prettyTree.txt").readText() - val tree: PrettyNode = restoreFromPrettyPrint(printedTree)!! + val tree: PrettyNode = restoreFromPrettyPrint(printedTree) val expected = listOf("0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11").sorted() Assert.assertEquals(expected, NodeTypes.compute(tree).sorted()) } @@ -60,7 +60,7 @@ class TreeFeatureTest { @Test fun testTokens() { val printedTree = File("src/test/resources/featureextraction/prettyTree.txt").readText() - val tree: PrettyNode = restoreFromPrettyPrint(printedTree)!! + val tree: PrettyNode = restoreFromPrettyPrint(printedTree) val expected = listOf("a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l").sorted() Assert.assertEquals(expected, Tokens.compute(tree).sorted()) } diff --git a/src/test/kotlin/astminer/featureextraction/TreeFeatureTestUtil.kt b/src/test/kotlin/astminer/featureextraction/TreeFeatureTestUtil.kt index c0558bd6..caf5d915 100644 --- a/src/test/kotlin/astminer/featureextraction/TreeFeatureTestUtil.kt +++ b/src/test/kotlin/astminer/featureextraction/TreeFeatureTestUtil.kt @@ -22,9 +22,9 @@ class PrettyNode(private val type: String, private val token: String) : Node { repeat(indent) { append(indentSymbol) } append(getTypeLabel()) if (getToken().isNotEmpty()) { - appendln(" : ${getToken()}") + appendLine(" : ${getToken()}") } else { - appendln() + appendLine() } getChildren().forEach { append(it.toPrettyString(indent + 1, indentSymbol)) } toString() @@ -46,7 +46,7 @@ class PrettyNode(private val type: String, private val token: String) : Node { } -fun restoreFromPrettyPrint(prettyPrintedTree: String, indentSymbol: String = "--") : PrettyNode? { +fun restoreFromPrettyPrint(prettyPrintedTree: String, indentSymbol: String = "--") : PrettyNode { val lastNodeByIndent = HashMap() val tree = prettyPrintedTree.lines().map { s -> val (node, indent) = restorePrintedNode(s, indentSymbol) diff --git a/src/test/kotlin/astminer/featureextraction/TreeFeatureTestUtilTest.kt b/src/test/kotlin/astminer/featureextraction/TreeFeatureTestUtilTest.kt index 2ca6fce2..25e451f9 100644 --- a/src/test/kotlin/astminer/featureextraction/TreeFeatureTestUtilTest.kt +++ b/src/test/kotlin/astminer/featureextraction/TreeFeatureTestUtilTest.kt @@ -25,7 +25,7 @@ class TreeFeatureTestUtilTest { node7.setParent(node3) val prettyTree = node0.toPrettyString() - val restoredTree = restoreFromPrettyPrint(prettyTree)!! + val restoredTree = restoreFromPrettyPrint(prettyTree) val prettyRestoredTree = restoredTree.toPrettyString() Assert.assertEquals(prettyTree, prettyRestoredTree) diff --git a/src/test/kotlin/astminer/parse/antlr/AntrlUtilTest.kt b/src/test/kotlin/astminer/parse/antlr/AntrlUtilTest.kt index 189158ed..61355a17 100644 --- a/src/test/kotlin/astminer/parse/antlr/AntrlUtilTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/AntrlUtilTest.kt @@ -15,8 +15,8 @@ class AntrlUtilTest { val node = parser.parseInputStream(FileInputStream(file)) var adoptedNodesSize = 0 - node?.preOrder()?.forEach { node -> - adoptedNodesSize += node.getChildren().filter { it.getParent() != node }.size + node?.preOrder()?.forEach { curNode -> + adoptedNodesSize += curNode.getChildren().filter { it.getParent() != curNode }.size } Assert.assertEquals("There should be no children with different parent", 0, adoptedNodesSize) } From f83ac9e6c0b5e5c281270bf4bec6cd52215d5f11 Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Fri, 30 Apr 2021 17:07:18 +0300 Subject: [PATCH 118/308] Use default properties for group and version --- build.gradle.kts | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/build.gradle.kts b/build.gradle.kts index 7aaee176..870baa6e 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -1,7 +1,7 @@ import tanvd.kosogor.proxy.shadowJar -group = "io.github.vovak.astminer" -version = "0.6.0" +group = "io.github.vovak" +version = "0.6.1" plugins { id("java") @@ -52,7 +52,7 @@ sourceSets["main"].java.srcDir(file(generatedSourcesPath)) idea.module.generatedSourceDirs.add(file(generatedSourcesPath)) tasks.generateGrammarSource { - maxHeapSize = "64m" +// maxHeapSize = "64m" arguments = arguments + listOf("-package", "me.vovak.antlr.parser") // Keep a copy of generated sources doLast { @@ -104,14 +104,6 @@ jmh { } publishing { - publications { - create("maven") { - groupId = "io.github.vovak" - artifactId = "astminer" - version = "0.6.0" - from(components["java"]) - } - } repositories { maven { url = uri("https://packages.jetbrains.team/maven/p/astminer/astminer") @@ -126,8 +118,7 @@ publishing { application.mainClassName = "astminer.MainKt" shadowJar { jar { - archiveName = "astminer-0.6.0.jar" - mainClass = "astminer.MainKt" + archiveName = "astminer-$version.jar" } }.apply { task.archiveClassifier.set("") From ccc34d27af8145693aef32e08d9611bd0db4dc2d Mon Sep 17 00:00:00 2001 From: illided Date: Sat, 1 May 2021 11:24:34 +0300 Subject: [PATCH 119/308] bug fixed and test added --- .../kotlin/astminer/parse/antlr/AntlrUtil.kt | 9 +++++++++ .../parse/antlr/java/AntlrJavaFunctionInfo.kt | 18 +++--------------- .../antlr/python/AntlrPythonFunctionInfo.kt | 7 ++----- .../antlr/python/PythonMethodSplitterTest.kt | 12 +++++++++++- .../methodSplitting/testMethodSplitting.py | 3 +++ 5 files changed, 28 insertions(+), 21 deletions(-) diff --git a/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt b/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt index bb7959d4..07a297b4 100644 --- a/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt +++ b/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt @@ -87,4 +87,13 @@ fun Node.hasFirstLabel(label: String): Boolean { fun Node.firstLabelIn(labels: List): Boolean { return labels.contains(decompressTypeLabel(this.getTypeLabel()).first()) +} + +fun Node.getTokensFromSubtree(): String { + if (isLeaf()) { + return getToken() + } + return getChildren().joinToString(separator = "") { child -> + child.getTokensFromSubtree() + } } \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt b/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt index ab237d35..87eabd8a 100644 --- a/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt @@ -1,10 +1,7 @@ package astminer.parse.antlr.java import astminer.common.model.* -import astminer.parse.antlr.AntlrNode -import astminer.parse.antlr.firstLabelIn -import astminer.parse.antlr.hasLastLabel -import astminer.parse.antlr.lastLabelIn +import astminer.parse.antlr.* class AntlrJavaFunctionInfo(override val root: AntlrNode) : FunctionInfo { override val nameNode: AntlrNode? = collectNameNode() @@ -32,7 +29,7 @@ class AntlrJavaFunctionInfo(override val root: AntlrNode) : FunctionInfo? { @@ -66,21 +63,12 @@ class AntlrJavaFunctionInfo(override val root: AntlrNode) : FunctionInfo - getTokensFromSubtree(child) - } - } } diff --git a/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt b/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt index 65605f40..3446b751 100644 --- a/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt @@ -1,10 +1,7 @@ package astminer.parse.antlr.python import astminer.common.model.* -import astminer.parse.antlr.AntlrNode -import astminer.parse.antlr.decompressTypeLabel -import astminer.parse.antlr.hasLastLabel -import astminer.parse.antlr.lastLabelIn +import astminer.parse.antlr.* class AntlrPythonFunctionInfo(override val root: AntlrNode) : FunctionInfo { override val nameNode: AntlrNode? = collectNameNode() @@ -59,7 +56,7 @@ class AntlrPythonFunctionInfo(override val root: AntlrNode) : FunctionInfo Date: Sat, 1 May 2021 11:29:27 +0300 Subject: [PATCH 120/308] little renaming --- .../parse/antlr/python/AntlrPythonFunctionInfo.kt | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt b/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt index 3446b751..2a347413 100644 --- a/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt @@ -9,8 +9,8 @@ class AntlrPythonFunctionInfo(override val root: AntlrNode) : FunctionInfo? = collectEnclosingElement() companion object { - private const val METHOD_NODE = "funcdef" - private const val METHOD_NAME_NODE = "NAME" + private const val FUNCTION_NODE = "funcdef" + private const val FUNCTION_NAME_NODE = "NAME" private const val CLASS_DECLARATION_NODE = "classdef" private const val CLASS_NAME_NODE = "NAME" @@ -24,12 +24,12 @@ class AntlrPythonFunctionInfo(override val root: AntlrNode) : FunctionInfo { @@ -69,7 +69,7 @@ class AntlrPythonFunctionInfo(override val root: AntlrNode) : FunctionInfo EnclosingElementType.Class - enclosingNode.hasLastLabel(METHOD_NODE) -> { + enclosingNode.hasLastLabel(FUNCTION_NODE) -> { when { enclosingNode.isMethod() -> EnclosingElementType.Method else -> EnclosingElementType.Function @@ -79,7 +79,7 @@ class AntlrPythonFunctionInfo(override val root: AntlrNode) : FunctionInfo enclosingNode.getChildOfType(CLASS_NAME_NODE) - EnclosingElementType.Method, EnclosingElementType.Function -> enclosingNode.getChildOfType(METHOD_NAME_NODE) + EnclosingElementType.Method, EnclosingElementType.Function -> enclosingNode.getChildOfType(FUNCTION_NAME_NODE) else -> throw IllegalStateException("Enclosing node can only be function or class") }?.getToken() return EnclosingElement( From d82c0933b0578bb8aa245b773c6a5754ee2f5259 Mon Sep 17 00:00:00 2001 From: illided Date: Sun, 2 May 2021 19:51:32 +0300 Subject: [PATCH 121/308] AntlrJavaScriptElementInfo added --- .../kotlin/astminer/parse/antlr/AntlrUtil.kt | 19 +++ .../javascript/AntlrJavaScriptElementInfo.kt | 124 ++++++++++++++ .../javascript/JavaScriptMethodSplitter.kt | 161 +----------------- 3 files changed, 148 insertions(+), 156 deletions(-) create mode 100644 src/main/kotlin/astminer/parse/antlr/javascript/AntlrJavaScriptElementInfo.kt diff --git a/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt b/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt index 07a297b4..6daf38e6 100644 --- a/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt +++ b/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt @@ -96,4 +96,23 @@ fun Node.getTokensFromSubtree(): String { return getChildren().joinToString(separator = "") { child -> child.getTokensFromSubtree() } +} + +fun AntlrNode.getItOrChildrenOfType(typeLabel: String) : List { + return if (hasLastLabel(typeLabel)) { + listOf(this) + } else { + this.getChildrenOfType(typeLabel).mapNotNull { it as? AntlrNode } + } +} + +fun AntlrNode.findEnclosingElementBy(condition: (AntlrNode) -> Boolean): AntlrNode? { + return findRecursively(this.getParent() as AntlrNode?, condition) +} + +private fun findRecursively(node: AntlrNode?, condition: (AntlrNode) -> Boolean) : AntlrNode? { + if (node == null || condition(node)) { + return node + } + return findRecursively(node.getParent() as AntlrNode?, condition) } \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/antlr/javascript/AntlrJavaScriptElementInfo.kt b/src/main/kotlin/astminer/parse/antlr/javascript/AntlrJavaScriptElementInfo.kt new file mode 100644 index 00000000..b82c2271 --- /dev/null +++ b/src/main/kotlin/astminer/parse/antlr/javascript/AntlrJavaScriptElementInfo.kt @@ -0,0 +1,124 @@ +package astminer.parse.antlr.javascript + +import astminer.common.model.* +import astminer.parse.antlr.* + +/** +Base class for describing JavaScript methods, functions or arrow functions. + */ +abstract class AntlrJavaScriptElementInfo(override val root: AntlrNode) : FunctionInfo { + companion object { + private val ENCLOSING_ELEMENT_NODES = + listOf("functionDeclaration", "variableDeclaration", "classDeclaration", "methodDefinition") + private const val ENCLOSING_ELEMENT_NAME_NODE = "Identifier" + + private const val SINGLE_PARAMETER_NODE = "formalParameterArg" + private const val PARAMETER_NAME_NODE = "Identifier" + } + + protected fun collectEnclosingElement(): EnclosingElement? { + val enclosingElement = root.findEnclosingElementBy { + it.containsLabelIn(ENCLOSING_ELEMENT_NODES) + } ?: return null + return EnclosingElement( + type = getEnclosingElementType(enclosingElement), + name = getEnclosingElementName(enclosingElement), + root = enclosingElement + ) + } + + private fun AntlrNode.containsLabelIn(labels: List): Boolean { + return decompressTypeLabel(getTypeLabel()).intersect(labels).isNotEmpty() + } + + private fun getEnclosingElementName(enclosingRoot: AntlrNode?): String? { + return enclosingRoot?.getChildren()?.firstOrNull { + it.hasLastLabel(ENCLOSING_ELEMENT_NAME_NODE) + }?.getToken() + } + + private fun getEnclosingElementType(enclosingRoot: AntlrNode): EnclosingElementType { + return when (decompressTypeLabel(enclosingRoot.getTypeLabel()).last()) { + "functionDeclaration" -> EnclosingElementType.Function + "classDeclaration" -> EnclosingElementType.Class + "methodDefinition" -> EnclosingElementType.Method + "variableDeclaration" -> EnclosingElementType.VariableDeclaration + else -> throw IllegalStateException("Couldn't derive enclosing element type") + } + } + + protected fun collectParameters(): List { + val parametersRoot = getParametersRoot() + return when { + //No parameters found + parametersRoot == null -> emptyList() + + //Have only one parameter, which is indicated only by its name + parametersRoot.hasLastLabel(PARAMETER_NAME_NODE) -> listOf( + MethodInfoParameter(name = parametersRoot.getToken(), type = null) + ) + + //Have many parameters or one indicated not only by it's name + else -> parametersRoot.getItOrChildrenOfType(SINGLE_PARAMETER_NODE).map { + val nameNode = it.getChildOfType(PARAMETER_NAME_NODE) ?: it + MethodInfoParameter(name = nameNode.getToken(), type = null) + } + } + } + + abstract fun getParametersRoot(): AntlrNode? +} + +class JavaScriptArrowInfo(override val root: AntlrNode) : AntlrJavaScriptElementInfo(root) { + companion object { + private const val ARROW_NAME_NODE = "Identifier" + private const val ARROW_PARAMETER_NODE = "arrowFunctionParameters" + private const val ARROW_PARAMETER_INNER_NODE = "formalParameterList" + } + + override val enclosingElement: EnclosingElement? = collectEnclosingElement() + override val parameters: List = collectParameters() + override val nameNode: AntlrNode? = root.getChildOfType(ARROW_NAME_NODE) + + override fun getParametersRoot(): AntlrNode? { + val parameterRoot = root.getChildOfType(ARROW_PARAMETER_NODE) + return parameterRoot?.getChildOfType(ARROW_PARAMETER_INNER_NODE) ?: parameterRoot + } +} + +class JavaScriptMethodInfo(override val root: AntlrNode) : AntlrJavaScriptElementInfo(root) { + companion object { + private val METHOD_GETTERS_SETTERS = listOf("getter", "setter") + private const val METHOD_NAME_NODE = "identifierName" + private const val METHOD_PARAMETER_NODE = "formalParameterList" + } + + override val enclosingElement: EnclosingElement? = collectEnclosingElement() + override val parameters: List = collectParameters() + override val nameNode: AntlrNode? = collectNameNode() + + private fun collectNameNode(): AntlrNode? { + val methodNameParent = root.getChildren().firstOrNull { + METHOD_GETTERS_SETTERS.contains(it.getTypeLabel()) + } ?: root + + return methodNameParent.getChildren().firstOrNull { + decompressTypeLabel(it.getTypeLabel()).contains(METHOD_NAME_NODE) + } + } + + override fun getParametersRoot(): AntlrNode? = root.getChildOfType(METHOD_PARAMETER_NODE) +} + +class JavaScriptFunctionInfo(override val root: AntlrNode) : AntlrJavaScriptElementInfo(root) { + companion object { + private const val FUNCTION_NAME_NODE = "Identifier" + private const val FUNCTION_PARAMETER_NODE = "formalParameterList" + } + + override val enclosingElement: EnclosingElement? = collectEnclosingElement() + override val parameters: List = collectParameters() + override val nameNode: AntlrNode? = root.getChildOfType(FUNCTION_NAME_NODE) + + override fun getParametersRoot(): AntlrNode? = root.getChildOfType(FUNCTION_PARAMETER_NODE) +} \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt index 22e3b592..1f2d1a00 100644 --- a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt @@ -17,168 +17,17 @@ class JavaScriptMethodSplitter : TreeMethodSplitter { } override fun splitIntoMethods(root: AntlrNode): Collection> { - val methodRoots: List = root.preOrder().map { node -> + return root.preOrder().mapNotNull { node -> when { - node.isArrowElement() -> ArrowElement(node as AntlrNode) - node.isFunctionElement() -> FunctionElement(node as AntlrNode) - node.isMethodElement() -> MethodElement(node as AntlrNode) + node.isArrowElement() -> JavaScriptArrowInfo(node as AntlrNode) + node.isFunctionElement() -> JavaScriptFunctionInfo(node as AntlrNode) + node.isMethodElement() -> JavaScriptMethodInfo(node as AntlrNode) else -> null } - }.filterNotNull() - return dummyMethodInfos() -// return methodRoots.map { it.getElementInfo() } + } } private fun Node.isArrowElement() = this.getChildOfType(ARROW_NODE) != null private fun Node.isFunctionElement() = this.getChildOfType(FUNCTION_NODE) != null private fun Node.isMethodElement() = decompressTypeLabel(this.getTypeLabel()).last() == METHOD_NODE -} - -/** - Base class for describing JavaScript methods, functions or arrow functions. - */ -abstract class JavaScriptElement(private val element: AntlrNode) { - companion object { - private val ENCLOSING_ELEMENT_NODES = listOf("functionDeclaration", "variableDeclaration", "classDeclaration", "methodDefinition") - private const val ENCLOSING_ELEMENT_NAME_NODE = "Identifier" - - private const val SINGLE_PARAMETER_NODE = "formalParameterArg" - private const val PARAMETER_NAME_NODE = "Identifier" - } - - /** - * Gets [element]'s information about its root, name, enclosing elements and list of parameters. - * @return element info - */ - fun getElementInfo() : MethodInfo { - val enclosingRoot = getEnclosingElementRoot(element.getParent() as AntlrNode) - return MethodInfo( - MethodNode(element, null, getElementName()), - ElementNode(enclosingRoot, getEnclosingElementName(enclosingRoot)), - getElementParametersList(getElementParametersRoot()) - ) - } - - /** - * Gets root of [element]'s enclosing element as first one with typeLabel from [ENCLOSING_ELEMENT_NAME_NODE]. - * @param node for checking if it is root of enclosing element - * @return root of enclosing element - */ - open fun getEnclosingElementRoot(node: AntlrNode?): AntlrNode? { - if (node == null || decompressTypeLabel(node.getTypeLabel()).intersect(ENCLOSING_ELEMENT_NODES).isNotEmpty()) { - return node - } - return getEnclosingElementRoot(node.getParent() as? AntlrNode) - } - - /** - * Gets name node of [element]'s enclosing element. - * @param enclosingRoot - root of enclosing element - * @return name node of enclosing element - */ - open fun getEnclosingElementName(enclosingRoot: AntlrNode?) : AntlrNode? { - return enclosingRoot?.getChildren()?.firstOrNull { - decompressTypeLabel(it.getTypeLabel()).last() == ENCLOSING_ELEMENT_NAME_NODE - } - } - - /** - * Gets list of [element]'s parameters by looking for them among [parameterRoot]'s children. - * @param parameterRoot - parent node of all parameter's nodes - * @return list of [element]'s parameters - */ - open fun getElementParametersList(parameterRoot: AntlrNode?): List> { - return when { - parameterRoot == null -> emptyList() - parameterRoot.hasLastLabel(PARAMETER_NAME_NODE) -> listOf(ParameterNode(parameterRoot, null, parameterRoot)) - else -> parameterRoot.getItOrChildrenOfType(SINGLE_PARAMETER_NODE).map { - ParameterNode(it, null, it.getItOrChildrenOfType(PARAMETER_NAME_NODE).firstOrNull()) - } - } - } - - private fun Node.hasLastLabel(typeLabel: String): Boolean { - return decompressTypeLabel(getTypeLabel()).last() == typeLabel - } - - private fun AntlrNode.getItOrChildrenOfType(typeLabel: String) : List { - return if (hasLastLabel(typeLabel)) { - listOf(this) - } else { - this.getChildrenOfType(typeLabel).mapNotNull { it as? AntlrNode } - } - } - - /** - * Gets name of [element]. - * @return [element]'s name node - */ - abstract fun getElementName(): AntlrNode? - - /** - * Gets parent node of all [element]'s parameter nodes. - * @return parameters' parent node - */ - abstract fun getElementParametersRoot(): AntlrNode? -} - - -class ArrowElement(private val element: AntlrNode) : JavaScriptElement(element) { - companion object { - private const val ARROW_NAME_NODE = "Identifier" - private const val ARROW_PARAMETER_NODE = "arrowFunctionParameters" - private const val ARROW_PARAMETER_INNER_NODE = "formalParameterList" - } - - override fun getElementName(): AntlrNode? { - return element.getChildren().firstOrNull { - it.getTypeLabel() == ARROW_NAME_NODE - } - } - - override fun getElementParametersRoot(): AntlrNode? { - val parameterRoot = element.getChildOfType(ARROW_PARAMETER_NODE) - return parameterRoot?.getChildOfType(ARROW_PARAMETER_INNER_NODE) ?: parameterRoot - } -} - - -class FunctionElement(private val element: AntlrNode) : JavaScriptElement(element) { - companion object { - private const val FUNCTION_NAME_NODE = "Identifier" - private const val FUNCTION_PARAMETER_NODE = "formalParameterList" - } - - override fun getElementName(): AntlrNode? { - return element.getChildren().firstOrNull { - it.getTypeLabel() == FUNCTION_NAME_NODE - } - } - - override fun getElementParametersRoot(): AntlrNode? { - return element.getChildOfType(FUNCTION_PARAMETER_NODE) - } -} - - -class MethodElement(private val element: AntlrNode) : JavaScriptElement(element) { - companion object { - private val METHOD_GETTERS_SETTERS = listOf("getter", "setter") - private const val METHOD_NAME_NODE = "identifierName" - private const val METHOD_PARAMETER_NODE = "formalParameterList" - } - - override fun getElementName(): AntlrNode? { - val methodNameParent = element.getChildren().firstOrNull { - METHOD_GETTERS_SETTERS.contains(it.getTypeLabel()) - } ?: element - - return methodNameParent.getChildren().firstOrNull { - decompressTypeLabel(it.getTypeLabel()).contains(METHOD_NAME_NODE) - } - } - - override fun getElementParametersRoot(): AntlrNode? { - return element.getChildOfType(METHOD_PARAMETER_NODE) - } } \ No newline at end of file From 948a0c9924f55cb6c4fbd4e3de7f7842ad07c008 Mon Sep 17 00:00:00 2001 From: illided Date: Sun, 2 May 2021 20:24:14 +0300 Subject: [PATCH 122/308] utils refactoring --- .../kotlin/astminer/parse/antlr/AntlrUtil.kt | 20 ++++++++----------- .../parse/antlr/java/JavaMethodSplitter.kt | 2 +- .../antlr/python/PythonMethodSplitter.kt | 3 ++- 3 files changed, 11 insertions(+), 14 deletions(-) diff --git a/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt b/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt index 6daf38e6..387dce6e 100644 --- a/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt +++ b/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt @@ -73,21 +73,17 @@ fun compressTree(root: AntlrNode): AntlrNode { fun decompressTypeLabel(typeLabel: String) = typeLabel.split("|") -fun Node.hasLastLabel(label: String): Boolean { - return decompressTypeLabel(this.getTypeLabel()).last() == label -} +fun AntlrNode.lastLabel() = decompressTypeLabel(getTypeLabel()).last() -fun Node.lastLabelIn(labels: List): Boolean { - return labels.contains(decompressTypeLabel(this.getTypeLabel()).last()) -} +fun AntlrNode.firstLabel() = decompressTypeLabel(getTypeLabel()).first() -fun Node.hasFirstLabel(label: String): Boolean { - return decompressTypeLabel(this.getTypeLabel()).first() == label -} +fun AntlrNode.hasLastLabel(label: String): Boolean = lastLabel() == label -fun Node.firstLabelIn(labels: List): Boolean { - return labels.contains(decompressTypeLabel(this.getTypeLabel()).first()) -} +fun AntlrNode.lastLabelIn(labels: List): Boolean = labels.contains(lastLabel()) + +fun AntlrNode.hasFirstLabel(label: String): Boolean = firstLabel() == label + +fun AntlrNode.firstLabelIn(labels: List): Boolean = labels.contains(firstLabel()) fun Node.getTokensFromSubtree(): String { if (isLeaf()) { diff --git a/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt b/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt index 6c5194c0..967b3c76 100644 --- a/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt @@ -10,7 +10,7 @@ class JavaMethodSplitter : TreeMethodSplitter { override fun splitIntoMethods(root: AntlrNode): Collection> { val methodRoots = root.preOrder().filter { - it.hasLastLabel(methodNodeType) + (it as AntlrNode).hasLastLabel(methodNodeType) } return methodRoots.map { AntlrJavaFunctionInfo(it as AntlrNode) } } diff --git a/src/main/kotlin/astminer/parse/antlr/python/PythonMethodSplitter.kt b/src/main/kotlin/astminer/parse/antlr/python/PythonMethodSplitter.kt index cf66b82b..d57d04b2 100644 --- a/src/main/kotlin/astminer/parse/antlr/python/PythonMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/python/PythonMethodSplitter.kt @@ -4,6 +4,7 @@ import astminer.common.* import astminer.common.model.* import astminer.parse.antlr.AntlrNode import astminer.parse.antlr.decompressTypeLabel +import astminer.parse.antlr.hasLastLabel class PythonMethodSplitter : TreeMethodSplitter { @@ -11,7 +12,7 @@ class PythonMethodSplitter : TreeMethodSplitter { override fun splitIntoMethods(root: AntlrNode): Collection> { val methodRoots = root.preOrder().filter { - decompressTypeLabel(it.getTypeLabel()).last() == methodNode + (it as AntlrNode).hasLastLabel(methodNode) } return methodRoots.map { AntlrPythonFunctionInfo(it as AntlrNode) } } From e4a0ab2bf13ffa383699aa4d126ae5e9b77c8c29 Mon Sep 17 00:00:00 2001 From: illided Date: Mon, 3 May 2021 13:07:03 +0300 Subject: [PATCH 123/308] GumTreeJavaFunctionInfo added --- .../gumtree/java/GumTreeJavaFunctionInfo.kt | 61 +++++++++++++++++++ .../gumtree/java/GumTreeJavaMethodSplitter.kt | 57 +---------------- .../java/GumTreeJavaMethodSplitterTest.kt | 2 + 3 files changed, 64 insertions(+), 56 deletions(-) create mode 100644 src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt diff --git a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt new file mode 100644 index 00000000..9eda2c8e --- /dev/null +++ b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt @@ -0,0 +1,61 @@ +package astminer.parse.gumtree.java + +import astminer.common.model.EnclosingElement +import astminer.common.model.EnclosingElementType +import astminer.common.model.FunctionInfo +import astminer.common.model.MethodInfoParameter +import astminer.parse.gumtree.GumTreeNode + +class GumTreeJavaFunctionInfo(override val root: GumTreeNode) : FunctionInfo { + companion object { + private object TypeLabels { + const val simpleName = "SimpleName" + const val typeDeclaration = "TypeDeclaration" + const val singleVariableDeclaration = "SingleVariableDeclaration" + } + } + + override val nameNode: GumTreeNode? = root.getChildOfType(TypeLabels.simpleName) + override val parameters: List = collectParameters() + override val returnType: String? = root.getElementType() + override val enclosingElement: EnclosingElement? = collectEnclosingClass() + + private fun collectEnclosingClass(): EnclosingElement? { + val enclosingClassNode = getEnclosingClassNode(root.getParent() as GumTreeNode) ?: return null + val enclosingClassName = enclosingClassNode.getChildOfType(TypeLabels.simpleName)?.getToken() + return EnclosingElement( + root = enclosingClassNode, + type = EnclosingElementType.Class, + name = enclosingClassName + ) + } + + private fun getEnclosingClassNode(node: GumTreeNode): GumTreeNode? { + if (node.getTypeLabel() == TypeLabels.typeDeclaration) { + return node + } + val parentNode = node.getParent() as? GumTreeNode + return parentNode?.let { getEnclosingClassNode(it) } + } + + private fun collectParameters(): List { + val params = root.getChildrenOfType(TypeLabels.singleVariableDeclaration) + return params.map { node -> + MethodInfoParameter( + name = node.getElementName(), + type = node.getElementType() + ) + } + } + + private fun GumTreeNode.getElementName(): String { + return getChildOfType(TypeLabels.simpleName)?.getToken() + ?: throw IllegalStateException("No name found for element") + } + + private fun GumTreeNode.getElementType(): String? { + return getChildren().firstOrNull { it.isTypeNode() }?.getToken() + } + + private fun GumTreeNode.isTypeNode() = getTypeLabel().endsWith("Type") +} diff --git a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt index 2f212f4e..0ca3026a 100644 --- a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt @@ -4,71 +4,16 @@ import astminer.common.model.* import astminer.common.preOrder import astminer.parse.gumtree.GumTreeNode -private fun GumTreeNode.isTypeNode() = getTypeLabel().endsWith("Type") - class GumTreeJavaMethodSplitter : TreeMethodSplitter { companion object { private object TypeLabels { const val methodDeclaration = "MethodDeclaration" - const val simpleName = "SimpleName" - const val typeDeclaration = "TypeDeclaration" - const val singleVariableDeclaration = "SingleVariableDeclaration" } } override fun splitIntoMethods(root: GumTreeNode): Collection> { val methodRoots = root.preOrder().filter { it.getTypeLabel() == TypeLabels.methodDeclaration } - return dummyMethodInfos() -// return methodRoots.map { collectMethodInfo(it as GumTreeNode) } - } - - private fun collectMethodInfo(methodNode: GumTreeNode): MethodInfo { - val methodReturnType = getElementType(methodNode) - val methodName = getElementName(methodNode) - - val classRoot = getEnclosingClass(methodNode) - val className = classRoot?.let { getElementName(it) } - - val parameters = getParameters(methodNode) - - return MethodInfo( - MethodNode(methodNode, methodReturnType, methodName), - ElementNode(classRoot, className), - parameters - ) - } - - private fun getElementName(node: GumTreeNode) = node.getChildren().map { - it - }.firstOrNull { - it.getTypeLabel() == TypeLabels.simpleName - } - - private fun getElementType(node: GumTreeNode) = node.getChildren().map { - it - }.firstOrNull { - it.isTypeNode() - } - - private fun getEnclosingClass(node: GumTreeNode): GumTreeNode? { - if (node.getTypeLabel() == TypeLabels.typeDeclaration) { - return node - } - val parentNode = node.getParent() as? GumTreeNode - return parentNode?.let { getEnclosingClass(it) } - } - - private fun getParameters(methodNode: GumTreeNode): List> { - val params = methodNode.getChildren().filter { - it.getTypeLabel() == TypeLabels.singleVariableDeclaration - } - return params.map { node -> - ParameterNode( - node, - getElementType(node), - getElementName(node) - ) - }.toList() + return methodRoots.map { GumTreeJavaFunctionInfo(it as GumTreeNode)} } } \ No newline at end of file diff --git a/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitterTest.kt index 1c966691..de577940 100644 --- a/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitterTest.kt @@ -80,4 +80,6 @@ class GumTreeJavaMethodSplitterTest { assertEquals(listOf("int", "SingleFunction"), parameters.map { it.type }.toList()) } } + + //TODO: add more tests } \ No newline at end of file From 658cc70c2ac2a20dd60b7966f1133a455a3a5ea5 Mon Sep 17 00:00:00 2001 From: illided Date: Mon, 3 May 2021 13:08:07 +0300 Subject: [PATCH 124/308] small refactoring --- .../parse/gumtree/java/GumTreeJavaMethodSplitter.kt | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt index 0ca3026a..3f9aa958 100644 --- a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt @@ -5,15 +5,10 @@ import astminer.common.preOrder import astminer.parse.gumtree.GumTreeNode class GumTreeJavaMethodSplitter : TreeMethodSplitter { - - companion object { - private object TypeLabels { - const val methodDeclaration = "MethodDeclaration" - } - } + private val methodDeclaration = "MethodDeclaration" override fun splitIntoMethods(root: GumTreeNode): Collection> { - val methodRoots = root.preOrder().filter { it.getTypeLabel() == TypeLabels.methodDeclaration } - return methodRoots.map { GumTreeJavaFunctionInfo(it as GumTreeNode)} + val methodRoots = root.preOrder().filter { it.getTypeLabel() == methodDeclaration } + return methodRoots.map { GumTreeJavaFunctionInfo(it as GumTreeNode) } } } \ No newline at end of file From b501e8a245283cb103956d623946c2645ac94580 Mon Sep 17 00:00:00 2001 From: illided Date: Mon, 3 May 2021 13:21:21 +0300 Subject: [PATCH 125/308] cast fixed --- .../parse/gumtree/java/GumTreeJavaFunctionInfo.kt | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt index 9eda2c8e..de968cea 100644 --- a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt @@ -21,7 +21,7 @@ class GumTreeJavaFunctionInfo(override val root: GumTreeNode) : FunctionInfo? = collectEnclosingClass() private fun collectEnclosingClass(): EnclosingElement? { - val enclosingClassNode = getEnclosingClassNode(root.getParent() as GumTreeNode) ?: return null + val enclosingClassNode = getEnclosingClassNode(root.getParent() as GumTreeNode?) ?: return null val enclosingClassName = enclosingClassNode.getChildOfType(TypeLabels.simpleName)?.getToken() return EnclosingElement( root = enclosingClassNode, @@ -30,12 +30,11 @@ class GumTreeJavaFunctionInfo(override val root: GumTreeNode) : FunctionInfo { From bf5aefacd1990ec4ae48e1894b03fc8849f02208 Mon Sep 17 00:00:00 2001 From: illided Date: Mon, 3 May 2021 13:35:45 +0300 Subject: [PATCH 126/308] toList statements removed --- .../java/GumTreeJavaMethodSplitterTest.kt | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitterTest.kt index de577940..9cce78ba 100644 --- a/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitterTest.kt @@ -26,8 +26,8 @@ class GumTreeJavaMethodSplitterTest { assertEquals("fun", name) assertEquals("void", returnType) assertEquals("SingleFunction", enclosingElement?.name) - assertEquals(listOf("args", "param"), parameters.map { it.name }.toList()) - assertEquals(listOf("String[]", "int"), parameters.map { it.type }.toList()) + assertEquals(listOf("args", "param"), parameters.map { it.name }) + assertEquals(listOf("String[]", "int"), parameters.map { it.type }) } } @@ -41,8 +41,8 @@ class GumTreeJavaMethodSplitterTest { assertEquals("main", name) assertEquals("void", returnType) assertEquals("InnerClass", enclosingElement?.name) - assertEquals(listOf("args"), parameters.map { it.name }.toList()) - assertEquals(listOf("String[]"), parameters.map { it.type }.toList()) + assertEquals(listOf("args"), parameters.map { it.name }) + assertEquals(listOf("String[]"), parameters.map { it.type }) } } @@ -55,15 +55,15 @@ class GumTreeJavaMethodSplitterTest { assertEquals("main", name) assertEquals("void", returnType) assertEquals("InnerClass", enclosingElement?.name) - assertEquals(listOf("args"), parameters.map { it.name }.toList()) - assertEquals(listOf("String[]"), parameters.map { it.type }.toList()) + assertEquals(listOf("args"), parameters.map { it.name }) + assertEquals(listOf("String[]"), parameters.map { it.type }) } with(methodInfos.last()) { assertEquals("fun", name) assertEquals("void", returnType) assertEquals("SingleMethodInnerClass", enclosingElement?.name) - assertEquals(listOf("args", "param"), parameters.map { it.name }.toList()) - assertEquals(listOf("String[]", "int"), parameters.map { it.type }.toList()) + assertEquals(listOf("args", "param"), parameters.map { it.name }) + assertEquals(listOf("String[]", "int"), parameters.map { it.type }) } } @@ -76,8 +76,8 @@ class GumTreeJavaMethodSplitterTest { assertEquals("fun", name) assertEquals("int", returnType) assertEquals("SingleFunction", enclosingElement?.name) - assertEquals(listOf("args", "param"), parameters.map { it.name }.toList()) - assertEquals(listOf("int", "SingleFunction"), parameters.map { it.type }.toList()) + assertEquals(listOf("args", "param"), parameters.map { it.name }) + assertEquals(listOf("int", "SingleFunction"), parameters.map { it.type }) } } From 259f482961f1d5b9a064d237926a302640fddaec Mon Sep 17 00:00:00 2001 From: furetur Date: Mon, 3 May 2021 16:31:08 +0500 Subject: [PATCH 127/308] pipeline + project imported + pipeline frontend + storage config --- src/main/kotlin/astminer/cli/utils.kt | 8 +- .../kotlin/astminer/config/StorageConfig.kt | 41 ++++++ .../kotlin/astminer/filters/CommonFilters.kt | 6 +- .../astminer/filters/FunctionFilters.kt | 12 +- src/main/kotlin/astminer/pipeline/Pipeline.kt | 33 +++-- .../astminer/pipeline/PipelineFrontend.kt | 40 ++++-- .../astminer/pipeline/ProjectImporter.kt | 39 ++++++ .../astminer/problem/FileLevelProblems.kt | 6 +- .../astminer/problem/FunctionLevelProblems.kt | 6 +- .../kotlin/astminer/cli/LabelExtractorTest.kt | 82 ------------ .../astminer/filters/FileFiltersTest.kt | 6 + .../astminer/filters/FunctionFiltersTest.kt | 125 ++++++++++++++++++ .../astminer/problem/LabelExtractorTest.kt | 73 ++++++++++ 13 files changed, 354 insertions(+), 123 deletions(-) create mode 100644 src/main/kotlin/astminer/config/StorageConfig.kt create mode 100644 src/main/kotlin/astminer/pipeline/ProjectImporter.kt delete mode 100644 src/test/kotlin/astminer/cli/LabelExtractorTest.kt create mode 100644 src/test/kotlin/astminer/filters/FileFiltersTest.kt create mode 100644 src/test/kotlin/astminer/filters/FunctionFiltersTest.kt create mode 100644 src/test/kotlin/astminer/problem/LabelExtractorTest.kt diff --git a/src/main/kotlin/astminer/cli/utils.kt b/src/main/kotlin/astminer/cli/utils.kt index d8071535..791d8bdb 100644 --- a/src/main/kotlin/astminer/cli/utils.kt +++ b/src/main/kotlin/astminer/cli/utils.kt @@ -56,12 +56,12 @@ fun getLabelExtractor( } "method" -> { val filterPredicates = mutableListOf( - ModifierFilterPredicate(excludeModifiers), AnnotationFilterPredicate(excludeAnnotations), - MethodNameWordsNumberFilter(maxMethodNameLength), MethodAnyNodeWordsNumberFilter(maxTokenLength), - TreeSizeFilterPredicate(maxTreeSize) + ModifierFilter(excludeModifiers), AnnotationFilter(excludeAnnotations), + FunctionNameWordsNumberFilter(maxMethodNameLength), FunctionAnyNodeWordsNumberFilter(maxTokenLength), + TreeSizeFilter(maxTreeSize) ) if (filterConstructors) { - filterPredicates.add(ConstructorFilterPredicate) + filterPredicates.add(ConstructorFilter) } return MethodNameExtractor(filterPredicates, javaParser) } diff --git a/src/main/kotlin/astminer/config/StorageConfig.kt b/src/main/kotlin/astminer/config/StorageConfig.kt new file mode 100644 index 00000000..6ad1aa92 --- /dev/null +++ b/src/main/kotlin/astminer/config/StorageConfig.kt @@ -0,0 +1,41 @@ +package astminer.config + +import astminer.storage.Storage +import astminer.storage.TokenProcessor +import astminer.storage.ast.CsvAstStorage +import astminer.storage.ast.DotAstStorage +import astminer.storage.path.Code2VecPathStorage +import astminer.storage.path.PathBasedStorageConfig + +sealed class StorageConfig { + abstract fun getStorage(outputDirectoryPath: String): Storage +} + +object CsvAstStorageConfig : StorageConfig() { + override fun getStorage(outputDirectoryPath: String) = CsvAstStorage(outputDirectoryPath) +} + +data class DotAstStorageConfig(val tokenProcessor: TokenProcessor) : StorageConfig() { + override fun getStorage(outputDirectoryPath: String) = DotAstStorage(outputDirectoryPath, tokenProcessor) +} + +data class Code2VecPathStorageConfig( + val maxPathLength: Int, + val maxPathWidth: Int, + val maxTokens: Long? = null, + val maxPaths: Long? = null, + val maxPathContextsPerEntity: Int? = null, + val tokenProcessor: TokenProcessor +) : StorageConfig() { + + private val storageConfig = PathBasedStorageConfig( + maxPathLength, + maxPathWidth, + maxTokens, + maxPaths, + maxPathContextsPerEntity + ) + + override fun getStorage(outputDirectoryPath: String) = + Code2VecPathStorage(outputDirectoryPath, storageConfig, tokenProcessor) +} diff --git a/src/main/kotlin/astminer/filters/CommonFilters.kt b/src/main/kotlin/astminer/filters/CommonFilters.kt index 2a2a7007..5263bc24 100644 --- a/src/main/kotlin/astminer/filters/CommonFilters.kt +++ b/src/main/kotlin/astminer/filters/CommonFilters.kt @@ -5,7 +5,7 @@ import astminer.common.model.Node import astminer.common.model.ParseResult import astminer.featureextraction.treeSize -abstract class TreeSizeFilterPredicate(private val maxSize: Int) : Filter { +abstract class TreeSizeFilter(private val maxSize: Int) : Filter { private fun isTreeFiltered(root: Node): Boolean { return if (maxSize == -1) { true @@ -19,12 +19,12 @@ abstract class TreeSizeFilterPredicate(private val maxSize: Int) : Filter override fun isFiltered(entity: T) = isTreeFiltered(entity.tree) } -class FileTreeSizeFilterPredicate(maxSize: Int) : TreeSizeFilterPredicate>(maxSize), FileFilter { +class FileTreeSizeFilter(maxSize: Int) : TreeSizeFilter>(maxSize), FileFilter { override val ParseResult.tree: Node get() = root } -class FunctionTreeSizeFilterPredicate(maxSize: Int) : TreeSizeFilterPredicate>(maxSize), +class FunctionTreeSizeFilter(maxSize: Int) : TreeSizeFilter>(maxSize), FunctionFilter { override val FunctionInfo.tree: Node get() = root diff --git a/src/main/kotlin/astminer/filters/FunctionFilters.kt b/src/main/kotlin/astminer/filters/FunctionFilters.kt index 1592f35a..5dc250dc 100644 --- a/src/main/kotlin/astminer/filters/FunctionFilters.kt +++ b/src/main/kotlin/astminer/filters/FunctionFilters.kt @@ -9,22 +9,23 @@ interface FunctionFilter : Filter> { override fun isFiltered(entity: FunctionInfo): Boolean } -class ModifierFilterPredicate(private val excludeModifiers: List) : FunctionFilter { +class ModifierFilter(private val excludeModifiers: List) : FunctionFilter { override fun isFiltered(entity: FunctionInfo): Boolean = !excludeModifiers.any { modifier -> modifier in entity.modifiers } } -class AnnotationFilterPredicate(private val excludeAnnotations: List) : FunctionFilter { +class AnnotationFilter(private val excludeAnnotations: List) : FunctionFilter { override fun isFiltered(entity: FunctionInfo): Boolean = !excludeAnnotations.any { annotation -> annotation in entity.annotations } } -object ConstructorFilterPredicate : FunctionFilter { +object ConstructorFilter : FunctionFilter { override fun isFiltered(entity: FunctionInfo) = !entity.isConstructor } -class MethodNameWordsNumberFilter(private val maxWordsNumber: Int) : FunctionFilter { +class FunctionNameWordsNumberFilter(private val maxWordsNumber: Int) : FunctionFilter { override fun isFiltered(entity: FunctionInfo): Boolean { + // TODO: this is not needed return if (maxWordsNumber == -1) { true } else { @@ -34,8 +35,9 @@ class MethodNameWordsNumberFilter(private val maxWordsNumber: Int) : FunctionFil } } -class MethodAnyNodeWordsNumberFilter(private val maxWordsNumber: Int) : FunctionFilter { +class FunctionAnyNodeWordsNumberFilter(private val maxWordsNumber: Int) : FunctionFilter { override fun isFiltered(entity: FunctionInfo): Boolean { + // TODO: this is not needed return if (maxWordsNumber == -1) { true } else { diff --git a/src/main/kotlin/astminer/pipeline/Pipeline.kt b/src/main/kotlin/astminer/pipeline/Pipeline.kt index 1ee6eb7d..12b59b79 100644 --- a/src/main/kotlin/astminer/pipeline/Pipeline.kt +++ b/src/main/kotlin/astminer/pipeline/Pipeline.kt @@ -1,8 +1,8 @@ package astminer.pipeline -import astminer.common.model.FunctionInfo import astminer.common.model.Node import astminer.common.preOrder +import astminer.config.StorageConfig import astminer.filters.Filter import astminer.problem.LabeledResult import astminer.problem.Problem @@ -14,9 +14,9 @@ class Pipeline( private val filters: List>, private val problem: Problem, private val excludedNodeTypes: List, - private val storage: Storage + private val storageConfig: StorageConfig, + private val outputDirectory: File ) { - private fun T.passesThroughFilters() = filters.all { filter -> filter.isFiltered(this) } private fun LabeledResult.excludeNodes() { @@ -25,17 +25,26 @@ class Pipeline( } } - fun run(files: List) { - val entities = frontend.parseEntities(files) + private fun getStorage(extension: String): Storage { + val directoryForExtension = outputDirectory.resolve(extension) + directoryForExtension.mkdir() - val labeledResults = entities - .filter { functionInfo -> functionInfo.passesThroughFilters() } - .mapNotNull { problem.process(it) } + return storageConfig.getStorage(directoryForExtension.path) + } - for (labeledResult in labeledResults) { - labeledResult.excludeNodes() - } + fun run() { + for ((extension, entities) in frontend.getEntities()) { + getStorage(extension).use { storage -> + val labeledResults = entities + .filter { functionInfo -> functionInfo.passesThroughFilters() } + .mapNotNull { problem.process(it) } - storage.store(labeledResults.asIterable()) + for (labeledResult in labeledResults) { + labeledResult.excludeNodes() + } + + storage.store(labeledResults.asIterable()) + } + } } } \ No newline at end of file diff --git a/src/main/kotlin/astminer/pipeline/PipelineFrontend.kt b/src/main/kotlin/astminer/pipeline/PipelineFrontend.kt index e7e0879f..c9a49854 100644 --- a/src/main/kotlin/astminer/pipeline/PipelineFrontend.kt +++ b/src/main/kotlin/astminer/pipeline/PipelineFrontend.kt @@ -2,14 +2,24 @@ package astminer.pipeline import astminer.common.model.* import astminer.parse.getHandlerFactory +import java.io.Closeable import java.io.File +data class EntitiesFromFiles(val fileExtension: String, val entities: Sequence) + interface PipelineFrontend { - fun parseEntities(files: List): Sequence + val inputDirectory: File + fun getEntities(): Sequence> } -abstract class CompositePipelineFrontend(private val parserType: String, extensions: List) : - PipelineFrontend { +abstract class CompositePipelineFrontend( + private val projectImporter: ProjectImporter, + private val parserType: String, + private val extensions: List +) : + PipelineFrontend, Closeable { + + override val inputDirectory: File = projectImporter.projectDirectory private val handlerFactories = extensions.associateWith { getHandlerFactory(it, parserType) } @@ -18,8 +28,8 @@ abstract class CompositePipelineFrontend(private val parserType: String, exte protected abstract fun LanguageHandler.getEntities(): Sequence - override fun parseEntities(files: List): Sequence { - return files.asSequence().flatMap { file -> + private fun getEntities(files: Sequence): Sequence { + return files.flatMap { file -> val handler = file.handler if (handler != null) { handler.getEntities() @@ -29,17 +39,29 @@ abstract class CompositePipelineFrontend(private val parserType: String, exte } } } + + override fun getEntities(): Sequence> = sequence { + for (extension in extensions) { + val files = projectImporter.getFiles(extension) + val entities = getEntities(files) + yield(EntitiesFromFiles(extension, entities)) + } + } + + override fun close() { + projectImporter.close() + } } -class FilePipelineFrontend(parserType: String, extensions: List) : +class FilePipelineFrontend(projectImporter: ProjectImporter, parserType: String, extensions: List) : CompositePipelineFrontend>( - parserType, extensions + projectImporter, parserType, extensions ) { override fun LanguageHandler.getEntities(): Sequence> = sequenceOf(parseResult) } -class FunctionPipelineFrontend(parserType: String, extensions: List) : - CompositePipelineFrontend>(parserType, extensions) { +class FunctionPipelineFrontend(projectImporter: ProjectImporter, parserType: String, extensions: List) : + CompositePipelineFrontend>(projectImporter, parserType, extensions) { override fun LanguageHandler.getEntities(): Sequence> = splitIntoMethods().asSequence() diff --git a/src/main/kotlin/astminer/pipeline/ProjectImporter.kt b/src/main/kotlin/astminer/pipeline/ProjectImporter.kt new file mode 100644 index 00000000..d39a9e1c --- /dev/null +++ b/src/main/kotlin/astminer/pipeline/ProjectImporter.kt @@ -0,0 +1,39 @@ +package astminer.pipeline + +import astminer.common.getProjectFilesWithExtension +import astminer.parse.fuzzy.cpp.FuzzyCppParser +import java.io.Closeable +import java.io.File + +class ProjectImporter(val projectDirectory: File, private val withPreprocessing: Boolean) : Closeable { + companion object { + private val preprocessedExtensions = listOf("c", "cpp") + private val folderForPreprocessedFiles = File("temp") + } + + private var preprocessingComplete = false + + private fun preprocess() { + val parser = FuzzyCppParser() + parser.preprocessProject(projectDirectory, folderForPreprocessedFiles) + preprocessingComplete = true + } + + private fun getFolder(extension: String): File = + if (withPreprocessing && extension in preprocessedExtensions) { + if (!preprocessingComplete) { + preprocess() + } + folderForPreprocessedFiles + } else { + projectDirectory + } + + + fun getFiles(extension: String): Sequence = + getProjectFilesWithExtension(getFolder(extension), extension).asSequence() + + override fun close() { + folderForPreprocessedFiles.delete() + } +} diff --git a/src/main/kotlin/astminer/problem/FileLevelProblems.kt b/src/main/kotlin/astminer/problem/FileLevelProblems.kt index ffc772f1..55caa652 100644 --- a/src/main/kotlin/astminer/problem/FileLevelProblems.kt +++ b/src/main/kotlin/astminer/problem/FileLevelProblems.kt @@ -4,15 +4,13 @@ import astminer.common.model.Node import astminer.common.model.ParseResult import java.io.File -interface FileLevelProblem : Problem> { - override fun process(entity: ParseResult): LabeledResult? -} +interface FileLevelProblem : Problem> object FilePathExtractor : FileLevelProblem { override fun process(entity: ParseResult): LabeledResult = entity.labeledWithFilePath() } -class FolderExtractor : FileLevelProblem { +object FolderExtractor : FileLevelProblem { override fun process(entity: ParseResult): LabeledResult? { val folderName = File(entity.filePath).parentFile.name ?: return null return entity.labeledWith(folderName) diff --git a/src/main/kotlin/astminer/problem/FunctionLevelProblems.kt b/src/main/kotlin/astminer/problem/FunctionLevelProblems.kt index 00bfba91..030f8dd9 100644 --- a/src/main/kotlin/astminer/problem/FunctionLevelProblems.kt +++ b/src/main/kotlin/astminer/problem/FunctionLevelProblems.kt @@ -5,11 +5,9 @@ import astminer.common.model.Node import astminer.common.preOrder import astminer.common.setTechnicalToken -interface FunctionLevelProblem : Problem> { - override fun process(entity: FunctionInfo): LabeledResult? -} +interface FunctionLevelProblem : Problem> -object MethodNameExtractor : FunctionLevelProblem { +object FunctionNameProblem : FunctionLevelProblem { override fun process(entity: FunctionInfo): LabeledResult? { val name = entity.name ?: return null entity.root.preOrder().forEach { node -> diff --git a/src/test/kotlin/astminer/cli/LabelExtractorTest.kt b/src/test/kotlin/astminer/cli/LabelExtractorTest.kt deleted file mode 100644 index 620d111b..00000000 --- a/src/test/kotlin/astminer/cli/LabelExtractorTest.kt +++ /dev/null @@ -1,82 +0,0 @@ -package astminer.cli - -import astminer.common.getTechnicalToken -import astminer.common.model.* -import astminer.parse.antlr.AntlrNode -import org.junit.Before -import org.junit.Test -import kotlin.test.assertEquals -import kotlin.test.assertNull -import kotlin.test.assertTrue - -internal class LabelExtractorTest { - - companion object { - private const val PATH_STRING = "random/folder/file.txt" - private const val FOLDER = "folder" - private const val FILENAME = "file.txt" - private const val METHOD_NAME = "method" - } - - private var dummyRoot = AntlrNode("", null, null) - - private fun makeMethodInfo(nameNode: AntlrNode) = object : FunctionInfo { - override val root: AntlrNode = dummyRoot - override val nameNode: AntlrNode = nameNode - } - - @Before - fun setUp() { - dummyRoot = AntlrNode("", null, null) - } - - @Test - fun testNonEmptyFilePathExtractor() { - val labelExtractor = FilePathExtractor() - val nonEmptyParseResult = ParseResult(dummyRoot, PATH_STRING) - val labeledParseResults = labelExtractor.toLabeledData(nonEmptyParseResult) - assertEquals(1, labeledParseResults.size) - val (root, label) = labeledParseResults[0] - assertEquals(dummyRoot, root) - assertEquals(PATH_STRING, label) - } - - @Test - fun testNonEmptyFolderExtractor() { - val labelExtractor = FolderExtractor() - val nonEmptyParseResult = ParseResult(dummyRoot, PATH_STRING) - val labeledParseResults = labelExtractor.toLabeledData(nonEmptyParseResult) - assertEquals(1, labeledParseResults.size) - val (root, label) = labeledParseResults[0] - assertEquals(dummyRoot, root) - assertEquals(FOLDER, label) - } - - @Test - fun `test method name extractor extracts correct method name`() { - val nameNode = AntlrNode("", dummyRoot, METHOD_NAME) -// val methodInfo = MethodInfo( -// MethodNode(dummyRoot, null, nameNode), -// ElementNode(null, null), -// emptyList() -// ) - val method = makeMethodInfo(nameNode) - val methodNameExtractor = MethodNameExtractor() - val label = methodNameExtractor.extractLabel(method, PATH_STRING) - assertEquals(METHOD_NAME, label) - } - - @Test - fun `test method name extractor hides method name with technical token`() { - val nameNode = AntlrNode("", dummyRoot, METHOD_NAME) -// val methodInfo = MethodInfo( -// MethodNode(dummyRoot, null, nameNode), -// ElementNode(null, null), -// emptyList() -// ) - val methodInfo = makeMethodInfo(nameNode) - val methodNameExtractor = MethodNameExtractor() - methodNameExtractor.extractLabel(methodInfo, PATH_STRING) - assertEquals("METHOD_NAME", nameNode.getTechnicalToken()) - } -} diff --git a/src/test/kotlin/astminer/filters/FileFiltersTest.kt b/src/test/kotlin/astminer/filters/FileFiltersTest.kt new file mode 100644 index 00000000..3915a2d7 --- /dev/null +++ b/src/test/kotlin/astminer/filters/FileFiltersTest.kt @@ -0,0 +1,6 @@ +package astminer.filters + +internal class FileFiltersTest { + + +} \ No newline at end of file diff --git a/src/test/kotlin/astminer/filters/FunctionFiltersTest.kt b/src/test/kotlin/astminer/filters/FunctionFiltersTest.kt new file mode 100644 index 00000000..a818b0e8 --- /dev/null +++ b/src/test/kotlin/astminer/filters/FunctionFiltersTest.kt @@ -0,0 +1,125 @@ +package astminer.filters + +import astminer.common.model.FunctionInfo +import astminer.common.model.Node +import astminer.parse.antlr.AntlrNode +import org.junit.Test +import kotlin.test.assertFalse +import kotlin.test.assertTrue + +class FunctionFiltersTest { + + @Test + fun `test modifiers filter should exclude function if it has the excluded modifier`() { + val excludedModifiers = listOf("a", "b") + val functionInfo = object : FunctionInfo { + override val modifiers: List = listOf("b", "c") + } + assertFalse { ModifierFilter(excludedModifiers).isFiltered(functionInfo) } + } + + @Test + fun `test modifiers filter should not exclude function if it does not have the excluded modifier`() { + val excludedModifiers = listOf("a", "b") + val functionInfo = object : FunctionInfo { + override val modifiers: List = listOf("c", "d") + } + assertTrue { ModifierFilter(excludedModifiers).isFiltered(functionInfo) } + } + + @Test + fun `test annotations filter should exclude function if it has the excluded modifier`() { + val excludedModifiers = listOf("a", "b") + val functionInfo = object : FunctionInfo { + override val modifiers: List = listOf("a", "c") + } + assertFalse { AnnotationFilter(excludedModifiers).isFiltered(functionInfo) } + } + + @Test + fun `test annotations filter should not exclude function if it does not have the excluded modifier`() { + val excludedModifiers = listOf("a", "b") + val functionInfo = object : FunctionInfo { + override val modifiers: List = listOf("y", "x") + } + assertTrue { AnnotationFilter(excludedModifiers).isFiltered(functionInfo) } + } + + @Test + fun `test constructor filter should exclude constructor functions`() { + val functionInfo = object : FunctionInfo { + override val isConstructor = true + } + assertFalse { ConstructorFilter.isFiltered(functionInfo) } + } + + @Test + fun `test constructor filter should not exclude non-constructor functions`() { + val functionInfo = object : FunctionInfo { + override val isConstructor = false + } + assertTrue { ConstructorFilter.isFiltered(functionInfo) } + } + + @Test + fun `test function name words number filter should not exclude function if maxWordsNumber is -1`() { + val functionInfo = object : FunctionInfo { + override val name = "Word".repeat(100) + } + assertTrue { FunctionNameWordsNumberFilter(-1).isFiltered(functionInfo) } + } + + @Test + fun `test function name words number filter for 50 should exclude function with name of 100 words`() { + val functionInfo = object : FunctionInfo { + override val name = "Word".repeat(100) + } + assertFalse { FunctionNameWordsNumberFilter(50).isFiltered(functionInfo) } + } + + @Test + fun `test function name words number filter for 101 should not exclude function with name of 100 words`() { + val functionInfo = object : FunctionInfo { + override val name = "Word".repeat(100) + } + assertFalse { FunctionNameWordsNumberFilter(101).isFiltered(functionInfo) } + } + + @Test + fun `test function any node words number filter should not exclude function if maxWordsNumber is -1`() { + val functionInfo = object : FunctionInfo { + override val name = "Word".repeat(100) + } + assertTrue { FunctionAnyNodeWordsNumberFilter(-1).isFiltered(functionInfo) } + } + + @Test + fun `test function any node words number filter for 50 should exclude function with name of 100 words`() { + val functionInfo = object : FunctionInfo { + override val name = "Word".repeat(100) + } + assertFalse { FunctionAnyNodeWordsNumberFilter(50).isFiltered(functionInfo) } + } + + @Test + fun `test function any node words number filter for 101 should not exclude function with name of 100 words`() { + val functionInfo = object : FunctionInfo { + override val name = "Word".repeat(100) + } + assertFalse { FunctionAnyNodeWordsNumberFilter(101).isFiltered(functionInfo) } + } + + @Test + fun `test function any node words number filter for 2 should exlude function that has a child of 3 words`() { + val root = AntlrNode("", null, "word") + val child = AntlrNode("", root, "wordWordWord") + root.setChildren(listOf(root)) + + val functionInfo = object : FunctionInfo { + override val root = root + } + assertFalse { FunctionAnyNodeWordsNumberFilter(2).isFiltered(functionInfo) } + } + + +} \ No newline at end of file diff --git a/src/test/kotlin/astminer/problem/LabelExtractorTest.kt b/src/test/kotlin/astminer/problem/LabelExtractorTest.kt new file mode 100644 index 00000000..597da431 --- /dev/null +++ b/src/test/kotlin/astminer/problem/LabelExtractorTest.kt @@ -0,0 +1,73 @@ +package astminer.problem + +import astminer.common.getTechnicalToken +import astminer.common.model.* +import astminer.parse.antlr.AntlrNode +import org.junit.Before +import org.junit.Test +import kotlin.test.assertEquals +import kotlin.test.assertNull + +internal class ProblemTest { + + companion object { + private const val PATH_STRING = "random/folder/file.txt" + private const val FOLDER = "folder" + private const val FILENAME = "file.txt" + private const val METHOD_NAME = "method" + } + + private var dummyRoot = AntlrNode("", null, null) + + @Before + fun setUp() { + dummyRoot = AntlrNode("", null, null) + } + + @Test + fun `test file path extractor returns the same root and file path`() { + val nonEmptyParseResult = ParseResult(dummyRoot, PATH_STRING) + val labeledParseResult = FilePathExtractor.process(nonEmptyParseResult) + + assertEquals(LabeledResult(dummyRoot, PATH_STRING, PATH_STRING), labeledParseResult) + } + + @Test + fun `test folder extractor returns null when folder is empty`() { + val nonEmptyParseResult = ParseResult(dummyRoot, "") + val labeledParseResult = FolderExtractor.process(nonEmptyParseResult) + + assertNull(labeledParseResult) + } + + @Test + fun `test folder extractor extracts folder when it is not empty`() { + val nonEmptyParseResult = ParseResult(dummyRoot, PATH_STRING) + val labeledParseResult = FolderExtractor.process(nonEmptyParseResult) + + assertEquals(LabeledResult(dummyRoot, FOLDER, PATH_STRING), labeledParseResult) + } + + @Test + fun `test method name extractor extracts correct method name`() { + val functionInfo = object : FunctionInfo { + override val nameNode = AntlrNode("", dummyRoot, METHOD_NAME) + override val filePath = PATH_STRING + override val root = dummyRoot + } + val labeledResult = FunctionNameProblem.process(functionInfo) + + assertEquals(LabeledResult(dummyRoot, METHOD_NAME, PATH_STRING), labeledResult) + } + + @Test + fun `test method name extractor hides method name with technical token`() { + val functionInfo = object : FunctionInfo { + override val nameNode = AntlrNode("", dummyRoot, METHOD_NAME) + override val filePath = PATH_STRING + override val root = dummyRoot + } + FunctionNameProblem.process(functionInfo) + assertEquals("METHOD_NAME", functionInfo.nameNode.getTechnicalToken()) + } +} From a666f2348d9ab5620df5abe1452c5f477d39f3ea Mon Sep 17 00:00:00 2001 From: furetur Date: Mon, 3 May 2021 19:39:37 +0500 Subject: [PATCH 128/308] saved unfinished work --- .../kotlin/astminer/config/PipelineConfig.kt | 42 +++++++++++++++++++ .../kotlin/astminer/config/StorageConfig.kt | 1 + .../kotlin/astminer/filters/FileFilters.kt | 4 +- .../astminer/filters/FunctionFilters.kt | 4 +- 4 files changed, 45 insertions(+), 6 deletions(-) create mode 100644 src/main/kotlin/astminer/config/PipelineConfig.kt diff --git a/src/main/kotlin/astminer/config/PipelineConfig.kt b/src/main/kotlin/astminer/config/PipelineConfig.kt new file mode 100644 index 00000000..fce4a6b5 --- /dev/null +++ b/src/main/kotlin/astminer/config/PipelineConfig.kt @@ -0,0 +1,42 @@ +package astminer.config + +import astminer.filters.FileFilter +import astminer.filters.Filter +import astminer.filters.FunctionFilter +import astminer.problem.FileLevelProblem +import astminer.problem.FunctionLevelProblem +import astminer.problem.Problem + +sealed class PipelineConfig { + abstract val inputDir: String + abstract val outputDir: String + abstract val parser: ParserConfig + abstract val problem: Problem<*> + abstract val filters: List> + abstract val storage: StorageConfig +} + +data class FilePipelineConfig( + override val inputDir: String, + override val outputDir: String, + override val parser: ParserConfig, + override val problem: FileLevelProblem, + override val filters: List, + override val storage: StorageConfig +) : PipelineConfig() + +data class FunctionPipelineConfig( + val inputDir: String, + val outputDir: String, + val parser: ParserConfig, + val problem: FunctionLevelProblem, + val filters: List, + val storage: StorageConfig +) + +data class ParserConfig( + val type: String, + val extensions: List, + val preprocess: Boolean +) + diff --git a/src/main/kotlin/astminer/config/StorageConfig.kt b/src/main/kotlin/astminer/config/StorageConfig.kt index 6ad1aa92..31602b0a 100644 --- a/src/main/kotlin/astminer/config/StorageConfig.kt +++ b/src/main/kotlin/astminer/config/StorageConfig.kt @@ -8,6 +8,7 @@ import astminer.storage.path.Code2VecPathStorage import astminer.storage.path.PathBasedStorageConfig sealed class StorageConfig { + // TODO: bad code. This function has a lot of side-effects abstract fun getStorage(outputDirectoryPath: String): Storage } diff --git a/src/main/kotlin/astminer/filters/FileFilters.kt b/src/main/kotlin/astminer/filters/FileFilters.kt index bab9cf3c..63f6f862 100644 --- a/src/main/kotlin/astminer/filters/FileFilters.kt +++ b/src/main/kotlin/astminer/filters/FileFilters.kt @@ -3,6 +3,4 @@ package astminer.filters import astminer.common.model.Node import astminer.common.model.ParseResult -interface FileFilter: Filter> { - override fun isFiltered(entity: ParseResult): Boolean -} +interface FileFilter: Filter> diff --git a/src/main/kotlin/astminer/filters/FunctionFilters.kt b/src/main/kotlin/astminer/filters/FunctionFilters.kt index 5dc250dc..8d2ff6e4 100644 --- a/src/main/kotlin/astminer/filters/FunctionFilters.kt +++ b/src/main/kotlin/astminer/filters/FunctionFilters.kt @@ -5,9 +5,7 @@ import astminer.common.model.Node import astminer.common.preOrder import astminer.common.splitToSubtokens -interface FunctionFilter : Filter> { - override fun isFiltered(entity: FunctionInfo): Boolean -} +interface FunctionFilter : Filter> class ModifierFilter(private val excludeModifiers: List) : FunctionFilter { override fun isFiltered(entity: FunctionInfo): Boolean = From 0d8f8dc6065b5a62f3e6e40e4dc1ab9491780a48 Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Mon, 3 May 2021 17:44:26 +0300 Subject: [PATCH 129/308] Create docker image for CI and easy local run --- .space.kts | 8 +++----- Dockerfile | 18 ++++++++++++++++++ build.gradle.kts | 12 ++++++++++-- 3 files changed, 31 insertions(+), 7 deletions(-) create mode 100644 Dockerfile diff --git a/.space.kts b/.space.kts index 88c22abd..8b404a58 100644 --- a/.space.kts +++ b/.space.kts @@ -1,8 +1,7 @@ job("Test") { - container("ubuntu") { + container(image="voudy/astminer") { shellScript { content = """ - apt-get update && apt-get install -y openjdk-8-jdk g++ ./gradlew test """ } @@ -16,14 +15,13 @@ job("Release") { } } - container("ubuntu") { + container(image="voudy/astminer") { env["PUBLISH_USER"] = Secrets("publish_user") env["PUBLISH_PASSWORD"] = Secrets("publish_password") shellScript { content = """ - apt-get update && apt-get install -y openjdk-8-jdk g++ - ./gradlew build publish + ./gradlew test publish """ } } diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..73b3a1e0 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,18 @@ +FROM alpine:3.13.5 + +LABEL desc="Docker container to run ASTMiner with all preinstalled requirements" + +# Install java +RUN apk add openjdk8 + +# Install G++ (required for Fuzzy parser) +RUN apk add g++ + +# Copy astminer sources +WORKDIR astminer +COPY . . + +# Prepare shadow jar +RUN ./gradlew shadowJar + +ENTRYPOINT ["java", "-jar", "build/shadow/astminer.jar"] diff --git a/build.gradle.kts b/build.gradle.kts index 870baa6e..f3ca7aa9 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -52,7 +52,7 @@ sourceSets["main"].java.srcDir(file(generatedSourcesPath)) idea.module.generatedSourceDirs.add(file(generatedSourcesPath)) tasks.generateGrammarSource { -// maxHeapSize = "64m" + // maxHeapSize = "64m" arguments = arguments + listOf("-package", "me.vovak.antlr.parser") // Keep a copy of generated sources doLast { @@ -104,6 +104,14 @@ jmh { } publishing { + publications { + create("maven") { + groupId = project.group.toString() + artifactId = project.name + version = project.version.toString() + from(components["java"]) + } + } repositories { maven { url = uri("https://packages.jetbrains.team/maven/p/astminer/astminer") @@ -118,7 +126,7 @@ publishing { application.mainClassName = "astminer.MainKt" shadowJar { jar { - archiveName = "astminer-$version.jar" + archiveName = "astminer.jar" } }.apply { task.archiveClassifier.set("") From afb62974279d4c8f837888a22201f20fb2508001 Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Mon, 3 May 2021 18:37:57 +0300 Subject: [PATCH 130/308] Add github CI workflow --- .circleci/config.yml | 66 ------------------------------------- .github/workflows/build.yml | 15 +++++++++ build.gradle.kts | 17 ++++++++++ 3 files changed, 32 insertions(+), 66 deletions(-) delete mode 100644 .circleci/config.yml create mode 100644 .github/workflows/build.yml diff --git a/.circleci/config.yml b/.circleci/config.yml deleted file mode 100644 index 29c651f9..00000000 --- a/.circleci/config.yml +++ /dev/null @@ -1,66 +0,0 @@ -version: 2.1 -jobs: - # build with machine executor - build: - machine: - enabled: true - working_directory: ~/astminer - steps: - - checkout: - path: ~/astminer - - run: ./gradlew build --stacktrace - - # release - release: - machine: - enabled: true - working_directory: ~/astminer - environment: - GIT_BRANCH: << pipeline.git.branch >> - steps: - - checkout: - path: ~/astminer - - run: ./gradlew bintrayUpload "-PbranchName=$GIT_BRANCH" - - release-dev: - machine: - enabled: true - working_directory: ~/astminer - environment: - CI_VERSION: << pipeline.number >> - GIT_BRANCH: << pipeline.git.branch >> - steps: - - checkout: - path: ~/astminer - - run: ./gradlew bintrayUpload "-PciVersion=1.$CI_VERSION" "-PbranchName=$GIT_BRANCH" - - -workflows: - version: 2 - - # release with manual approval in CircleCI app - deploy-library: - jobs: - - build - - approve-release: - type: approval - requires: - - build - filters: - branches: - only: - - master - - release: - requires: - - approve-release - filters: - branches: - only: - - master - - release-dev: - requires: - - build - filters: - branches: - only: - - master-dev diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 00000000..d7da813a --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,15 @@ +name: Gradle Build + +on: [push, pull_request] + +jobs: + build: + + runs-on: ubuntu-latest + container: voudy/astminer + + steps: + - uses: actions/checkout@v2 + + - name: Build with Gradle # Building with gradle already include running tests + run: ./gradlew build --console=plain diff --git a/build.gradle.kts b/build.gradle.kts index f3ca7aa9..c2fd66ec 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -131,3 +131,20 @@ shadowJar { }.apply { task.archiveClassifier.set("") } + +tasks.withType { + // Kotlin DSL workaround from https://github.com/gradle/kotlin-dsl-samples/issues/836#issuecomment-384206237 + addTestListener(object : TestListener { + override fun beforeSuite(suite: TestDescriptor) {} + override fun beforeTest(testDescriptor: TestDescriptor) {} + override fun afterTest(testDescriptor: TestDescriptor, result: TestResult) {} + override fun afterSuite(suite: TestDescriptor, result: TestResult) { + if (suite.parent == null) { + println( + "${result.resultType} (${result.testCount} tests, ${result.successfulTestCount} successes, " + + "${result.failedTestCount} failures, ${result.skippedTestCount} skipped)" + ) + } + } + }) +} \ No newline at end of file From 731976aba85a893488b9c6a34dd691f985ef6cf9 Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Mon, 3 May 2021 19:07:37 +0300 Subject: [PATCH 131/308] cli.sh check if docker image exist --- cli.sh | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/cli.sh b/cli.sh index 985a253a..b6e872c3 100755 --- a/cli.sh +++ b/cli.sh @@ -1,3 +1,14 @@ #!/bin/bash -java -jar build/shadow/lib-*.jar "$@" +IMAGE_NAME="voudy/astminer" +SHADOW_JAR_PATH="build/shadow/astminer.jar" + +if [[ "$(docker images -q $IMAGE_NAME 2> /dev/null)" == "" ]]; then + echo "Can't find docker image, will compile from sources" + ./gradlew shadowJar + java -jar $SHADOW_JAR_PATH "$@" +else + echo "Run ASTMiner inside docker" + docker run --rm voudy/astminer "$@" +fi + From c12af502054d8e9779a408c16ccc3a46c6b7c37d Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Mon, 3 May 2021 19:07:59 +0300 Subject: [PATCH 132/308] Add information about docker and maven local into readme --- README.md | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 5715387b..b16260b8 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ See [changelog](changelog.md) ## About Astminer was first implemented as a part of pipeline in the [code style extraction project](https://arxiv.org/abs/2002.03997) and later converted into a reusable tool. -Currently it supports extraction of: +Currently, it supports extraction of: * Path-based representations of files * Path-based representations of methods * Raw ASTs @@ -29,10 +29,29 @@ For the output format, see the section below. ## Usage +We provide docker image with preinstalled requirements for all supported parsers. +You can use image with last release by pulling image from Docker hub: +```shell +docker pull voudy/astminer +``` +If you want to run version from specific branch then you can rebuild image using following command: +```shell +docker build -t voudy/astminer . +``` + +If you don't want to use docker, you can always use Gradle tasks. +It works almost for all parsers and languages as expected. + +There are two different ways to use ASTMiner. + ### Use as CLI -1. Run `./gradlew shadowJar` in project directory -2. Now you can use shell script to run cli `./cli.sh optionName parameters`, where `optionName` is one of the following options: +You can run ASTMiner in CLI mode to preprocess your data with already implemented logic. +Use special script for it: +```shell +./cli.sh optionName parameters +``` +Where `optionName` is one of the following options: #### Preprocess @@ -67,7 +86,7 @@ Parse all files written in specified language into ASTs, split into methods, and #### Import -Astminer is available in the JetBrains Space package repository. You can add the dependency in your `build.gradle` file: +ASTMiner is available in the JetBrains Space package repository. You can add the dependency in your `build.gradle` file: ``` repositories { maven { @@ -91,6 +110,14 @@ dependencies { } ``` +#### Local development + +In order to use specific version of library navigate to required branch and build local version of ASTMiner: +```shell +./gradlew publishToMavenLocal +``` +After that add `mavenLocal()` into `repositories` field inside your gradle configuration. + #### Examples If you want to use `astminer` as a library in your Java/Kotlin based data mining tool, check the following examples: From 64350668a9cd4c4e071a367972aacf503dd687a9 Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Mon, 3 May 2021 19:08:30 +0300 Subject: [PATCH 133/308] Update version --- build.gradle.kts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.gradle.kts b/build.gradle.kts index c2fd66ec..9fa50f2c 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -1,7 +1,7 @@ import tanvd.kosogor.proxy.shadowJar group = "io.github.vovak" -version = "0.6.1" +version = "0.6.2" plugins { id("java") From f937c0df58d6a159233d1ee33a7f25cece11e724 Mon Sep 17 00:00:00 2001 From: furetur Date: Tue, 4 May 2021 12:58:10 +0500 Subject: [PATCH 134/308] Pipeline + config + GetPipeline --- .../kotlin/astminer/config/PipelineConfig.kt | 14 +++--- .../kotlin/astminer/config/StorageConfig.kt | 30 ++---------- .../kotlin/astminer/pipeline/GetPipeline.kt | 32 +++++++++++++ src/main/kotlin/astminer/pipeline/Pipeline.kt | 17 ++----- .../astminer/pipeline/PipelineFrontend.kt | 48 +++++-------------- .../astminer/pipeline/ProjectImporter.kt | 39 --------------- .../astminer/pipeline/StorageCreator.kt | 34 +++++++++++++ 7 files changed, 93 insertions(+), 121 deletions(-) create mode 100644 src/main/kotlin/astminer/pipeline/GetPipeline.kt delete mode 100644 src/main/kotlin/astminer/pipeline/ProjectImporter.kt create mode 100644 src/main/kotlin/astminer/pipeline/StorageCreator.kt diff --git a/src/main/kotlin/astminer/config/PipelineConfig.kt b/src/main/kotlin/astminer/config/PipelineConfig.kt index fce4a6b5..14472ee5 100644 --- a/src/main/kotlin/astminer/config/PipelineConfig.kt +++ b/src/main/kotlin/astminer/config/PipelineConfig.kt @@ -26,13 +26,13 @@ data class FilePipelineConfig( ) : PipelineConfig() data class FunctionPipelineConfig( - val inputDir: String, - val outputDir: String, - val parser: ParserConfig, - val problem: FunctionLevelProblem, - val filters: List, - val storage: StorageConfig -) + override val inputDir: String, + override val outputDir: String, + override val parser: ParserConfig, + override val problem: FunctionLevelProblem, + override val filters: List, + override val storage: StorageConfig +) : PipelineConfig() data class ParserConfig( val type: String, diff --git a/src/main/kotlin/astminer/config/StorageConfig.kt b/src/main/kotlin/astminer/config/StorageConfig.kt index 31602b0a..d100030c 100644 --- a/src/main/kotlin/astminer/config/StorageConfig.kt +++ b/src/main/kotlin/astminer/config/StorageConfig.kt @@ -1,24 +1,13 @@ package astminer.config -import astminer.storage.Storage import astminer.storage.TokenProcessor -import astminer.storage.ast.CsvAstStorage -import astminer.storage.ast.DotAstStorage -import astminer.storage.path.Code2VecPathStorage import astminer.storage.path.PathBasedStorageConfig -sealed class StorageConfig { - // TODO: bad code. This function has a lot of side-effects - abstract fun getStorage(outputDirectoryPath: String): Storage -} +sealed class StorageConfig -object CsvAstStorageConfig : StorageConfig() { - override fun getStorage(outputDirectoryPath: String) = CsvAstStorage(outputDirectoryPath) -} +object CsvAstStorageConfig : StorageConfig() -data class DotAstStorageConfig(val tokenProcessor: TokenProcessor) : StorageConfig() { - override fun getStorage(outputDirectoryPath: String) = DotAstStorage(outputDirectoryPath, tokenProcessor) -} +data class DotAstStorageConfig(val tokenProcessor: TokenProcessor) : StorageConfig() data class Code2VecPathStorageConfig( val maxPathLength: Int, @@ -28,15 +17,6 @@ data class Code2VecPathStorageConfig( val maxPathContextsPerEntity: Int? = null, val tokenProcessor: TokenProcessor ) : StorageConfig() { - - private val storageConfig = PathBasedStorageConfig( - maxPathLength, - maxPathWidth, - maxTokens, - maxPaths, - maxPathContextsPerEntity - ) - - override fun getStorage(outputDirectoryPath: String) = - Code2VecPathStorage(outputDirectoryPath, storageConfig, tokenProcessor) + fun toPathBasedConfig() = + PathBasedStorageConfig(maxPathLength, maxPathWidth, maxTokens, maxPaths, maxPathContextsPerEntity) } diff --git a/src/main/kotlin/astminer/pipeline/GetPipeline.kt b/src/main/kotlin/astminer/pipeline/GetPipeline.kt new file mode 100644 index 00000000..af8b1817 --- /dev/null +++ b/src/main/kotlin/astminer/pipeline/GetPipeline.kt @@ -0,0 +1,32 @@ +package astminer.pipeline + +import astminer.common.model.FunctionInfo +import astminer.common.model.Node +import astminer.common.model.ParseResult +import astminer.config.FilePipelineConfig +import astminer.config.FunctionPipelineConfig +import astminer.config.PipelineConfig + +/** + * This function must have no side effects + */ +fun getPipeline(pipelineConfig: PipelineConfig): Pipeline<*> { + return when (pipelineConfig) { + is FilePipelineConfig -> getFilePipeline(pipelineConfig) + is FunctionPipelineConfig -> getFunctionPipeline(pipelineConfig) + } +} + +private fun getFilePipeline(filePipelineConfig: FilePipelineConfig): Pipeline> = + with(filePipelineConfig) { + val frontend = FilePipelineFrontend(inputDir, parser.type, parser.extensions) + val storageCreator = StorageCreator(storage, outputDir) + Pipeline(frontend, filters, problem, emptyList(), storageCreator) + } + +private fun getFunctionPipeline(functionPipelineConfig: FunctionPipelineConfig): Pipeline> = + with(functionPipelineConfig) { + val frontend = FunctionPipelineFrontend(inputDir, parser.type, parser.extensions) + val storageCreator = StorageCreator(storage, outputDir) + Pipeline(frontend, filters, problem, emptyList(), storageCreator) + } \ No newline at end of file diff --git a/src/main/kotlin/astminer/pipeline/Pipeline.kt b/src/main/kotlin/astminer/pipeline/Pipeline.kt index 12b59b79..cdcb2132 100644 --- a/src/main/kotlin/astminer/pipeline/Pipeline.kt +++ b/src/main/kotlin/astminer/pipeline/Pipeline.kt @@ -2,20 +2,16 @@ package astminer.pipeline import astminer.common.model.Node import astminer.common.preOrder -import astminer.config.StorageConfig import astminer.filters.Filter import astminer.problem.LabeledResult import astminer.problem.Problem -import astminer.storage.Storage -import java.io.File class Pipeline( private val frontend: PipelineFrontend, private val filters: List>, private val problem: Problem, private val excludedNodeTypes: List, - private val storageConfig: StorageConfig, - private val outputDirectory: File + private val storageCreator: StorageCreator ) { private fun T.passesThroughFilters() = filters.all { filter -> filter.isFiltered(this) } @@ -25,16 +21,9 @@ class Pipeline( } } - private fun getStorage(extension: String): Storage { - val directoryForExtension = outputDirectory.resolve(extension) - directoryForExtension.mkdir() - - return storageConfig.getStorage(directoryForExtension.path) - } - fun run() { for ((extension, entities) in frontend.getEntities()) { - getStorage(extension).use { storage -> + storageCreator.createStorage(extension).use { storage -> val labeledResults = entities .filter { functionInfo -> functionInfo.passesThroughFilters() } .mapNotNull { problem.process(it) } @@ -47,4 +36,4 @@ class Pipeline( } } } -} \ No newline at end of file +} diff --git a/src/main/kotlin/astminer/pipeline/PipelineFrontend.kt b/src/main/kotlin/astminer/pipeline/PipelineFrontend.kt index c9a49854..bef3930e 100644 --- a/src/main/kotlin/astminer/pipeline/PipelineFrontend.kt +++ b/src/main/kotlin/astminer/pipeline/PipelineFrontend.kt @@ -1,67 +1,43 @@ package astminer.pipeline +import astminer.common.getProjectFilesWithExtension import astminer.common.model.* import astminer.parse.getHandlerFactory -import java.io.Closeable import java.io.File data class EntitiesFromFiles(val fileExtension: String, val entities: Sequence) interface PipelineFrontend { - val inputDirectory: File fun getEntities(): Sequence> } abstract class CompositePipelineFrontend( - private val projectImporter: ProjectImporter, + private val inputDirectoryPath: String, private val parserType: String, private val extensions: List -) : - PipelineFrontend, Closeable { - - override val inputDirectory: File = projectImporter.projectDirectory - - private val handlerFactories = extensions.associateWith { getHandlerFactory(it, parserType) } - - private val File.handler: LanguageHandler? - get() = handlerFactories[extension]?.createHandler(this) +) : PipelineFrontend { protected abstract fun LanguageHandler.getEntities(): Sequence - private fun getEntities(files: Sequence): Sequence { - return files.flatMap { file -> - val handler = file.handler - if (handler != null) { - handler.getEntities() - } else { - println("Failed") - emptySequence() - } - } - } - override fun getEntities(): Sequence> = sequence { + val inputDirectory = File(inputDirectoryPath) + for (extension in extensions) { - val files = projectImporter.getFiles(extension) - val entities = getEntities(files) + val handlerFactory = getHandlerFactory(extension, parserType) + val files = getProjectFilesWithExtension(inputDirectory, extension).asSequence() + val entities = files.flatMap { file -> handlerFactory.createHandler(file).getEntities() } yield(EntitiesFromFiles(extension, entities)) } } - - override fun close() { - projectImporter.close() - } } -class FilePipelineFrontend(projectImporter: ProjectImporter, parserType: String, extensions: List) : - CompositePipelineFrontend>( - projectImporter, parserType, extensions - ) { +class FilePipelineFrontend(inputDirectoryPath: String, parserType: String, extensions: List) : + CompositePipelineFrontend>(inputDirectoryPath, parserType, extensions) { override fun LanguageHandler.getEntities(): Sequence> = sequenceOf(parseResult) } -class FunctionPipelineFrontend(projectImporter: ProjectImporter, parserType: String, extensions: List) : - CompositePipelineFrontend>(projectImporter, parserType, extensions) { +class FunctionPipelineFrontend(inputDirectoryPath: String, parserType: String, extensions: List) : + CompositePipelineFrontend>(inputDirectoryPath, parserType, extensions) { override fun LanguageHandler.getEntities(): Sequence> = splitIntoMethods().asSequence() diff --git a/src/main/kotlin/astminer/pipeline/ProjectImporter.kt b/src/main/kotlin/astminer/pipeline/ProjectImporter.kt deleted file mode 100644 index d39a9e1c..00000000 --- a/src/main/kotlin/astminer/pipeline/ProjectImporter.kt +++ /dev/null @@ -1,39 +0,0 @@ -package astminer.pipeline - -import astminer.common.getProjectFilesWithExtension -import astminer.parse.fuzzy.cpp.FuzzyCppParser -import java.io.Closeable -import java.io.File - -class ProjectImporter(val projectDirectory: File, private val withPreprocessing: Boolean) : Closeable { - companion object { - private val preprocessedExtensions = listOf("c", "cpp") - private val folderForPreprocessedFiles = File("temp") - } - - private var preprocessingComplete = false - - private fun preprocess() { - val parser = FuzzyCppParser() - parser.preprocessProject(projectDirectory, folderForPreprocessedFiles) - preprocessingComplete = true - } - - private fun getFolder(extension: String): File = - if (withPreprocessing && extension in preprocessedExtensions) { - if (!preprocessingComplete) { - preprocess() - } - folderForPreprocessedFiles - } else { - projectDirectory - } - - - fun getFiles(extension: String): Sequence = - getProjectFilesWithExtension(getFolder(extension), extension).asSequence() - - override fun close() { - folderForPreprocessedFiles.delete() - } -} diff --git a/src/main/kotlin/astminer/pipeline/StorageCreator.kt b/src/main/kotlin/astminer/pipeline/StorageCreator.kt new file mode 100644 index 00000000..5e92f883 --- /dev/null +++ b/src/main/kotlin/astminer/pipeline/StorageCreator.kt @@ -0,0 +1,34 @@ +package astminer.pipeline + +import astminer.config.Code2VecPathStorageConfig +import astminer.config.CsvAstStorageConfig +import astminer.config.DotAstStorageConfig +import astminer.config.StorageConfig +import astminer.storage.Storage +import astminer.storage.ast.CsvAstStorage +import astminer.storage.ast.DotAstStorage +import astminer.storage.path.Code2VecPathStorage +import java.io.File + +class StorageCreator(private val config: StorageConfig, outputDirectoryPath: String) { + private val outputDirectory = File(outputDirectoryPath) + + private fun getOutputPath(extension: String): String { + val outputDirectoryForExtension = outputDirectory.resolve(extension) + outputDirectoryForExtension.mkdir() + return outputDirectoryForExtension.path + } + + fun createStorage(extension: String): Storage { + val outputPath = getOutputPath(extension) + return when (config) { + is CsvAstStorageConfig -> CsvAstStorage(outputPath) + is DotAstStorageConfig -> DotAstStorage(outputPath, config.tokenProcessor) + is Code2VecPathStorageConfig -> Code2VecPathStorage( + outputPath, + config.toPathBasedConfig(), + config.tokenProcessor + ) + } + } +} From 2b2120ecfeeb1fa3e7244b171c659baaaff93ad6 Mon Sep 17 00:00:00 2001 From: illided Date: Tue, 4 May 2021 12:32:43 +0300 Subject: [PATCH 135/308] find enclosing moved higher --- src/main/kotlin/astminer/parse/FindingUtils.kt | 11 +++++++++++ src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt | 12 +----------- .../parse/antlr/java/AntlrJavaFunctionInfo.kt | 12 +++--------- .../antlr/javascript/AntlrJavaScriptElementInfo.kt | 1 + .../parse/antlr/python/AntlrPythonFunctionInfo.kt | 10 ++-------- 5 files changed, 18 insertions(+), 28 deletions(-) create mode 100644 src/main/kotlin/astminer/parse/FindingUtils.kt diff --git a/src/main/kotlin/astminer/parse/FindingUtils.kt b/src/main/kotlin/astminer/parse/FindingUtils.kt new file mode 100644 index 00000000..aa8bc1e5 --- /dev/null +++ b/src/main/kotlin/astminer/parse/FindingUtils.kt @@ -0,0 +1,11 @@ +package astminer.parse + +import astminer.common.model.Node + +inline fun T.findEnclosingElementBy(condition: (T) -> Boolean): T? { + var curNode = this.getParent() + while (!(curNode == null || condition(curNode as T))) { + curNode = curNode.getParent() + } + return curNode as T? +} diff --git a/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt b/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt index 387dce6e..d6a0df9a 100644 --- a/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt +++ b/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt @@ -5,6 +5,7 @@ import org.antlr.v4.runtime.ParserRuleContext import org.antlr.v4.runtime.Vocabulary import org.antlr.v4.runtime.tree.ErrorNode import org.antlr.v4.runtime.tree.TerminalNode +import java.util.concurrent.locks.Condition fun convertAntlrTree(tree: ParserRuleContext, ruleNames: Array, vocabulary: Vocabulary): AntlrNode { return compressTree(convertRuleContext(tree, ruleNames, null, vocabulary)) @@ -101,14 +102,3 @@ fun AntlrNode.getItOrChildrenOfType(typeLabel: String) : List { this.getChildrenOfType(typeLabel).mapNotNull { it as? AntlrNode } } } - -fun AntlrNode.findEnclosingElementBy(condition: (AntlrNode) -> Boolean): AntlrNode? { - return findRecursively(this.getParent() as AntlrNode?, condition) -} - -private fun findRecursively(node: AntlrNode?, condition: (AntlrNode) -> Boolean) : AntlrNode? { - if (node == null || condition(node)) { - return node - } - return findRecursively(node.getParent() as AntlrNode?, condition) -} \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt b/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt index 87eabd8a..648c4952 100644 --- a/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt @@ -2,6 +2,7 @@ package astminer.parse.antlr.java import astminer.common.model.* import astminer.parse.antlr.* +import astminer.parse.findEnclosingElementBy class AntlrJavaFunctionInfo(override val root: AntlrNode) : FunctionInfo { override val nameNode: AntlrNode? = collectNameNode() @@ -33,7 +34,7 @@ class AntlrJavaFunctionInfo(override val root: AntlrNode) : FunctionInfo? { - val enclosingClassNode = findEnclosingClassNode(root) ?: return null + val enclosingClassNode = root.findEnclosingElementBy { it.hasLastLabel(CLASS_DECLARATION_NODE) } ?: return null return EnclosingElement( type = EnclosingElementType.Class, name = enclosingClassNode.getChildOfType(CLASS_NAME_NODE)?.getToken(), @@ -41,13 +42,6 @@ class AntlrJavaFunctionInfo(override val root: AntlrNode) : FunctionInfo { val parametersRoot = root.getChildOfType(METHOD_PARAMETER_NODE) val innerParametersRoot = parametersRoot?.getChildOfType(METHOD_PARAMETER_INNER_NODE) ?: return emptyList() @@ -58,7 +52,7 @@ class AntlrJavaFunctionInfo(override val root: AntlrNode) : FunctionInfo getParameterInfo(singleParameter) } + }.map { singleParameter -> getParameterInfo(singleParameter) } } private fun getParameterInfo(parameterNode: AntlrNode): MethodInfoParameter { diff --git a/src/main/kotlin/astminer/parse/antlr/javascript/AntlrJavaScriptElementInfo.kt b/src/main/kotlin/astminer/parse/antlr/javascript/AntlrJavaScriptElementInfo.kt index b82c2271..dcba1e72 100644 --- a/src/main/kotlin/astminer/parse/antlr/javascript/AntlrJavaScriptElementInfo.kt +++ b/src/main/kotlin/astminer/parse/antlr/javascript/AntlrJavaScriptElementInfo.kt @@ -2,6 +2,7 @@ package astminer.parse.antlr.javascript import astminer.common.model.* import astminer.parse.antlr.* +import astminer.parse.findEnclosingElementBy /** Base class for describing JavaScript methods, functions or arrow functions. diff --git a/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt b/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt index 2a347413..0571ae43 100644 --- a/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt @@ -2,6 +2,7 @@ package astminer.parse.antlr.python import astminer.common.model.* import astminer.parse.antlr.* +import astminer.parse.findEnclosingElementBy class AntlrPythonFunctionInfo(override val root: AntlrNode) : FunctionInfo { override val nameNode: AntlrNode? = collectNameNode() @@ -66,7 +67,7 @@ class AntlrPythonFunctionInfo(override val root: AntlrNode) : FunctionInfo? { - val enclosingNode = findEnclosingNode(root.getParent() as AntlrNode?) ?: return null + val enclosingNode = root.findEnclosingElementBy { it.lastLabelIn(POSSIBLE_ENCLOSING_ELEMENTS) } ?: return null val type = when { enclosingNode.hasLastLabel(CLASS_DECLARATION_NODE) -> EnclosingElementType.Class enclosingNode.hasLastLabel(FUNCTION_NODE) -> { @@ -89,13 +90,6 @@ class AntlrPythonFunctionInfo(override val root: AntlrNode) : FunctionInfo Date: Tue, 4 May 2021 12:51:01 +0300 Subject: [PATCH 136/308] fuzzy function info added --- .../parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt | 62 +++++++++++++++++++ .../parse/fuzzy/cpp/FuzzyMethodSplitter.kt | 56 +---------------- 2 files changed, 65 insertions(+), 53 deletions(-) create mode 100644 src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt new file mode 100644 index 00000000..b4643d35 --- /dev/null +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt @@ -0,0 +1,62 @@ +package astminer.parse.fuzzy.cpp + +import astminer.common.model.EnclosingElement +import astminer.common.model.EnclosingElementType +import astminer.common.model.FunctionInfo +import astminer.common.model.MethodInfoParameter +import astminer.parse.findEnclosingElementBy + +class FuzzyCppFunctionInfo(override val root: FuzzyNode): FunctionInfo { + companion object { + private const val METHOD_NAME_NODE = "NAME" + private const val METHOD_RETURN_NODE = "METHOD_RETURN" + private const val METHOD_RETURN_TYPE_NODE = "TYPE_FULL_NAME" + + private const val CLASS_DECLARATION_NODE = "TYPE_DECL" + private const val CLASS_NAME_NODE = "NAME" + + private const val METHOD_PARAMETER_NODE = "METHOD_PARAMETER_IN" + private const val PARAMETER_NAME_NODE = "NAME" + private const val PARAMETER_TYPE_NODE = "TYPE_FULL_NAME" + } + + override val returnType: String? = collectReturnType() + override val enclosingElement: EnclosingElement? = collectEnclosingClass() + override val parameters: List = collectParameters() + override val nameNode: FuzzyNode? = collectNameNode() + + private fun collectNameNode(): FuzzyNode? { + return root.getChildOfType(METHOD_NAME_NODE) as FuzzyNode? + } + + private fun collectReturnType(): String? { + return root.getChildOfType(METHOD_RETURN_NODE)?.getChildOfType(METHOD_RETURN_TYPE_NODE)?.getToken() + } + + private fun collectEnclosingClass(): EnclosingElement? { + val enclosingClass = findEnclosingClass() ?: return null + val enclosingClassName = findEnclosingClassName(enclosingClass) ?: return null + return EnclosingElement( + root = enclosingClass, + type = EnclosingElementType.Class, + name = enclosingClassName + ) + } + + private fun findEnclosingClass(): FuzzyNode? { + return root.findEnclosingElementBy { it.getTypeLabel() == CLASS_DECLARATION_NODE } + } + + private fun findEnclosingClassName(enclosingClass: FuzzyNode): String? { + return enclosingClass.getChildOfType(CLASS_NAME_NODE)?.getToken() + } + + private fun collectParameters(): List { + val parameters = root.getChildrenOfType(METHOD_PARAMETER_NODE) + return parameters.map { param -> + val type = param.getChildOfType(PARAMETER_TYPE_NODE)?.getToken() + val name = param.getChildOfType(PARAMETER_NAME_NODE)?.getToken() ?: "" + MethodInfoParameter(name, type) + } + } +} \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyMethodSplitter.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyMethodSplitter.kt index 59cc769d..5d01ceb6 100644 --- a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyMethodSplitter.kt @@ -4,60 +4,10 @@ import astminer.common.* import astminer.common.model.* class FuzzyMethodSplitter : TreeMethodSplitter { - - companion object { - private const val METHOD_NODE = "METHOD" - private const val METHOD_NAME_NODE = "NAME" - private const val METHOD_RETURN_NODE = "METHOD_RETURN" - private const val METHOD_RETURN_TYPE_NODE = "TYPE_FULL_NAME" - - private const val CLASS_DECLARATION_NODE = "TYPE_DECL" - private const val CLASS_NAME_NODE = "NAME" - - private const val METHOD_PARAMETER_NODE = "METHOD_PARAMETER_IN" - private const val PARAMETER_NAME_NODE = "NAME" - private const val PARAMETER_TYPE_NODE = "TYPE_FULL_NAME" - } + private val methodNode = "METHOD" override fun splitIntoMethods(root: FuzzyNode): Collection> { - val methodRoots = root.preOrder().filter { it.getTypeLabel() == METHOD_NODE } - return dummyMethodInfos() -// return methodRoots.map { collectMethodInfo(it as FuzzyNode) } - } - - private fun collectMethodInfo(methodNode: FuzzyNode): MethodInfo { - val methodReturnType = - methodNode.getChildOfType(METHOD_RETURN_NODE)?.getChildOfType(METHOD_RETURN_TYPE_NODE) as? FuzzyNode - val methodName = methodNode.getChildOfType(METHOD_NAME_NODE) as? FuzzyNode - - val classRoot = getEnclosingClass(methodNode) - val className = classRoot?.getChildOfType(CLASS_NAME_NODE) as? FuzzyNode - - val parameters = methodNode.getChildrenOfType(METHOD_PARAMETER_NODE) - val parameterNodes = parameters.map { node -> - val fuzzyNode = node as FuzzyNode - ParameterNode( - fuzzyNode, - fuzzyNode.getChildOfType(PARAMETER_TYPE_NODE) as? FuzzyNode, - fuzzyNode.getChildOfType(PARAMETER_NAME_NODE) as? FuzzyNode - ) - }.toList() - - return MethodInfo( - MethodNode(methodNode, methodReturnType, methodName), - ElementNode(classRoot, className), - parameterNodes - ) - } - - private fun getEnclosingClass(node: FuzzyNode): FuzzyNode? { - if (node.getTypeLabel() == CLASS_DECLARATION_NODE) { - return node - } - val parentNode = node.getParent() as? FuzzyNode - if (parentNode != null) { - return getEnclosingClass(parentNode) - } - return null + val methodRoots = root.preOrder().filter { it.getTypeLabel() == methodNode } + return methodRoots.map { FuzzyCppFunctionInfo(it as FuzzyNode) } } } \ No newline at end of file From 45d2403879b4534e47b8af9586dd490ba058f593 Mon Sep 17 00:00:00 2001 From: furetur Date: Tue, 4 May 2021 16:16:11 +0500 Subject: [PATCH 137/308] Improved ParsingException.kt and logging --- build.gradle.kts | 3 ++- log/.gitkeep | 0 src/main/kotlin/astminer/common/model/ParsingModel.kt | 6 ++++-- src/main/kotlin/astminer/parse/ParsingException.kt | 3 ++- src/main/kotlin/astminer/parse/antlr/java/JavaParser.kt | 2 +- .../astminer/parse/antlr/javascript/JavaScriptParser.kt | 2 +- src/main/kotlin/astminer/parse/antlr/python/PythonParser.kt | 2 +- src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppParser.kt | 4 ++-- .../astminer/parse/gumtree/python/GumTreePythonParser.kt | 2 +- src/main/resources/simplelogger.properties | 1 + 10 files changed, 15 insertions(+), 10 deletions(-) create mode 100644 log/.gitkeep create mode 100644 src/main/resources/simplelogger.properties diff --git a/build.gradle.kts b/build.gradle.kts index 71436212..bae21978 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -55,7 +55,8 @@ dependencies { // https://mvnrepository.com/artifact/io.shiftleft/fuzzyc2cpg api("io.shiftleft", "fuzzyc2cpg_2.13", "1.2.9") // https://mvnrepository.com/artifact/org.slf4j/slf4j-simple - testImplementation("org.slf4j", "slf4j-simple", "1.7.30") + implementation("org.slf4j", "slf4j-simple", "1.7.30") + implementation("io.github.microutils:kotlin-logging:1.5.9") testImplementation("junit:junit:4.11") testImplementation(kotlin("test-junit")) diff --git a/log/.gitkeep b/log/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/src/main/kotlin/astminer/common/model/ParsingModel.kt b/src/main/kotlin/astminer/common/model/ParsingModel.kt index 52c8d0b2..58611a8e 100644 --- a/src/main/kotlin/astminer/common/model/ParsingModel.kt +++ b/src/main/kotlin/astminer/common/model/ParsingModel.kt @@ -2,9 +2,12 @@ package astminer.common.model import astminer.cli.LabeledResult import astminer.parse.ParsingException +import mu.KotlinLogging import java.io.File import java.io.InputStream +// TODO: later move this logger to Pipeline +private val logger = KotlinLogging.logger("ParsingModel") interface Node { fun getTypeLabel(): String @@ -57,8 +60,7 @@ interface Parser { try { handleResult(parseFile(file)) } catch (parsingException: ParsingException) { - // TODO: all error reporting should be on the surface, in my opinion - println("Failed to parse file ${file.path}: ${parsingException.message}") + logger.error(parsingException) { "Failed to parse file ${file.path}" } } } } diff --git a/src/main/kotlin/astminer/parse/ParsingException.kt b/src/main/kotlin/astminer/parse/ParsingException.kt index b3c721d1..27d3f510 100644 --- a/src/main/kotlin/astminer/parse/ParsingException.kt +++ b/src/main/kotlin/astminer/parse/ParsingException.kt @@ -1,3 +1,4 @@ package astminer.parse -class ParsingException(message: String) : IllegalStateException(message) +class ParsingException(parserType: String, language: String, message: String? = null) : + IllegalStateException("Parser $parserType had problems parsing $language: ${message ?: "Unknown error."}") diff --git a/src/main/kotlin/astminer/parse/antlr/java/JavaParser.kt b/src/main/kotlin/astminer/parse/antlr/java/JavaParser.kt index 74442807..277b1aca 100644 --- a/src/main/kotlin/astminer/parse/antlr/java/JavaParser.kt +++ b/src/main/kotlin/astminer/parse/antlr/java/JavaParser.kt @@ -22,7 +22,7 @@ class JavaParser : Parser { val context = parser.compilationUnit() convertAntlrTree(context, Java8Parser.ruleNames, Java8Parser.VOCABULARY) } catch (e: Exception) { - throw ParsingException("Failed to parse Java code: ${e.message}") + throw ParsingException("ANTLR", "Java", e.message) } } } \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptParser.kt b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptParser.kt index dc821545..947af3b2 100644 --- a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptParser.kt +++ b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptParser.kt @@ -22,7 +22,7 @@ class JavaScriptParser : Parser { val context = parser.program() convertAntlrTree(context, JavaScriptParser.ruleNames, JavaScriptParser.VOCABULARY) } catch (e: Exception) { - throw ParsingException("Failed to parse JavaScript code ${e.message}") + throw ParsingException("ANTLR", "JavaScript", e.message) } } } \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/antlr/python/PythonParser.kt b/src/main/kotlin/astminer/parse/antlr/python/PythonParser.kt index 714710ba..69554417 100644 --- a/src/main/kotlin/astminer/parse/antlr/python/PythonParser.kt +++ b/src/main/kotlin/astminer/parse/antlr/python/PythonParser.kt @@ -22,7 +22,7 @@ class PythonParser : Parser { val context = parser.file_input() convertAntlrTree(context, Python3Parser.ruleNames, Python3Parser.VOCABULARY) } catch (e: Exception) { - throw ParsingException("Failed to parse Python code: ${e.message}") + throw ParsingException("ANTLR", "Python", e.message) } } } \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppParser.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppParser.kt index d9735f72..24b97c46 100644 --- a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppParser.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppParser.kt @@ -121,11 +121,11 @@ class FuzzyCppParser : Parser { if (File(actualFilePath).absolutePath != File(filePath).absolutePath) { println("While parsing $filePath, actually parsed $actualFilePath") } - val node = vertexToNode[it] ?: throw ParsingException("Unknown fuzzy cpp parser error.") + val node = vertexToNode[it] ?: throw ParsingException("Fuzzy", "C++") return ParseResult(node, actualFilePath) } } - throw ParsingException("Unknown fuzzy cpp parser error.") + throw ParsingException("Fuzzy", "C++") } /** diff --git a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonParser.kt b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonParser.kt index 365c7d04..e3ab61db 100644 --- a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonParser.kt +++ b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonParser.kt @@ -18,7 +18,7 @@ class GumTreePythonParser : Parser { val context = PythonTreeGenerator().generate(InputStreamReader(content)) wrapGumTreeNode(context) } catch (e: Exception) { - throw ParsingException("Failed to parse Python code: ${e.message}") + throw ParsingException("GumTree", "Python", e.message) } } diff --git a/src/main/resources/simplelogger.properties b/src/main/resources/simplelogger.properties new file mode 100644 index 00000000..5fd256f3 --- /dev/null +++ b/src/main/resources/simplelogger.properties @@ -0,0 +1 @@ +org.slf4j.simpleLogger.logFile = log/log.txt From d9a18533ef610b781167603befcb3f5248490452 Mon Sep 17 00:00:00 2001 From: furetur Date: Tue, 4 May 2021 17:34:27 +0500 Subject: [PATCH 138/308] added documentation --- .../kotlin/astminer/filters/CommonFilters.kt | 7 +++++ .../astminer/filters/FunctionFilters.kt | 15 ++++++++++ .../kotlin/astminer/pipeline/GetPipeline.kt | 3 +- src/main/kotlin/astminer/pipeline/Pipeline.kt | 2 +- .../astminer/pipeline/PipelineFrontend.kt | 30 +++++++++++++++++++ .../astminer/pipeline/StorageCreator.kt | 15 ++++++++-- .../astminer/problem/FileLevelProblems.kt | 6 ++++ .../astminer/problem/FunctionLevelProblems.kt | 4 +++ src/main/kotlin/astminer/problem/Problem.kt | 8 +++++ 9 files changed, 85 insertions(+), 5 deletions(-) diff --git a/src/main/kotlin/astminer/filters/CommonFilters.kt b/src/main/kotlin/astminer/filters/CommonFilters.kt index 5263bc24..d7c6e9dd 100644 --- a/src/main/kotlin/astminer/filters/CommonFilters.kt +++ b/src/main/kotlin/astminer/filters/CommonFilters.kt @@ -7,6 +7,7 @@ import astminer.featureextraction.treeSize abstract class TreeSizeFilter(private val maxSize: Int) : Filter { private fun isTreeFiltered(root: Node): Boolean { + // TODO: this is not needed return if (maxSize == -1) { true } else { @@ -19,11 +20,17 @@ abstract class TreeSizeFilter(private val maxSize: Int) : Filter { override fun isFiltered(entity: T) = isTreeFiltered(entity.tree) } +/** + * Filter that excludes files that have ASTs bigger than [maxSize] + */ class FileTreeSizeFilter(maxSize: Int) : TreeSizeFilter>(maxSize), FileFilter { override val ParseResult.tree: Node get() = root } +/** + * Filter that excludes functions that have ASTs bigger than [maxSize] + */ class FunctionTreeSizeFilter(maxSize: Int) : TreeSizeFilter>(maxSize), FunctionFilter { override val FunctionInfo.tree: Node diff --git a/src/main/kotlin/astminer/filters/FunctionFilters.kt b/src/main/kotlin/astminer/filters/FunctionFilters.kt index 8d2ff6e4..904acafe 100644 --- a/src/main/kotlin/astminer/filters/FunctionFilters.kt +++ b/src/main/kotlin/astminer/filters/FunctionFilters.kt @@ -7,20 +7,32 @@ import astminer.common.splitToSubtokens interface FunctionFilter : Filter> +/** + * Filter that excludes functions that have at least one of modifiers from the [excludeModifiers] list. + */ class ModifierFilter(private val excludeModifiers: List) : FunctionFilter { override fun isFiltered(entity: FunctionInfo): Boolean = !excludeModifiers.any { modifier -> modifier in entity.modifiers } } +/** + * Filter that excludes functions that have at least one annotations from the [excludeAnnotations] list. + */ class AnnotationFilter(private val excludeAnnotations: List) : FunctionFilter { override fun isFiltered(entity: FunctionInfo): Boolean = !excludeAnnotations.any { annotation -> annotation in entity.annotations } } +/** + * Filter that excludes constructors + */ object ConstructorFilter : FunctionFilter { override fun isFiltered(entity: FunctionInfo) = !entity.isConstructor } +/** + * Filter that excludes functions that have more than [maxWordsNumber] words in their names. + */ class FunctionNameWordsNumberFilter(private val maxWordsNumber: Int) : FunctionFilter { override fun isFiltered(entity: FunctionInfo): Boolean { // TODO: this is not needed @@ -33,6 +45,9 @@ class FunctionNameWordsNumberFilter(private val maxWordsNumber: Int) : FunctionF } } +/** + * Filter that excludes functions that have more words than [maxWordsNumber] in any token of their subtree. + */ class FunctionAnyNodeWordsNumberFilter(private val maxWordsNumber: Int) : FunctionFilter { override fun isFiltered(entity: FunctionInfo): Boolean { // TODO: this is not needed diff --git a/src/main/kotlin/astminer/pipeline/GetPipeline.kt b/src/main/kotlin/astminer/pipeline/GetPipeline.kt index af8b1817..767143a0 100644 --- a/src/main/kotlin/astminer/pipeline/GetPipeline.kt +++ b/src/main/kotlin/astminer/pipeline/GetPipeline.kt @@ -8,7 +8,8 @@ import astminer.config.FunctionPipelineConfig import astminer.config.PipelineConfig /** - * This function must have no side effects + * Initializes the Pipeline given the [pipelineConfig]. + * This function must have no side effects! */ fun getPipeline(pipelineConfig: PipelineConfig): Pipeline<*> { return when (pipelineConfig) { diff --git a/src/main/kotlin/astminer/pipeline/Pipeline.kt b/src/main/kotlin/astminer/pipeline/Pipeline.kt index cdcb2132..4b52efef 100644 --- a/src/main/kotlin/astminer/pipeline/Pipeline.kt +++ b/src/main/kotlin/astminer/pipeline/Pipeline.kt @@ -23,7 +23,7 @@ class Pipeline( fun run() { for ((extension, entities) in frontend.getEntities()) { - storageCreator.createStorage(extension).use { storage -> + storageCreator.createStorageAndOutputFolder(extension).use { storage -> val labeledResults = entities .filter { functionInfo -> functionInfo.passesThroughFilters() } .mapNotNull { problem.process(it) } diff --git a/src/main/kotlin/astminer/pipeline/PipelineFrontend.kt b/src/main/kotlin/astminer/pipeline/PipelineFrontend.kt index bef3930e..189e1361 100644 --- a/src/main/kotlin/astminer/pipeline/PipelineFrontend.kt +++ b/src/main/kotlin/astminer/pipeline/PipelineFrontend.kt @@ -5,12 +5,32 @@ import astminer.common.model.* import astminer.parse.getHandlerFactory import java.io.File +/** + * A group of entities that come from the files with the same file extension. + * @param fileExtension The file extension that all entities share. + * @param entities The entities that are extracted from the files + * @see PipelineFrontend for the definition of "entity" + */ data class EntitiesFromFiles(val fileExtension: String, val entities: Sequence) +/** + * Extracts entities from files and groups them by file extensions. + * Entity -- anything that can be extracted from a file of code. + * @param T The type of entities + */ interface PipelineFrontend { + /** + * Extract entities and group them by file extensions. + * @see PipelineFrontend for the definition of "entity". + */ fun getEntities(): Sequence> } +/** + * Base class for several PipelineFrontend implementations. + * Finds parsers of type [parserType] for all the given languages by [extensions]. + * Looks for files in [inputDirectoryPath]. + */ abstract class CompositePipelineFrontend( private val inputDirectoryPath: String, private val parserType: String, @@ -31,11 +51,21 @@ abstract class CompositePipelineFrontend( } } +/** + * PipelineFrontend that extracts ParseResult from files. + * Basically, it parses the given files and returns the results. + * @see ParseResult + */ class FilePipelineFrontend(inputDirectoryPath: String, parserType: String, extensions: List) : CompositePipelineFrontend>(inputDirectoryPath, parserType, extensions) { override fun LanguageHandler.getEntities(): Sequence> = sequenceOf(parseResult) } +/** + * PipelineFrontend that extracts FunctionInfo from files. + * It parses the files, finds functions in those files and collects information about the functions. + * @see FunctionInfo + */ class FunctionPipelineFrontend(inputDirectoryPath: String, parserType: String, extensions: List) : CompositePipelineFrontend>(inputDirectoryPath, parserType, extensions) { diff --git a/src/main/kotlin/astminer/pipeline/StorageCreator.kt b/src/main/kotlin/astminer/pipeline/StorageCreator.kt index 5e92f883..32a64a3e 100644 --- a/src/main/kotlin/astminer/pipeline/StorageCreator.kt +++ b/src/main/kotlin/astminer/pipeline/StorageCreator.kt @@ -10,17 +10,26 @@ import astminer.storage.ast.DotAstStorage import astminer.storage.path.Code2VecPathStorage import java.io.File +/** + * Creates storage for each extension. + * @param config The config that defines that storage will be used and the params of that storage + * @param outputDirectoryPath Path to the base output directory where folders for each extension will be created + * (e.g 'py', 'java') + */ class StorageCreator(private val config: StorageConfig, outputDirectoryPath: String) { private val outputDirectory = File(outputDirectoryPath) - private fun getOutputPath(extension: String): String { + private fun createOutputPath(extension: String): String { val outputDirectoryForExtension = outputDirectory.resolve(extension) outputDirectoryForExtension.mkdir() return outputDirectoryForExtension.path } - fun createStorage(extension: String): Storage { - val outputPath = getOutputPath(extension) + /** + * Creates folder [outputDirectoryPath]/[extension] and initializes the storage in that folder. + */ + fun createStorageAndOutputFolder(extension: String): Storage { + val outputPath = createOutputPath(extension) return when (config) { is CsvAstStorageConfig -> CsvAstStorage(outputPath) is DotAstStorageConfig -> DotAstStorage(outputPath, config.tokenProcessor) diff --git a/src/main/kotlin/astminer/problem/FileLevelProblems.kt b/src/main/kotlin/astminer/problem/FileLevelProblems.kt index 55caa652..1c658984 100644 --- a/src/main/kotlin/astminer/problem/FileLevelProblems.kt +++ b/src/main/kotlin/astminer/problem/FileLevelProblems.kt @@ -6,10 +6,16 @@ import java.io.File interface FileLevelProblem : Problem> +/** + * Labels files with folder names + */ object FilePathExtractor : FileLevelProblem { override fun process(entity: ParseResult): LabeledResult = entity.labeledWithFilePath() } +/** + * Labels files with folder names + */ object FolderExtractor : FileLevelProblem { override fun process(entity: ParseResult): LabeledResult? { val folderName = File(entity.filePath).parentFile.name ?: return null diff --git a/src/main/kotlin/astminer/problem/FunctionLevelProblems.kt b/src/main/kotlin/astminer/problem/FunctionLevelProblems.kt index 030f8dd9..994e3538 100644 --- a/src/main/kotlin/astminer/problem/FunctionLevelProblems.kt +++ b/src/main/kotlin/astminer/problem/FunctionLevelProblems.kt @@ -7,6 +7,10 @@ import astminer.common.setTechnicalToken interface FunctionLevelProblem : Problem> +/** + * Labels functions with their names. + * Hides the name of the function in the subtree and also all in the recursive calls. + */ object FunctionNameProblem : FunctionLevelProblem { override fun process(entity: FunctionInfo): LabeledResult? { val name = entity.name ?: return null diff --git a/src/main/kotlin/astminer/problem/Problem.kt b/src/main/kotlin/astminer/problem/Problem.kt index d6576eab..4f76fe52 100644 --- a/src/main/kotlin/astminer/problem/Problem.kt +++ b/src/main/kotlin/astminer/problem/Problem.kt @@ -2,6 +2,14 @@ package astminer.problem import astminer.common.model.Node +/** + * A structural element of the pipeline. + * Extracts labels from entities and also may mutate them. + */ interface Problem { + /** + * Extracts label from entity. + * If returns null then this entity will not be used further. + */ fun process(entity: T): LabeledResult? } From d433900cba8fc0c94d496f6295e2f323c909d889 Mon Sep 17 00:00:00 2001 From: furetur Date: Tue, 4 May 2021 18:05:45 +0500 Subject: [PATCH 139/308] rearranged rows in PipelineConfig.kt --- src/main/kotlin/astminer/config/PipelineConfig.kt | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/main/kotlin/astminer/config/PipelineConfig.kt b/src/main/kotlin/astminer/config/PipelineConfig.kt index 14472ee5..17245891 100644 --- a/src/main/kotlin/astminer/config/PipelineConfig.kt +++ b/src/main/kotlin/astminer/config/PipelineConfig.kt @@ -11,8 +11,8 @@ sealed class PipelineConfig { abstract val inputDir: String abstract val outputDir: String abstract val parser: ParserConfig - abstract val problem: Problem<*> abstract val filters: List> + abstract val problem: Problem<*> abstract val storage: StorageConfig } @@ -20,8 +20,8 @@ data class FilePipelineConfig( override val inputDir: String, override val outputDir: String, override val parser: ParserConfig, - override val problem: FileLevelProblem, override val filters: List, + override val problem: FileLevelProblem, override val storage: StorageConfig ) : PipelineConfig() @@ -29,14 +29,13 @@ data class FunctionPipelineConfig( override val inputDir: String, override val outputDir: String, override val parser: ParserConfig, - override val problem: FunctionLevelProblem, override val filters: List, + override val problem: FunctionLevelProblem, override val storage: StorageConfig ) : PipelineConfig() data class ParserConfig( val type: String, - val extensions: List, - val preprocess: Boolean + val extensions: List ) From 717c558407b07e9f8231615a8c03fceaabb280ee Mon Sep 17 00:00:00 2001 From: furetur Date: Tue, 4 May 2021 18:06:21 +0500 Subject: [PATCH 140/308] removed the cli -_- --- .../kotlin/astminer/cli/Code2VecExtractor.kt | 180 ------------------ .../kotlin/astminer/cli/LabelExtractors.kt | 133 ------------- .../astminer/cli/PathContextsExtractor.kt | 133 ------------- src/main/kotlin/astminer/cli/ProjectParser.kt | 161 ---------------- .../astminer/cli/ProjectPreprocessor.kt | 33 ---- src/main/kotlin/astminer/cli/utils.kt | 70 ------- 6 files changed, 710 deletions(-) delete mode 100644 src/main/kotlin/astminer/cli/Code2VecExtractor.kt delete mode 100644 src/main/kotlin/astminer/cli/LabelExtractors.kt delete mode 100644 src/main/kotlin/astminer/cli/PathContextsExtractor.kt delete mode 100644 src/main/kotlin/astminer/cli/ProjectParser.kt delete mode 100644 src/main/kotlin/astminer/cli/ProjectPreprocessor.kt delete mode 100644 src/main/kotlin/astminer/cli/utils.kt diff --git a/src/main/kotlin/astminer/cli/Code2VecExtractor.kt b/src/main/kotlin/astminer/cli/Code2VecExtractor.kt deleted file mode 100644 index 56c290c8..00000000 --- a/src/main/kotlin/astminer/cli/Code2VecExtractor.kt +++ /dev/null @@ -1,180 +0,0 @@ -package astminer.cli - -import astminer.common.getProjectFilesWithExtension -import astminer.common.model.Node -import astminer.common.model.ParseResult -import astminer.storage.* -import astminer.storage.path.Code2VecPathStorage -import astminer.storage.path.PathBasedStorageConfig -import com.github.ajalt.clikt.core.CliktCommand -import com.github.ajalt.clikt.parameters.options.* -import com.github.ajalt.clikt.parameters.types.int -import com.github.ajalt.clikt.parameters.types.long -import java.io.File - -class Code2VecExtractor(private val customLabelExtractor: LabelExtractor? = null) : CliktCommand() { - - private val supportedLanguages = listOf("java", "c", "cpp", "py", "js") - - val extensions: List by option( - "--lang", - help = "Comma-separated list of file extensions that will be parsed.\n" + - "Supports 'c', 'cpp', 'java', 'py', 'js', defaults to all these extensions." - ).split(",").default(supportedLanguages) - - val projectRoot: String by option( - "--project", - help = "Path to the project that will be parsed" - ).required() - - val outputDirName: String by option( - "--output", - help = "Path to directory where the output will be stored" - ).required() - - val maxPathLength: Int by option( - "--maxL", - help = "Maximum length of path for code2vec" - ).int().default(8) - - val maxPathWidth: Int by option( - "--maxW", - help = "Maximum width of path. " + - "Note, that here width is the difference between token indices in contrast to the original code2vec." - ).int().default(3) - - val maxPathContexts: Int by option( - "--maxContexts", - help = "Number of path contexts to keep from each method." - ).int().default(500) - - val maxTokens: Long by option( - "--maxTokens", - help = "Keep only contexts with maxTokens most popular tokens." - ).long().default(Long.MAX_VALUE) - - val maxPaths: Long by option( - "--maxPaths", - help = "Keep only contexts with maxTokens most popular paths." - ).long().default(Long.MAX_VALUE) - - val granularityLevel: String by option( - "--granularity", - help = "Choose level of granularity ('file' or 'method', defaults to 'file')" - ).default("file") - - val folderLabel: Boolean by option( - "--folder-label", - help = "if passed with file-level granularity, the folder name is used to label paths" - ).flag(default = false) - - val isMethodNameHide: Boolean by option( - "--hide-method-name", - help = "if passed with method level granularity, the names of all methods are replaced with placeholder token" - ).flag(default = false) - - val isTokenSplitted: Boolean by option( - "--split-tokens", - help = "if passed, split tokens into sequence of tokens" - ).flag(default = false) - - val excludeModifiers: List by option( - "--filter-modifiers", - help = "Comma-separated list of function's modifiers, which should be filtered." + - "Works only for method-level granulation." - ).split(",").default(emptyList()) - - val excludeAnnotations: List by option( - "--filter-annotations", - help = "Comma-separated list of function's annotations, which should be filtered." + - "Works only for method-level granulation." - ).split(",").default(emptyList()) - - val filterConstructors: Boolean by option( - "--remove-constructors", - help = "Remove constructor methods, works for method-level granulation" - ).flag(default = false) - - val javaParser: String by option( - "--java-parser", - help = "Choose a parser for .java files." + - "'gumtree' for GumTree parser, 'antlr' for antlr parser." - ).default("gumtree") - - val maxMethodNameLength: Int by option( - "--max-method-name-length", - help = "Filtering methods with a large sequence of subtokens in their names" - ).int().default(-1) - - val maxTokenLength: Int by option( - "--max-token-length", - help = "Filter methods containing a long sequence of subtokens in the ast node" - ).int().default(-1) - - val maxTreeSize: Int by option( - "--max-tree-size", - help = "Filter methods by their ast size" - ).int().default(-1) - - private fun extractFromTree( - parseResult: ParseResult, - storage: Code2VecPathStorage, - labelExtractor: LabelExtractor - ) { - val labeledParseResults = labelExtractor.toLabeledData(parseResult) - - // Retrieve paths from every node individually and store them - storage.store(labeledParseResults) - } - - private fun extract(labelExtractor: LabelExtractor) { - val outputDir = File(outputDirName) - val storageConfig = PathBasedStorageConfig( - maxPathLength, - maxPathWidth, - maxTokens, - maxPaths, - maxPathContexts - ) - for (extension in extensions) { - val outputDirForLanguage = outputDir.resolve(extension) - outputDirForLanguage.mkdir() - // Choose how to process tokens - val tokenProcessor = if (isTokenSplitted) { - TokenProcessor.Split - } else { - TokenProcessor.Normalize - } - // Choose type of storage - val storage = Code2VecPathStorage(outputDirForLanguage.path, storageConfig, tokenProcessor) - // Choose type of parser - val parser = getParser( - extension, - javaParser - ) - // Parse project one file at a time - parser.parseFiles(getProjectFilesWithExtension(File(projectRoot), extension)) { - // Retrieve labeled data - extractFromTree(it, storage, labelExtractor) - } - // Save stored data on disk - storage.close() - } - } - - override fun run() { - val labelExtractor = customLabelExtractor ?: getLabelExtractor( - granularityLevel, - javaParser, - isMethodNameHide, - excludeModifiers, - excludeAnnotations, - filterConstructors, - maxMethodNameLength, - maxTokenLength, - maxTreeSize, - folderLabel - ) - extract(labelExtractor) - } -} \ No newline at end of file diff --git a/src/main/kotlin/astminer/cli/LabelExtractors.kt b/src/main/kotlin/astminer/cli/LabelExtractors.kt deleted file mode 100644 index 0a7206ad..00000000 --- a/src/main/kotlin/astminer/cli/LabelExtractors.kt +++ /dev/null @@ -1,133 +0,0 @@ -package astminer.cli - -import astminer.common.model.Node -import astminer.common.model.ParseResult -import astminer.common.model.FunctionInfo -import astminer.common.preOrder -import astminer.common.setTechnicalToken -import astminer.problem.LabeledResult -import astminer.filters.FunctionFilter -import astminer.parse.antlr.AntlrNode -import astminer.parse.antlr.java.JavaMethodSplitter -import astminer.parse.antlr.javascript.JavaScriptMethodSplitter -import astminer.parse.antlr.python.PythonMethodSplitter -import astminer.parse.fuzzy.cpp.FuzzyMethodSplitter -import astminer.parse.fuzzy.cpp.FuzzyNode -import astminer.parse.gumtree.GumTreeNode -import astminer.parse.gumtree.java.GumTreeJavaMethodSplitter -import astminer.parse.gumtree.python.GumTreePythonMethodSplitter -import java.io.File - - -interface LabelExtractor { - fun toLabeledData(parseResult: ParseResult): List> -} - -abstract class FileLabelExtractor : LabelExtractor { - - override fun toLabeledData( - parseResult: ParseResult - ): List> { - val (root, filePath) = parseResult - val label = extractLabel(root, filePath) ?: return emptyList() - return listOf(LabeledResult(root, label, parseResult.filePath)) - } - - abstract fun extractLabel(root: Node, filePath: String): String? -} - -abstract class MethodLabelExtractor( - open val filterPredicates: Collection = emptyList(), - open val javaParser: String = "gumtree", - open val pythonParser: String = "antlr" -) : LabelExtractor { - - override fun toLabeledData( - parseResult: ParseResult - ): List> { - val (root, filePath) = parseResult - val fileExtension = File(filePath).extension - val methodInfos = when (fileExtension) { - "c", "cpp" -> { - val methodSplitter = FuzzyMethodSplitter() - methodSplitter.splitIntoMethods(root as FuzzyNode) - } - "java" -> { - when (javaParser) { - "gumtree" -> { - val methodSplitter = GumTreeJavaMethodSplitter() - methodSplitter.splitIntoMethods(root as GumTreeNode) - } - "antlr" -> { - val methodSplitter = JavaMethodSplitter() - methodSplitter.splitIntoMethods(root as AntlrNode) - } - else -> { - throw UnsupportedOperationException("Unsupported parser $javaParser") - } - } - } - "py" -> { - when (pythonParser) { - "gumtree" -> { - val methodSplitter = GumTreePythonMethodSplitter() - methodSplitter.splitIntoMethods(root as GumTreeNode) - } - "antlr" -> { - val methodSplitter = PythonMethodSplitter() - methodSplitter.splitIntoMethods(root as AntlrNode) - } - else -> { - throw UnsupportedOperationException("Unsupported parser $pythonParser") - } - } - } - "js" -> { - val methodSplitter = JavaScriptMethodSplitter() - methodSplitter.splitIntoMethods(root as AntlrNode) - } - else -> throw UnsupportedOperationException("Unsupported extension $fileExtension") - }.filter { methodInfo -> - filterPredicates.all { predicate -> - predicate.isFiltered(methodInfo) - } - } - return methodInfos.mapNotNull { - val label = extractLabel(it, filePath) ?: return@mapNotNull null - LabeledResult(it.root, label, filePath) - } - } - - abstract fun extractLabel(functionInfo: FunctionInfo, filePath: String): String? -} - -class FilePathExtractor : FileLabelExtractor() { - override fun extractLabel(root: Node, filePath: String): String { - return filePath - } -} - -class FolderExtractor : FileLabelExtractor() { - override fun extractLabel(root: Node, filePath: String): String? { - return File(filePath).parentFile.name - } -} - -class MethodNameExtractor( - override val filterPredicates: Collection = emptyList(), - override val javaParser: String = "gumtree", - override val pythonParser: String = "antlr" -) : MethodLabelExtractor(filterPredicates, javaParser, pythonParser) { - - override fun extractLabel(functionInfo: FunctionInfo, filePath: String): String? { - val name = functionInfo.name ?: return null - functionInfo.root.preOrder().forEach { node -> - if (node.getToken() == name) { - node.setTechnicalToken("SELF") - } - } - functionInfo.nameNode?.setTechnicalToken("METHOD_NAME") - // TODO: for some reason it is not normalized, check if something is wrong. Maybe storages normalize the label - return name - } -} diff --git a/src/main/kotlin/astminer/cli/PathContextsExtractor.kt b/src/main/kotlin/astminer/cli/PathContextsExtractor.kt deleted file mode 100644 index 2e74aae7..00000000 --- a/src/main/kotlin/astminer/cli/PathContextsExtractor.kt +++ /dev/null @@ -1,133 +0,0 @@ -package astminer.cli - -import astminer.common.getProjectFilesWithExtension -import astminer.common.model.* -import astminer.parse.antlr.java.JavaParser -import astminer.parse.antlr.javascript.JavaScriptParser -import astminer.parse.antlr.python.PythonParser -import astminer.storage.* -import astminer.storage.path.Code2VecPathStorage -import astminer.storage.path.PathBasedStorageConfig -import astminer.parse.fuzzy.cpp.FuzzyCppParser -import astminer.parse.gumtree.java.GumTreeJavaParser -import com.github.ajalt.clikt.core.CliktCommand -import com.github.ajalt.clikt.parameters.options.* -import com.github.ajalt.clikt.parameters.types.int -import com.github.ajalt.clikt.parameters.types.long -import java.io.File -import java.lang.IllegalArgumentException - -class PathContextsExtractor(private val customLabelExtractor: LabelExtractor? = null) : CliktCommand() { - - /** - * @param parser class that implements parsing - * @param extension file extension to choose files for parsing - */ - private data class SupportedLanguage(val parser: Parser, val extension: String) - - /** - * List of supported language extensions and corresponding parsers. - */ - private val supportedLanguages = listOf( - SupportedLanguage(GumTreeJavaParser(), "java"), - SupportedLanguage(FuzzyCppParser(), "c"), - SupportedLanguage(FuzzyCppParser(), "cpp"), - SupportedLanguage(PythonParser(), "py"), - SupportedLanguage(JavaScriptParser(), "js") - ) - - val extensions: List by option( - "--lang", - help = "File extensions that will be parsed" - ).split(",").default(supportedLanguages.map { it.extension }) - - val projectRoot: String by option( - "--project", - help = "Path to the project that will be parsed" - ).required() - - val outputDirName: String by option( - "--output", - help = "Path to directory where the output will be stored" - ).required() - - val maxPathLength: Int by option( - "--maxL", - help = "Maximum length of path for code2vec" - ).int().default(8) - - val maxPathWidth: Int by option( - "--maxW", - help = "Maximum width of path. " + - "Note, that here width is the difference between token indices in contrast to the original code2vec." - ).int().default(3) - - val maxPathContexts: Int by option( - "--maxContexts", - help = "Number of path contexts to keep from each method." - ).int().default(500) - - val maxTokens: Long by option( - "--maxTokens", - help = "Keep only contexts with maxTokens most popular tokens." - ).long().default(Long.MAX_VALUE) - - val maxPaths: Long by option( - "--maxPaths", - help = "Keep only contexts with maxTokens most popular paths." - ).long().default(Long.MAX_VALUE) - - val javaParser: String by option( - "--java-parser", - help = "Choose a parser for .java files." + - "'gumtree' for GumTree parser, 'antlr' for antlr parser." - ).default("gumtree") - - private fun getParser(extension: String): Parser { - if (extension == "java") { - return when (javaParser) { - "gumtree" -> GumTreeJavaParser() - "antlr" -> JavaParser() - else -> throw IllegalArgumentException("javaParser should be `antlr` or `gumtree`, not $javaParser") - } - } - for (language in supportedLanguages) { - if (extension == language.extension) { - return language.parser - } - } - throw UnsupportedOperationException("Unsupported extension $extension") - } - - private fun extractPathContexts(labelExtractor: LabelExtractor) { - val outputDir = File(outputDirName) - val storageConfig = PathBasedStorageConfig( - maxPathLength, - maxPathWidth, - maxTokens, - maxPaths, - maxPathContexts - ) - for (extension in extensions) { - val parser = getParser(extension) - - val outputDirForLanguage = outputDir.resolve(extension) - outputDirForLanguage.mkdir() - val storage = Code2VecPathStorage(outputDirForLanguage.path, storageConfig, TokenProcessor.Split) - - val files = getProjectFilesWithExtension(File(projectRoot), extension) - parser.parseFiles(files) { parseResult -> - val labeledParseResults = labelExtractor.toLabeledData(parseResult) - storage.store(labeledParseResults) - } - - // Save stored data on disk - storage.close() - } - } - - override fun run() { - val labelExtractor = customLabelExtractor ?: FilePathExtractor() - extractPathContexts(labelExtractor) - } -} \ No newline at end of file diff --git a/src/main/kotlin/astminer/cli/ProjectParser.kt b/src/main/kotlin/astminer/cli/ProjectParser.kt deleted file mode 100644 index cccf4c16..00000000 --- a/src/main/kotlin/astminer/cli/ProjectParser.kt +++ /dev/null @@ -1,161 +0,0 @@ -package astminer.cli - -import astminer.storage.ast.CsvAstStorage -import astminer.storage.ast.DotAstStorage -import astminer.common.getProjectFilesWithExtension -import astminer.common.preOrder -import astminer.storage.Storage -import astminer.storage.TokenProcessor -import com.github.ajalt.clikt.core.CliktCommand -import com.github.ajalt.clikt.parameters.options.* -import com.github.ajalt.clikt.parameters.types.int -import java.io.File - -class ProjectParser(private val customLabelExtractor: LabelExtractor? = null) : CliktCommand() { - - private val supportedLanguages = listOf("java", "c", "cpp", "py", "js") - - val extensions: List by option( - "--lang", - help = "Comma-separated list of file extensions that will be parsed.\n" + - "Supports 'c', 'cpp', 'java', 'py', 'js', defaults to all these extensions." - ).split(",").default(supportedLanguages) - - val projectRoot: String by option( - "--project", - help = "Path to the project that will be parsed" - ).required() - - val outputDirName: String by option( - "--output", - help = "Path to directory where the output will be stored" - ).required() - - val astStorageType: String by option( - "--storage", - help = "AST storage type ('dot' or 'csv', defaults to 'csv')" - ).default("csv") - - val granularityLevel: String by option( - "--granularity", - help = "Choose level of granularity ('file' or 'method', defaults to 'file')" - ).default("file") - - val isMethodNameHide: Boolean by option( - "--hide-method-name", - help = "if passed with method level granularity, the names of all methods are replaced with placeholder token" - ).flag(default = false) - - val isTokenSplitted: Boolean by option( - "--split-tokens", - help = "if passed, split tokens into sequence of tokens" - ).flag(default = false) - - val excludeModifiers: List by option( - "--filter-modifiers", - help = "Comma-separated list of function's modifiers, which should be filtered." + - "Works only for method-level granulation." - ).split(",").default(emptyList()) - - val excludeAnnotations: List by option( - "--filter-annotations", - help = "Comma-separated list of function's annotations, which should be filtered." + - "Works only for method-level granulation." - ).split(",").default(emptyList()) - - val filterConstructors: Boolean by option( - "--remove-constructors", - help = "Remove constructor methods, works for method-level granulation" - ).flag(default = false) - - val excludeNodes: List by option( - "--remove-nodes", - help = "Comma-separated list of node types, which must be removed from asts." - ).split(",").default(emptyList()) - - val javaParser: String by option( - "--java-parser", - help = "Choose a parser for .java files." + - "'gumtree' for GumTree parser, 'antlr' for antlr parser." - ).default("gumtree") - - val maxMethodNameLength: Int by option( - "--max-method-name-length", - help = "Filtering methods with a large sequence of subtokens in their names" - ).int().default(-1) - - val maxTokenLength: Int by option( - "--max-token-length", - help = "Filter methods containing a long sequence of subtokens in the ast node" - ).int().default(-1) - - val maxTreeSize: Int by option( - "--max-tree-size", - help = "Filter methods by their ast size" - ).int().default(-1) - - val folderLabel: Boolean by option( - "--folder-label", - help = "if passed with file-level granularity, the folder name is used to label paths" - ).flag(default = false) - - - private fun getStorage(storageType: String, directoryPath: String): Storage { - return when (storageType) { - "csv" -> CsvAstStorage(directoryPath) - "dot" -> DotAstStorage( - directoryPath, - if (isTokenSplitted) TokenProcessor.Split else TokenProcessor.Normalize - ) - else -> { - throw UnsupportedOperationException("Unsupported AST storage $storageType") - } - } - } - - private fun parsing(labelExtractor: LabelExtractor) { - val outputDir = File(outputDirName) - for (extension in extensions) { - // Create directory for current extension - val outputDirForLanguage = outputDir.resolve(extension) - // Choose type of storage - val storage = getStorage(astStorageType, outputDirForLanguage.path) - // Choose type of parser - val parser = getParser( - extension, - javaParser - ) - // Parse project - val filesToParse = getProjectFilesWithExtension(File(projectRoot), extension) - parser.parseFiles(filesToParse) { parseResult -> - val labeledParseResults = labelExtractor.toLabeledData(parseResult) - labeledParseResults.forEach { labeledParseResult -> - labeledParseResult.root.preOrder().forEach { node -> - excludeNodes.forEach { node.removeChildrenOfType(it) } - } - // Save AST as it is or process it to extract features / path-based representations - storage.store(labeledParseResult) - } - } - // Save stored data on disk - storage.close() - } - - } - - override fun run() { - val labelExtractor = customLabelExtractor ?: getLabelExtractor( - granularityLevel, - javaParser, - isMethodNameHide, - excludeModifiers, - excludeAnnotations, - filterConstructors, - maxMethodNameLength, - maxTokenLength, - maxTreeSize, - folderLabel - ) - parsing(labelExtractor) - } -} \ No newline at end of file diff --git a/src/main/kotlin/astminer/cli/ProjectPreprocessor.kt b/src/main/kotlin/astminer/cli/ProjectPreprocessor.kt deleted file mode 100644 index 1dfa5ca7..00000000 --- a/src/main/kotlin/astminer/cli/ProjectPreprocessor.kt +++ /dev/null @@ -1,33 +0,0 @@ -package astminer.cli - -import astminer.parse.fuzzy.cpp.FuzzyCppParser -import com.github.ajalt.clikt.core.CliktCommand -import com.github.ajalt.clikt.parameters.options.option -import com.github.ajalt.clikt.parameters.options.required -import java.io.File - -/** - * Preprocess C/C++ project located in [projectRoot] and save the preprocessed files in [preprocessDir], replicating - * structure of the original project. - */ -class ProjectPreprocessor : CliktCommand() { - - val projectRoot: String by option( - "--project", - help = "Path to the project that will be parsed" - ).required() - - val preprocessDir: String by option( - "--output", - help = "Path to directory where the preprocessed data will be stored" - ).required() - - private fun preprocessing() { - val parser = FuzzyCppParser() - parser.preprocessProject(File(projectRoot), File(preprocessDir)) - } - - override fun run() { - preprocessing() - } -} \ No newline at end of file diff --git a/src/main/kotlin/astminer/cli/utils.kt b/src/main/kotlin/astminer/cli/utils.kt deleted file mode 100644 index 791d8bdb..00000000 --- a/src/main/kotlin/astminer/cli/utils.kt +++ /dev/null @@ -1,70 +0,0 @@ -package astminer.cli - -import astminer.parse.antlr.java.JavaParser -import astminer.parse.antlr.python.PythonParser -import astminer.parse.fuzzy.cpp.FuzzyCppParser -import astminer.parse.gumtree.java.GumTreeJavaParser -import astminer.common.model.Node -import astminer.common.model.Parser -import astminer.filters.* -import astminer.parse.antlr.javascript.JavaScriptParser - -fun getParser( - extension: String, - javaParser: String -): Parser { - return when (extension) { - "java" -> { - when (javaParser) { - "gumtree" -> GumTreeJavaParser() - "antlr" -> JavaParser() - else -> { - throw UnsupportedOperationException("Unsupported parser for java extension $javaParser") - } - } - } - "c" -> FuzzyCppParser() - "cpp" -> FuzzyCppParser() - "py" -> PythonParser() - "js" -> JavaScriptParser() - else -> { - throw UnsupportedOperationException("Unsupported extension $extension") - } - } -} - - -fun getLabelExtractor( - granularityLevel: String, - javaParser: String, - hideMethodNames: Boolean, - excludeModifiers: List, - excludeAnnotations: List, - filterConstructors: Boolean, - maxMethodNameLength: Int, - maxTokenLength: Int, - maxTreeSize: Int, - useFolderName: Boolean -): LabelExtractor { - when (granularityLevel) { - "file" -> { - return if (useFolderName) { - FolderExtractor() - } else { - FilePathExtractor() - } - } - "method" -> { - val filterPredicates = mutableListOf( - ModifierFilter(excludeModifiers), AnnotationFilter(excludeAnnotations), - FunctionNameWordsNumberFilter(maxMethodNameLength), FunctionAnyNodeWordsNumberFilter(maxTokenLength), - TreeSizeFilter(maxTreeSize) - ) - if (filterConstructors) { - filterPredicates.add(ConstructorFilter) - } - return MethodNameExtractor(filterPredicates, javaParser) - } - } - throw UnsupportedOperationException("Unsupported granularity level $granularityLevel") -} From 902ce183fe4e1c6fb697e0ab6fafe2f7f8953a66 Mon Sep 17 00:00:00 2001 From: illided Date: Wed, 5 May 2021 16:30:38 +0300 Subject: [PATCH 141/308] gumtreefunctioninfo added --- .../gumtree/python/GumTreeFunctionInfo.kt | 83 +++++++++++++++++ .../python/GumTreePythonMethodSplitter.kt | 88 +------------------ 2 files changed, 84 insertions(+), 87 deletions(-) create mode 100644 src/main/kotlin/astminer/parse/gumtree/python/GumTreeFunctionInfo.kt diff --git a/src/main/kotlin/astminer/parse/gumtree/python/GumTreeFunctionInfo.kt b/src/main/kotlin/astminer/parse/gumtree/python/GumTreeFunctionInfo.kt new file mode 100644 index 00000000..9eaff1cc --- /dev/null +++ b/src/main/kotlin/astminer/parse/gumtree/python/GumTreeFunctionInfo.kt @@ -0,0 +1,83 @@ +package astminer.parse.gumtree.python + +import astminer.common.model.EnclosingElement +import astminer.common.model.EnclosingElementType +import astminer.common.model.FunctionInfo +import astminer.common.model.MethodInfoParameter +import astminer.parse.findEnclosingElementBy +import astminer.parse.gumtree.GumTreeNode + +class GumTreeFunctionInfo(override val root: GumTreeNode) : FunctionInfo { + companion object { + private object TypeLabels { + const val classDefinition = "ClassDef" + const val functionDefinition = "FunctionDef" + const val asyncFunctionDefinition = "AsyncFunctionDef" + const val nameLoad = "Name_Load" + const val posOnlyArgs = "posonlyargs" + const val kwOnlyArgs = "kwonlyargs" + const val arguments = "arguments" + const val vararg = "vararg" + const val kwarg = "kwarg" + const val args = "args" + const val arg = "arg" + + const val body = "body" + const val returnTypeLabel = "Return" + const val passTypeLabel = "Pass" + const val constantType = "Constant-" + + val methodDefinitions = listOf(functionDefinition, asyncFunctionDefinition) + val funcArgsTypesNodes = listOf(args, posOnlyArgs, kwOnlyArgs) + } + } + + override val nameNode: GumTreeNode = root + override val parameters: List = collectParameters() + override val enclosingElement: EnclosingElement? = collectEnclosingClass() + + private fun getElementType(node: GumTreeNode): GumTreeNode? { + if (node.getTypeLabel() == TypeLabels.arg) { + return node.getChildOfType(TypeLabels.nameLoad) + } + if (TypeLabels.methodDefinitions.contains(node.getTypeLabel())) { + return node.getChildOfType(TypeLabels.body)?.getChildOfType(TypeLabels.returnTypeLabel)?.let { + it.getChildren().firstOrNull { child -> + child.getTypeLabel().startsWith(TypeLabels.constantType) + } + } + } + return null + } + + private fun collectEnclosingClass(): EnclosingElement? { + val enclosing = findEnclosingClass() ?: return null + return EnclosingElement( + type = EnclosingElementType.Class, + name = enclosing.getToken(), + root = enclosing + ) + } + + private fun findEnclosingClass(): GumTreeNode? { + return root.findEnclosingElementBy { it.getTypeLabel() == TypeLabels.classDefinition } + } + + private fun collectParameters(): List { + val arguments = root.getChildrenOfType(TypeLabels.arguments).flatMap { it.getChildren() } + val params = arguments.flatMap { node -> + when (node.getTypeLabel()) { + in TypeLabels.funcArgsTypesNodes -> node.getChildren().flatMap { it.getChildren() } + .filter { it.getTypeLabel() == TypeLabels.arg } + TypeLabels.vararg, TypeLabels.kwarg -> listOf(node) + else -> emptyList() + } + } + return params.map { node-> + MethodInfoParameter( + name = node.getToken(), + type = getElementType(node)?.getToken() + ) + } + } +} \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitter.kt b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitter.kt index 6c1a0cb9..4f27c65a 100644 --- a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitter.kt @@ -7,100 +7,14 @@ import astminer.parse.gumtree.GumTreeNode class GumTreePythonMethodSplitter : TreeMethodSplitter { companion object { private object TypeLabels { - const val classDefinition = "ClassDef" const val functionDefinition = "FunctionDef" const val asyncFunctionDefinition = "AsyncFunctionDef" - const val nameLoad = "Name_Load" - const val posOnlyArgs = "posonlyargs" - const val kwOnlyArgs = "kwonlyargs" - const val arguments = "arguments" - const val vararg = "vararg" - const val kwarg = "kwarg" - const val args = "args" - const val arg = "arg" - - const val body = "body" - const val returnTypeLabel = "Return" - const val passTypeLabel = "Pass" - const val constantType = "Constant-" - val methodDefinitions = listOf(functionDefinition, asyncFunctionDefinition) - val funcArgsTypesNodes = listOf(args, posOnlyArgs, kwOnlyArgs) } } override fun splitIntoMethods(root: GumTreeNode): Collection> { val methodRoots = root.preOrder().filter { TypeLabels.methodDefinitions.contains(it.getTypeLabel()) } - return dummyMethodInfos() -// return methodRoots.map { collectMethodInfo(it as GumTreeNode) } - } - - private fun collectMethodInfo(methodNode: GumTreeNode): MethodInfo { - val methodReturnType = getElementType(methodNode) // no methods return types for current parser - val methodName = getElementName(methodNode) - - val classRoot = getEnclosingClass(methodNode) - val className = classRoot?.let { getElementName(it) } - - val parameters = getParameters(methodNode) - - return MethodInfo( - MethodNode(methodNode, methodReturnType, methodName), - ElementNode(classRoot, className), - parameters - ) - } - - private fun getElementName(node: GumTreeNode) = node - - private fun getElementType(node: GumTreeNode): GumTreeNode? { - if (node.getTypeLabel() == TypeLabels.arg) { - return node.getChildOfType(TypeLabels.nameLoad) - } - // if return statement has "Constant-`Type`" return value => function type is `Type` - if (TypeLabels.methodDefinitions.contains(node.getTypeLabel())) { - return node.getChildOfType(TypeLabels.body)?.getChildOfType(TypeLabels.returnTypeLabel)?.let { - it.getChildren().firstOrNull { child -> - child.getTypeLabel().startsWith(TypeLabels.constantType) - } - } - } - return null - } - - private fun getEnclosingClass(node: GumTreeNode): GumTreeNode? { - if (node.getTypeLabel() == TypeLabels.classDefinition) { - return node - } - val parentNode = node.getParent() as? GumTreeNode - return parentNode?.let { getEnclosingClass(it) } - } - - private fun getParameters(methodNode: GumTreeNode): List> { - val params = methodNode.getChildrenOfType(TypeLabels.arguments).flatMap { - it.getChildren() - }.filter { - TypeLabels.funcArgsTypesNodes.contains(it.getTypeLabel()) - }.flatMap { - it.getChildren() - }.filter { - it.getTypeLabel() == TypeLabels.arg - } as MutableList - - methodNode.getChildrenOfType(TypeLabels.arguments).flatMap { - it.getChildren() - }.filter { - it.getTypeLabel() == TypeLabels.vararg || it.getTypeLabel() == TypeLabels.kwarg - }.forEach { - params.add(it) - } - - return params.map {node -> - ParameterNode( - node, - getElementType(node), - getElementName(node) - ) - }.toList() + return methodRoots.map { GumTreeFunctionInfo(it as GumTreeNode) } } } From 27b68ca809d16875513a8d6e09ffc8f6ba4ca8b9 Mon Sep 17 00:00:00 2001 From: Vladimir Kovalenko Date: Thu, 6 May 2021 07:39:43 +0300 Subject: [PATCH 142/308] remove CircleCI badge --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index b16260b8..05d285cc 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,4 @@ [![JetBrains Research](https://jb.gg/badges/research.svg)](https://confluence.jetbrains.com/display/ALL/JetBrains+on+GitHub) -[![CircleCI](https://circleci.com/gh/JetBrains-Research/astminer.svg?style=svg)](https://circleci.com/gh/JetBrains-Research/astminer) # astminer A library for mining of [path-based representations of code](https://arxiv.org/pdf/1803.09544.pdf) and more, supported by the [Machine Learning Methods for Software Engineering](https://research.jetbrains.org/groups/ml_methods) group at [JetBrains Research](https://research.jetbrains.org). From e49a9dc6b6b1b4e2720bbac9c94983bc50ae115c Mon Sep 17 00:00:00 2001 From: Vladimir Kovalenko Date: Thu, 6 May 2021 07:53:26 +0300 Subject: [PATCH 143/308] fix spelling of `astminer` --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 05d285cc..3c8ebc71 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ Supported languages of the input: See [changelog](changelog.md) ## About -Astminer was first implemented as a part of pipeline in the [code style extraction project](https://arxiv.org/abs/2002.03997) and later converted into a reusable tool. +`astminer` was first implemented as a part of pipeline in the [code style extraction project](https://arxiv.org/abs/2002.03997) and later converted into a reusable tool. Currently, it supports extraction of: * Path-based representations of files @@ -41,11 +41,11 @@ docker build -t voudy/astminer . If you don't want to use docker, you can always use Gradle tasks. It works almost for all parsers and languages as expected. -There are two different ways to use ASTMiner. +There are two different ways to use `astminer`. ### Use as CLI -You can run ASTMiner in CLI mode to preprocess your data with already implemented logic. +You can run `astminer` in CLI mode to preprocess your data with already implemented logic. Use special script for it: ```shell ./cli.sh optionName parameters @@ -85,7 +85,7 @@ Parse all files written in specified language into ASTs, split into methods, and #### Import -ASTMiner is available in the JetBrains Space package repository. You can add the dependency in your `build.gradle` file: +`astminer` is available in the JetBrains Space package repository. You can add the dependency in your `build.gradle` file: ``` repositories { maven { @@ -111,7 +111,7 @@ dependencies { #### Local development -In order to use specific version of library navigate to required branch and build local version of ASTMiner: +In order to use specific version of library navigate to required branch and build local version of `astminer`: ```shell ./gradlew publishToMavenLocal ``` From b2e87a66949abb8c82152cf60f0784d1e1ebc009 Mon Sep 17 00:00:00 2001 From: Vladimir Kovalenko Date: Thu, 6 May 2021 08:43:42 +0300 Subject: [PATCH 144/308] fix cli script: do not run gradle every time, change wording --- cli.sh | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/cli.sh b/cli.sh index b6e872c3..526e0ebf 100755 --- a/cli.sh +++ b/cli.sh @@ -4,11 +4,14 @@ IMAGE_NAME="voudy/astminer" SHADOW_JAR_PATH="build/shadow/astminer.jar" if [[ "$(docker images -q $IMAGE_NAME 2> /dev/null)" == "" ]]; then - echo "Can't find docker image, will compile from sources" - ./gradlew shadowJar + echo "Docker image not found, will use $SHADOW_JAR_PATH"; + if ! [[ -f "$SHADOW_JAR_PATH" ]]; then + echo "$SHADOW_JAR_PATH not found, building" + ./gradlew shadowJar + fi java -jar $SHADOW_JAR_PATH "$@" else - echo "Run ASTMiner inside docker" + echo "Running astminer in docker" docker run --rm voudy/astminer "$@" fi From a6bbc549adb0051ac06d30d107b19aeb69ce1603 Mon Sep 17 00:00:00 2001 From: Vladimir Kovalenko Date: Thu, 6 May 2021 09:06:22 +0300 Subject: [PATCH 145/308] fix readme: adjust order, fix wording, add links to sections --- README.md | 40 +++++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 3c8ebc71..f39899db 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ [![JetBrains Research](https://jb.gg/badges/research.svg)](https://confluence.jetbrains.com/display/ALL/JetBrains+on+GitHub) -# astminer +# `astminer` A library for mining of [path-based representations of code](https://arxiv.org/pdf/1803.09544.pdf) and more, supported by the [Machine Learning Methods for Software Engineering](https://research.jetbrains.org/groups/ml_methods) group at [JetBrains Research](https://research.jetbrains.org). Supported languages of the input: @@ -24,29 +24,35 @@ Currently, it supports extraction of: Supported languages are Java, Python, C/C++, but it is designed to be very easily extensible. -For the output format, see the section below. +For the output format, see [the section below](running-astminer-cli). ## Usage +There are two ways to use `astminer`. -We provide docker image with preinstalled requirements for all supported parsers. -You can use image with last release by pulling image from Docker hub: +- [As a standalone CLI tool](using-astminer-cli) with pre-implemented logic for common processing and mining tasks +- [Integrated](using-astminer-as-a-dependency) into your Kotlin/Java mining pipelines as a Gradle dependency. + +### Using `astminer` CLI +#### Building or installing `astminer` CLI +`astminer` CLI can be either built from sources or installed in a pre-built Docker image. + +##### Building locally +`./cli.sh` will do the job for you by triggering a Gradle build on the first run. + +##### Installing the Docker image +The C++ parser in `astminer` relies on `g++`. To avoid misconfiguration with this and likely other future external dependencies, you can use it from a Docker container. + +Install the image with the last release by pulling it from Docker Hub: ```shell docker pull voudy/astminer ``` -If you want to run version from specific branch then you can rebuild image using following command: +To rebuild the image locally, run ```shell docker build -t voudy/astminer . ``` -If you don't want to use docker, you can always use Gradle tasks. -It works almost for all parsers and languages as expected. - -There are two different ways to use `astminer`. - -### Use as CLI - -You can run `astminer` in CLI mode to preprocess your data with already implemented logic. -Use special script for it: +#### Running `astminer` CLI +Run ```shell ./cli.sh optionName parameters ``` @@ -81,7 +87,7 @@ Parse all files written in specified language into ASTs, split into methods, and ./cli.sh code2vec --lang py,java,c,cpp,js --project path/to/project --output path/to/results --maxL L --maxW W --maxContexts C --maxTokens T --maxPaths P --split-tokens --granularity method ``` -### Integrate in your mining pipeline +### Using `astminer` as a dependency #### Import @@ -111,11 +117,11 @@ dependencies { #### Local development -In order to use specific version of library navigate to required branch and build local version of `astminer`: +To use a specific version of the library, navigate to the required branch and build local version of `astminer`: ```shell ./gradlew publishToMavenLocal ``` -After that add `mavenLocal()` into `repositories` field inside your gradle configuration. +After that add `mavenLocal()` into the `repositories` section in your gradle configuration. #### Examples From 21ac7fc2ead959be17da8d8ad344c1da80a081b9 Mon Sep 17 00:00:00 2001 From: Vladimir Kovalenko Date: Thu, 6 May 2021 09:08:32 +0300 Subject: [PATCH 146/308] fix links in readme --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index f39899db..4e8f7333 100644 --- a/README.md +++ b/README.md @@ -24,13 +24,13 @@ Currently, it supports extraction of: Supported languages are Java, Python, C/C++, but it is designed to be very easily extensible. -For the output format, see [the section below](running-astminer-cli). +For the output format, see [the section below](#output-format). ## Usage There are two ways to use `astminer`. -- [As a standalone CLI tool](using-astminer-cli) with pre-implemented logic for common processing and mining tasks -- [Integrated](using-astminer-as-a-dependency) into your Kotlin/Java mining pipelines as a Gradle dependency. +- [As a standalone CLI tool](#using-astminer-cli) with pre-implemented logic for common processing and mining tasks +- [Integrated](#using-astminer-as-a-dependency) into your Kotlin/Java mining pipelines as a Gradle dependency. ### Using `astminer` CLI #### Building or installing `astminer` CLI @@ -121,7 +121,7 @@ To use a specific version of the library, navigate to the required branch and bu ```shell ./gradlew publishToMavenLocal ``` -After that add `mavenLocal()` into the `repositories` section in your gradle configuration. +After that, add `mavenLocal()` into the `repositories` section in your gradle configuration. #### Examples From 5474afc663697b1d1b1708e9193269f6966dd716 Mon Sep 17 00:00:00 2001 From: furetur Date: Thu, 6 May 2021 14:39:01 +0500 Subject: [PATCH 147/308] resolved all compilation errors --- src/main/kotlin/astminer/Main.kt | 34 ++++++------ .../kotlin/astminer/config/StorageConfig.kt | 2 +- .../kotlin/astminer/examples/AllCppFiles.kt | 4 +- .../kotlin/astminer/examples/AllJavaAst.kt | 4 +- .../astminer/examples/AllJavaFilesGumTree.kt | 4 +- .../astminer/examples/AllJavaScriptFiles.kt | 4 +- .../astminer/examples/AllPythonFiles.kt | 4 +- .../astminer/examples/Code2VecJavaMethods.kt | 53 ++++++++----------- .../kotlin/astminer/pipeline/GetPipeline.kt | 4 +- .../astminer/cli/Code2VecExtractorTest.kt | 26 --------- .../astminer/cli/PathContextsExtractorTest.kt | 24 --------- .../astminer/filters/FunctionFiltersTest.kt | 45 ++++++++++------ .../Code2VecExtractionPipelineTest.kt | 40 ++++++++++++++ 13 files changed, 118 insertions(+), 130 deletions(-) delete mode 100644 src/test/kotlin/astminer/cli/Code2VecExtractorTest.kt delete mode 100644 src/test/kotlin/astminer/cli/PathContextsExtractorTest.kt create mode 100644 src/test/kotlin/astminer/pipeline/Code2VecExtractionPipelineTest.kt diff --git a/src/main/kotlin/astminer/Main.kt b/src/main/kotlin/astminer/Main.kt index dba480d5..c49dceaf 100644 --- a/src/main/kotlin/astminer/Main.kt +++ b/src/main/kotlin/astminer/Main.kt @@ -1,20 +1,20 @@ package astminer -import astminer.cli.* +//import astminer.cli.* -fun main(args: Array) { - if (args.isEmpty()) { - println(""" - You should specify the task as the first argument ("preprocess", "parse", "pathContexts", or "code2vec"). - For more information run `./cli.sh taskName --help` - """.trimIndent()) - } else { - return when (args[0]) { - "preprocess" -> ProjectPreprocessor().main(args.sliceArray(1 until args.size)) - "parse" -> ProjectParser().main(args.sliceArray(1 until args.size)) - "pathContexts" -> PathContextsExtractor().main(args.sliceArray(1 until args.size)) - "code2vec" -> Code2VecExtractor().main(args.sliceArray(1 until args.size)) - else -> throw Exception("The first argument should be task's name: either 'preprocess', 'parse', 'pathContexts', or 'code2vec'") - } - } -} \ No newline at end of file +//fun main(args: Array) { +// if (args.isEmpty()) { +// println(""" +// You should specify the task as the first argument ("preprocess", "parse", "pathContexts", or "code2vec"). +// For more information run `./cli.sh taskName --help` +// """.trimIndent()) +// } else { +// return when (args[0]) { +// "preprocess" -> ProjectPreprocessor().main(args.sliceArray(1 until args.size)) +// "parse" -> ProjectParser().main(args.sliceArray(1 until args.size)) +// "pathContexts" -> PathContextsExtractor().main(args.sliceArray(1 until args.size)) +// "code2vec" -> Code2VecExtractor().main(args.sliceArray(1 until args.size)) +// else -> throw Exception("The first argument should be task's name: either 'preprocess', 'parse', 'pathContexts', or 'code2vec'") +// } +// } +//} \ No newline at end of file diff --git a/src/main/kotlin/astminer/config/StorageConfig.kt b/src/main/kotlin/astminer/config/StorageConfig.kt index d100030c..e5b65ab2 100644 --- a/src/main/kotlin/astminer/config/StorageConfig.kt +++ b/src/main/kotlin/astminer/config/StorageConfig.kt @@ -15,7 +15,7 @@ data class Code2VecPathStorageConfig( val maxTokens: Long? = null, val maxPaths: Long? = null, val maxPathContextsPerEntity: Int? = null, - val tokenProcessor: TokenProcessor + val tokenProcessor: TokenProcessor = TokenProcessor.Normalize ) : StorageConfig() { fun toPathBasedConfig() = PathBasedStorageConfig(maxPathLength, maxPathWidth, maxTokens, maxPaths, maxPathContextsPerEntity) diff --git a/src/main/kotlin/astminer/examples/AllCppFiles.kt b/src/main/kotlin/astminer/examples/AllCppFiles.kt index 55faff40..1a45b003 100644 --- a/src/main/kotlin/astminer/examples/AllCppFiles.kt +++ b/src/main/kotlin/astminer/examples/AllCppFiles.kt @@ -22,9 +22,7 @@ fun allCppFiles() { val files = getProjectFilesWithExtension(preprocOutputFolder, "cpp") parser.parseFiles(files) { parseResult -> - parseResult.labeledWithFilePath()?.let { labeledResult -> - storage.store(labeledResult) - } + storage.store(parseResult.labeledWithFilePath()) } storage.close() diff --git a/src/main/kotlin/astminer/examples/AllJavaAst.kt b/src/main/kotlin/astminer/examples/AllJavaAst.kt index 675bc62f..25a88b48 100644 --- a/src/main/kotlin/astminer/examples/AllJavaAst.kt +++ b/src/main/kotlin/astminer/examples/AllJavaAst.kt @@ -13,9 +13,7 @@ fun allJavaAsts() { val files = getProjectFilesWithExtension(File(folder), "java") JavaParser().parseFiles(files) { parseResult -> - parseResult.labeledWithFilePath()?.let { labeledResult -> - storage.store(labeledResult) - } + storage.store(parseResult.labeledWithFilePath()) } storage.close() diff --git a/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt b/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt index 937fb8a6..c8236abf 100644 --- a/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt +++ b/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt @@ -15,9 +15,7 @@ fun allJavaFilesGumTree() { val files = getProjectFilesWithExtension(File(inputDir), "java") GumTreeJavaParser().parseFiles(files) { parseResult -> - parseResult.labeledWithFilePath()?.let { labeledResult -> - storage.store(labeledResult) - } + storage.store(parseResult.labeledWithFilePath()) } storage.close() diff --git a/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt b/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt index 2e7db26c..f3a835e5 100644 --- a/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt +++ b/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt @@ -15,9 +15,7 @@ fun allJavaScriptFiles() { val files = getProjectFilesWithExtension(File(folder), "js") JavaScriptParser().parseFiles(files) { parseResult -> - parseResult.labeledWithFilePath()?.let { labeledResult -> - storage.store(labeledResult) - } + storage.store(parseResult.labeledWithFilePath()) } storage.close() diff --git a/src/main/kotlin/astminer/examples/AllPythonFiles.kt b/src/main/kotlin/astminer/examples/AllPythonFiles.kt index ff8a82b0..91f6fe88 100644 --- a/src/main/kotlin/astminer/examples/AllPythonFiles.kt +++ b/src/main/kotlin/astminer/examples/AllPythonFiles.kt @@ -16,9 +16,7 @@ fun allPythonFiles() { val files = getProjectFilesWithExtension(File(inputDir), "py") PythonParser().parseFiles(files) { parseResult -> - parseResult.labeledWithFilePath()?.let { labeledResult -> - storage.store(labeledResult) - } + storage.store(parseResult.labeledWithFilePath()) } storage.close() diff --git a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt index a914b65f..0455120f 100644 --- a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt @@ -1,13 +1,10 @@ package astminer.examples -import astminer.problem.LabeledResult -import astminer.cli.MethodNameExtractor -import astminer.common.* -import astminer.parse.antlr.java.JavaMethodSplitter -import astminer.parse.antlr.java.JavaParser -import astminer.storage.path.Code2VecPathStorage -import astminer.storage.path.PathBasedStorageConfig -import java.io.File +import astminer.config.Code2VecPathStorageConfig +import astminer.config.FunctionPipelineConfig +import astminer.config.ParserConfig +import astminer.pipeline.getFunctionPipeline +import astminer.problem.FunctionNameProblem //Retrieve paths from all Java files, using a GumTree parser. @@ -16,26 +13,22 @@ fun code2vecJavaMethods() { val folder = "src/test/resources/code2vecPathMining" val outputDir = "out_examples/code2vecPathMining" - - val storage = Code2VecPathStorage(outputDir, PathBasedStorageConfig(5, 5)) - - File(folder).forFilesWithSuffix(".java") { file -> - //parse file - val fileNode = JavaParser().parseInputStream(file.inputStream()) - - //extract method nodes - val methods = JavaMethodSplitter().splitIntoMethods(fileNode) - - val labelExtractor = MethodNameExtractor() - - methods.forEach { methodInfo -> - val label = labelExtractor.extractLabel(methodInfo, file.absolutePath) ?: return@forEach - // TODO: this is ugly maybe label should be normalized by default - val normalizedLabel = splitToSubtokens(label).joinToString("|") - // Retrieve paths from every node individually and store them - storage.store(LabeledResult(methodInfo.root, normalizedLabel, file.absolutePath)) - } - } - - storage.close() + val pipelineConfig = FunctionPipelineConfig( + folder, + outputDir, + ParserConfig( + "antlr", + listOf("java") + ), + emptyList(), + FunctionNameProblem, + Code2VecPathStorageConfig( + maxPathLength = 5, + maxPathWidth = 5 + ) + ) + + val pipeline = getFunctionPipeline(pipelineConfig) + + pipeline.run() } diff --git a/src/main/kotlin/astminer/pipeline/GetPipeline.kt b/src/main/kotlin/astminer/pipeline/GetPipeline.kt index 767143a0..5a547219 100644 --- a/src/main/kotlin/astminer/pipeline/GetPipeline.kt +++ b/src/main/kotlin/astminer/pipeline/GetPipeline.kt @@ -18,14 +18,14 @@ fun getPipeline(pipelineConfig: PipelineConfig): Pipeline<*> { } } -private fun getFilePipeline(filePipelineConfig: FilePipelineConfig): Pipeline> = +fun getFilePipeline(filePipelineConfig: FilePipelineConfig): Pipeline> = with(filePipelineConfig) { val frontend = FilePipelineFrontend(inputDir, parser.type, parser.extensions) val storageCreator = StorageCreator(storage, outputDir) Pipeline(frontend, filters, problem, emptyList(), storageCreator) } -private fun getFunctionPipeline(functionPipelineConfig: FunctionPipelineConfig): Pipeline> = +fun getFunctionPipeline(functionPipelineConfig: FunctionPipelineConfig): Pipeline> = with(functionPipelineConfig) { val frontend = FunctionPipelineFrontend(inputDir, parser.type, parser.extensions) val storageCreator = StorageCreator(storage, outputDir) diff --git a/src/test/kotlin/astminer/cli/Code2VecExtractorTest.kt b/src/test/kotlin/astminer/cli/Code2VecExtractorTest.kt deleted file mode 100644 index eb69e393..00000000 --- a/src/test/kotlin/astminer/cli/Code2VecExtractorTest.kt +++ /dev/null @@ -1,26 +0,0 @@ -package astminer.cli - -import astminer.cli.util.CliArgs -import astminer.cli.util.languagesToString -import astminer.cli.util.verifyPathContextExtraction -import org.junit.Test -import java.io.File - -internal class Code2VecExtractorTest { - private val testDataDir = File("src/test/resources") - private val code2VecExtractor = Code2VecExtractor() - - @Test - fun testDefaultExtraction() { - val extractedDataDir = createTempDir("extractedData") - val languages = listOf("java", "py") - val cliArgs = CliArgs.Builder(testDataDir, extractedDataDir) - .extensions(languagesToString(languages)) - .build() - - code2VecExtractor.main(cliArgs.args) - verifyPathContextExtraction(extractedDataDir, languages, false) - } -} - - diff --git a/src/test/kotlin/astminer/cli/PathContextsExtractorTest.kt b/src/test/kotlin/astminer/cli/PathContextsExtractorTest.kt deleted file mode 100644 index 39adb422..00000000 --- a/src/test/kotlin/astminer/cli/PathContextsExtractorTest.kt +++ /dev/null @@ -1,24 +0,0 @@ -package astminer.cli - -import astminer.cli.util.CliArgs -import astminer.cli.util.languagesToString -import astminer.cli.util.verifyPathContextExtraction -import org.junit.Test -import java.io.File - -internal class PathContextsExtractorTest { - private val testDataDir = File("src/test/resources") - private val pathContextsExtractor = PathContextsExtractor() - - @Test - fun testDefaultExtraction() { - val extractedDataDir = createTempDir("extractedData") - val languages = listOf("java", "py") - val cliArgs = CliArgs.Builder(testDataDir, extractedDataDir) - .extensions(languagesToString(languages)) - .build() - - pathContextsExtractor.main(cliArgs.args) - verifyPathContextExtraction(extractedDataDir, languages, false) - } -} \ No newline at end of file diff --git a/src/test/kotlin/astminer/filters/FunctionFiltersTest.kt b/src/test/kotlin/astminer/filters/FunctionFiltersTest.kt index a818b0e8..97a1e43d 100644 --- a/src/test/kotlin/astminer/filters/FunctionFiltersTest.kt +++ b/src/test/kotlin/astminer/filters/FunctionFiltersTest.kt @@ -1,5 +1,6 @@ package astminer.filters +import astminer.common.createBamboo import astminer.common.model.FunctionInfo import astminer.common.model.Node import astminer.parse.antlr.AntlrNode @@ -8,9 +9,9 @@ import kotlin.test.assertFalse import kotlin.test.assertTrue class FunctionFiltersTest { - + val a = ConstructorFilter @Test - fun `test modifiers filter should exclude function if it has the excluded modifier`() { + fun `test ModifierFilter should exclude function if it has the excluded modifier`() { val excludedModifiers = listOf("a", "b") val functionInfo = object : FunctionInfo { override val modifiers: List = listOf("b", "c") @@ -19,7 +20,7 @@ class FunctionFiltersTest { } @Test - fun `test modifiers filter should not exclude function if it does not have the excluded modifier`() { + fun `test ModifierFilter should not exclude function if it does not have the excluded modifier`() { val excludedModifiers = listOf("a", "b") val functionInfo = object : FunctionInfo { override val modifiers: List = listOf("c", "d") @@ -28,7 +29,7 @@ class FunctionFiltersTest { } @Test - fun `test annotations filter should exclude function if it has the excluded modifier`() { + fun `test AnnotationFilter should exclude function if it has the excluded modifier`() { val excludedModifiers = listOf("a", "b") val functionInfo = object : FunctionInfo { override val modifiers: List = listOf("a", "c") @@ -37,7 +38,7 @@ class FunctionFiltersTest { } @Test - fun `test annotations filter should not exclude function if it does not have the excluded modifier`() { + fun `test AnnotationFilter should not exclude function if it does not have the excluded modifier`() { val excludedModifiers = listOf("a", "b") val functionInfo = object : FunctionInfo { override val modifiers: List = listOf("y", "x") @@ -46,7 +47,7 @@ class FunctionFiltersTest { } @Test - fun `test constructor filter should exclude constructor functions`() { + fun `test ConstructorFilter should exclude constructor functions`() { val functionInfo = object : FunctionInfo { override val isConstructor = true } @@ -54,7 +55,7 @@ class FunctionFiltersTest { } @Test - fun `test constructor filter should not exclude non-constructor functions`() { + fun `test ConstructorFilter should not exclude non-constructor functions`() { val functionInfo = object : FunctionInfo { override val isConstructor = false } @@ -62,7 +63,7 @@ class FunctionFiltersTest { } @Test - fun `test function name words number filter should not exclude function if maxWordsNumber is -1`() { + fun `test FunctionNameWordsNumberFilter should not exclude function if maxWordsNumber is -1`() { val functionInfo = object : FunctionInfo { override val name = "Word".repeat(100) } @@ -70,7 +71,7 @@ class FunctionFiltersTest { } @Test - fun `test function name words number filter for 50 should exclude function with name of 100 words`() { + fun `test FunctionNameWordsNumberFilter for 50 should exclude function with name of 100 words`() { val functionInfo = object : FunctionInfo { override val name = "Word".repeat(100) } @@ -78,7 +79,7 @@ class FunctionFiltersTest { } @Test - fun `test function name words number filter for 101 should not exclude function with name of 100 words`() { + fun `test FunctionNameWordsNumberFilter for 101 should not exclude function with name of 100 words`() { val functionInfo = object : FunctionInfo { override val name = "Word".repeat(100) } @@ -86,7 +87,7 @@ class FunctionFiltersTest { } @Test - fun `test function any node words number filter should not exclude function if maxWordsNumber is -1`() { + fun `test FunctionAnyNodeWordsNumberFilter should not exclude function if maxWordsNumber is -1`() { val functionInfo = object : FunctionInfo { override val name = "Word".repeat(100) } @@ -94,7 +95,7 @@ class FunctionFiltersTest { } @Test - fun `test function any node words number filter for 50 should exclude function with name of 100 words`() { + fun `test FunctionAnyNodeWordsNumberFilter for 50 should exclude function with name of 100 words`() { val functionInfo = object : FunctionInfo { override val name = "Word".repeat(100) } @@ -102,7 +103,7 @@ class FunctionFiltersTest { } @Test - fun `test function any node words number filter for 101 should not exclude function with name of 100 words`() { + fun `test FunctionAnyNodeWordsNumberFilter for 101 should not exclude function with name of 100 words`() { val functionInfo = object : FunctionInfo { override val name = "Word".repeat(100) } @@ -110,10 +111,10 @@ class FunctionFiltersTest { } @Test - fun `test function any node words number filter for 2 should exlude function that has a child of 3 words`() { + fun `test FunctionAnyNodeWordsNumberFilter for 2 should exclude function that has a child of 3 words`() { val root = AntlrNode("", null, "word") val child = AntlrNode("", root, "wordWordWord") - root.setChildren(listOf(root)) + root.setChildren(listOf(child)) val functionInfo = object : FunctionInfo { override val root = root @@ -121,5 +122,19 @@ class FunctionFiltersTest { assertFalse { FunctionAnyNodeWordsNumberFilter(2).isFiltered(functionInfo) } } + @Test + fun `test FunctionTreeSizeFilter for 100 should exclude bamboo of length 101`() { + val functionInfo = object : FunctionInfo { + override val root = createBamboo(101) + } + assertFalse { FunctionTreeSizeFilter(101).isFiltered(functionInfo) } + } + @Test + fun `test FunctionTreeSizeFilter for 10 should not exclude bamboo of length 5`() { + val functionInfo = object : FunctionInfo { + override val root = createBamboo(5) + } + assertTrue { FunctionTreeSizeFilter(10).isFiltered(functionInfo) } + } } \ No newline at end of file diff --git a/src/test/kotlin/astminer/pipeline/Code2VecExtractionPipelineTest.kt b/src/test/kotlin/astminer/pipeline/Code2VecExtractionPipelineTest.kt new file mode 100644 index 00000000..d33a9b42 --- /dev/null +++ b/src/test/kotlin/astminer/pipeline/Code2VecExtractionPipelineTest.kt @@ -0,0 +1,40 @@ +package astminer.pipeline + +import astminer.cli.util.verifyPathContextExtraction +import astminer.config.Code2VecPathStorageConfig +import astminer.config.FilePipelineConfig +import astminer.config.ParserConfig +import astminer.problem.FilePathExtractor +import org.junit.Test +import java.io.File + +internal class Code2VecExtractionPipelineTest { + private val testDataDir = File("src/test/resources") + + @Test + fun testDefaultExtraction() { + val extractedDataDir = createTempDir("extractedData") + val languages = listOf("java", "py") + + val config = FilePipelineConfig( + testDataDir.path, + extractedDataDir.path, + ParserConfig( + "gumtree", + languages + ), + emptyList(), + FilePathExtractor, + Code2VecPathStorageConfig( + maxPathLength = 8, + maxPathWidth = 3 + ) + ) + + val pipeline = getFilePipeline(config) + + pipeline.run() + + verifyPathContextExtraction(extractedDataDir, languages, false) + } +} \ No newline at end of file From 16edca9fec3c43e3479a50a81d575e7171c54983 Mon Sep 17 00:00:00 2001 From: furetur Date: Thu, 6 May 2021 14:46:48 +0500 Subject: [PATCH 148/308] removed maxWordsNumber == -1 checks --- .../astminer/filters/FunctionFilters.kt | 20 +++++-------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/src/main/kotlin/astminer/filters/FunctionFilters.kt b/src/main/kotlin/astminer/filters/FunctionFilters.kt index 904acafe..a2c30e80 100644 --- a/src/main/kotlin/astminer/filters/FunctionFilters.kt +++ b/src/main/kotlin/astminer/filters/FunctionFilters.kt @@ -35,13 +35,8 @@ object ConstructorFilter : FunctionFilter { */ class FunctionNameWordsNumberFilter(private val maxWordsNumber: Int) : FunctionFilter { override fun isFiltered(entity: FunctionInfo): Boolean { - // TODO: this is not needed - return if (maxWordsNumber == -1) { - true - } else { - val name = entity.name - name != null && splitToSubtokens(name).size <= maxWordsNumber - } + val name = entity.name + return name != null && splitToSubtokens(name).size <= maxWordsNumber } } @@ -49,12 +44,7 @@ class FunctionNameWordsNumberFilter(private val maxWordsNumber: Int) : FunctionF * Filter that excludes functions that have more words than [maxWordsNumber] in any token of their subtree. */ class FunctionAnyNodeWordsNumberFilter(private val maxWordsNumber: Int) : FunctionFilter { - override fun isFiltered(entity: FunctionInfo): Boolean { - // TODO: this is not needed - return if (maxWordsNumber == -1) { - true - } else { - !entity.root.preOrder().any { node -> splitToSubtokens(node.getToken()).size > maxWordsNumber } - } - } + override fun isFiltered(entity: FunctionInfo): Boolean = + !entity.root.preOrder().any { node -> splitToSubtokens(node.getToken()).size > maxWordsNumber } + } From 8d4d32cdb0a8fc0b36a5443d65d3cf5ab30d6f4e Mon Sep 17 00:00:00 2001 From: furetur Date: Thu, 6 May 2021 18:33:52 +0500 Subject: [PATCH 149/308] added tests --- .../kotlin/astminer/config/StorageConfig.kt | 2 +- src/main/kotlin/astminer/parse/factory.kt | 6 +- .../kotlin/astminer/pipeline/GetPipeline.kt | 4 +- src/main/kotlin/astminer/pipeline/Pipeline.kt | 4 +- .../astminer/pipeline/PipelineFrontend.kt | 8 +- .../astminer/pipeline/StorageCreator.kt | 8 +- .../astminer/problem/FileLevelProblems.kt | 2 +- .../astminer/problem/FunctionLevelProblems.kt | 7 +- src/test/kotlin/astminer/common/TestUtils.kt | 6 +- .../astminer/filters/FileFiltersTest.kt | 19 ++- .../astminer/filters/FunctionFiltersTest.kt | 30 ++--- .../Code2VecExtractionPipelineTest.kt | 2 +- .../pipeline/CompositePipelineFrontendTest.kt | 23 ++++ src/test/kotlin/astminer/pipeline/Mocks.kt | 69 +++++++++++ .../kotlin/astminer/pipeline/PipelineTest.kt | 115 ++++++++++++++++++ .../pipeline/StorageCreatorImplTest.kt | 54 ++++++++ src/test/kotlin/astminer/pipeline/Utils.kt | 20 +++ .../astminer/problem/FilePathExtractorTest.kt | 21 ++++ .../problem/FolderNameExtractorTest.kt | 31 +++++ .../problem/FunctionNameProblemTest.kt | 57 +++++++++ .../astminer/problem/LabelExtractorTest.kt | 73 ----------- 21 files changed, 446 insertions(+), 115 deletions(-) create mode 100644 src/test/kotlin/astminer/pipeline/CompositePipelineFrontendTest.kt create mode 100644 src/test/kotlin/astminer/pipeline/Mocks.kt create mode 100644 src/test/kotlin/astminer/pipeline/PipelineTest.kt create mode 100644 src/test/kotlin/astminer/pipeline/StorageCreatorImplTest.kt create mode 100644 src/test/kotlin/astminer/pipeline/Utils.kt create mode 100644 src/test/kotlin/astminer/problem/FilePathExtractorTest.kt create mode 100644 src/test/kotlin/astminer/problem/FolderNameExtractorTest.kt create mode 100644 src/test/kotlin/astminer/problem/FunctionNameProblemTest.kt delete mode 100644 src/test/kotlin/astminer/problem/LabelExtractorTest.kt diff --git a/src/main/kotlin/astminer/config/StorageConfig.kt b/src/main/kotlin/astminer/config/StorageConfig.kt index e5b65ab2..97b8e418 100644 --- a/src/main/kotlin/astminer/config/StorageConfig.kt +++ b/src/main/kotlin/astminer/config/StorageConfig.kt @@ -7,7 +7,7 @@ sealed class StorageConfig object CsvAstStorageConfig : StorageConfig() -data class DotAstStorageConfig(val tokenProcessor: TokenProcessor) : StorageConfig() +data class DotAstStorageConfig(val tokenProcessor: TokenProcessor = TokenProcessor.Normalize) : StorageConfig() data class Code2VecPathStorageConfig( val maxPathLength: Int, diff --git a/src/main/kotlin/astminer/parse/factory.kt b/src/main/kotlin/astminer/parse/factory.kt index 1c9a6613..e54cfd59 100644 --- a/src/main/kotlin/astminer/parse/factory.kt +++ b/src/main/kotlin/astminer/parse/factory.kt @@ -19,7 +19,7 @@ fun getHandlerFactory(extension: String, parserType: String): HandlerFactory { private fun getGumtreeHandlerFactory(extension: String): HandlerFactory { return when (extension) { "java" -> GumtreeJavaHandlerFactory - "python" -> GumtreePythonHandlerFactory + "py" -> GumtreePythonHandlerFactory else -> throw UnsupportedOperationException() } } @@ -27,8 +27,8 @@ private fun getGumtreeHandlerFactory(extension: String): HandlerFactory { private fun getAntlrHandlerFactory(extension: String): HandlerFactory { return when (extension) { "java" -> AntlrJavaHandlerFactory - "javascript" -> AntlrJavascriptHandlerFactory - "python" -> AntlrPythonHandlerFactory + "js" -> AntlrJavascriptHandlerFactory + "py" -> AntlrPythonHandlerFactory else -> throw UnsupportedOperationException() } } diff --git a/src/main/kotlin/astminer/pipeline/GetPipeline.kt b/src/main/kotlin/astminer/pipeline/GetPipeline.kt index 5a547219..e5cc8f27 100644 --- a/src/main/kotlin/astminer/pipeline/GetPipeline.kt +++ b/src/main/kotlin/astminer/pipeline/GetPipeline.kt @@ -21,13 +21,13 @@ fun getPipeline(pipelineConfig: PipelineConfig): Pipeline<*> { fun getFilePipeline(filePipelineConfig: FilePipelineConfig): Pipeline> = with(filePipelineConfig) { val frontend = FilePipelineFrontend(inputDir, parser.type, parser.extensions) - val storageCreator = StorageCreator(storage, outputDir) + val storageCreator = StorageCreatorImpl(storage, outputDir) Pipeline(frontend, filters, problem, emptyList(), storageCreator) } fun getFunctionPipeline(functionPipelineConfig: FunctionPipelineConfig): Pipeline> = with(functionPipelineConfig) { val frontend = FunctionPipelineFrontend(inputDir, parser.type, parser.extensions) - val storageCreator = StorageCreator(storage, outputDir) + val storageCreator = StorageCreatorImpl(storage, outputDir) Pipeline(frontend, filters, problem, emptyList(), storageCreator) } \ No newline at end of file diff --git a/src/main/kotlin/astminer/pipeline/Pipeline.kt b/src/main/kotlin/astminer/pipeline/Pipeline.kt index 4b52efef..f0f5fa5e 100644 --- a/src/main/kotlin/astminer/pipeline/Pipeline.kt +++ b/src/main/kotlin/astminer/pipeline/Pipeline.kt @@ -8,9 +8,9 @@ import astminer.problem.Problem class Pipeline( private val frontend: PipelineFrontend, - private val filters: List>, + private val filters: List> = emptyList(), private val problem: Problem, - private val excludedNodeTypes: List, + private val excludedNodeTypes: List = emptyList(), private val storageCreator: StorageCreator ) { private fun T.passesThroughFilters() = filters.all { filter -> filter.isFiltered(this) } diff --git a/src/main/kotlin/astminer/pipeline/PipelineFrontend.kt b/src/main/kotlin/astminer/pipeline/PipelineFrontend.kt index 189e1361..87553835 100644 --- a/src/main/kotlin/astminer/pipeline/PipelineFrontend.kt +++ b/src/main/kotlin/astminer/pipeline/PipelineFrontend.kt @@ -43,7 +43,13 @@ abstract class CompositePipelineFrontend( val inputDirectory = File(inputDirectoryPath) for (extension in extensions) { - val handlerFactory = getHandlerFactory(extension, parserType) + val handlerFactory = try { + getHandlerFactory(extension, parserType) + } catch (e: UnsupportedOperationException) { + println("Damn") + yield(EntitiesFromFiles(extension, emptySequence())) + continue + } val files = getProjectFilesWithExtension(inputDirectory, extension).asSequence() val entities = files.flatMap { file -> handlerFactory.createHandler(file).getEntities() } yield(EntitiesFromFiles(extension, entities)) diff --git a/src/main/kotlin/astminer/pipeline/StorageCreator.kt b/src/main/kotlin/astminer/pipeline/StorageCreator.kt index 32a64a3e..171a2411 100644 --- a/src/main/kotlin/astminer/pipeline/StorageCreator.kt +++ b/src/main/kotlin/astminer/pipeline/StorageCreator.kt @@ -10,13 +10,17 @@ import astminer.storage.ast.DotAstStorage import astminer.storage.path.Code2VecPathStorage import java.io.File +interface StorageCreator { + fun createStorageAndOutputFolder(extension: String): Storage +} + /** * Creates storage for each extension. * @param config The config that defines that storage will be used and the params of that storage * @param outputDirectoryPath Path to the base output directory where folders for each extension will be created * (e.g 'py', 'java') */ -class StorageCreator(private val config: StorageConfig, outputDirectoryPath: String) { +class StorageCreatorImpl(private val config: StorageConfig, outputDirectoryPath: String) : StorageCreator { private val outputDirectory = File(outputDirectoryPath) private fun createOutputPath(extension: String): String { @@ -28,7 +32,7 @@ class StorageCreator(private val config: StorageConfig, outputDirectoryPath: Str /** * Creates folder [outputDirectoryPath]/[extension] and initializes the storage in that folder. */ - fun createStorageAndOutputFolder(extension: String): Storage { + override fun createStorageAndOutputFolder(extension: String): Storage { val outputPath = createOutputPath(extension) return when (config) { is CsvAstStorageConfig -> CsvAstStorage(outputPath) diff --git a/src/main/kotlin/astminer/problem/FileLevelProblems.kt b/src/main/kotlin/astminer/problem/FileLevelProblems.kt index 1c658984..2141c737 100644 --- a/src/main/kotlin/astminer/problem/FileLevelProblems.kt +++ b/src/main/kotlin/astminer/problem/FileLevelProblems.kt @@ -18,7 +18,7 @@ object FilePathExtractor : FileLevelProblem { */ object FolderExtractor : FileLevelProblem { override fun process(entity: ParseResult): LabeledResult? { - val folderName = File(entity.filePath).parentFile.name ?: return null + val folderName = File(entity.filePath).parentFile?.name ?: return null return entity.labeledWith(folderName) } } diff --git a/src/main/kotlin/astminer/problem/FunctionLevelProblems.kt b/src/main/kotlin/astminer/problem/FunctionLevelProblems.kt index 994e3538..463ee509 100644 --- a/src/main/kotlin/astminer/problem/FunctionLevelProblems.kt +++ b/src/main/kotlin/astminer/problem/FunctionLevelProblems.kt @@ -12,14 +12,17 @@ interface FunctionLevelProblem : Problem> * Hides the name of the function in the subtree and also all in the recursive calls. */ object FunctionNameProblem : FunctionLevelProblem { + const val TECHNICAL_METHOD_NAME = "METHOD_NAME" + const val TECHNICAL_RECURSIVE_CALL = "SELF" + override fun process(entity: FunctionInfo): LabeledResult? { val name = entity.name ?: return null entity.root.preOrder().forEach { node -> if (node.getToken() == name) { - node.setTechnicalToken("SELF") + node.setTechnicalToken(TECHNICAL_RECURSIVE_CALL) } } - entity.nameNode?.setTechnicalToken("METHOD_NAME") + entity.nameNode?.setTechnicalToken(TECHNICAL_METHOD_NAME) return LabeledResult(entity.root, name, entity.filePath) } } diff --git a/src/test/kotlin/astminer/common/TestUtils.kt b/src/test/kotlin/astminer/common/TestUtils.kt index 4751bb99..d861b148 100644 --- a/src/test/kotlin/astminer/common/TestUtils.kt +++ b/src/test/kotlin/astminer/common/TestUtils.kt @@ -2,9 +2,10 @@ package astminer.common import astminer.problem.LabeledResult import astminer.common.model.Node +import astminer.common.model.ParseResult -class DummyNode(val data: String, val childrenList: MutableList) : Node { +class DummyNode(val data: String, val childrenList: MutableList = mutableListOf()) : Node { override val metadata: MutableMap = hashMapOf() override fun isLeaf(): Boolean { @@ -30,7 +31,6 @@ class DummyNode(val data: String, val childrenList: MutableList) : No override fun removeChildrenOfType(typeLabel: String) { childrenList.removeIf { it.getTypeLabel() == typeLabel } } - } /** @@ -92,4 +92,6 @@ fun createBamboo(size: Int): DummyNode { return root } +fun T.toParseResult() = ParseResult(this, "") + fun T.labeledWith(label: String) = LabeledResult(this, label, "") diff --git a/src/test/kotlin/astminer/filters/FileFiltersTest.kt b/src/test/kotlin/astminer/filters/FileFiltersTest.kt index 3915a2d7..edb7b267 100644 --- a/src/test/kotlin/astminer/filters/FileFiltersTest.kt +++ b/src/test/kotlin/astminer/filters/FileFiltersTest.kt @@ -1,6 +1,21 @@ package astminer.filters -internal class FileFiltersTest { +import astminer.common.createBamboo +import astminer.common.toParseResult +import org.junit.Test +import kotlin.test.assertFalse +import kotlin.test.assertTrue +internal class FileFiltersTest { + @Test + fun `test FileTreeSizeFilter for 100 should exclude bamboo of length 101`() { + val node = createBamboo(101).toParseResult() + assertFalse { FileTreeSizeFilter(100).isFiltered(node) } + } -} \ No newline at end of file + @Test + fun `test FileTreeSizeFilter for 10 should not exclude bamboo of length 5`() { + val node = createBamboo(5).toParseResult() + assertTrue { FileTreeSizeFilter(10).isFiltered(node) } + } +} diff --git a/src/test/kotlin/astminer/filters/FunctionFiltersTest.kt b/src/test/kotlin/astminer/filters/FunctionFiltersTest.kt index 97a1e43d..861b0fb6 100644 --- a/src/test/kotlin/astminer/filters/FunctionFiltersTest.kt +++ b/src/test/kotlin/astminer/filters/FunctionFiltersTest.kt @@ -9,7 +9,6 @@ import kotlin.test.assertFalse import kotlin.test.assertTrue class FunctionFiltersTest { - val a = ConstructorFilter @Test fun `test ModifierFilter should exclude function if it has the excluded modifier`() { val excludedModifiers = listOf("a", "b") @@ -32,7 +31,7 @@ class FunctionFiltersTest { fun `test AnnotationFilter should exclude function if it has the excluded modifier`() { val excludedModifiers = listOf("a", "b") val functionInfo = object : FunctionInfo { - override val modifiers: List = listOf("a", "c") + override val annotations: List = listOf("a", "c") } assertFalse { AnnotationFilter(excludedModifiers).isFiltered(functionInfo) } } @@ -41,7 +40,7 @@ class FunctionFiltersTest { fun `test AnnotationFilter should not exclude function if it does not have the excluded modifier`() { val excludedModifiers = listOf("a", "b") val functionInfo = object : FunctionInfo { - override val modifiers: List = listOf("y", "x") + override val annotations: List = listOf("y", "x") } assertTrue { AnnotationFilter(excludedModifiers).isFiltered(functionInfo) } } @@ -62,14 +61,6 @@ class FunctionFiltersTest { assertTrue { ConstructorFilter.isFiltered(functionInfo) } } - @Test - fun `test FunctionNameWordsNumberFilter should not exclude function if maxWordsNumber is -1`() { - val functionInfo = object : FunctionInfo { - override val name = "Word".repeat(100) - } - assertTrue { FunctionNameWordsNumberFilter(-1).isFiltered(functionInfo) } - } - @Test fun `test FunctionNameWordsNumberFilter for 50 should exclude function with name of 100 words`() { val functionInfo = object : FunctionInfo { @@ -83,21 +74,13 @@ class FunctionFiltersTest { val functionInfo = object : FunctionInfo { override val name = "Word".repeat(100) } - assertFalse { FunctionNameWordsNumberFilter(101).isFiltered(functionInfo) } - } - - @Test - fun `test FunctionAnyNodeWordsNumberFilter should not exclude function if maxWordsNumber is -1`() { - val functionInfo = object : FunctionInfo { - override val name = "Word".repeat(100) - } - assertTrue { FunctionAnyNodeWordsNumberFilter(-1).isFiltered(functionInfo) } + assertTrue { FunctionNameWordsNumberFilter(101).isFiltered(functionInfo) } } @Test fun `test FunctionAnyNodeWordsNumberFilter for 50 should exclude function with name of 100 words`() { val functionInfo = object : FunctionInfo { - override val name = "Word".repeat(100) + override val root = AntlrNode("", null, "Word".repeat(100)) } assertFalse { FunctionAnyNodeWordsNumberFilter(50).isFiltered(functionInfo) } } @@ -106,8 +89,9 @@ class FunctionFiltersTest { fun `test FunctionAnyNodeWordsNumberFilter for 101 should not exclude function with name of 100 words`() { val functionInfo = object : FunctionInfo { override val name = "Word".repeat(100) + override val root = createBamboo(1) } - assertFalse { FunctionAnyNodeWordsNumberFilter(101).isFiltered(functionInfo) } + assertTrue { FunctionAnyNodeWordsNumberFilter(101).isFiltered(functionInfo) } } @Test @@ -127,7 +111,7 @@ class FunctionFiltersTest { val functionInfo = object : FunctionInfo { override val root = createBamboo(101) } - assertFalse { FunctionTreeSizeFilter(101).isFiltered(functionInfo) } + assertFalse { FunctionTreeSizeFilter(100).isFiltered(functionInfo) } } @Test diff --git a/src/test/kotlin/astminer/pipeline/Code2VecExtractionPipelineTest.kt b/src/test/kotlin/astminer/pipeline/Code2VecExtractionPipelineTest.kt index d33a9b42..155a1961 100644 --- a/src/test/kotlin/astminer/pipeline/Code2VecExtractionPipelineTest.kt +++ b/src/test/kotlin/astminer/pipeline/Code2VecExtractionPipelineTest.kt @@ -14,7 +14,7 @@ internal class Code2VecExtractionPipelineTest { @Test fun testDefaultExtraction() { val extractedDataDir = createTempDir("extractedData") - val languages = listOf("java", "py") + val languages = listOf("java", "python") val config = FilePipelineConfig( testDataDir.path, diff --git a/src/test/kotlin/astminer/pipeline/CompositePipelineFrontendTest.kt b/src/test/kotlin/astminer/pipeline/CompositePipelineFrontendTest.kt new file mode 100644 index 00000000..01297f5c --- /dev/null +++ b/src/test/kotlin/astminer/pipeline/CompositePipelineFrontendTest.kt @@ -0,0 +1,23 @@ +package astminer.pipeline + +import astminer.common.model.LanguageHandler +import astminer.common.model.Node +import org.junit.Test +import kotlin.test.assertEquals + +internal class CompositePipelineFrontendTest { + class DummyCompositeFrontend(inputDirectoryPath: String, parserType: String, extensions: List) : + CompositePipelineFrontend(inputDirectoryPath, parserType, extensions) { + + override fun LanguageHandler.getEntities(): Sequence = sequenceOf(Unit) + } + + @Test + fun `test should skip language if it is not supported`() { + val tempDir = createTempDirectoryWithEmptyFiles(mapOf("py" to 5)) + val frontend = DummyCompositeFrontend(tempDir.path, "antlr", listOf("py", "unsupported_language")) + val entitiesCounts = getExtractedEntitiesCounts(frontend.getEntities()) + + assertEquals(mapOf("py" to 5, "unsupported_language" to 0), entitiesCounts) + } +} diff --git a/src/test/kotlin/astminer/pipeline/Mocks.kt b/src/test/kotlin/astminer/pipeline/Mocks.kt new file mode 100644 index 00000000..40bb45d1 --- /dev/null +++ b/src/test/kotlin/astminer/pipeline/Mocks.kt @@ -0,0 +1,69 @@ +package astminer.pipeline + +import astminer.common.DummyNode +import astminer.common.labeledWith +import astminer.common.model.Node +import astminer.filters.Filter +import astminer.problem.LabeledResult +import astminer.problem.Problem +import astminer.storage.Storage + +class DummyPipelineFrontend(private val extensionsToNodeNames: Map) : PipelineFrontend { + override fun getEntities(): Sequence> = + extensionsToNodeNames.entries.map { (extension, nodeName) -> EntitiesFromFiles(extension, sequenceOf(DummyNode(nodeName))) } + .asSequence() +} + +class SimplePipelineFrontend(private val nodes: List) : PipelineFrontend { + override fun getEntities(): Sequence> = + sequenceOf(EntitiesFromFiles("", nodes.asSequence())) +} + +class DummyFilter(private val excludeName: String = "") : Filter { + override fun isFiltered(entity: DummyNode): Boolean = entity.getToken() != excludeName +} + + +class DummyLabelExtractor(private val excludeName: String = "") : Problem { + override fun process(entity: DummyNode): LabeledResult? = if (entity.data != excludeName) { + entity.labeledWith("label ${entity.data}") + } else { + null + } +} + +class BambooLabelExtractor : Problem { + private fun getLabel(entity: Node): String { + val firstChildLabel = entity.getChildren().firstOrNull()?.let { getLabel(it) } ?: "" + return "${entity.getTypeLabel()}<$firstChildLabel" + } + + override fun process(entity: DummyNode): LabeledResult = entity.labeledWith(getLabel(entity)) +} + +class DummyStorageCreator : StorageCreator { + private val storages = mutableMapOf() + + val results: Map> + get() = storages.mapValues { (_, storage) -> storage.labeledResults } + + override fun createStorageAndOutputFolder(extension: String): Storage { + val storage = DummyStorage() + storages[extension] = storage + return storage + } +} + +class DummyStorage : Storage { + override val outputDirectoryPath: String = "" + + val labeledResults = mutableSetOf() + + override fun store(labeledResult: LabeledResult) { + labeledResults.add(labeledResult.label) + } + + override fun close() { + /* no-op */ + } +} diff --git a/src/test/kotlin/astminer/pipeline/PipelineTest.kt b/src/test/kotlin/astminer/pipeline/PipelineTest.kt new file mode 100644 index 00000000..17173e85 --- /dev/null +++ b/src/test/kotlin/astminer/pipeline/PipelineTest.kt @@ -0,0 +1,115 @@ +package astminer.pipeline + +import astminer.common.DummyNode +import org.junit.Before +import org.junit.Test +import kotlin.test.assertEquals + +internal class PipelineTest { + companion object { + val extensionsToNodeNames = mapOf("a" to "A", "b" to "B") + } + + lateinit var storageCreator: DummyStorageCreator + + @Before + fun init() { + storageCreator = DummyStorageCreator() + } + + @Test + fun `test pipeline saves all entities if none are filtered and no node types are excluded`() { + Pipeline( + frontend = DummyPipelineFrontend(extensionsToNodeNames), + problem = DummyLabelExtractor(), + storageCreator = storageCreator + ).run() + + val expectedResults = mapOf( + "a" to setOf("label A"), + "b" to setOf("label B") + ) + assertEquals(expectedResults, storageCreator.results) + } + + @Test + fun `test pipeline saves A if B is filtered out by a DummyFilter`() { + Pipeline( + frontend = DummyPipelineFrontend(extensionsToNodeNames), + filters = listOf(DummyFilter("B")), + problem = DummyLabelExtractor(), + storageCreator = storageCreator + ).run() + + val expectedResults = mapOf( + "a" to setOf("label A"), + "b" to setOf() + ) + assertEquals(expectedResults, storageCreator.results) + } + + @Test + fun `test pipeline saves A if B is filtered out by a DummyLabelExtractor`() { + Pipeline( + frontend = DummyPipelineFrontend(extensionsToNodeNames), + filters = listOf(DummyFilter()), + problem = DummyLabelExtractor("B"), + storageCreator = storageCreator + ).run() + + val expectedResults = mapOf( + "a" to setOf("label A"), + "b" to setOf() + ) + assertEquals(expectedResults, storageCreator.results) + } + + @Test + fun `test pipeline saves nothing if 'label A' is filtered by a filter and 'label B' is filtered by a problem`() { + Pipeline( + frontend = DummyPipelineFrontend(extensionsToNodeNames), + filters = listOf(DummyFilter("A")), + problem = DummyLabelExtractor("B"), + storageCreator = storageCreator + ).run() + + val expectedResults = mapOf( + "a" to setOf(), + "b" to setOf() + ) + assertEquals(expectedResults, storageCreator.results) + } + + @Test + fun `test pipeline should not remove any nodes from the tree by default`() { + val node = DummyNode("Root", mutableListOf(DummyNode("Child"))) + + Pipeline( + frontend = SimplePipelineFrontend(listOf(node)), + problem = BambooLabelExtractor(), + storageCreator = storageCreator + ).run() + + val expectedResults = mapOf( + "" to setOf("Root): File { + val dir = createTempDir() + for ((extension, fileCount) in fileCounts.entries) { + for (i in 1..fileCount) { + dir.resolve("$i.$extension").createNewFile() + } + } + return dir +} + +fun getExtractedEntitiesCounts(entitiesFromFiles: Sequence>): Map = + entitiesFromFiles.associate { it.fileExtension to it.entities.toList().size } diff --git a/src/test/kotlin/astminer/problem/FilePathExtractorTest.kt b/src/test/kotlin/astminer/problem/FilePathExtractorTest.kt new file mode 100644 index 00000000..ef560019 --- /dev/null +++ b/src/test/kotlin/astminer/problem/FilePathExtractorTest.kt @@ -0,0 +1,21 @@ +package astminer.problem + +import astminer.common.model.ParseResult +import astminer.parse.antlr.AntlrNode +import org.junit.Test +import kotlin.test.assertEquals + +class FilePathExtractorTest { + companion object { + private const val PATH = "random/folder/file.txt" + private var dummyRoot = AntlrNode("", null, null) + } + + @Test + fun `test file path extractor returns the same root and file path and labels with file path`() { + val nonEmptyParseResult = ParseResult(dummyRoot, PATH) + val labeledParseResult = FilePathExtractor.process(nonEmptyParseResult) + + assertEquals(LabeledResult(dummyRoot, PATH, PATH), labeledParseResult) + } +} \ No newline at end of file diff --git a/src/test/kotlin/astminer/problem/FolderNameExtractorTest.kt b/src/test/kotlin/astminer/problem/FolderNameExtractorTest.kt new file mode 100644 index 00000000..9e5da1b4 --- /dev/null +++ b/src/test/kotlin/astminer/problem/FolderNameExtractorTest.kt @@ -0,0 +1,31 @@ +package astminer.problem + +import astminer.common.model.ParseResult +import astminer.parse.antlr.AntlrNode +import org.junit.Test +import kotlin.test.assertEquals +import kotlin.test.assertNull + +class FolderNameExtractorTest { + companion object { + private const val PATH = "random/folder/file.txt" + private const val FOLDER = "folder" + private var dummyRoot = AntlrNode("", null, null) + } + + @Test + fun `test folder extractor returns null when folder is empty or not found`() { + val nonEmptyParseResult = ParseResult(dummyRoot, "") + val labeledParseResult = FolderExtractor.process(nonEmptyParseResult) + + assertNull(labeledParseResult) + } + + @Test + fun `test folder extractor extracts folder when it is not empty`() { + val nonEmptyParseResult = ParseResult(dummyRoot, PATH) + val labeledParseResult = FolderExtractor.process(nonEmptyParseResult) + + assertEquals(LabeledResult(dummyRoot, FOLDER, PATH), labeledParseResult) + } +} diff --git a/src/test/kotlin/astminer/problem/FunctionNameProblemTest.kt b/src/test/kotlin/astminer/problem/FunctionNameProblemTest.kt new file mode 100644 index 00000000..9cb507f9 --- /dev/null +++ b/src/test/kotlin/astminer/problem/FunctionNameProblemTest.kt @@ -0,0 +1,57 @@ +package astminer.problem + +import astminer.common.DummyNode +import astminer.common.getTechnicalToken +import astminer.common.model.FunctionInfo +import astminer.common.model.Node +import org.junit.Before +import org.junit.Test +import kotlin.test.assertEquals + +class FunctionNameProblemTest { + companion object { + private const val PATH = "random/folder/file.txt" + private const val FUNCTION_NAME = "method" + } + + lateinit var functionRoot: Node + + private val functionInfo: FunctionInfo + get() = object : FunctionInfo { + override val nameNode = functionRoot + override val filePath = PATH + override val root = functionRoot + } + + @Before + fun init() { + val leafNodeWithRecursiveCall = DummyNode(FUNCTION_NAME) + val emptyIntermediateNode = DummyNode("", mutableListOf(leafNodeWithRecursiveCall)) + functionRoot = DummyNode(FUNCTION_NAME, mutableListOf(emptyIntermediateNode)) + } + + @Test + fun `test FunctionNameProblem extracts correct method name`() { + val labeledResult = FunctionNameProblem.process(functionInfo) + assertEquals(LabeledResult(functionRoot, FUNCTION_NAME, PATH), labeledResult) + } + + @Test + fun `test FunctionNameProblem hides function name node token with METHOD_NAME`() { + FunctionNameProblem.process(functionInfo) + assertEquals("METHOD_NAME", functionInfo.nameNode?.getTechnicalToken()) + } + + @Test + fun `test FunctionNameProblem hides function root token with METHOD_NAME if it is the name node`() { + FunctionNameProblem.process(functionInfo) + assertEquals("METHOD_NAME", functionInfo.root.getTechnicalToken()) + } + + @Test + fun `test function name problem should hide recursive call tokens with SELF`() { + FunctionNameProblem.process(functionInfo) + val recursiveCallNode = functionInfo.root.getChildren().firstOrNull()?.getChildren()?.firstOrNull() + assertEquals("SELF", recursiveCallNode?.getTechnicalToken()) + } +} \ No newline at end of file diff --git a/src/test/kotlin/astminer/problem/LabelExtractorTest.kt b/src/test/kotlin/astminer/problem/LabelExtractorTest.kt deleted file mode 100644 index 597da431..00000000 --- a/src/test/kotlin/astminer/problem/LabelExtractorTest.kt +++ /dev/null @@ -1,73 +0,0 @@ -package astminer.problem - -import astminer.common.getTechnicalToken -import astminer.common.model.* -import astminer.parse.antlr.AntlrNode -import org.junit.Before -import org.junit.Test -import kotlin.test.assertEquals -import kotlin.test.assertNull - -internal class ProblemTest { - - companion object { - private const val PATH_STRING = "random/folder/file.txt" - private const val FOLDER = "folder" - private const val FILENAME = "file.txt" - private const val METHOD_NAME = "method" - } - - private var dummyRoot = AntlrNode("", null, null) - - @Before - fun setUp() { - dummyRoot = AntlrNode("", null, null) - } - - @Test - fun `test file path extractor returns the same root and file path`() { - val nonEmptyParseResult = ParseResult(dummyRoot, PATH_STRING) - val labeledParseResult = FilePathExtractor.process(nonEmptyParseResult) - - assertEquals(LabeledResult(dummyRoot, PATH_STRING, PATH_STRING), labeledParseResult) - } - - @Test - fun `test folder extractor returns null when folder is empty`() { - val nonEmptyParseResult = ParseResult(dummyRoot, "") - val labeledParseResult = FolderExtractor.process(nonEmptyParseResult) - - assertNull(labeledParseResult) - } - - @Test - fun `test folder extractor extracts folder when it is not empty`() { - val nonEmptyParseResult = ParseResult(dummyRoot, PATH_STRING) - val labeledParseResult = FolderExtractor.process(nonEmptyParseResult) - - assertEquals(LabeledResult(dummyRoot, FOLDER, PATH_STRING), labeledParseResult) - } - - @Test - fun `test method name extractor extracts correct method name`() { - val functionInfo = object : FunctionInfo { - override val nameNode = AntlrNode("", dummyRoot, METHOD_NAME) - override val filePath = PATH_STRING - override val root = dummyRoot - } - val labeledResult = FunctionNameProblem.process(functionInfo) - - assertEquals(LabeledResult(dummyRoot, METHOD_NAME, PATH_STRING), labeledResult) - } - - @Test - fun `test method name extractor hides method name with technical token`() { - val functionInfo = object : FunctionInfo { - override val nameNode = AntlrNode("", dummyRoot, METHOD_NAME) - override val filePath = PATH_STRING - override val root = dummyRoot - } - FunctionNameProblem.process(functionInfo) - assertEquals("METHOD_NAME", functionInfo.nameNode.getTechnicalToken()) - } -} From ae2865209fe9a80b349b7955c9b879b609595ec7 Mon Sep 17 00:00:00 2001 From: furetur Date: Fri, 7 May 2021 13:55:18 +0500 Subject: [PATCH 150/308] added 1 todo --- src/main/kotlin/astminer/pipeline/PipelineFrontend.kt | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/kotlin/astminer/pipeline/PipelineFrontend.kt b/src/main/kotlin/astminer/pipeline/PipelineFrontend.kt index 87553835..3301064f 100644 --- a/src/main/kotlin/astminer/pipeline/PipelineFrontend.kt +++ b/src/main/kotlin/astminer/pipeline/PipelineFrontend.kt @@ -46,6 +46,7 @@ abstract class CompositePipelineFrontend( val handlerFactory = try { getHandlerFactory(extension, parserType) } catch (e: UnsupportedOperationException) { + // TODO: log everything println("Damn") yield(EntitiesFromFiles(extension, emptySequence())) continue From 2bddf86f91d5af2d1ca4b965bba4b2218f76025a Mon Sep 17 00:00:00 2001 From: furetur Date: Fri, 7 May 2021 14:18:27 +0500 Subject: [PATCH 151/308] cleaned up renaming MethodInfo* to FunctionInfo* --- .../kotlin/astminer/cli/LabelExtractors.kt | 16 ++++---- .../{FunctionInfo.kt => FunctionInfoModel.kt} | 19 ++++----- .../astminer/common/model/HandlerModel.kt | 2 +- .../common/model/TreeSplittingModel.kt | 41 ------------------- .../astminer/examples/AllPythonMethods.kt | 4 +- .../astminer/parse/antlr/AntlrHandler.kt | 8 ++-- .../parse/antlr/java/AntlrJavaFunctionInfo.kt | 8 ++-- .../parse/antlr/java/JavaMethodSplitter.kt | 2 +- .../javascript/AntlrJavaScriptElementInfo.kt | 12 +++--- ...itter.kt => JavaScriptFunctionSplitter.kt} | 2 +- .../antlr/python/AntlrPythonFunctionInfo.kt | 8 ++-- ...dSplitter.kt => PythonFunctionSplitter.kt} | 3 +- .../parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt | 8 ++-- ...odSplitter.kt => FuzzyFunctionSplitter.kt} | 2 +- .../astminer/parse/fuzzy/cpp/FuzzyHandler.kt | 4 +- .../astminer/parse/gumtree/GumtreeHandler.kt | 4 +- .../gumtree/java/GumTreeJavaFunctionInfo.kt | 8 ++-- .../gumtree/java/GumTreeJavaMethodSplitter.kt | 2 +- .../gumtree/python/GumTreeFunctionInfo.kt | 8 ++-- ...er.kt => GumTreePythonFunctionSplitter.kt} | 2 +- .../JavaScriptMethodSplitterTest.kt | 2 +- .../antlr/python/PythonMethodSplitterTest.kt | 3 +- .../parse/cpp/FuzzyMethodSplitterTest.kt | 5 +-- .../java/GumTreeJavaMethodSplitterTest.kt | 1 - .../python/GumTreePythonMethodSplitterTest.kt | 3 +- 25 files changed, 65 insertions(+), 112 deletions(-) rename src/main/kotlin/astminer/common/model/{FunctionInfo.kt => FunctionInfoModel.kt} (67%) delete mode 100644 src/main/kotlin/astminer/common/model/TreeSplittingModel.kt rename src/main/kotlin/astminer/parse/antlr/javascript/{JavaScriptMethodSplitter.kt => JavaScriptFunctionSplitter.kt} (94%) rename src/main/kotlin/astminer/parse/antlr/python/{PythonMethodSplitter.kt => PythonFunctionSplitter.kt} (82%) rename src/main/kotlin/astminer/parse/fuzzy/cpp/{FuzzyMethodSplitter.kt => FuzzyFunctionSplitter.kt} (85%) rename src/main/kotlin/astminer/parse/gumtree/python/{GumTreePythonMethodSplitter.kt => GumTreePythonFunctionSplitter.kt} (90%) diff --git a/src/main/kotlin/astminer/cli/LabelExtractors.kt b/src/main/kotlin/astminer/cli/LabelExtractors.kt index df2b2a92..5c6bb0b1 100644 --- a/src/main/kotlin/astminer/cli/LabelExtractors.kt +++ b/src/main/kotlin/astminer/cli/LabelExtractors.kt @@ -7,13 +7,13 @@ import astminer.common.preOrder import astminer.common.setTechnicalToken import astminer.parse.antlr.AntlrNode import astminer.parse.antlr.java.JavaMethodSplitter -import astminer.parse.antlr.javascript.JavaScriptMethodSplitter -import astminer.parse.antlr.python.PythonMethodSplitter -import astminer.parse.fuzzy.cpp.FuzzyMethodSplitter +import astminer.parse.antlr.javascript.JavaScriptFunctionSplitter +import astminer.parse.antlr.python.PythonFunctionSplitter +import astminer.parse.fuzzy.cpp.FuzzyFunctionSplitter import astminer.parse.fuzzy.cpp.FuzzyNode import astminer.parse.gumtree.GumTreeNode import astminer.parse.gumtree.java.GumTreeJavaMethodSplitter -import astminer.parse.gumtree.python.GumTreePythonMethodSplitter +import astminer.parse.gumtree.python.GumTreePythonFunctionSplitter import java.io.File @@ -63,7 +63,7 @@ abstract class MethodLabelExtractor( val fileExtension = File(filePath).extension val methodInfos = when (fileExtension) { "c", "cpp" -> { - val methodSplitter = FuzzyMethodSplitter() + val methodSplitter = FuzzyFunctionSplitter() methodSplitter.splitIntoMethods(root as FuzzyNode) } "java" -> { @@ -84,11 +84,11 @@ abstract class MethodLabelExtractor( "py" -> { when (pythonParser) { "gumtree" -> { - val methodSplitter = GumTreePythonMethodSplitter() + val methodSplitter = GumTreePythonFunctionSplitter() methodSplitter.splitIntoMethods(root as GumTreeNode) } "antlr" -> { - val methodSplitter = PythonMethodSplitter() + val methodSplitter = PythonFunctionSplitter() methodSplitter.splitIntoMethods(root as AntlrNode) } else -> { @@ -97,7 +97,7 @@ abstract class MethodLabelExtractor( } } "js" -> { - val methodSplitter = JavaScriptMethodSplitter() + val methodSplitter = JavaScriptFunctionSplitter() methodSplitter.splitIntoMethods(root as AntlrNode) } else -> throw UnsupportedOperationException("Unsupported extension $fileExtension") diff --git a/src/main/kotlin/astminer/common/model/FunctionInfo.kt b/src/main/kotlin/astminer/common/model/FunctionInfoModel.kt similarity index 67% rename from src/main/kotlin/astminer/common/model/FunctionInfo.kt rename to src/main/kotlin/astminer/common/model/FunctionInfoModel.kt index 7db831a6..8e53a82d 100644 --- a/src/main/kotlin/astminer/common/model/FunctionInfo.kt +++ b/src/main/kotlin/astminer/common/model/FunctionInfoModel.kt @@ -1,12 +1,16 @@ package astminer.common.model -class MethodInfoPropertyNotImplementedException(propertyName: String) : +interface TreeFunctionSplitter { + fun splitIntoMethods(root: T): Collection> +} + +class FunctionInfoPropertyNotImplementedException(propertyName: String) : UnsupportedOperationException( - "The property $propertyName of MethodInfo for this language and parser type is not implemented yet. " + + "The property $propertyName of FunctionInfo for this language and parser type is not implemented yet. " + "Consider implementing it." ) -private fun notImplemented(propertyName: String): Nothing = throw MethodInfoPropertyNotImplementedException(propertyName) +private fun notImplemented(propertyName: String): Nothing = throw FunctionInfoPropertyNotImplementedException(propertyName) interface FunctionInfo { val nameNode: T? @@ -19,7 +23,7 @@ interface FunctionInfo { get() = notImplemented("annotations") val modifiers: List get() = notImplemented("modifiers") - val parameters: List + val parameters: List get() = notImplemented("parameters") val returnType: String? get() = notImplemented("returnType") @@ -29,7 +33,7 @@ interface FunctionInfo { get() = notImplemented("isConstructor") } -data class MethodInfoParameter(val name: String, val type: String?) +data class FunctionInfoParameter(val name: String, val type: String?) data class EnclosingElement(val type: EnclosingElementType, val name: String?, val root: T) @@ -39,8 +43,3 @@ enum class EnclosingElementType { Method, VariableDeclaration, } - -// TODO: should be removed -class DummyFunctionInfo : FunctionInfo - -fun dummyMethodInfos() = listOf(DummyFunctionInfo()) diff --git a/src/main/kotlin/astminer/common/model/HandlerModel.kt b/src/main/kotlin/astminer/common/model/HandlerModel.kt index 6f156ba3..ccc4316a 100644 --- a/src/main/kotlin/astminer/common/model/HandlerModel.kt +++ b/src/main/kotlin/astminer/common/model/HandlerModel.kt @@ -8,7 +8,7 @@ interface HandlerFactory { abstract class LanguageHandler { abstract val parseResult: ParseResult - protected abstract val splitter: TreeMethodSplitter + protected abstract val splitter: TreeFunctionSplitter fun splitIntoMethods(): Collection> { val root = parseResult.root ?: return emptyList() diff --git a/src/main/kotlin/astminer/common/model/TreeSplittingModel.kt b/src/main/kotlin/astminer/common/model/TreeSplittingModel.kt deleted file mode 100644 index 568b44bc..00000000 --- a/src/main/kotlin/astminer/common/model/TreeSplittingModel.kt +++ /dev/null @@ -1,41 +0,0 @@ -package astminer.common.model - -interface TreeMethodSplitter { - fun splitIntoMethods(root: T): Collection> -} - -class MethodInfo( - val method: MethodNode, - val enclosingElement: ElementNode, - val methodParameters: List> -) { - fun name() = method.name() - fun returnType() = method.returnType() - - fun enclosingElementName() = enclosingElement.name() -} - -class MethodNode( - val root: T, - val returnTypeNode: T?, - val nameNode: T? // why is it nullable?? -) { - fun name() = nameNode?.getToken() - fun returnType() = returnTypeNode?.getToken() -} - -class ElementNode( - val root: T?, - val nameNode: T? -) { - fun name() = nameNode?.getToken() -} - -data class ParameterNode( - val root: T, - val returnTypeNode: T?, - val nameNode: T? -) { - fun name() = nameNode?.getToken() - fun returnType() = returnTypeNode?.getToken() -} diff --git a/src/main/kotlin/astminer/examples/AllPythonMethods.kt b/src/main/kotlin/astminer/examples/AllPythonMethods.kt index cfc55ef4..37660629 100644 --- a/src/main/kotlin/astminer/examples/AllPythonMethods.kt +++ b/src/main/kotlin/astminer/examples/AllPythonMethods.kt @@ -2,7 +2,7 @@ package astminer.examples import astminer.cli.LabeledResult import astminer.common.model.FunctionInfo -import astminer.parse.gumtree.python.GumTreePythonMethodSplitter +import astminer.parse.gumtree.python.GumTreePythonFunctionSplitter import astminer.parse.gumtree.GumTreeNode import astminer.parse.gumtree.python.GumTreePythonParser import astminer.storage.path.Code2VecPathStorage @@ -27,7 +27,7 @@ fun allPythonMethods() { val fileNode = GumTreePythonParser().parseInputStream(file.inputStream()) ?: return@forFilesWithSuffix // extract method nodes - val methodNodes = GumTreePythonMethodSplitter().splitIntoMethods(fileNode) + val methodNodes = GumTreePythonFunctionSplitter().splitIntoMethods(fileNode) methodNodes.forEach { methodInfo -> // Retrieve a method identifier diff --git a/src/main/kotlin/astminer/parse/antlr/AntlrHandler.kt b/src/main/kotlin/astminer/parse/antlr/AntlrHandler.kt index ad6f6f0b..866fa1fe 100644 --- a/src/main/kotlin/astminer/parse/antlr/AntlrHandler.kt +++ b/src/main/kotlin/astminer/parse/antlr/AntlrHandler.kt @@ -5,9 +5,9 @@ import astminer.common.model.HandlerFactory import astminer.common.model.LanguageHandler import astminer.parse.antlr.java.JavaMethodSplitter import astminer.parse.antlr.java.JavaParser -import astminer.parse.antlr.javascript.JavaScriptMethodSplitter +import astminer.parse.antlr.javascript.JavaScriptFunctionSplitter import astminer.parse.antlr.javascript.JavaScriptParser -import astminer.parse.antlr.python.PythonMethodSplitter +import astminer.parse.antlr.python.PythonFunctionSplitter import astminer.parse.antlr.python.PythonParser import java.io.File @@ -25,7 +25,7 @@ object AntlrPythonHandlerFactory : HandlerFactory { class AntlrPythonHandler(file: File) : LanguageHandler() { override val parseResult: ParseResult = PythonParser().parseFile(file) - override val splitter = PythonMethodSplitter() + override val splitter = PythonFunctionSplitter() } } @@ -34,6 +34,6 @@ object AntlrJavascriptHandlerFactory : HandlerFactory { class AntlrJavascriptHandler(file: File) : LanguageHandler() { override val parseResult: ParseResult = JavaScriptParser().parseFile(file) - override val splitter = JavaScriptMethodSplitter() + override val splitter = JavaScriptFunctionSplitter() } } \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt b/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt index 648c4952..de0be107 100644 --- a/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt @@ -6,7 +6,7 @@ import astminer.parse.findEnclosingElementBy class AntlrJavaFunctionInfo(override val root: AntlrNode) : FunctionInfo { override val nameNode: AntlrNode? = collectNameNode() - override val parameters: List = collectParameters() + override val parameters: List = collectParameters() override val returnType: String? = collectReturnType() override val enclosingElement: EnclosingElement? = collectEnclosingClass() @@ -42,7 +42,7 @@ class AntlrJavaFunctionInfo(override val root: AntlrNode) : FunctionInfo { + private fun collectParameters(): List { val parametersRoot = root.getChildOfType(METHOD_PARAMETER_NODE) val innerParametersRoot = parametersRoot?.getChildOfType(METHOD_PARAMETER_INNER_NODE) ?: return emptyList() @@ -55,14 +55,14 @@ class AntlrJavaFunctionInfo(override val root: AntlrNode) : FunctionInfo getParameterInfo(singleParameter) } } - private fun getParameterInfo(parameterNode: AntlrNode): MethodInfoParameter { + private fun getParameterInfo(parameterNode: AntlrNode): FunctionInfoParameter { val returnTypeNode = parameterNode.getChildOfType(PARAMETER_RETURN_TYPE_NODE) val returnTypeToken = returnTypeNode?.getTokensFromSubtree() val parameterName = parameterNode.getChildOfType(PARAMETER_NAME_NODE)?.getToken() ?: throw IllegalStateException("Parameter name wasn't found") - return MethodInfoParameter(parameterName, returnTypeToken) + return FunctionInfoParameter(parameterName, returnTypeToken) } } diff --git a/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt b/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt index 967b3c76..c943fe4f 100644 --- a/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt @@ -5,7 +5,7 @@ import astminer.common.model.* import astminer.parse.antlr.AntlrNode import astminer.parse.antlr.hasLastLabel -class JavaMethodSplitter : TreeMethodSplitter { +class JavaMethodSplitter : TreeFunctionSplitter { private val methodNodeType = "methodDeclaration" override fun splitIntoMethods(root: AntlrNode): Collection> { diff --git a/src/main/kotlin/astminer/parse/antlr/javascript/AntlrJavaScriptElementInfo.kt b/src/main/kotlin/astminer/parse/antlr/javascript/AntlrJavaScriptElementInfo.kt index dcba1e72..47a94c49 100644 --- a/src/main/kotlin/astminer/parse/antlr/javascript/AntlrJavaScriptElementInfo.kt +++ b/src/main/kotlin/astminer/parse/antlr/javascript/AntlrJavaScriptElementInfo.kt @@ -48,7 +48,7 @@ abstract class AntlrJavaScriptElementInfo(override val root: AntlrNode) : Functi } } - protected fun collectParameters(): List { + protected fun collectParameters(): List { val parametersRoot = getParametersRoot() return when { //No parameters found @@ -56,13 +56,13 @@ abstract class AntlrJavaScriptElementInfo(override val root: AntlrNode) : Functi //Have only one parameter, which is indicated only by its name parametersRoot.hasLastLabel(PARAMETER_NAME_NODE) -> listOf( - MethodInfoParameter(name = parametersRoot.getToken(), type = null) + FunctionInfoParameter(name = parametersRoot.getToken(), type = null) ) //Have many parameters or one indicated not only by it's name else -> parametersRoot.getItOrChildrenOfType(SINGLE_PARAMETER_NODE).map { val nameNode = it.getChildOfType(PARAMETER_NAME_NODE) ?: it - MethodInfoParameter(name = nameNode.getToken(), type = null) + FunctionInfoParameter(name = nameNode.getToken(), type = null) } } } @@ -78,7 +78,7 @@ class JavaScriptArrowInfo(override val root: AntlrNode) : AntlrJavaScriptElement } override val enclosingElement: EnclosingElement? = collectEnclosingElement() - override val parameters: List = collectParameters() + override val parameters: List = collectParameters() override val nameNode: AntlrNode? = root.getChildOfType(ARROW_NAME_NODE) override fun getParametersRoot(): AntlrNode? { @@ -95,7 +95,7 @@ class JavaScriptMethodInfo(override val root: AntlrNode) : AntlrJavaScriptElemen } override val enclosingElement: EnclosingElement? = collectEnclosingElement() - override val parameters: List = collectParameters() + override val parameters: List = collectParameters() override val nameNode: AntlrNode? = collectNameNode() private fun collectNameNode(): AntlrNode? { @@ -118,7 +118,7 @@ class JavaScriptFunctionInfo(override val root: AntlrNode) : AntlrJavaScriptElem } override val enclosingElement: EnclosingElement? = collectEnclosingElement() - override val parameters: List = collectParameters() + override val parameters: List = collectParameters() override val nameNode: AntlrNode? = root.getChildOfType(FUNCTION_NAME_NODE) override fun getParametersRoot(): AntlrNode? = root.getChildOfType(FUNCTION_PARAMETER_NODE) diff --git a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitter.kt similarity index 94% rename from src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt rename to src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitter.kt index 1f2d1a00..5b5a6b3b 100644 --- a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitter.kt @@ -9,7 +9,7 @@ import astminer.parse.antlr.decompressTypeLabel * Get all methods (in JavaScript there are divided into functions, arrow functions and methods) and information * about their names, enclosing elements and parameters. */ -class JavaScriptMethodSplitter : TreeMethodSplitter { +class JavaScriptFunctionSplitter : TreeFunctionSplitter { companion object { private const val METHOD_NODE = "methodDefinition" private const val ARROW_NODE = "ARROW" diff --git a/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt b/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt index 0571ae43..389786df 100644 --- a/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt @@ -6,7 +6,7 @@ import astminer.parse.findEnclosingElementBy class AntlrPythonFunctionInfo(override val root: AntlrNode) : FunctionInfo { override val nameNode: AntlrNode? = collectNameNode() - override val parameters: List = collectParameters() + override val parameters: List = collectParameters() override val enclosingElement: EnclosingElement? = collectEnclosingElement() companion object { @@ -33,7 +33,7 @@ class AntlrPythonFunctionInfo(override val root: AntlrNode) : FunctionInfo { + private fun collectParameters(): List { val parametersRoot = root.getChildOfType(METHOD_PARAMETER_NODE) val innerParametersRoot = parametersRoot?.getChildOfType(METHOD_PARAMETER_INNER_NODE) ?: return emptyList() @@ -48,7 +48,7 @@ class AntlrPythonFunctionInfo(override val root: AntlrNode) : FunctionInfo { +class PythonFunctionSplitter : TreeFunctionSplitter { private val methodNode = "funcdef" override fun splitIntoMethods(root: AntlrNode): Collection> { diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt index b4643d35..b5d6e11c 100644 --- a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt @@ -3,7 +3,7 @@ package astminer.parse.fuzzy.cpp import astminer.common.model.EnclosingElement import astminer.common.model.EnclosingElementType import astminer.common.model.FunctionInfo -import astminer.common.model.MethodInfoParameter +import astminer.common.model.FunctionInfoParameter import astminer.parse.findEnclosingElementBy class FuzzyCppFunctionInfo(override val root: FuzzyNode): FunctionInfo { @@ -22,7 +22,7 @@ class FuzzyCppFunctionInfo(override val root: FuzzyNode): FunctionInfo? = collectEnclosingClass() - override val parameters: List = collectParameters() + override val parameters: List = collectParameters() override val nameNode: FuzzyNode? = collectNameNode() private fun collectNameNode(): FuzzyNode? { @@ -51,12 +51,12 @@ class FuzzyCppFunctionInfo(override val root: FuzzyNode): FunctionInfo { + private fun collectParameters(): List { val parameters = root.getChildrenOfType(METHOD_PARAMETER_NODE) return parameters.map { param -> val type = param.getChildOfType(PARAMETER_TYPE_NODE)?.getToken() val name = param.getChildOfType(PARAMETER_NAME_NODE)?.getToken() ?: "" - MethodInfoParameter(name, type) + FunctionInfoParameter(name, type) } } } \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyMethodSplitter.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyFunctionSplitter.kt similarity index 85% rename from src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyMethodSplitter.kt rename to src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyFunctionSplitter.kt index 5d01ceb6..3a2467c8 100644 --- a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyFunctionSplitter.kt @@ -3,7 +3,7 @@ package astminer.parse.fuzzy.cpp import astminer.common.* import astminer.common.model.* -class FuzzyMethodSplitter : TreeMethodSplitter { +class FuzzyFunctionSplitter : TreeFunctionSplitter { private val methodNode = "METHOD" override fun splitIntoMethods(root: FuzzyNode): Collection> { diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyHandler.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyHandler.kt index ea6b014f..18ae066a 100644 --- a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyHandler.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyHandler.kt @@ -4,7 +4,7 @@ import astminer.common.model.HandlerFactory import astminer.common.model.LanguageHandler import astminer.common.model.ParseResult import astminer.parse.fuzzy.cpp.FuzzyCppParser -import astminer.parse.fuzzy.cpp.FuzzyMethodSplitter +import astminer.parse.fuzzy.cpp.FuzzyFunctionSplitter import astminer.parse.fuzzy.cpp.FuzzyNode import java.io.File @@ -13,7 +13,7 @@ object FuzzyCppHandler : HandlerFactory { class CppFuzzyHandler(file: File) : LanguageHandler() { - override val splitter = FuzzyMethodSplitter() + override val splitter = FuzzyFunctionSplitter() override val parseResult: ParseResult = FuzzyCppParser().parseFile(file) } } \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/gumtree/GumtreeHandler.kt b/src/main/kotlin/astminer/parse/gumtree/GumtreeHandler.kt index 6bb95b27..85723a52 100644 --- a/src/main/kotlin/astminer/parse/gumtree/GumtreeHandler.kt +++ b/src/main/kotlin/astminer/parse/gumtree/GumtreeHandler.kt @@ -5,7 +5,7 @@ import astminer.common.model.HandlerFactory import astminer.common.model.LanguageHandler import astminer.parse.gumtree.java.GumTreeJavaParser import astminer.parse.gumtree.java.GumTreeJavaMethodSplitter -import astminer.parse.gumtree.python.GumTreePythonMethodSplitter +import astminer.parse.gumtree.python.GumTreePythonFunctionSplitter import astminer.parse.gumtree.python.GumTreePythonParser import java.io.File @@ -22,7 +22,7 @@ object GumtreePythonHandlerFactory : HandlerFactory { override fun createHandler(file: File): LanguageHandler = PythonGumTreeHandler(file) class PythonGumTreeHandler(file: File) : LanguageHandler() { - override val splitter = GumTreePythonMethodSplitter() + override val splitter = GumTreePythonFunctionSplitter() override val parseResult: ParseResult = GumTreePythonParser().parseFile(file) } } \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt index de968cea..fbec6aa0 100644 --- a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt @@ -3,7 +3,7 @@ package astminer.parse.gumtree.java import astminer.common.model.EnclosingElement import astminer.common.model.EnclosingElementType import astminer.common.model.FunctionInfo -import astminer.common.model.MethodInfoParameter +import astminer.common.model.FunctionInfoParameter import astminer.parse.gumtree.GumTreeNode class GumTreeJavaFunctionInfo(override val root: GumTreeNode) : FunctionInfo { @@ -16,7 +16,7 @@ class GumTreeJavaFunctionInfo(override val root: GumTreeNode) : FunctionInfo = collectParameters() + override val parameters: List = collectParameters() override val returnType: String? = root.getElementType() override val enclosingElement: EnclosingElement? = collectEnclosingClass() @@ -37,10 +37,10 @@ class GumTreeJavaFunctionInfo(override val root: GumTreeNode) : FunctionInfo { + private fun collectParameters(): List { val params = root.getChildrenOfType(TypeLabels.singleVariableDeclaration) return params.map { node -> - MethodInfoParameter( + FunctionInfoParameter( name = node.getElementName(), type = node.getElementType() ) diff --git a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt index 3f9aa958..1ec56414 100644 --- a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt @@ -4,7 +4,7 @@ import astminer.common.model.* import astminer.common.preOrder import astminer.parse.gumtree.GumTreeNode -class GumTreeJavaMethodSplitter : TreeMethodSplitter { +class GumTreeJavaMethodSplitter : TreeFunctionSplitter { private val methodDeclaration = "MethodDeclaration" override fun splitIntoMethods(root: GumTreeNode): Collection> { diff --git a/src/main/kotlin/astminer/parse/gumtree/python/GumTreeFunctionInfo.kt b/src/main/kotlin/astminer/parse/gumtree/python/GumTreeFunctionInfo.kt index 9eaff1cc..76df1ffd 100644 --- a/src/main/kotlin/astminer/parse/gumtree/python/GumTreeFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/gumtree/python/GumTreeFunctionInfo.kt @@ -3,7 +3,7 @@ package astminer.parse.gumtree.python import astminer.common.model.EnclosingElement import astminer.common.model.EnclosingElementType import astminer.common.model.FunctionInfo -import astminer.common.model.MethodInfoParameter +import astminer.common.model.FunctionInfoParameter import astminer.parse.findEnclosingElementBy import astminer.parse.gumtree.GumTreeNode @@ -33,7 +33,7 @@ class GumTreeFunctionInfo(override val root: GumTreeNode) : FunctionInfo = collectParameters() + override val parameters: List = collectParameters() override val enclosingElement: EnclosingElement? = collectEnclosingClass() private fun getElementType(node: GumTreeNode): GumTreeNode? { @@ -63,7 +63,7 @@ class GumTreeFunctionInfo(override val root: GumTreeNode) : FunctionInfo { + private fun collectParameters(): List { val arguments = root.getChildrenOfType(TypeLabels.arguments).flatMap { it.getChildren() } val params = arguments.flatMap { node -> when (node.getTypeLabel()) { @@ -74,7 +74,7 @@ class GumTreeFunctionInfo(override val root: GumTreeNode) : FunctionInfo - MethodInfoParameter( + FunctionInfoParameter( name = node.getToken(), type = getElementType(node)?.getToken() ) diff --git a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitter.kt b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitter.kt similarity index 90% rename from src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitter.kt rename to src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitter.kt index 4f27c65a..0e168efa 100644 --- a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitter.kt @@ -4,7 +4,7 @@ import astminer.common.model.* import astminer.common.preOrder import astminer.parse.gumtree.GumTreeNode -class GumTreePythonMethodSplitter : TreeMethodSplitter { +class GumTreePythonFunctionSplitter : TreeFunctionSplitter { companion object { private object TypeLabels { const val functionDefinition = "FunctionDef" diff --git a/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitterTest.kt index c49177bb..07051799 100644 --- a/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitterTest.kt @@ -14,7 +14,7 @@ class JavaScriptMethodSplitterTest { companion object { const val N_METHODS = 47 const val testFilePath = "src/test/resources/methodSplitting/testMethodSplitting.js" - val methodSplitter = JavaScriptMethodSplitter() + val methodSplitter = JavaScriptFunctionSplitter() val parser = JavaScriptParser() } diff --git a/src/test/kotlin/astminer/parse/antlr/python/PythonMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/python/PythonMethodSplitterTest.kt index c5ff62dd..bd1c4b6c 100644 --- a/src/test/kotlin/astminer/parse/antlr/python/PythonMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/python/PythonMethodSplitterTest.kt @@ -2,7 +2,6 @@ package astminer.parse.antlr.python import astminer.common.model.EnclosingElementType import astminer.common.model.FunctionInfo -import astminer.common.model.MethodInfo import astminer.parse.antlr.AntlrNode import org.junit.Test import kotlin.test.assertEquals @@ -14,7 +13,7 @@ import kotlin.test.assertNull class PythonMethodSplitterTest { companion object { const val N_FUNCTIONS = 17 - val methodSplitter = PythonMethodSplitter() + val methodSplitter = PythonFunctionSplitter() val parser = PythonParser() } diff --git a/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt index f47376e4..65217952 100644 --- a/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt @@ -1,9 +1,8 @@ package astminer.parse.cpp import astminer.common.model.FunctionInfo -import astminer.common.model.MethodInfo import astminer.parse.fuzzy.cpp.FuzzyCppParser -import astminer.parse.fuzzy.cpp.FuzzyMethodSplitter +import astminer.parse.fuzzy.cpp.FuzzyFunctionSplitter import astminer.parse.fuzzy.cpp.FuzzyNode import org.junit.Test import kotlin.test.assertEquals @@ -16,7 +15,7 @@ class FuzzyMethodSplitterTest { companion object { const val N_FUNCTIONS = 10 - val methodSplitter = FuzzyMethodSplitter() + val methodSplitter = FuzzyFunctionSplitter() val parser = FuzzyCppParser() } diff --git a/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitterTest.kt index 9cce78ba..5309dda0 100644 --- a/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitterTest.kt @@ -1,7 +1,6 @@ package astminer.parse.gumtree.java import astminer.common.model.FunctionInfo -import astminer.common.model.MethodInfo import astminer.parse.gumtree.GumTreeNode import org.junit.Test import java.io.File diff --git a/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitterTest.kt index 37ec3f38..e5ef806c 100644 --- a/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitterTest.kt @@ -1,7 +1,6 @@ package astminer.parse.gumtree.python import astminer.common.model.FunctionInfo -import astminer.common.model.MethodInfo import astminer.parse.gumtree.GumTreeNode import org.junit.Test import java.io.File @@ -13,7 +12,7 @@ class GumTreePythonMethodSplitterTest { GumTreePythonParser().parseInputStream(File(filename).inputStream()) private fun splitMethods(filename: String): Collection> = parse(filename)?.let { - GumTreePythonMethodSplitter().splitIntoMethods(it) + GumTreePythonFunctionSplitter().splitIntoMethods(it) } ?: emptyList() private fun createPath(file: String) = "src/test/resources/gumTreeMethodSplitter/$file" From 24fe0666a04430f045ccb2ea3bdbfd2a657afa16 Mon Sep 17 00:00:00 2001 From: furetur Date: Fri, 7 May 2021 14:27:46 +0500 Subject: [PATCH 152/308] removed unused import --- src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt b/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt index d6a0df9a..0eae4ec7 100644 --- a/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt +++ b/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt @@ -5,7 +5,6 @@ import org.antlr.v4.runtime.ParserRuleContext import org.antlr.v4.runtime.Vocabulary import org.antlr.v4.runtime.tree.ErrorNode import org.antlr.v4.runtime.tree.TerminalNode -import java.util.concurrent.locks.Condition fun convertAntlrTree(tree: ParserRuleContext, ruleNames: Array, vocabulary: Vocabulary): AntlrNode { return compressTree(convertRuleContext(tree, ruleNames, null, vocabulary)) From 51e83d6909f6053cecf860a947f275c9bc3b9f95 Mon Sep 17 00:00:00 2001 From: furetur Date: Fri, 7 May 2021 14:32:53 +0500 Subject: [PATCH 153/308] removed redundant null-safety (?. and ?:) --- src/main/kotlin/astminer/common/model/HandlerModel.kt | 2 +- src/main/kotlin/astminer/examples/AllCppFiles.kt | 4 +--- src/main/kotlin/astminer/examples/AllJavaAst.kt | 4 +--- src/main/kotlin/astminer/examples/AllJavaFiles.kt | 2 +- src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt | 4 +--- src/main/kotlin/astminer/examples/AllJavaMethods.kt | 2 +- src/main/kotlin/astminer/examples/AllPythonFiles.kt | 4 +--- src/main/kotlin/astminer/examples/AllPythonMethods.kt | 2 +- src/main/kotlin/astminer/examples/FeatureExtraction.kt | 2 +- 9 files changed, 9 insertions(+), 17 deletions(-) diff --git a/src/main/kotlin/astminer/common/model/HandlerModel.kt b/src/main/kotlin/astminer/common/model/HandlerModel.kt index 5b843a1d..be434220 100644 --- a/src/main/kotlin/astminer/common/model/HandlerModel.kt +++ b/src/main/kotlin/astminer/common/model/HandlerModel.kt @@ -11,7 +11,7 @@ abstract class LanguageHandler { protected abstract val splitter: TreeMethodSplitter fun splitIntoMethods(): Collection> { - val root = parseResult.root ?: return emptyList() + val root = parseResult.root return splitter.splitIntoMethods(root) } } diff --git a/src/main/kotlin/astminer/examples/AllCppFiles.kt b/src/main/kotlin/astminer/examples/AllCppFiles.kt index 55faff40..1a45b003 100644 --- a/src/main/kotlin/astminer/examples/AllCppFiles.kt +++ b/src/main/kotlin/astminer/examples/AllCppFiles.kt @@ -22,9 +22,7 @@ fun allCppFiles() { val files = getProjectFilesWithExtension(preprocOutputFolder, "cpp") parser.parseFiles(files) { parseResult -> - parseResult.labeledWithFilePath()?.let { labeledResult -> - storage.store(labeledResult) - } + storage.store(parseResult.labeledWithFilePath()) } storage.close() diff --git a/src/main/kotlin/astminer/examples/AllJavaAst.kt b/src/main/kotlin/astminer/examples/AllJavaAst.kt index 675bc62f..25a88b48 100644 --- a/src/main/kotlin/astminer/examples/AllJavaAst.kt +++ b/src/main/kotlin/astminer/examples/AllJavaAst.kt @@ -13,9 +13,7 @@ fun allJavaAsts() { val files = getProjectFilesWithExtension(File(folder), "java") JavaParser().parseFiles(files) { parseResult -> - parseResult.labeledWithFilePath()?.let { labeledResult -> - storage.store(labeledResult) - } + storage.store(parseResult.labeledWithFilePath()) } storage.close() diff --git a/src/main/kotlin/astminer/examples/AllJavaFiles.kt b/src/main/kotlin/astminer/examples/AllJavaFiles.kt index d135e439..402a9754 100644 --- a/src/main/kotlin/astminer/examples/AllJavaFiles.kt +++ b/src/main/kotlin/astminer/examples/AllJavaFiles.kt @@ -15,7 +15,7 @@ fun allJavaFiles() { val storage = Code2VecPathStorage(outputDir, PathBasedStorageConfig(5, 5)) File(inputDir).forFilesWithSuffix("11.java") { file -> - val node = JavaParser().parseInputStream(file.inputStream()) ?: return@forFilesWithSuffix + val node = JavaParser().parseInputStream(file.inputStream()) node.prettyPrint() JavaMethodSplitter().splitIntoMethods(node).forEach { println(it.name()) diff --git a/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt b/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt index 937fb8a6..c8236abf 100644 --- a/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt +++ b/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt @@ -15,9 +15,7 @@ fun allJavaFilesGumTree() { val files = getProjectFilesWithExtension(File(inputDir), "java") GumTreeJavaParser().parseFiles(files) { parseResult -> - parseResult.labeledWithFilePath()?.let { labeledResult -> - storage.store(labeledResult) - } + storage.store(parseResult.labeledWithFilePath()) } storage.close() diff --git a/src/main/kotlin/astminer/examples/AllJavaMethods.kt b/src/main/kotlin/astminer/examples/AllJavaMethods.kt index 7a748f37..1941667d 100644 --- a/src/main/kotlin/astminer/examples/AllJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/AllJavaMethods.kt @@ -29,7 +29,7 @@ fun allJavaMethods() { File(inputDir).forFilesWithSuffix(".java") { file -> //parse file - val fileNode = GumTreeJavaParser().parseInputStream(file.inputStream()) ?: return@forFilesWithSuffix + val fileNode = GumTreeJavaParser().parseInputStream(file.inputStream()) //extract method nodes val methodNodes = GumTreeJavaMethodSplitter().splitIntoMethods(fileNode) diff --git a/src/main/kotlin/astminer/examples/AllPythonFiles.kt b/src/main/kotlin/astminer/examples/AllPythonFiles.kt index ff8a82b0..91f6fe88 100644 --- a/src/main/kotlin/astminer/examples/AllPythonFiles.kt +++ b/src/main/kotlin/astminer/examples/AllPythonFiles.kt @@ -16,9 +16,7 @@ fun allPythonFiles() { val files = getProjectFilesWithExtension(File(inputDir), "py") PythonParser().parseFiles(files) { parseResult -> - parseResult.labeledWithFilePath()?.let { labeledResult -> - storage.store(labeledResult) - } + storage.store(parseResult.labeledWithFilePath()) } storage.close() diff --git a/src/main/kotlin/astminer/examples/AllPythonMethods.kt b/src/main/kotlin/astminer/examples/AllPythonMethods.kt index 2f55d247..ca11b722 100644 --- a/src/main/kotlin/astminer/examples/AllPythonMethods.kt +++ b/src/main/kotlin/astminer/examples/AllPythonMethods.kt @@ -24,7 +24,7 @@ fun allPythonMethods() { File(inputDir).forFilesWithSuffix(".py") { file -> // parse file - val fileNode = GumTreePythonParser().parseInputStream(file.inputStream()) ?: return@forFilesWithSuffix + val fileNode = GumTreePythonParser().parseInputStream(file.inputStream()) // extract method nodes val methodNodes = GumTreePythonMethodSplitter().splitIntoMethods(fileNode) diff --git a/src/main/kotlin/astminer/examples/FeatureExtraction.kt b/src/main/kotlin/astminer/examples/FeatureExtraction.kt index 786ff840..28e964ad 100644 --- a/src/main/kotlin/astminer/examples/FeatureExtraction.kt +++ b/src/main/kotlin/astminer/examples/FeatureExtraction.kt @@ -20,7 +20,7 @@ fun parseAndCollectFeatures() { val fileName = fileInput.name val nol = numberOfLines(fileInput) - val tree = ParsedTree(parser.className(), parser.parseInputStream(fileInput.inputStream()) ?: return@forFilesWithSuffix, fileName, nol) + val tree = ParsedTree(parser.className(), parser.parseInputStream(fileInput.inputStream()), fileName, nol) storage.storeParsedTree(tree) } From bf5c82267a0bda230bd8a8022e5659354cc6fa3e Mon Sep 17 00:00:00 2001 From: furetur Date: Fri, 7 May 2021 14:42:17 +0500 Subject: [PATCH 154/308] removed redundant null-safety (?. and ?:) --- src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt | 4 +--- src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt b/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt index 2e7db26c..f3a835e5 100644 --- a/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt +++ b/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt @@ -15,9 +15,7 @@ fun allJavaScriptFiles() { val files = getProjectFilesWithExtension(File(folder), "js") JavaScriptParser().parseFiles(files) { parseResult -> - parseResult.labeledWithFilePath()?.let { labeledResult -> - storage.store(labeledResult) - } + storage.store(parseResult.labeledWithFilePath()) } storage.close() diff --git a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt index 86246764..c753e68e 100644 --- a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt @@ -20,7 +20,7 @@ fun code2vecJavaMethods() { File(folder).forFilesWithSuffix(".java") { file -> //parse file - val fileNode = JavaParser().parseInputStream(file.inputStream()) ?: return@forFilesWithSuffix + val fileNode = JavaParser().parseInputStream(file.inputStream()) //extract method nodes val methods = JavaMethodSplitter().splitIntoMethods(fileNode) From e907e7f464dbdb9cbda44e899e5bbfe2622488ff Mon Sep 17 00:00:00 2001 From: furetur Date: Fri, 7 May 2021 19:25:40 +0500 Subject: [PATCH 155/308] added FilterConfigs, ProblemConfigs, StorageCreatorConfigs --- build.gradle.kts | 5 +- .../kotlin/astminer/config/FilterConfigs.kt | 51 ++++++++++++++++++ .../kotlin/astminer/config/PipelineConfig.kt | 49 ++++++++--------- .../kotlin/astminer/config/ProblemConfigs.kt | 33 ++++++++++++ .../kotlin/astminer/config/StorageConfig.kt | 22 -------- .../astminer/config/StorageCreatorConfig.kt | 43 +++++++++++++++ .../astminer/examples/Code2VecJavaMethods.kt | 15 +++--- .../kotlin/astminer/filters/CommonFilters.kt | 1 + .../astminer/filters/FunctionFilters.kt | 2 + .../kotlin/astminer/pipeline/GetPipeline.kt | 23 +++++--- .../astminer/pipeline/StorageCreator.kt | 47 ---------------- .../astminer/pipeline/StorageCreators.kt | 54 +++++++++++++++++++ .../astminer/problem/FileLevelProblems.kt | 2 + .../astminer/problem/FunctionLevelProblems.kt | 2 + .../pipeline/AbstractStorageCreatorTest.kt | 33 ++++++++++++ .../Code2VecExtractionPipelineTest.kt | 18 ++++--- .../pipeline/StorageCreatorImplTest.kt | 54 ------------------- 17 files changed, 280 insertions(+), 174 deletions(-) create mode 100644 src/main/kotlin/astminer/config/FilterConfigs.kt create mode 100644 src/main/kotlin/astminer/config/ProblemConfigs.kt delete mode 100644 src/main/kotlin/astminer/config/StorageConfig.kt create mode 100644 src/main/kotlin/astminer/config/StorageCreatorConfig.kt delete mode 100644 src/main/kotlin/astminer/pipeline/StorageCreator.kt create mode 100644 src/main/kotlin/astminer/pipeline/StorageCreators.kt create mode 100644 src/test/kotlin/astminer/pipeline/AbstractStorageCreatorTest.kt delete mode 100644 src/test/kotlin/astminer/pipeline/StorageCreatorImplTest.kt diff --git a/build.gradle.kts b/build.gradle.kts index 1a04d57f..fd30c6e8 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -17,13 +17,14 @@ println(version) plugins { id("java") - kotlin("jvm") version "1.3.61" apply true + kotlin("jvm") version "1.4.32" apply true id("antlr") id("idea") id("application") id("tanvd.kosogor") version "1.0.6" id("org.jetbrains.dokka") version "0.9.18" id("me.champeau.gradle.jmh") version "0.5.0" + kotlin("plugin.serialization") version "1.4.32" } @@ -57,6 +58,7 @@ dependencies { // https://mvnrepository.com/artifact/org.slf4j/slf4j-simple implementation("org.slf4j", "slf4j-simple", "1.7.30") implementation("io.github.microutils:kotlin-logging:1.5.9") + implementation("org.jetbrains.kotlinx:kotlinx-serialization-json:1.2.0") testImplementation("junit:junit:4.11") testImplementation(kotlin("test-junit")) @@ -66,6 +68,7 @@ dependencies { jmhImplementation("org.jetbrains.kotlin:kotlin-reflect:1.3.61") jmhImplementation("org.openjdk.jmh:jmh-core:1.21") jmhImplementation("org.openjdk.jmh:jmh-generator-annprocess:1.21") + } val shadowJar = shadowJar { diff --git a/src/main/kotlin/astminer/config/FilterConfigs.kt b/src/main/kotlin/astminer/config/FilterConfigs.kt new file mode 100644 index 00000000..cfcee385 --- /dev/null +++ b/src/main/kotlin/astminer/config/FilterConfigs.kt @@ -0,0 +1,51 @@ +package astminer.config + +import astminer.filters.* +import kotlinx.serialization.Serializable +import kotlinx.serialization.Transient + +@Serializable +sealed class FileFilterConfig { + abstract val filter: FileFilter +} + +@Serializable +data class FileTreeSizeFilterConfig(val maxTreeSize: Int) : FileFilterConfig() { + @Transient + override val filter = FileTreeSizeFilter(maxTreeSize) +} + +@Serializable +sealed class FunctionFilterConfig { + abstract val filter: FunctionFilter +} + +@Serializable +data class ModifierFilterConfig(val excludeModifiers: List) : FunctionFilterConfig() { + @Transient + override val filter = ModifierFilter(excludeModifiers) +} + +@Serializable +data class AnnotationFilterConfig(val excludeAnnotations: List) : FunctionFilterConfig() { + @Transient + override val filter = AnnotationFilter(excludeAnnotations) +} + +@Serializable +object ConstructorFilterConfig : FunctionFilterConfig() { + @Transient + override val filter = ConstructorFilter +} + +@Serializable +data class FunctionNameWordsNumberFilterConfig(val maxWordsNumber: Int) : FunctionFilterConfig() { + @Transient + override val filter = FunctionNameWordsNumberFilter(maxWordsNumber) +} + +@Serializable +data class FunctionAnyNodeWordsNumberFilterConfig(val maxWordsNumber: Int) : FunctionFilterConfig() { + @Transient + override val filter = FunctionAnyNodeWordsNumberFilter(maxWordsNumber) +} diff --git a/src/main/kotlin/astminer/config/PipelineConfig.kt b/src/main/kotlin/astminer/config/PipelineConfig.kt index 17245891..9aa579b7 100644 --- a/src/main/kotlin/astminer/config/PipelineConfig.kt +++ b/src/main/kotlin/astminer/config/PipelineConfig.kt @@ -1,39 +1,36 @@ package astminer.config -import astminer.filters.FileFilter -import astminer.filters.Filter -import astminer.filters.FunctionFilter -import astminer.problem.FileLevelProblem -import astminer.problem.FunctionLevelProblem -import astminer.problem.Problem +import kotlinx.serialization.SerialName +import kotlinx.serialization.Serializable -sealed class PipelineConfig { - abstract val inputDir: String - abstract val outputDir: String - abstract val parser: ParserConfig - abstract val filters: List> - abstract val problem: Problem<*> - abstract val storage: StorageConfig -} +@Serializable +sealed class PipelineConfig +@Serializable +@SerialName("file granularity") data class FilePipelineConfig( - override val inputDir: String, - override val outputDir: String, - override val parser: ParserConfig, - override val filters: List, - override val problem: FileLevelProblem, - override val storage: StorageConfig + val inputDir: String, + val outputDir: String, + val parserConfig: ParserConfig, + val filterConfigs: List = emptyList(), + val problemConfig: FileProblemConfig, + val excludedNodeTypes: List = emptyList(), + val storageCreatorConfig: StorageCreatorConfig ) : PipelineConfig() +@Serializable +@SerialName("function granularity") data class FunctionPipelineConfig( - override val inputDir: String, - override val outputDir: String, - override val parser: ParserConfig, - override val filters: List, - override val problem: FunctionLevelProblem, - override val storage: StorageConfig + val inputDir: String, + val outputDir: String, + val parserConfig: ParserConfig, + val filterConfigs: List = emptyList(), + val problemConfig: FunctionProblemConfig, + val excludedNodeTypes: List = emptyList(), + val storageCreatorConfig: StorageCreatorConfig ) : PipelineConfig() +@Serializable data class ParserConfig( val type: String, val extensions: List diff --git a/src/main/kotlin/astminer/config/ProblemConfigs.kt b/src/main/kotlin/astminer/config/ProblemConfigs.kt new file mode 100644 index 00000000..f647fadc --- /dev/null +++ b/src/main/kotlin/astminer/config/ProblemConfigs.kt @@ -0,0 +1,33 @@ +package astminer.config + +import astminer.problem.* +import kotlinx.serialization.Serializable +import kotlinx.serialization.Transient + +@Serializable +sealed class FileProblemConfig { + abstract val problem: FileLevelProblem +} + +@Serializable +object FilePathExtractorConfig : FileProblemConfig() { + @Transient + override val problem = FilePathExtractor +} + +@Serializable +object FolderNameExtractorConfig : FileProblemConfig() { + @Transient + override val problem = FolderExtractor +} + +@Serializable +sealed class FunctionProblemConfig { + abstract val problem: FunctionLevelProblem +} + +@Serializable +object FunctionNamePredictionConfig : FunctionProblemConfig() { + @Transient + override val problem = FunctionNameProblem +} diff --git a/src/main/kotlin/astminer/config/StorageConfig.kt b/src/main/kotlin/astminer/config/StorageConfig.kt deleted file mode 100644 index 97b8e418..00000000 --- a/src/main/kotlin/astminer/config/StorageConfig.kt +++ /dev/null @@ -1,22 +0,0 @@ -package astminer.config - -import astminer.storage.TokenProcessor -import astminer.storage.path.PathBasedStorageConfig - -sealed class StorageConfig - -object CsvAstStorageConfig : StorageConfig() - -data class DotAstStorageConfig(val tokenProcessor: TokenProcessor = TokenProcessor.Normalize) : StorageConfig() - -data class Code2VecPathStorageConfig( - val maxPathLength: Int, - val maxPathWidth: Int, - val maxTokens: Long? = null, - val maxPaths: Long? = null, - val maxPathContextsPerEntity: Int? = null, - val tokenProcessor: TokenProcessor = TokenProcessor.Normalize -) : StorageConfig() { - fun toPathBasedConfig() = - PathBasedStorageConfig(maxPathLength, maxPathWidth, maxTokens, maxPaths, maxPathContextsPerEntity) -} diff --git a/src/main/kotlin/astminer/config/StorageCreatorConfig.kt b/src/main/kotlin/astminer/config/StorageCreatorConfig.kt new file mode 100644 index 00000000..3ebc7cf9 --- /dev/null +++ b/src/main/kotlin/astminer/config/StorageCreatorConfig.kt @@ -0,0 +1,43 @@ +package astminer.config + +import astminer.pipeline.* +import astminer.storage.TokenProcessor +import astminer.storage.path.PathBasedStorageConfig +import kotlinx.serialization.SerialName +import kotlinx.serialization.Serializable + +@Serializable +sealed class StorageCreatorConfig { + abstract fun getCreator(outputFolderPath: String): StorageCreator +} + +@Serializable +@SerialName("csv ast") +object CsvAstStorageCreatorConfig : StorageCreatorConfig() { + override fun getCreator(outputFolderPath: String) = CsvAstStorageCreator(outputFolderPath) +} + +@Serializable +@SerialName("dot ast") +data class DotAstStorageCreatorConfig(val tokenProcessor: TokenProcessor = TokenProcessor.Normalize) : + StorageCreatorConfig() { + override fun getCreator(outputFolderPath: String) = DotAstStorageCreator(outputFolderPath, tokenProcessor) +} + + +@Serializable +@SerialName("code2vec paths") +data class Code2VecPathStorageCreatorConfig( + val maxPathLength: Int, + val maxPathWidth: Int, + val maxTokens: Long? = null, + val maxPaths: Long? = null, + val maxPathContextsPerEntity: Int? = null, + val tokenProcessor: TokenProcessor = TokenProcessor.Normalize +) : StorageCreatorConfig() { + override fun getCreator(outputFolderPath: String) = Code2VecStorageCreator( + outputFolderPath, + PathBasedStorageConfig(maxPathLength, maxPathWidth, maxTokens, maxPaths, maxPathContextsPerEntity), + tokenProcessor + ) +} diff --git a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt index 0455120f..70308a22 100644 --- a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt @@ -1,8 +1,6 @@ package astminer.examples -import astminer.config.Code2VecPathStorageConfig -import astminer.config.FunctionPipelineConfig -import astminer.config.ParserConfig +import astminer.config.* import astminer.pipeline.getFunctionPipeline import astminer.problem.FunctionNameProblem @@ -14,15 +12,14 @@ fun code2vecJavaMethods() { val outputDir = "out_examples/code2vecPathMining" val pipelineConfig = FunctionPipelineConfig( - folder, - outputDir, - ParserConfig( + inputDir = folder, + outputDir = outputDir, + parserConfig = ParserConfig( "antlr", listOf("java") ), - emptyList(), - FunctionNameProblem, - Code2VecPathStorageConfig( + problemConfig = FunctionNamePredictionConfig, + storageCreatorConfig = Code2VecPathStorageCreatorConfig( maxPathLength = 5, maxPathWidth = 5 ) diff --git a/src/main/kotlin/astminer/filters/CommonFilters.kt b/src/main/kotlin/astminer/filters/CommonFilters.kt index d7c6e9dd..b56f77e4 100644 --- a/src/main/kotlin/astminer/filters/CommonFilters.kt +++ b/src/main/kotlin/astminer/filters/CommonFilters.kt @@ -4,6 +4,7 @@ import astminer.common.model.FunctionInfo import astminer.common.model.Node import astminer.common.model.ParseResult import astminer.featureextraction.treeSize +import kotlinx.serialization.Serializable abstract class TreeSizeFilter(private val maxSize: Int) : Filter { private fun isTreeFiltered(root: Node): Boolean { diff --git a/src/main/kotlin/astminer/filters/FunctionFilters.kt b/src/main/kotlin/astminer/filters/FunctionFilters.kt index a2c30e80..6d6f1ff9 100644 --- a/src/main/kotlin/astminer/filters/FunctionFilters.kt +++ b/src/main/kotlin/astminer/filters/FunctionFilters.kt @@ -4,6 +4,8 @@ import astminer.common.model.FunctionInfo import astminer.common.model.Node import astminer.common.preOrder import astminer.common.splitToSubtokens +import kotlinx.serialization.SerialName +import kotlinx.serialization.Serializable interface FunctionFilter : Filter> diff --git a/src/main/kotlin/astminer/pipeline/GetPipeline.kt b/src/main/kotlin/astminer/pipeline/GetPipeline.kt index e5cc8f27..2cd48e7d 100644 --- a/src/main/kotlin/astminer/pipeline/GetPipeline.kt +++ b/src/main/kotlin/astminer/pipeline/GetPipeline.kt @@ -20,14 +20,23 @@ fun getPipeline(pipelineConfig: PipelineConfig): Pipeline<*> { fun getFilePipeline(filePipelineConfig: FilePipelineConfig): Pipeline> = with(filePipelineConfig) { - val frontend = FilePipelineFrontend(inputDir, parser.type, parser.extensions) - val storageCreator = StorageCreatorImpl(storage, outputDir) - Pipeline(frontend, filters, problem, emptyList(), storageCreator) + Pipeline( + frontend = FilePipelineFrontend(inputDir, parserConfig.type, parserConfig.extensions), + filters = filterConfigs.map { it.filter }, + problem = problemConfig.problem, + excludedNodeTypes = excludedNodeTypes, + storageCreator = storageCreatorConfig.getCreator(outputDir) + ) } fun getFunctionPipeline(functionPipelineConfig: FunctionPipelineConfig): Pipeline> = with(functionPipelineConfig) { - val frontend = FunctionPipelineFrontend(inputDir, parser.type, parser.extensions) - val storageCreator = StorageCreatorImpl(storage, outputDir) - Pipeline(frontend, filters, problem, emptyList(), storageCreator) - } \ No newline at end of file + Pipeline( + frontend = FunctionPipelineFrontend(inputDir, parserConfig.type, parserConfig.extensions), + filters = filterConfigs.map { it.filter }, + problem = problemConfig.problem, + excludedNodeTypes = excludedNodeTypes, + storageCreator = storageCreatorConfig.getCreator(outputDir) + ) + } + diff --git a/src/main/kotlin/astminer/pipeline/StorageCreator.kt b/src/main/kotlin/astminer/pipeline/StorageCreator.kt deleted file mode 100644 index 171a2411..00000000 --- a/src/main/kotlin/astminer/pipeline/StorageCreator.kt +++ /dev/null @@ -1,47 +0,0 @@ -package astminer.pipeline - -import astminer.config.Code2VecPathStorageConfig -import astminer.config.CsvAstStorageConfig -import astminer.config.DotAstStorageConfig -import astminer.config.StorageConfig -import astminer.storage.Storage -import astminer.storage.ast.CsvAstStorage -import astminer.storage.ast.DotAstStorage -import astminer.storage.path.Code2VecPathStorage -import java.io.File - -interface StorageCreator { - fun createStorageAndOutputFolder(extension: String): Storage -} - -/** - * Creates storage for each extension. - * @param config The config that defines that storage will be used and the params of that storage - * @param outputDirectoryPath Path to the base output directory where folders for each extension will be created - * (e.g 'py', 'java') - */ -class StorageCreatorImpl(private val config: StorageConfig, outputDirectoryPath: String) : StorageCreator { - private val outputDirectory = File(outputDirectoryPath) - - private fun createOutputPath(extension: String): String { - val outputDirectoryForExtension = outputDirectory.resolve(extension) - outputDirectoryForExtension.mkdir() - return outputDirectoryForExtension.path - } - - /** - * Creates folder [outputDirectoryPath]/[extension] and initializes the storage in that folder. - */ - override fun createStorageAndOutputFolder(extension: String): Storage { - val outputPath = createOutputPath(extension) - return when (config) { - is CsvAstStorageConfig -> CsvAstStorage(outputPath) - is DotAstStorageConfig -> DotAstStorage(outputPath, config.tokenProcessor) - is Code2VecPathStorageConfig -> Code2VecPathStorage( - outputPath, - config.toPathBasedConfig(), - config.tokenProcessor - ) - } - } -} diff --git a/src/main/kotlin/astminer/pipeline/StorageCreators.kt b/src/main/kotlin/astminer/pipeline/StorageCreators.kt new file mode 100644 index 00000000..1e66ea14 --- /dev/null +++ b/src/main/kotlin/astminer/pipeline/StorageCreators.kt @@ -0,0 +1,54 @@ +package astminer.pipeline + +import astminer.storage.Storage +import astminer.storage.TokenProcessor +import astminer.storage.ast.CsvAstStorage +import astminer.storage.ast.DotAstStorage +import astminer.storage.path.Code2VecPathStorage +import astminer.storage.path.PathBasedStorageConfig +import java.io.File + +interface StorageCreator { + fun createStorageAndOutputFolder(extension: String): Storage +} + +abstract class AbstractStorageCreator(private val outputDirectoryPath: String) : StorageCreator { + private fun createOutputFolder(extension: String): File { + val outputDirectoryForExtension = File(outputDirectoryPath).resolve(extension) + outputDirectoryForExtension.mkdir() + return outputDirectoryForExtension + } + + abstract fun initializeStorage(outputFolderPath: String): Storage + + override fun createStorageAndOutputFolder(extension: String): Storage = + initializeStorage(createOutputFolder(extension).path) +} + +/** + * Creates CsvAstStorages + */ +class CsvAstStorageCreator(outputDirectoryPath: String) : AbstractStorageCreator(outputDirectoryPath) { + override fun initializeStorage(outputFolderPath: String) = CsvAstStorage(outputFolderPath) +} + +/** + * Creates DotAstStorages given [tokenProcessor] + */ +class DotAstStorageCreator(outputDirectoryPath: String, private val tokenProcessor: TokenProcessor) : + AbstractStorageCreator(outputDirectoryPath) { + override fun initializeStorage(outputFolderPath: String) = DotAstStorage(outputFolderPath, tokenProcessor) +} + +/** + * Creates Code2VecStorages given [config] and [tokenProcessor] + */ +class Code2VecStorageCreator( + outputDirectoryPath: String, + private val config: PathBasedStorageConfig, + private val tokenProcessor: TokenProcessor +) : AbstractStorageCreator(outputDirectoryPath) { + override fun initializeStorage(outputFolderPath: String) = + Code2VecPathStorage(outputFolderPath, config, tokenProcessor) +} + diff --git a/src/main/kotlin/astminer/problem/FileLevelProblems.kt b/src/main/kotlin/astminer/problem/FileLevelProblems.kt index 2141c737..54c0d88a 100644 --- a/src/main/kotlin/astminer/problem/FileLevelProblems.kt +++ b/src/main/kotlin/astminer/problem/FileLevelProblems.kt @@ -2,6 +2,8 @@ package astminer.problem import astminer.common.model.Node import astminer.common.model.ParseResult +import kotlinx.serialization.SerialName +import kotlinx.serialization.Serializable import java.io.File interface FileLevelProblem : Problem> diff --git a/src/main/kotlin/astminer/problem/FunctionLevelProblems.kt b/src/main/kotlin/astminer/problem/FunctionLevelProblems.kt index 463ee509..fc19e73e 100644 --- a/src/main/kotlin/astminer/problem/FunctionLevelProblems.kt +++ b/src/main/kotlin/astminer/problem/FunctionLevelProblems.kt @@ -4,6 +4,8 @@ import astminer.common.model.FunctionInfo import astminer.common.model.Node import astminer.common.preOrder import astminer.common.setTechnicalToken +import kotlinx.serialization.SerialName +import kotlinx.serialization.Serializable interface FunctionLevelProblem : Problem> diff --git a/src/test/kotlin/astminer/pipeline/AbstractStorageCreatorTest.kt b/src/test/kotlin/astminer/pipeline/AbstractStorageCreatorTest.kt new file mode 100644 index 00000000..7c27b18e --- /dev/null +++ b/src/test/kotlin/astminer/pipeline/AbstractStorageCreatorTest.kt @@ -0,0 +1,33 @@ +package astminer.pipeline + +import astminer.storage.Storage +import org.junit.Before +import org.junit.Test +import java.io.File +import java.nio.file.Files.createTempDirectory +import kotlin.test.assertEquals + +internal class AbstractStorageCreatorTest { + lateinit var tempDir: File + + @Before + fun init() { + tempDir = createTempDirectory("prefix").toFile() + } + + @Test + fun `test creating a StorageCreator should not alter the output directory`() { + AbstractStorageCreatorImpl(tempDir.path) + assertEquals(0, tempDir.listFiles()?.size, "There should be no files in the directory") + } + + @Test + fun `test StorageCreator's createStorageAndOutputFolder should create a subdirectory named after the file extension`() { + AbstractStorageCreatorImpl(tempDir.path).createStorageAndOutputFolder("file extension") + assertEquals(listOf("file extension"), tempDir.listFiles()?.map { it.name }) + } + + class AbstractStorageCreatorImpl(outputFolderPath: String) : AbstractStorageCreator(outputFolderPath) { + override fun initializeStorage(outputFolderPath: String): Storage = DummyStorage() + } +} diff --git a/src/test/kotlin/astminer/pipeline/Code2VecExtractionPipelineTest.kt b/src/test/kotlin/astminer/pipeline/Code2VecExtractionPipelineTest.kt index 155a1961..a071de86 100644 --- a/src/test/kotlin/astminer/pipeline/Code2VecExtractionPipelineTest.kt +++ b/src/test/kotlin/astminer/pipeline/Code2VecExtractionPipelineTest.kt @@ -1,31 +1,33 @@ package astminer.pipeline import astminer.cli.util.verifyPathContextExtraction -import astminer.config.Code2VecPathStorageConfig +import astminer.config.Code2VecPathStorageCreatorConfig +import astminer.config.FilePathExtractorConfig import astminer.config.FilePipelineConfig import astminer.config.ParserConfig import astminer.problem.FilePathExtractor import org.junit.Test import java.io.File +import java.nio.file.Files.createTempDirectory internal class Code2VecExtractionPipelineTest { private val testDataDir = File("src/test/resources") @Test fun testDefaultExtraction() { - val extractedDataDir = createTempDir("extractedData") + val extractedDataDir = createTempDirectory("extractedData").toFile() + val languages = listOf("java", "python") val config = FilePipelineConfig( - testDataDir.path, - extractedDataDir.path, - ParserConfig( + inputDir = testDataDir.path, + outputDir = extractedDataDir.path, + parserConfig = ParserConfig( "gumtree", languages ), - emptyList(), - FilePathExtractor, - Code2VecPathStorageConfig( + problemConfig = FilePathExtractorConfig, + storageCreatorConfig = Code2VecPathStorageCreatorConfig( maxPathLength = 8, maxPathWidth = 3 ) diff --git a/src/test/kotlin/astminer/pipeline/StorageCreatorImplTest.kt b/src/test/kotlin/astminer/pipeline/StorageCreatorImplTest.kt deleted file mode 100644 index 0372d5aa..00000000 --- a/src/test/kotlin/astminer/pipeline/StorageCreatorImplTest.kt +++ /dev/null @@ -1,54 +0,0 @@ -package astminer.pipeline - -import astminer.config.Code2VecPathStorageConfig -import astminer.config.CsvAstStorageConfig -import astminer.config.DotAstStorageConfig -import astminer.storage.ast.CsvAstStorage -import astminer.storage.ast.DotAstStorage -import astminer.storage.path.Code2VecPathStorage -import org.junit.Before -import org.junit.Test -import java.io.File -import kotlin.test.assertEquals -import kotlin.test.assertTrue - -internal class StorageCreatorImplTest { - - lateinit var tempDir: File - - @Before - fun init() { - tempDir = createTempDir() - } - - @Test - fun `test creating a StorageCreator should not alter the output directory`() { - StorageCreatorImpl(CsvAstStorageConfig, tempDir.path) - assertEquals(0, tempDir.listFiles()?.size, "There should be no files in the directory") - } - - @Test - fun `test StorageCreator's createStorageAndOutputFolder should create a subdirectory named after the file extension`() { - StorageCreatorImpl(CsvAstStorageConfig, tempDir.path).createStorageAndOutputFolder("file extension") - assertEquals(listOf("file extension"), tempDir.listFiles()?.map { it.name }) - } - - @Test - fun `test StorageCreator should create CsvAstStorage if given CsvAstStorageConfig`() { - val storage = StorageCreatorImpl(CsvAstStorageConfig, tempDir.path).createStorageAndOutputFolder("a") - assertTrue { storage is CsvAstStorage } - } - - @Test - fun `test StorageCreator should create DotAstStorage if given DotAstStorageConfig`() { - val storage = StorageCreatorImpl(DotAstStorageConfig(), tempDir.path).createStorageAndOutputFolder("a") - assertTrue { storage is DotAstStorage } - } - - @Test - fun `test StorageCreator should create Code2VecPathStorage if given Code2VecPathStorageConfig`() { - val config = Code2VecPathStorageConfig(1, 1) - val storage = StorageCreatorImpl(config, tempDir.path).createStorageAndOutputFolder("a") - assertTrue { storage is Code2VecPathStorage } - } -} From dfaa9e9d2d22299c446ab92db0cde581591cca12 Mon Sep 17 00:00:00 2001 From: furetur Date: Sun, 9 May 2021 22:06:23 +0500 Subject: [PATCH 156/308] added working cli command (kinda working) --- configs/parse-files.json | 15 ++++++ src/main/kotlin/astminer/Main.kt | 47 ++++++++++++------- .../kotlin/astminer/cli/LabelExtractors.kt | 0 .../kotlin/astminer/config/FilterConfigs.kt | 2 +- .../kotlin/astminer/config/ProblemConfigs.kt | 10 ++-- .../astminer/config/StorageCreatorConfig.kt | 2 +- .../astminer/examples/Code2VecJavaMethods.kt | 2 +- .../astminer/pipeline/PipelineFrontend.kt | 5 ++ .../Code2VecExtractionPipelineTest.kt | 2 +- 9 files changed, 61 insertions(+), 24 deletions(-) create mode 100644 configs/parse-files.json delete mode 100644 src/main/kotlin/astminer/cli/LabelExtractors.kt diff --git a/configs/parse-files.json b/configs/parse-files.json new file mode 100644 index 00000000..e464aea5 --- /dev/null +++ b/configs/parse-files.json @@ -0,0 +1,15 @@ +{ + "type": "file granularity", + "inputDir": "src/test/resources/methodSplitting/", + "outputDir": "output", + "parserConfig": { + "type": "antlr", + "extensions": ["java"] + }, + "problemConfig": { + "type": "filepath" + }, + "storageCreatorConfig": { + "type": "csv ast" + } +} diff --git a/src/main/kotlin/astminer/Main.kt b/src/main/kotlin/astminer/Main.kt index c49dceaf..f06a5f29 100644 --- a/src/main/kotlin/astminer/Main.kt +++ b/src/main/kotlin/astminer/Main.kt @@ -1,20 +1,33 @@ package astminer -//import astminer.cli.* +import astminer.config.PipelineConfig +import astminer.pipeline.getPipeline +import com.github.ajalt.clikt.core.CliktCommand +import com.github.ajalt.clikt.parameters.arguments.argument +import com.github.ajalt.clikt.parameters.types.file +import kotlinx.serialization.SerializationException +import kotlinx.serialization.decodeFromString +import kotlinx.serialization.json.Json +import java.io.File -//fun main(args: Array) { -// if (args.isEmpty()) { -// println(""" -// You should specify the task as the first argument ("preprocess", "parse", "pathContexts", or "code2vec"). -// For more information run `./cli.sh taskName --help` -// """.trimIndent()) -// } else { -// return when (args[0]) { -// "preprocess" -> ProjectPreprocessor().main(args.sliceArray(1 until args.size)) -// "parse" -> ProjectParser().main(args.sliceArray(1 until args.size)) -// "pathContexts" -> PathContextsExtractor().main(args.sliceArray(1 until args.size)) -// "code2vec" -> Code2VecExtractor().main(args.sliceArray(1 until args.size)) -// else -> throw Exception("The first argument should be task's name: either 'preprocess', 'parse', 'pathContexts', or 'code2vec'") -// } -// } -//} \ No newline at end of file + +class PipelineRunner : CliktCommand(name = "") { + val config: File by argument("config", help = "Path to config").file( + exists = true, + folderOkay = false, + readable = true + ) + + override fun run() { + val config = try { + Json.decodeFromString(config.readText()) + } catch (e: SerializationException) { + // TODO: should log it also + println("Error: $e") + return + } + getPipeline(config).run() + } +} + +fun main(args: Array) = PipelineRunner().main(args) diff --git a/src/main/kotlin/astminer/cli/LabelExtractors.kt b/src/main/kotlin/astminer/cli/LabelExtractors.kt deleted file mode 100644 index e69de29b..00000000 diff --git a/src/main/kotlin/astminer/config/FilterConfigs.kt b/src/main/kotlin/astminer/config/FilterConfigs.kt index cfcee385..1f86df8b 100644 --- a/src/main/kotlin/astminer/config/FilterConfigs.kt +++ b/src/main/kotlin/astminer/config/FilterConfigs.kt @@ -33,7 +33,7 @@ data class AnnotationFilterConfig(val excludeAnnotations: List) : Functi } @Serializable -object ConstructorFilterConfig : FunctionFilterConfig() { +class ConstructorFilterConfig : FunctionFilterConfig() { @Transient override val filter = ConstructorFilter } diff --git a/src/main/kotlin/astminer/config/ProblemConfigs.kt b/src/main/kotlin/astminer/config/ProblemConfigs.kt index f647fadc..dbc628bf 100644 --- a/src/main/kotlin/astminer/config/ProblemConfigs.kt +++ b/src/main/kotlin/astminer/config/ProblemConfigs.kt @@ -1,6 +1,7 @@ package astminer.config import astminer.problem.* +import kotlinx.serialization.SerialName import kotlinx.serialization.Serializable import kotlinx.serialization.Transient @@ -10,13 +11,15 @@ sealed class FileProblemConfig { } @Serializable -object FilePathExtractorConfig : FileProblemConfig() { +@SerialName("filepath") +class FilePathExtractorConfig : FileProblemConfig() { @Transient override val problem = FilePathExtractor } @Serializable -object FolderNameExtractorConfig : FileProblemConfig() { +@SerialName("foldername") +class FolderNameExtractorConfig : FileProblemConfig() { @Transient override val problem = FolderExtractor } @@ -27,7 +30,8 @@ sealed class FunctionProblemConfig { } @Serializable -object FunctionNamePredictionConfig : FunctionProblemConfig() { +@SerialName("function name prediction") +class FunctionNamePredictionConfig : FunctionProblemConfig() { @Transient override val problem = FunctionNameProblem } diff --git a/src/main/kotlin/astminer/config/StorageCreatorConfig.kt b/src/main/kotlin/astminer/config/StorageCreatorConfig.kt index 3ebc7cf9..d201b2a2 100644 --- a/src/main/kotlin/astminer/config/StorageCreatorConfig.kt +++ b/src/main/kotlin/astminer/config/StorageCreatorConfig.kt @@ -13,7 +13,7 @@ sealed class StorageCreatorConfig { @Serializable @SerialName("csv ast") -object CsvAstStorageCreatorConfig : StorageCreatorConfig() { +class CsvAstStorageCreatorConfig : StorageCreatorConfig() { override fun getCreator(outputFolderPath: String) = CsvAstStorageCreator(outputFolderPath) } diff --git a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt index 70308a22..d13fba7f 100644 --- a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt @@ -18,7 +18,7 @@ fun code2vecJavaMethods() { "antlr", listOf("java") ), - problemConfig = FunctionNamePredictionConfig, + problemConfig = FunctionNamePredictionConfig(), storageCreatorConfig = Code2VecPathStorageCreatorConfig( maxPathLength = 5, maxPathWidth = 5 diff --git a/src/main/kotlin/astminer/pipeline/PipelineFrontend.kt b/src/main/kotlin/astminer/pipeline/PipelineFrontend.kt index 3301064f..00b3c4ec 100644 --- a/src/main/kotlin/astminer/pipeline/PipelineFrontend.kt +++ b/src/main/kotlin/astminer/pipeline/PipelineFrontend.kt @@ -3,6 +3,7 @@ package astminer.pipeline import astminer.common.getProjectFilesWithExtension import astminer.common.model.* import astminer.parse.getHandlerFactory +import mu.KotlinLogging import java.io.File /** @@ -26,6 +27,8 @@ interface PipelineFrontend { fun getEntities(): Sequence> } +private val logger = KotlinLogging.logger("PipelineFrontend") + /** * Base class for several PipelineFrontend implementations. * Finds parsers of type [parserType] for all the given languages by [extensions]. @@ -42,6 +45,8 @@ abstract class CompositePipelineFrontend( override fun getEntities(): Sequence> = sequence { val inputDirectory = File(inputDirectoryPath) + logger.info { "Reading ${inputDirectory.absolutePath}" } + for (extension in extensions) { val handlerFactory = try { getHandlerFactory(extension, parserType) diff --git a/src/test/kotlin/astminer/pipeline/Code2VecExtractionPipelineTest.kt b/src/test/kotlin/astminer/pipeline/Code2VecExtractionPipelineTest.kt index a071de86..d99a0468 100644 --- a/src/test/kotlin/astminer/pipeline/Code2VecExtractionPipelineTest.kt +++ b/src/test/kotlin/astminer/pipeline/Code2VecExtractionPipelineTest.kt @@ -26,7 +26,7 @@ internal class Code2VecExtractionPipelineTest { "gumtree", languages ), - problemConfig = FilePathExtractorConfig, + problemConfig = FilePathExtractorConfig(), storageCreatorConfig = Code2VecPathStorageCreatorConfig( maxPathLength = 8, maxPathWidth = 3 From 2035f66797b589b32d833ab74d60f5ef5587063c Mon Sep 17 00:00:00 2001 From: furetur Date: Tue, 11 May 2021 11:27:49 +0500 Subject: [PATCH 157/308] fixed dependencies after merge --- build.gradle.kts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/build.gradle.kts b/build.gradle.kts index 8767ae55..da30daef 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -25,9 +25,10 @@ dependencies { // ===== Parsers ===== antlr("org.antlr:antlr4:4.7.1") // https://mvnrepository.com/artifact/com.github.gumtreediff - api("com.github.gumtreediff", "core", "2.1.0") + api("com.github.gumtreediff", "core", "2.1.2") api("com.github.gumtreediff", "client", "2.1.0") api("com.github.gumtreediff", "gen.jdt", "2.1.0") + api("com.github.gumtreediff", "gen.python", "2.1.2") // https://mvnrepository.com/artifact/io.shiftleft/fuzzyc2cpg api("io.shiftleft", "fuzzyc2cpg_2.13", "1.2.9") From d2b2d7904782289d6c7ac73aadf2a4b4f1382b21 Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Thu, 13 May 2021 17:10:47 +0300 Subject: [PATCH 158/308] Add python parser into docker image --- .circleci/config.yml | 0 .dockerignore | 15 +++++++++++++++ .gitignore | 1 + Dockerfile | 19 ++++++++++++++----- build.gradle.kts | 4 ++-- cli.sh | 2 +- src/jmh/kotlin/cli/BenchmarkResultWorker.kt | 2 +- .../javascript/JavaScriptMethodSplitter.kt | 2 +- .../python/GumTreePythonMethodSplitterTest.kt | 2 +- 9 files changed, 36 insertions(+), 11 deletions(-) delete mode 100644 .circleci/config.yml create mode 100644 .dockerignore diff --git a/.circleci/config.yml b/.circleci/config.yml deleted file mode 100644 index e69de29b..00000000 diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..d50294c6 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,15 @@ +*.iml +*.csv +asts/ + +.idea/ +.gradle/ +examples/out/ +src/main/generated/ +build/ + +.DS_Store + +.github +scripts/ + diff --git a/.gitignore b/.gitignore index f1c745c3..61ce55df 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ *.iml *.csv +asts/ .idea/ .gradle/ diff --git a/Dockerfile b/Dockerfile index 73b3a1e0..56ef99fd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,12 +1,21 @@ -FROM alpine:3.13.5 +FROM ubuntu:20.04 LABEL desc="Docker container to run ASTMiner with all preinstalled requirements" -# Install java -RUN apk add openjdk8 +# Instal OpenJDK8 +RUN apt-get update && apt-get install -y openjdk-8-jdk # Install G++ (required for Fuzzy parser) -RUN apk add g++ +RUN apt-get update && apt-get install -y g++ + +# Install PythonParser for GumTree +RUN apt-get install -y --no-install-recommends -y python3.8 python3-pip git && \ + git clone https://github.com/JetBrains-Research/pythonparser && \ + pip3 install -r pythonparser/requirements.txt && \ + mv pythonparser/src/main/python/pythonparser/pythonparser_3.py /tmp/pythonparser && \ + chmod +x /tmp/pythonparser && \ + rm -rf pythonparser +ENV PATH="/tmp:${PATH}" # Copy astminer sources WORKDIR astminer @@ -15,4 +24,4 @@ COPY . . # Prepare shadow jar RUN ./gradlew shadowJar -ENTRYPOINT ["java", "-jar", "build/shadow/astminer.jar"] +CMD ["java", "-jar", "build/shadow/astminer.jar"] diff --git a/build.gradle.kts b/build.gradle.kts index 640f866a..97a93534 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -26,8 +26,8 @@ dependencies { antlr("org.antlr:antlr4:4.7.1") // https://mvnrepository.com/artifact/com.github.gumtreediff api("com.github.gumtreediff", "core", "2.1.2") - api("com.github.gumtreediff", "client", "2.1.0") - api("com.github.gumtreediff", "gen.jdt", "2.1.0") + api("com.github.gumtreediff", "client", "2.1.2") + api("com.github.gumtreediff", "gen.jdt", "2.1.2") api("com.github.gumtreediff", "gen.python", "2.1.2") // https://mvnrepository.com/artifact/io.shiftleft/fuzzyc2cpg diff --git a/cli.sh b/cli.sh index 526e0ebf..5a0c5005 100755 --- a/cli.sh +++ b/cli.sh @@ -12,6 +12,6 @@ if [[ "$(docker images -q $IMAGE_NAME 2> /dev/null)" == "" ]]; then java -jar $SHADOW_JAR_PATH "$@" else echo "Running astminer in docker" - docker run --rm voudy/astminer "$@" + docker run --rm $IMAGE_NAME "$@" fi diff --git a/src/jmh/kotlin/cli/BenchmarkResultWorker.kt b/src/jmh/kotlin/cli/BenchmarkResultWorker.kt index 44294238..2c67bf56 100644 --- a/src/jmh/kotlin/cli/BenchmarkResultWorker.kt +++ b/src/jmh/kotlin/cli/BenchmarkResultWorker.kt @@ -101,7 +101,7 @@ class BenchmarkResultWorker { } -fun main(args: Array) { +fun main() { val benchmarkResultWorker = BenchmarkResultWorker() val results = benchmarkResultWorker.parseCsvFile("src/jmh/benchmarks.csv") benchmarkResultWorker.saveToMarkdown(results, "src/jmh/results.md") diff --git a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt index 387ab198..b2b4e508 100644 --- a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt @@ -105,7 +105,7 @@ abstract class JavaScriptElement(private val element: AntlrNode) { return if (hasLastLabel(typeLabel)) { listOf(this) } else { - this.getChildrenOfType(typeLabel).mapNotNull { it as? AntlrNode } + this.getChildrenOfType(typeLabel).map { it } } } diff --git a/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitterTest.kt index 6e1c6025..df9799fe 100644 --- a/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitterTest.kt @@ -7,7 +7,7 @@ import java.io.File import kotlin.test.assertEquals import kotlin.test.assertNotNull -class GumTreeJavaMethodSplitterTest { +class GumTreePythonMethodSplitterTest { private fun parse(filename: String): GumTreeNode? = GumTreePythonParser().parseInputStream(File(filename).inputStream()) From 291371e44446f46a717970e3ea64b066e7223030 Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Thu, 13 May 2021 18:35:52 +0300 Subject: [PATCH 159/308] Check installed parsers before running tests --- Dockerfile | 17 ++++++++++------- src/test/kotlin/astminer/Utils.kt | 14 ++++++++++++++ .../astminer/parse/cpp/FuzzyCppParserTest.kt | 6 ++++++ .../parse/cpp/FuzzyMethodSplitterTest.kt | 9 ++++++--- .../python/GumTreePythonMethodSplitterTest.kt | 6 ++++++ .../gumtree/python/GumTreePythonParserTest.kt | 3 +++ 6 files changed, 45 insertions(+), 10 deletions(-) create mode 100644 src/test/kotlin/astminer/Utils.kt diff --git a/Dockerfile b/Dockerfile index 56ef99fd..09a5733f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,13 +9,16 @@ RUN apt-get update && apt-get install -y openjdk-8-jdk RUN apt-get update && apt-get install -y g++ # Install PythonParser for GumTree -RUN apt-get install -y --no-install-recommends -y python3.8 python3-pip git && \ - git clone https://github.com/JetBrains-Research/pythonparser && \ - pip3 install -r pythonparser/requirements.txt && \ - mv pythonparser/src/main/python/pythonparser/pythonparser_3.py /tmp/pythonparser && \ - chmod +x /tmp/pythonparser && \ - rm -rf pythonparser -ENV PATH="/tmp:${PATH}" +ARG PYTHONPARSER_REPO=https://raw.githubusercontent.com/JetBrains-Research/pythonparser/master +RUN apt-get update && \ + apt-get install -y --no-install-recommends -y python3.8 python3-pip git wget && \ + mkdir pythonparser && \ + cd pythonparser && \ + wget $PYTHONPARSER_REPO/requirements.txt && \ + wget $PYTHONPARSER_REPO/src/main/python/pythonparser/pythonparser_3.py -O pythonparser && \ + pip3 install -r requirements.txt && \ + chmod +x pythonparser +ENV PATH="/pythonparser:${PATH}" # Copy astminer sources WORKDIR astminer diff --git a/src/test/kotlin/astminer/Utils.kt b/src/test/kotlin/astminer/Utils.kt new file mode 100644 index 00000000..676ac076 --- /dev/null +++ b/src/test/kotlin/astminer/Utils.kt @@ -0,0 +1,14 @@ +package astminer + +import java.io.File + +fun checkExecutable(execName: String): Boolean { + val execFolders = System.getenv("PATH").split(File.pathSeparator) + execFolders.forEach { + val folderFiles = File(it).list() ?: return@forEach + if (folderFiles.contains(execName)) { + return true + } + } + return false +} \ No newline at end of file diff --git a/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt b/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt index 95a32049..6b6b567d 100644 --- a/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt +++ b/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt @@ -1,15 +1,21 @@ package astminer.parse.cpp +import astminer.checkExecutable import astminer.common.getProjectFilesWithExtension import astminer.examples.forFilesWithSuffix import astminer.parse.fuzzy.cpp.FuzzyCppParser import astminer.parse.fuzzy.cpp.FuzzyNode import org.junit.Assert +import org.junit.Assume +import org.junit.Before import org.junit.Test import java.io.File class FuzzyCppParserTest { + @Before + fun checkGPP() = Assume.assumeTrue(checkExecutable("g++")) + @Test fun testNodeIsNotNull() { val parser = FuzzyCppParser() diff --git a/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt index 7190776f..e4b8c1f0 100644 --- a/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt @@ -1,13 +1,15 @@ package astminer.parse.cpp +import astminer.checkExecutable import astminer.common.model.MethodInfo import astminer.parse.fuzzy.cpp.FuzzyCppParser import astminer.parse.fuzzy.cpp.FuzzyMethodSplitter import astminer.parse.fuzzy.cpp.FuzzyNode +import org.junit.Assume +import org.junit.Before import org.junit.Test -import kotlin.test.assertEquals import java.io.File -import kotlin.test.BeforeTest +import kotlin.test.assertEquals import kotlin.test.assertNotNull import kotlin.test.assertNull @@ -21,8 +23,9 @@ class FuzzyMethodSplitterTest { var methodInfos: Collection> = listOf() - @BeforeTest + @Before fun parseTree() { + Assume.assumeTrue(checkExecutable("g++")) val testTree = parser.parseInputStream(File("src/test/resources/methodSplitting/testMethodSplitting.cpp").inputStream()) assertNotNull(testTree) methodInfos = methodSplitter.splitIntoMethods(testTree) diff --git a/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitterTest.kt index df9799fe..30303d03 100644 --- a/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitterTest.kt @@ -1,7 +1,10 @@ package astminer.parse.gumtree.python +import astminer.checkExecutable import astminer.common.model.MethodInfo import astminer.parse.gumtree.GumTreeNode +import org.junit.Assume +import org.junit.Before import org.junit.Test import java.io.File import kotlin.test.assertEquals @@ -17,6 +20,9 @@ class GumTreePythonMethodSplitterTest { private fun createPath(file: String) = "src/test/resources/gumTreeMethodSplitter/$file" + @Before + fun checkPythonParser() = Assume.assumeTrue(checkExecutable("pythonparser")) + @Test fun methodsCountTest() { assertEquals(7, splitMethods(createPath("1.py")).size) diff --git a/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonParserTest.kt b/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonParserTest.kt index 48be6f7d..d4e74db5 100644 --- a/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonParserTest.kt +++ b/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonParserTest.kt @@ -1,6 +1,8 @@ package astminer.parse.gumtree.python +import astminer.checkExecutable import org.junit.After +import org.junit.Assume import org.junit.Before import org.junit.Test import java.io.File @@ -16,6 +18,7 @@ class GumTreePythonParserTest { @Before fun mkdir() { + Assume.assumeTrue(checkExecutable("pythonparser")) testFolder.mkdirs() testFile.createNewFile() } From 0ceba2c292b7e109299155dfefcc94fc5c5f193e Mon Sep 17 00:00:00 2001 From: breandan Date: Fri, 14 May 2021 00:43:38 -0400 Subject: [PATCH 160/308] fix group name of maven artifact --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 4e8f7333..1b91d646 100644 --- a/README.md +++ b/README.md @@ -100,7 +100,7 @@ repositories { } dependencies { - compile 'io.github.vovak.astminer:astminer:0.6.0' + compile 'io.github.vovak:astminer:0.6.0' } ``` @@ -111,7 +111,7 @@ repositories { } dependencies { - compile("io.github.vovak.astminer", "astminer", "0.6.0") + compile("io.github.vovak", "astminer", "0.6.0") } ``` From ca7582c7a8ea616cb48cec555f1d459369c0d9e2 Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Mon, 17 May 2021 16:30:50 +0300 Subject: [PATCH 161/308] Update minor version --- README.md | 7 ++++--- build.gradle.kts | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 1b91d646..bff273ea 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ [![JetBrains Research](https://jb.gg/badges/research.svg)](https://confluence.jetbrains.com/display/ALL/JetBrains+on+GitHub) +![astminer version](https://img.shields.io/badge/astminer-v0.6.3-blue) # `astminer` A library for mining of [path-based representations of code](https://arxiv.org/pdf/1803.09544.pdf) and more, supported by the [Machine Learning Methods for Software Engineering](https://research.jetbrains.org/groups/ml_methods) group at [JetBrains Research](https://research.jetbrains.org). @@ -100,7 +101,7 @@ repositories { } dependencies { - compile 'io.github.vovak:astminer:0.6.0' + compile 'io.github.vovak:astminer:' } ``` @@ -111,7 +112,7 @@ repositories { } dependencies { - compile("io.github.vovak", "astminer", "0.6.0") + compile("io.github.vovak", "astminer", ) } ``` @@ -160,7 +161,7 @@ See [JavaParser](src/main/kotlin/astminer/parse/antlr/java/JavaParser.kt) or [Py If the language has a parsing tool that is available as Java library: 1. Add the library as a dependency in [build.gradle.kts](/build.gradle.kts); 2. Implement a wrapper for the parsing tool. -See [FuzzyCppParser](src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt) for an example of a wrapper. +See [FuzzyCppParser](src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppParser.kt) for an example of a wrapper. ## Contribution We believe that `astminer` could find use beyond our own mining tasks. diff --git a/build.gradle.kts b/build.gradle.kts index 97a93534..4a9c1da4 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -1,7 +1,7 @@ import tanvd.kosogor.proxy.shadowJar group = "io.github.vovak" -version = "0.6.2" +version = "0.6.3" plugins { id("java") From 7a2eead474a6d18bf856e32e5a347efc4494436a Mon Sep 17 00:00:00 2001 From: furetur Date: Tue, 18 May 2021 12:06:24 +0500 Subject: [PATCH 162/308] removed circleci config + moved log from log/log.txt to log.txt --- .circleci/config.yml | 0 log/.gitkeep | 0 src/main/resources/simplelogger.properties | 2 +- 3 files changed, 1 insertion(+), 1 deletion(-) delete mode 100644 .circleci/config.yml delete mode 100644 log/.gitkeep diff --git a/.circleci/config.yml b/.circleci/config.yml deleted file mode 100644 index e69de29b..00000000 diff --git a/log/.gitkeep b/log/.gitkeep deleted file mode 100644 index e69de29b..00000000 diff --git a/src/main/resources/simplelogger.properties b/src/main/resources/simplelogger.properties index 5fd256f3..bd4d1cd0 100644 --- a/src/main/resources/simplelogger.properties +++ b/src/main/resources/simplelogger.properties @@ -1 +1 @@ -org.slf4j.simpleLogger.logFile = log/log.txt +org.slf4j.simpleLogger.logFile = log.txt From 6cd908a207cd1926c71993454fa35ef32b88106d Mon Sep 17 00:00:00 2001 From: furetur Date: Tue, 18 May 2021 12:44:44 +0500 Subject: [PATCH 163/308] better config + config examples --- configs/function-name-prediction-ast.json | 28 +++++++++++++++++++ configs/parse-files-csv.json | 24 ++++++++++++++++ configs/parse-files.json | 15 ---------- configs/paths-from-files.json | 25 +++++++++++++++++ .../kotlin/astminer/config/FilterConfigs.kt | 15 +++++++--- .../kotlin/astminer/config/PipelineConfig.kt | 16 +++++------ .../kotlin/astminer/config/ProblemConfigs.kt | 4 +-- .../astminer/config/StorageCreatorConfig.kt | 22 +++++++++------ 8 files changed, 112 insertions(+), 37 deletions(-) create mode 100644 configs/function-name-prediction-ast.json create mode 100644 configs/parse-files-csv.json delete mode 100644 configs/parse-files.json create mode 100644 configs/paths-from-files.json diff --git a/configs/function-name-prediction-ast.json b/configs/function-name-prediction-ast.json new file mode 100644 index 00000000..95517d44 --- /dev/null +++ b/configs/function-name-prediction-ast.json @@ -0,0 +1,28 @@ +{ + "type": "function granularity", + + "inputDir": "src/test/resources/methodSplitting/", + "outputDir": "output", + + "parser": { + "type": "antlr", + "extensions": ["java"] + }, + "filters": [ + { + "type": "by function name length", + "maxWordsNumber": 10 + }, + { + "type": "by length of any token", + "maxWordsNumber": 100 + } + ], + "problem": { + "type": "function name prediction" + }, + "storage": { + "type": "ast", + "format": "csv" + } +} diff --git a/configs/parse-files-csv.json b/configs/parse-files-csv.json new file mode 100644 index 00000000..e15ea6c1 --- /dev/null +++ b/configs/parse-files-csv.json @@ -0,0 +1,24 @@ +{ + "type": "file granularity", + + "inputDir": "src/test/resources/methodSplitting/", + "outputDir": "output", + + "parser": { + "type": "antlr", + "extensions": ["java", "js"] + }, + "filters": [ + { + "type": "max tree size", + "maxTreeSize": 1000 + } + ], + "problem": { + "type": "label with filepath" + }, + "storage": { + "type": "ast", + "format": "csv" + } +} diff --git a/configs/parse-files.json b/configs/parse-files.json deleted file mode 100644 index e464aea5..00000000 --- a/configs/parse-files.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "type": "file granularity", - "inputDir": "src/test/resources/methodSplitting/", - "outputDir": "output", - "parserConfig": { - "type": "antlr", - "extensions": ["java"] - }, - "problemConfig": { - "type": "filepath" - }, - "storageCreatorConfig": { - "type": "csv ast" - } -} diff --git a/configs/paths-from-files.json b/configs/paths-from-files.json new file mode 100644 index 00000000..91db0fa2 --- /dev/null +++ b/configs/paths-from-files.json @@ -0,0 +1,25 @@ +{ + "type": "file granularity", + + "inputDir": "src/test/resources/methodSplitting/", + "outputDir": "output", + + "parser": { + "type": "antlr", + "extensions": ["java", "js"] + }, + "filters": [ + { + "type": "max tree size", + "maxTreeSize": 1000 + } + ], + "problem": { + "type": "label with filepath" + }, + "storage": { + "type": "code2vec paths", + "maxPathLength": 5, + "maxPathWidth": 5 + } +} diff --git a/src/main/kotlin/astminer/config/FilterConfigs.kt b/src/main/kotlin/astminer/config/FilterConfigs.kt index 1f86df8b..18f8aac8 100644 --- a/src/main/kotlin/astminer/config/FilterConfigs.kt +++ b/src/main/kotlin/astminer/config/FilterConfigs.kt @@ -1,6 +1,7 @@ package astminer.config import astminer.filters.* +import kotlinx.serialization.SerialName import kotlinx.serialization.Serializable import kotlinx.serialization.Transient @@ -10,6 +11,7 @@ sealed class FileFilterConfig { } @Serializable +@SerialName("max tree size") data class FileTreeSizeFilterConfig(val maxTreeSize: Int) : FileFilterConfig() { @Transient override val filter = FileTreeSizeFilter(maxTreeSize) @@ -21,30 +23,35 @@ sealed class FunctionFilterConfig { } @Serializable -data class ModifierFilterConfig(val excludeModifiers: List) : FunctionFilterConfig() { +@SerialName("exclude functions with modifiers") +data class ModifierFilterConfig(val modifiers: List) : FunctionFilterConfig() { @Transient - override val filter = ModifierFilter(excludeModifiers) + override val filter = ModifierFilter(modifiers) } @Serializable -data class AnnotationFilterConfig(val excludeAnnotations: List) : FunctionFilterConfig() { +@SerialName("exclude functions with annotations") +data class AnnotationFilterConfig(val annotations: List) : FunctionFilterConfig() { @Transient - override val filter = AnnotationFilter(excludeAnnotations) + override val filter = AnnotationFilter(annotations) } @Serializable +@SerialName("exclude constructors") class ConstructorFilterConfig : FunctionFilterConfig() { @Transient override val filter = ConstructorFilter } @Serializable +@SerialName("by function name length") data class FunctionNameWordsNumberFilterConfig(val maxWordsNumber: Int) : FunctionFilterConfig() { @Transient override val filter = FunctionNameWordsNumberFilter(maxWordsNumber) } @Serializable +@SerialName("by length of any token") data class FunctionAnyNodeWordsNumberFilterConfig(val maxWordsNumber: Int) : FunctionFilterConfig() { @Transient override val filter = FunctionAnyNodeWordsNumberFilter(maxWordsNumber) diff --git a/src/main/kotlin/astminer/config/PipelineConfig.kt b/src/main/kotlin/astminer/config/PipelineConfig.kt index 9aa579b7..3d39005c 100644 --- a/src/main/kotlin/astminer/config/PipelineConfig.kt +++ b/src/main/kotlin/astminer/config/PipelineConfig.kt @@ -11,11 +11,11 @@ sealed class PipelineConfig data class FilePipelineConfig( val inputDir: String, val outputDir: String, - val parserConfig: ParserConfig, - val filterConfigs: List = emptyList(), - val problemConfig: FileProblemConfig, + @SerialName("parser") val parserConfig: ParserConfig, + @SerialName("filters") val filterConfigs: List = emptyList(), + @SerialName("problem") val problemConfig: FileProblemConfig, val excludedNodeTypes: List = emptyList(), - val storageCreatorConfig: StorageCreatorConfig + @SerialName("storage") val storageCreatorConfig: StorageCreatorConfig ) : PipelineConfig() @Serializable @@ -23,11 +23,11 @@ data class FilePipelineConfig( data class FunctionPipelineConfig( val inputDir: String, val outputDir: String, - val parserConfig: ParserConfig, - val filterConfigs: List = emptyList(), - val problemConfig: FunctionProblemConfig, + @SerialName("parser") val parserConfig: ParserConfig, + @SerialName("filters") val filterConfigs: List = emptyList(), + @SerialName("problem") val problemConfig: FunctionProblemConfig, val excludedNodeTypes: List = emptyList(), - val storageCreatorConfig: StorageCreatorConfig + @SerialName("storage") val storageCreatorConfig: StorageCreatorConfig ) : PipelineConfig() @Serializable diff --git a/src/main/kotlin/astminer/config/ProblemConfigs.kt b/src/main/kotlin/astminer/config/ProblemConfigs.kt index dbc628bf..6a5d99b2 100644 --- a/src/main/kotlin/astminer/config/ProblemConfigs.kt +++ b/src/main/kotlin/astminer/config/ProblemConfigs.kt @@ -11,14 +11,14 @@ sealed class FileProblemConfig { } @Serializable -@SerialName("filepath") +@SerialName("label with filepath") class FilePathExtractorConfig : FileProblemConfig() { @Transient override val problem = FilePathExtractor } @Serializable -@SerialName("foldername") +@SerialName("label with folder name") class FolderNameExtractorConfig : FileProblemConfig() { @Transient override val problem = FolderExtractor diff --git a/src/main/kotlin/astminer/config/StorageCreatorConfig.kt b/src/main/kotlin/astminer/config/StorageCreatorConfig.kt index d201b2a2..5da4fc42 100644 --- a/src/main/kotlin/astminer/config/StorageCreatorConfig.kt +++ b/src/main/kotlin/astminer/config/StorageCreatorConfig.kt @@ -12,18 +12,24 @@ sealed class StorageCreatorConfig { } @Serializable -@SerialName("csv ast") -class CsvAstStorageCreatorConfig : StorageCreatorConfig() { - override fun getCreator(outputFolderPath: String) = CsvAstStorageCreator(outputFolderPath) +enum class AstStorageFormat { + @SerialName("dot") Dot, + @SerialName("csv") Csv } @Serializable -@SerialName("dot ast") -data class DotAstStorageCreatorConfig(val tokenProcessor: TokenProcessor = TokenProcessor.Normalize) : - StorageCreatorConfig() { - override fun getCreator(outputFolderPath: String) = DotAstStorageCreator(outputFolderPath, tokenProcessor) -} +@SerialName("ast") +data class AstStorageCreatorConfig( + val format: AstStorageFormat, + val splitTokens: Boolean = false +) : StorageCreatorConfig() { + private val tokenProcessor = if (splitTokens) TokenProcessor.Split else TokenProcessor.Normalize + override fun getCreator(outputFolderPath: String): StorageCreator = when (format) { + AstStorageFormat.Csv -> CsvAstStorageCreator(outputFolderPath) + AstStorageFormat.Dot -> DotAstStorageCreator(outputFolderPath, tokenProcessor) + } +} @Serializable @SerialName("code2vec paths") From 48cfbdbf8d30f393d506d7708837309df780905e Mon Sep 17 00:00:00 2001 From: furetur Date: Tue, 18 May 2021 13:26:29 +0500 Subject: [PATCH 164/308] renamed StorageCreator to StorageFactory --- .../common/model/FunctionInfoModel.kt | 2 +- .../kotlin/astminer/config/PipelineConfig.kt | 4 +-- ...eatorConfig.kt => StorageFactoryConfig.kt} | 20 ++++++------- .../astminer/examples/Code2VecJavaMethods.kt | 3 +- .../kotlin/astminer/pipeline/GetPipeline.kt | 4 +-- src/main/kotlin/astminer/pipeline/Pipeline.kt | 4 +-- ...StorageCreators.kt => StorageFactories.kt} | 14 +++++----- ...rTest.kt => AbstractStorageFactoryTest.kt} | 12 ++++---- .../Code2VecExtractionPipelineTest.kt | 5 ++-- src/test/kotlin/astminer/pipeline/Mocks.kt | 2 +- .../kotlin/astminer/pipeline/PipelineTest.kt | 28 +++++++++---------- 11 files changed, 48 insertions(+), 50 deletions(-) rename src/main/kotlin/astminer/config/{StorageCreatorConfig.kt => StorageFactoryConfig.kt} (69%) rename src/main/kotlin/astminer/pipeline/{StorageCreators.kt => StorageFactories.kt} (78%) rename src/test/kotlin/astminer/pipeline/{AbstractStorageCreatorTest.kt => AbstractStorageFactoryTest.kt} (65%) diff --git a/src/main/kotlin/astminer/common/model/FunctionInfoModel.kt b/src/main/kotlin/astminer/common/model/FunctionInfoModel.kt index cd4d88a9..7ba2ede9 100644 --- a/src/main/kotlin/astminer/common/model/FunctionInfoModel.kt +++ b/src/main/kotlin/astminer/common/model/FunctionInfoModel.kt @@ -6,7 +6,7 @@ interface TreeFunctionSplitter { class FunctionInfoPropertyNotImplementedException(propertyName: String) : UnsupportedOperationException( - "The property $propertyName of FunctionInfo for this language and parser type is not implemented yet. " + + "The property `$propertyName` of FunctionInfo for this language and parser type is not implemented yet. " + "Consider implementing it." ) diff --git a/src/main/kotlin/astminer/config/PipelineConfig.kt b/src/main/kotlin/astminer/config/PipelineConfig.kt index 3d39005c..7841136e 100644 --- a/src/main/kotlin/astminer/config/PipelineConfig.kt +++ b/src/main/kotlin/astminer/config/PipelineConfig.kt @@ -15,7 +15,7 @@ data class FilePipelineConfig( @SerialName("filters") val filterConfigs: List = emptyList(), @SerialName("problem") val problemConfig: FileProblemConfig, val excludedNodeTypes: List = emptyList(), - @SerialName("storage") val storageCreatorConfig: StorageCreatorConfig + @SerialName("storage") val storageFactoryConfig: StorageFactoryConfig ) : PipelineConfig() @Serializable @@ -27,7 +27,7 @@ data class FunctionPipelineConfig( @SerialName("filters") val filterConfigs: List = emptyList(), @SerialName("problem") val problemConfig: FunctionProblemConfig, val excludedNodeTypes: List = emptyList(), - @SerialName("storage") val storageCreatorConfig: StorageCreatorConfig + @SerialName("storage") val storageFactoryConfig: StorageFactoryConfig ) : PipelineConfig() @Serializable diff --git a/src/main/kotlin/astminer/config/StorageCreatorConfig.kt b/src/main/kotlin/astminer/config/StorageFactoryConfig.kt similarity index 69% rename from src/main/kotlin/astminer/config/StorageCreatorConfig.kt rename to src/main/kotlin/astminer/config/StorageFactoryConfig.kt index 5da4fc42..afce1712 100644 --- a/src/main/kotlin/astminer/config/StorageCreatorConfig.kt +++ b/src/main/kotlin/astminer/config/StorageFactoryConfig.kt @@ -7,8 +7,8 @@ import kotlinx.serialization.SerialName import kotlinx.serialization.Serializable @Serializable -sealed class StorageCreatorConfig { - abstract fun getCreator(outputFolderPath: String): StorageCreator +sealed class StorageFactoryConfig { + abstract fun getCreator(outputFolderPath: String): StorageFactory } @Serializable @@ -19,29 +19,29 @@ enum class AstStorageFormat { @Serializable @SerialName("ast") -data class AstStorageCreatorConfig( +data class AstStorageFactoryConfig( val format: AstStorageFormat, val splitTokens: Boolean = false -) : StorageCreatorConfig() { +) : StorageFactoryConfig() { private val tokenProcessor = if (splitTokens) TokenProcessor.Split else TokenProcessor.Normalize - override fun getCreator(outputFolderPath: String): StorageCreator = when (format) { - AstStorageFormat.Csv -> CsvAstStorageCreator(outputFolderPath) - AstStorageFormat.Dot -> DotAstStorageCreator(outputFolderPath, tokenProcessor) + override fun getCreator(outputFolderPath: String): StorageFactory = when (format) { + AstStorageFormat.Csv -> CsvAstStorageFactory(outputFolderPath) + AstStorageFormat.Dot -> DotAstStorageFactory(outputFolderPath, tokenProcessor) } } @Serializable @SerialName("code2vec paths") -data class Code2VecPathStorageCreatorConfig( +data class Code2VecPathStorageFactoryConfig( val maxPathLength: Int, val maxPathWidth: Int, val maxTokens: Long? = null, val maxPaths: Long? = null, val maxPathContextsPerEntity: Int? = null, val tokenProcessor: TokenProcessor = TokenProcessor.Normalize -) : StorageCreatorConfig() { - override fun getCreator(outputFolderPath: String) = Code2VecStorageCreator( +) : StorageFactoryConfig() { + override fun getCreator(outputFolderPath: String) = Code2VecStorageFactory( outputFolderPath, PathBasedStorageConfig(maxPathLength, maxPathWidth, maxTokens, maxPaths, maxPathContextsPerEntity), tokenProcessor diff --git a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt index d13fba7f..5aecaaee 100644 --- a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt @@ -2,7 +2,6 @@ package astminer.examples import astminer.config.* import astminer.pipeline.getFunctionPipeline -import astminer.problem.FunctionNameProblem //Retrieve paths from all Java files, using a GumTree parser. @@ -19,7 +18,7 @@ fun code2vecJavaMethods() { listOf("java") ), problemConfig = FunctionNamePredictionConfig(), - storageCreatorConfig = Code2VecPathStorageCreatorConfig( + storageFactoryConfig = Code2VecPathStorageFactoryConfig( maxPathLength = 5, maxPathWidth = 5 ) diff --git a/src/main/kotlin/astminer/pipeline/GetPipeline.kt b/src/main/kotlin/astminer/pipeline/GetPipeline.kt index 2cd48e7d..1f28e761 100644 --- a/src/main/kotlin/astminer/pipeline/GetPipeline.kt +++ b/src/main/kotlin/astminer/pipeline/GetPipeline.kt @@ -25,7 +25,7 @@ fun getFilePipeline(filePipelineConfig: FilePipelineConfig): Pipeline( private val filters: List> = emptyList(), private val problem: Problem, private val excludedNodeTypes: List = emptyList(), - private val storageCreator: StorageCreator + private val storageFactory: StorageFactory ) { private fun T.passesThroughFilters() = filters.all { filter -> filter.isFiltered(this) } @@ -23,7 +23,7 @@ class Pipeline( fun run() { for ((extension, entities) in frontend.getEntities()) { - storageCreator.createStorageAndOutputFolder(extension).use { storage -> + storageFactory.createStorageAndOutputFolder(extension).use { storage -> val labeledResults = entities .filter { functionInfo -> functionInfo.passesThroughFilters() } .mapNotNull { problem.process(it) } diff --git a/src/main/kotlin/astminer/pipeline/StorageCreators.kt b/src/main/kotlin/astminer/pipeline/StorageFactories.kt similarity index 78% rename from src/main/kotlin/astminer/pipeline/StorageCreators.kt rename to src/main/kotlin/astminer/pipeline/StorageFactories.kt index 1e66ea14..3c4a0f55 100644 --- a/src/main/kotlin/astminer/pipeline/StorageCreators.kt +++ b/src/main/kotlin/astminer/pipeline/StorageFactories.kt @@ -8,11 +8,11 @@ import astminer.storage.path.Code2VecPathStorage import astminer.storage.path.PathBasedStorageConfig import java.io.File -interface StorageCreator { +interface StorageFactory { fun createStorageAndOutputFolder(extension: String): Storage } -abstract class AbstractStorageCreator(private val outputDirectoryPath: String) : StorageCreator { +abstract class AbstractStorageFactory(private val outputDirectoryPath: String) : StorageFactory { private fun createOutputFolder(extension: String): File { val outputDirectoryForExtension = File(outputDirectoryPath).resolve(extension) outputDirectoryForExtension.mkdir() @@ -28,26 +28,26 @@ abstract class AbstractStorageCreator(private val outputDirectoryPath: String) : /** * Creates CsvAstStorages */ -class CsvAstStorageCreator(outputDirectoryPath: String) : AbstractStorageCreator(outputDirectoryPath) { +class CsvAstStorageFactory(outputDirectoryPath: String) : AbstractStorageFactory(outputDirectoryPath) { override fun initializeStorage(outputFolderPath: String) = CsvAstStorage(outputFolderPath) } /** * Creates DotAstStorages given [tokenProcessor] */ -class DotAstStorageCreator(outputDirectoryPath: String, private val tokenProcessor: TokenProcessor) : - AbstractStorageCreator(outputDirectoryPath) { +class DotAstStorageFactory(outputDirectoryPath: String, private val tokenProcessor: TokenProcessor) : + AbstractStorageFactory(outputDirectoryPath) { override fun initializeStorage(outputFolderPath: String) = DotAstStorage(outputFolderPath, tokenProcessor) } /** * Creates Code2VecStorages given [config] and [tokenProcessor] */ -class Code2VecStorageCreator( +class Code2VecStorageFactory( outputDirectoryPath: String, private val config: PathBasedStorageConfig, private val tokenProcessor: TokenProcessor -) : AbstractStorageCreator(outputDirectoryPath) { +) : AbstractStorageFactory(outputDirectoryPath) { override fun initializeStorage(outputFolderPath: String) = Code2VecPathStorage(outputFolderPath, config, tokenProcessor) } diff --git a/src/test/kotlin/astminer/pipeline/AbstractStorageCreatorTest.kt b/src/test/kotlin/astminer/pipeline/AbstractStorageFactoryTest.kt similarity index 65% rename from src/test/kotlin/astminer/pipeline/AbstractStorageCreatorTest.kt rename to src/test/kotlin/astminer/pipeline/AbstractStorageFactoryTest.kt index 7c27b18e..dd3ea8c4 100644 --- a/src/test/kotlin/astminer/pipeline/AbstractStorageCreatorTest.kt +++ b/src/test/kotlin/astminer/pipeline/AbstractStorageFactoryTest.kt @@ -7,7 +7,7 @@ import java.io.File import java.nio.file.Files.createTempDirectory import kotlin.test.assertEquals -internal class AbstractStorageCreatorTest { +internal class AbstractStorageFactoryTest { lateinit var tempDir: File @Before @@ -16,18 +16,18 @@ internal class AbstractStorageCreatorTest { } @Test - fun `test creating a StorageCreator should not alter the output directory`() { - AbstractStorageCreatorImpl(tempDir.path) + fun `test creating a StorageFactory should not alter the output directory`() { + AbstractStorageFactoryImpl(tempDir.path) assertEquals(0, tempDir.listFiles()?.size, "There should be no files in the directory") } @Test - fun `test StorageCreator's createStorageAndOutputFolder should create a subdirectory named after the file extension`() { - AbstractStorageCreatorImpl(tempDir.path).createStorageAndOutputFolder("file extension") + fun `test StorageFactory's createStorageAndOutputFolder should create a subdirectory named after the file extension`() { + AbstractStorageFactoryImpl(tempDir.path).createStorageAndOutputFolder("file extension") assertEquals(listOf("file extension"), tempDir.listFiles()?.map { it.name }) } - class AbstractStorageCreatorImpl(outputFolderPath: String) : AbstractStorageCreator(outputFolderPath) { + class AbstractStorageFactoryImpl(outputFolderPath: String) : AbstractStorageFactory(outputFolderPath) { override fun initializeStorage(outputFolderPath: String): Storage = DummyStorage() } } diff --git a/src/test/kotlin/astminer/pipeline/Code2VecExtractionPipelineTest.kt b/src/test/kotlin/astminer/pipeline/Code2VecExtractionPipelineTest.kt index d99a0468..f5b77f1d 100644 --- a/src/test/kotlin/astminer/pipeline/Code2VecExtractionPipelineTest.kt +++ b/src/test/kotlin/astminer/pipeline/Code2VecExtractionPipelineTest.kt @@ -1,11 +1,10 @@ package astminer.pipeline import astminer.cli.util.verifyPathContextExtraction -import astminer.config.Code2VecPathStorageCreatorConfig +import astminer.config.Code2VecPathStorageFactoryConfig import astminer.config.FilePathExtractorConfig import astminer.config.FilePipelineConfig import astminer.config.ParserConfig -import astminer.problem.FilePathExtractor import org.junit.Test import java.io.File import java.nio.file.Files.createTempDirectory @@ -27,7 +26,7 @@ internal class Code2VecExtractionPipelineTest { languages ), problemConfig = FilePathExtractorConfig(), - storageCreatorConfig = Code2VecPathStorageCreatorConfig( + storageFactoryConfig = Code2VecPathStorageFactoryConfig( maxPathLength = 8, maxPathWidth = 3 ) diff --git a/src/test/kotlin/astminer/pipeline/Mocks.kt b/src/test/kotlin/astminer/pipeline/Mocks.kt index 40bb45d1..942c7958 100644 --- a/src/test/kotlin/astminer/pipeline/Mocks.kt +++ b/src/test/kotlin/astminer/pipeline/Mocks.kt @@ -41,7 +41,7 @@ class BambooLabelExtractor : Problem { override fun process(entity: DummyNode): LabeledResult = entity.labeledWith(getLabel(entity)) } -class DummyStorageCreator : StorageCreator { +class DummyStorageFactory : StorageFactory { private val storages = mutableMapOf() val results: Map> diff --git a/src/test/kotlin/astminer/pipeline/PipelineTest.kt b/src/test/kotlin/astminer/pipeline/PipelineTest.kt index 17173e85..5d5bfc04 100644 --- a/src/test/kotlin/astminer/pipeline/PipelineTest.kt +++ b/src/test/kotlin/astminer/pipeline/PipelineTest.kt @@ -10,11 +10,11 @@ internal class PipelineTest { val extensionsToNodeNames = mapOf("a" to "A", "b" to "B") } - lateinit var storageCreator: DummyStorageCreator + lateinit var storageFactory: DummyStorageFactory @Before fun init() { - storageCreator = DummyStorageCreator() + storageFactory = DummyStorageFactory() } @Test @@ -22,14 +22,14 @@ internal class PipelineTest { Pipeline( frontend = DummyPipelineFrontend(extensionsToNodeNames), problem = DummyLabelExtractor(), - storageCreator = storageCreator + storageFactory = storageFactory ).run() val expectedResults = mapOf( "a" to setOf("label A"), "b" to setOf("label B") ) - assertEquals(expectedResults, storageCreator.results) + assertEquals(expectedResults, storageFactory.results) } @Test @@ -38,14 +38,14 @@ internal class PipelineTest { frontend = DummyPipelineFrontend(extensionsToNodeNames), filters = listOf(DummyFilter("B")), problem = DummyLabelExtractor(), - storageCreator = storageCreator + storageFactory = storageFactory ).run() val expectedResults = mapOf( "a" to setOf("label A"), "b" to setOf() ) - assertEquals(expectedResults, storageCreator.results) + assertEquals(expectedResults, storageFactory.results) } @Test @@ -54,14 +54,14 @@ internal class PipelineTest { frontend = DummyPipelineFrontend(extensionsToNodeNames), filters = listOf(DummyFilter()), problem = DummyLabelExtractor("B"), - storageCreator = storageCreator + storageFactory = storageFactory ).run() val expectedResults = mapOf( "a" to setOf("label A"), "b" to setOf() ) - assertEquals(expectedResults, storageCreator.results) + assertEquals(expectedResults, storageFactory.results) } @Test @@ -70,14 +70,14 @@ internal class PipelineTest { frontend = DummyPipelineFrontend(extensionsToNodeNames), filters = listOf(DummyFilter("A")), problem = DummyLabelExtractor("B"), - storageCreator = storageCreator + storageFactory = storageFactory ).run() val expectedResults = mapOf( "a" to setOf(), "b" to setOf() ) - assertEquals(expectedResults, storageCreator.results) + assertEquals(expectedResults, storageFactory.results) } @Test @@ -87,13 +87,13 @@ internal class PipelineTest { Pipeline( frontend = SimplePipelineFrontend(listOf(node)), problem = BambooLabelExtractor(), - storageCreator = storageCreator + storageFactory = storageFactory ).run() val expectedResults = mapOf( "" to setOf("Root Date: Tue, 18 May 2021 14:24:31 +0500 Subject: [PATCH 165/308] switched to yaml configs --- build.gradle.kts | 1 + configs/function-name-prediction-ast.json | 28 ----------------------- configs/function-name-prediction-ast.yml | 21 +++++++++++++++++ configs/parse-files-csv.json | 24 ------------------- configs/parse-files-csv.yml | 19 +++++++++++++++ configs/paths-from-files.json | 25 -------------------- configs/paths-from-files.yml | 21 +++++++++++++++++ src/main/kotlin/astminer/Main.kt | 7 +++++- 8 files changed, 68 insertions(+), 78 deletions(-) delete mode 100644 configs/function-name-prediction-ast.json create mode 100644 configs/function-name-prediction-ast.yml delete mode 100644 configs/parse-files-csv.json create mode 100644 configs/parse-files-csv.yml delete mode 100644 configs/paths-from-files.json create mode 100644 configs/paths-from-files.yml diff --git a/build.gradle.kts b/build.gradle.kts index fd30c6e8..14bf8f6a 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -69,6 +69,7 @@ dependencies { jmhImplementation("org.openjdk.jmh:jmh-core:1.21") jmhImplementation("org.openjdk.jmh:jmh-generator-annprocess:1.21") + implementation("com.charleskorn.kaml:kaml:0.33.0") } val shadowJar = shadowJar { diff --git a/configs/function-name-prediction-ast.json b/configs/function-name-prediction-ast.json deleted file mode 100644 index 95517d44..00000000 --- a/configs/function-name-prediction-ast.json +++ /dev/null @@ -1,28 +0,0 @@ -{ - "type": "function granularity", - - "inputDir": "src/test/resources/methodSplitting/", - "outputDir": "output", - - "parser": { - "type": "antlr", - "extensions": ["java"] - }, - "filters": [ - { - "type": "by function name length", - "maxWordsNumber": 10 - }, - { - "type": "by length of any token", - "maxWordsNumber": 100 - } - ], - "problem": { - "type": "function name prediction" - }, - "storage": { - "type": "ast", - "format": "csv" - } -} diff --git a/configs/function-name-prediction-ast.yml b/configs/function-name-prediction-ast.yml new file mode 100644 index 00000000..04b65311 --- /dev/null +++ b/configs/function-name-prediction-ast.yml @@ -0,0 +1,21 @@ +type: 'function granularity' + +inputDir: 'src/test/resources/methodSplitting/' +outputDir: 'output' + +parser: + type: 'antlr' + extensions: ['java'] + +filters: + - type: 'by function name length' + maxWordsNumber: 10 + - type: 'by length of any token' + maxWordsNumber: 100 + +problem: + type: 'function name prediction' + +storage: + type: 'ast' + format: 'csv' diff --git a/configs/parse-files-csv.json b/configs/parse-files-csv.json deleted file mode 100644 index e15ea6c1..00000000 --- a/configs/parse-files-csv.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "type": "file granularity", - - "inputDir": "src/test/resources/methodSplitting/", - "outputDir": "output", - - "parser": { - "type": "antlr", - "extensions": ["java", "js"] - }, - "filters": [ - { - "type": "max tree size", - "maxTreeSize": 1000 - } - ], - "problem": { - "type": "label with filepath" - }, - "storage": { - "type": "ast", - "format": "csv" - } -} diff --git a/configs/parse-files-csv.yml b/configs/parse-files-csv.yml new file mode 100644 index 00000000..a0ff9403 --- /dev/null +++ b/configs/parse-files-csv.yml @@ -0,0 +1,19 @@ +type: 'file granularity' + +inputDir: 'src/test/resources/methodSplitting/' +outputDir: 'output' + +parser: + type: 'antlr' + extensions: ['java', 'js'] + +filters: + - type: 'max tree size' + maxTreeSize: 1000 + +problem: + type: 'label with filepath' + +storage: + type: 'ast' + format: 'csv' diff --git a/configs/paths-from-files.json b/configs/paths-from-files.json deleted file mode 100644 index 91db0fa2..00000000 --- a/configs/paths-from-files.json +++ /dev/null @@ -1,25 +0,0 @@ -{ - "type": "file granularity", - - "inputDir": "src/test/resources/methodSplitting/", - "outputDir": "output", - - "parser": { - "type": "antlr", - "extensions": ["java", "js"] - }, - "filters": [ - { - "type": "max tree size", - "maxTreeSize": 1000 - } - ], - "problem": { - "type": "label with filepath" - }, - "storage": { - "type": "code2vec paths", - "maxPathLength": 5, - "maxPathWidth": 5 - } -} diff --git a/configs/paths-from-files.yml b/configs/paths-from-files.yml new file mode 100644 index 00000000..bf7b063b --- /dev/null +++ b/configs/paths-from-files.yml @@ -0,0 +1,21 @@ +type: 'file granularity' + +inputDir: 'src/test/resources/methodSplitting/' +outputDir: 'output' + +parser: + type: 'antlr' + extensions: ['java', 'js'] + +filters: + - type: 'max tree size' + maxTreeSize: 1000 + +problem: + type: 'label with filepath' + + +storage: + type: 'code2vec paths' + maxPathLength: 5 + maxPathWidth: 5 diff --git a/src/main/kotlin/astminer/Main.kt b/src/main/kotlin/astminer/Main.kt index f06a5f29..e35709f5 100644 --- a/src/main/kotlin/astminer/Main.kt +++ b/src/main/kotlin/astminer/Main.kt @@ -2,12 +2,15 @@ package astminer import astminer.config.PipelineConfig import astminer.pipeline.getPipeline +import com.charleskorn.kaml.PolymorphismStyle import com.github.ajalt.clikt.core.CliktCommand import com.github.ajalt.clikt.parameters.arguments.argument import com.github.ajalt.clikt.parameters.types.file import kotlinx.serialization.SerializationException import kotlinx.serialization.decodeFromString import kotlinx.serialization.json.Json +import com.charleskorn.kaml.Yaml +import com.charleskorn.kaml.YamlConfiguration import java.io.File @@ -18,9 +21,11 @@ class PipelineRunner : CliktCommand(name = "") { readable = true ) + private val yaml = Yaml(configuration = YamlConfiguration(polymorphismStyle = PolymorphismStyle.Property)) + override fun run() { val config = try { - Json.decodeFromString(config.readText()) + yaml.decodeFromString(config.readText()) } catch (e: SerializationException) { // TODO: should log it also println("Error: $e") From 45d085c817dac0d840035541b6ce2b4725cf48fc Mon Sep 17 00:00:00 2001 From: furetur Date: Tue, 18 May 2021 15:02:54 +0500 Subject: [PATCH 166/308] fixed problems after merge --- .../python/GumTreePythonMethodSplitterTest.kt | 175 ++++++++++++++++++ 1 file changed, 175 insertions(+) diff --git a/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitterTest.kt index e69de29b..3d2d6685 100644 --- a/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitterTest.kt @@ -0,0 +1,175 @@ +package astminer.parse.gumtree.python + +import astminer.checkExecutable +import astminer.common.model.FunctionInfo +import astminer.parse.gumtree.GumTreeNode +import org.junit.Assume +import org.junit.Before +import org.junit.Test +import java.io.File +import kotlin.test.assertEquals +import kotlin.test.assertNotNull + +class GumTreePythonMethodSplitterTest { + private fun parse(filename: String): GumTreeNode = + GumTreePythonParser().parseInputStream(File(filename).inputStream()) + + private fun splitMethods(filename: String): Collection> = + GumTreePythonFunctionSplitter().splitIntoMethods(parse(filename)) + + private fun createPath(file: String) = "src/test/resources/gumTreeMethodSplitter/$file" + + @Before + fun checkPythonParser() = Assume.assumeTrue(checkExecutable("pythonparser")) + + @Test + fun methodsCountTest() { + assertEquals(7, splitMethods(createPath("1.py")).size) + assertEquals(9, splitMethods(createPath("2.py")).size) + assertEquals(3, splitMethods(createPath("3.py")).size) + assertEquals(5, splitMethods(createPath("4.py")).size) + } + + @Test + fun funcNamesTest() { + val realNames = setOf( + "no_args_func", "with_args_no_typed", "with_typed_args", + "with_typed_return_no_args", "full_typed", + "func_dif_args_typed_return", "complex_args_full_typed" + ) + val methodInfos = splitMethods(createPath("1.py")) + val parsedNames = methodInfos.map { it.name }.toSet() + assertEquals(realNames, parsedNames) + } + + @Test + fun methodInfoTest1TypedArgs() { + val methodInfos = splitMethods(createPath("1.py")) + val method = methodInfos.firstOrNull { it.name == "complex_args_full_typed" } + assertNotNull(method) + with(method) { + assertEquals("complex_args_full_typed", name) + assertEquals(null, returnType) + assertEquals(1, parameters.size) + assertEquals(listOf("node"), parameters.map { it.name }.toList()) + assertEquals(listOf("JsonNodeType"), parameters.map { it.type }.toList()) + } + } + + @Test + fun methodInfoTest2ManyArgs() { + val methodInfos = splitMethods(createPath("1.py")) + val method = methodInfos.firstOrNull { it.name == "func_dif_args_typed_return" } + assertNotNull(method) + with(method) { + assertEquals("func_dif_args_typed_return", name) + assertEquals("int", returnType) + assertEquals(6, parameters.size) + assertEquals(listOf("a", "b", "c", "d", "e", "f"), parameters.map { it.name }.toList()) + assertEquals(emptyList(), parameters.mapNotNull { it.type }.toList()) + } + } + + @Test + fun methodInfoTest3EnclosingClass() { + val methodInfos = splitMethods(createPath("2.py")) + val method = methodInfos.firstOrNull { it.name == "foo_typed" } + assertNotNull(method) + with(method) { + assertEquals("foo_typed", name) + assertEquals("A", enclosingElement?.name) + assertEquals(null, returnType) + assertEquals(3, parameters.size) + assertEquals(listOf("self", "x", "y"), parameters.map { it.name }.toList()) + assertEquals(listOf(null, "int", "int"), parameters.map { it.type }.toList()) + } + } + + @Test + fun methodInfoTest4EnclosingClass() { + val methodInfos = splitMethods(createPath("2.py")) + val method = methodInfos.firstOrNull { it.name == "bar_typed" } + assertNotNull(method) + with(method) { + assertEquals("bar_typed", name) + assertEquals("C", enclosingElement?.name) + assertEquals(null, returnType) + assertEquals(2, parameters.size) + assertEquals(listOf("self", "x"), parameters.map { it.name }.toList()) + assertEquals(listOf(null, "int"), parameters.map { it.type }.toList()) + } + } + + @Test + fun methodInfoTest5AsyncDef() { + val methodInfos = splitMethods(createPath("3.py")) + val method = methodInfos.firstOrNull { it.name == "async_schrecklich_typed" } + assertNotNull(method) + with(method) { + assertEquals("async_schrecklich_typed", name) + assertEquals("AsyncFunctionDef", root.getTypeLabel()) + assertEquals(null, enclosingElement?.name) + assertEquals("int", returnType) + assertEquals(4, parameters.size) + assertEquals(listOf("event", "x", "args", "kwargs"), parameters.map { it.name }.toList()) + assertEquals(listOf("str", "int", null, null), parameters.map { it.type }.toList()) + } + } + + @Test + fun methodInfoTest6Doc() { + val methodInfos = splitMethods(createPath("3.py")) + val method = methodInfos.firstOrNull { it.name == "async_simple_no_typed" } + assertNotNull(method) + with(method) { + assertEquals("async_simple_no_typed", name) + assertEquals("AsyncFunctionDef", root.getTypeLabel()) + assertEquals(null, enclosingElement?.name) + assertEquals( + "\n async doc\n ", + root.getChildOfType("body") + ?.getChildOfType("Expr") + ?.getChildOfType("Constant-str") + ?.getToken() + ) + assertEquals(4, parameters.size) + assertEquals( + listOf("gh", "original_issue", "branch", "backport_pr_number"), + parameters.map { it.name }.toList() + ) + assertEquals(listOf(null, null, null, null), parameters.map { it.type }.toList()) + } + } + + @Test + fun methodInfoTest7InnerFunc() { + val methodInfos = splitMethods(createPath("4.py")) + val method = methodInfos.firstOrNull { it.name == "foo_2" } + assertNotNull(method) + with(method) { + assertEquals("foo_2", name) + assertEquals("foo_1", method.root.parent?.wrappedNode?.parent?.label) + assertEquals(null, enclosingElement?.name) + assertEquals("None", returnType) + assertEquals(1, parameters.size) + assertEquals(listOf("c"), parameters.map { it.name }.toList()) + assertEquals(listOf(null), parameters.map { it.type }.toList()) + } + } + + @Test + fun methodInfoTest8InnerFunc() { + val methodInfos = splitMethods(createPath("4.py")) + val method = methodInfos.firstOrNull { it.name == "bar_2" } + assertNotNull(method) + with(method) { + assertEquals("bar_2", name) + assertEquals("bar_1", method.root.parent?.wrappedNode?.parent?.label) + assertEquals(null, enclosingElement?.name) + assertEquals("int", returnType) + assertEquals(2, parameters.size) + assertEquals(listOf("d", "e"), parameters.map { it.name }.toList()) + assertEquals(listOf("int", "int"), parameters.map { it.type }.toList()) + } + } +} From 545052c00b6b6bc162a1f46e29ca0d6f836eacf6 Mon Sep 17 00:00:00 2001 From: illided Date: Tue, 18 May 2021 18:13:25 +0300 Subject: [PATCH 167/308] gumtreefunctioninfo return type added --- .../{GumTreeFunctionInfo.kt => GumTreePythonFunctionInfo.kt} | 4 +++- .../parse/gumtree/python/GumTreePythonFunctionSplitter.kt | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) rename src/main/kotlin/astminer/parse/gumtree/python/{GumTreeFunctionInfo.kt => GumTreePythonFunctionInfo.kt} (92%) diff --git a/src/main/kotlin/astminer/parse/gumtree/python/GumTreeFunctionInfo.kt b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt similarity index 92% rename from src/main/kotlin/astminer/parse/gumtree/python/GumTreeFunctionInfo.kt rename to src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt index 76df1ffd..68cd3e92 100644 --- a/src/main/kotlin/astminer/parse/gumtree/python/GumTreeFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt @@ -7,7 +7,7 @@ import astminer.common.model.FunctionInfoParameter import astminer.parse.findEnclosingElementBy import astminer.parse.gumtree.GumTreeNode -class GumTreeFunctionInfo(override val root: GumTreeNode) : FunctionInfo { +class GumTreePythonFunctionInfo(override val root: GumTreeNode) : FunctionInfo { companion object { private object TypeLabels { const val classDefinition = "ClassDef" @@ -35,11 +35,13 @@ class GumTreeFunctionInfo(override val root: GumTreeNode) : FunctionInfo = collectParameters() override val enclosingElement: EnclosingElement? = collectEnclosingClass() + override val returnType: String? = getElementType(root)?.getToken() private fun getElementType(node: GumTreeNode): GumTreeNode? { if (node.getTypeLabel() == TypeLabels.arg) { return node.getChildOfType(TypeLabels.nameLoad) } + // if return statement has "Constant-`Type`" return value => function type is `Type` if (TypeLabels.methodDefinitions.contains(node.getTypeLabel())) { return node.getChildOfType(TypeLabels.body)?.getChildOfType(TypeLabels.returnTypeLabel)?.let { it.getChildren().firstOrNull { child -> diff --git a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitter.kt b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitter.kt index 0e168efa..a28dcbd7 100644 --- a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitter.kt +++ b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitter.kt @@ -15,6 +15,6 @@ class GumTreePythonFunctionSplitter : TreeFunctionSplitter { override fun splitIntoMethods(root: GumTreeNode): Collection> { val methodRoots = root.preOrder().filter { TypeLabels.methodDefinitions.contains(it.getTypeLabel()) } - return methodRoots.map { GumTreeFunctionInfo(it as GumTreeNode) } + return methodRoots.map { GumTreePythonFunctionInfo(it as GumTreeNode) } } } From a07e440391a9068b1e01be9066aa742f7efe2569 Mon Sep 17 00:00:00 2001 From: illided Date: Tue, 18 May 2021 18:22:25 +0300 Subject: [PATCH 168/308] keyword Constant added to the tests --- .../gumtree/python/GumTreePythonMethodSplitterTest.kt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitterTest.kt index 3d2d6685..6b2447ca 100644 --- a/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitterTest.kt @@ -63,7 +63,7 @@ class GumTreePythonMethodSplitterTest { assertNotNull(method) with(method) { assertEquals("func_dif_args_typed_return", name) - assertEquals("int", returnType) + assertEquals("Constant-int", returnType) assertEquals(6, parameters.size) assertEquals(listOf("a", "b", "c", "d", "e", "f"), parameters.map { it.name }.toList()) assertEquals(emptyList(), parameters.mapNotNull { it.type }.toList()) @@ -109,7 +109,7 @@ class GumTreePythonMethodSplitterTest { assertEquals("async_schrecklich_typed", name) assertEquals("AsyncFunctionDef", root.getTypeLabel()) assertEquals(null, enclosingElement?.name) - assertEquals("int", returnType) + assertEquals("Constant-int", returnType) assertEquals(4, parameters.size) assertEquals(listOf("event", "x", "args", "kwargs"), parameters.map { it.name }.toList()) assertEquals(listOf("str", "int", null, null), parameters.map { it.type }.toList()) @@ -150,7 +150,7 @@ class GumTreePythonMethodSplitterTest { assertEquals("foo_2", name) assertEquals("foo_1", method.root.parent?.wrappedNode?.parent?.label) assertEquals(null, enclosingElement?.name) - assertEquals("None", returnType) + assertEquals("Constant-NoneType", returnType) assertEquals(1, parameters.size) assertEquals(listOf("c"), parameters.map { it.name }.toList()) assertEquals(listOf(null), parameters.map { it.type }.toList()) @@ -166,7 +166,7 @@ class GumTreePythonMethodSplitterTest { assertEquals("bar_2", name) assertEquals("bar_1", method.root.parent?.wrappedNode?.parent?.label) assertEquals(null, enclosingElement?.name) - assertEquals("int", returnType) + assertEquals("Constant-int", returnType) assertEquals(2, parameters.size) assertEquals(listOf("d", "e"), parameters.map { it.name }.toList()) assertEquals(listOf("int", "int"), parameters.map { it.type }.toList()) From 00bc6c4ab30924793ac7bcf1124d115f0b7b6936 Mon Sep 17 00:00:00 2001 From: illided Date: Tue, 18 May 2021 18:27:14 +0300 Subject: [PATCH 169/308] return type fix --- .../astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt index 68cd3e92..ab517137 100644 --- a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt @@ -35,7 +35,7 @@ class GumTreePythonFunctionInfo(override val root: GumTreeNode) : FunctionInfo = collectParameters() override val enclosingElement: EnclosingElement? = collectEnclosingClass() - override val returnType: String? = getElementType(root)?.getToken() + override val returnType: String? = getElementType(root)?.getTypeLabel() private fun getElementType(node: GumTreeNode): GumTreeNode? { if (node.getTypeLabel() == TypeLabels.arg) { From 45ee8e92bb67412c9f09c5d739dc0d98514926a8 Mon Sep 17 00:00:00 2001 From: illided Date: Tue, 18 May 2021 18:56:53 +0300 Subject: [PATCH 170/308] params possible fix --- .../astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt index ab517137..648ad25a 100644 --- a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt @@ -69,7 +69,7 @@ class GumTreePythonFunctionInfo(override val root: GumTreeNode) : FunctionInfo when (node.getTypeLabel()) { - in TypeLabels.funcArgsTypesNodes -> node.getChildren().flatMap { it.getChildren() } + in TypeLabels.funcArgsTypesNodes -> node.getChildren() .filter { it.getTypeLabel() == TypeLabels.arg } TypeLabels.vararg, TypeLabels.kwarg -> listOf(node) else -> emptyList() From 70538104c25cf3c5990adb8a25fe105beaa88e33 Mon Sep 17 00:00:00 2001 From: furetur Date: Wed, 19 May 2021 14:27:08 +0500 Subject: [PATCH 171/308] refactored pipeline and removed all redundant entities --- src/main/kotlin/astminer/Main.kt | 5 +- .../kotlin/astminer/config/PipelineConfig.kt | 41 ++++--- .../kotlin/astminer/config/StorageConfig.kt | 40 ++++++ .../astminer/config/StorageFactoryConfig.kt | 49 -------- .../astminer/examples/Code2VecJavaMethods.kt | 8 +- .../kotlin/astminer/pipeline/GetPipeline.kt | 42 ------- src/main/kotlin/astminer/pipeline/Pipeline.kt | 73 +++++++---- .../astminer/pipeline/PipelineBranch.kt | 42 +++++++ .../astminer/pipeline/PipelineFrontend.kt | 86 ------------- .../astminer/pipeline/StorageFactories.kt | 54 -------- .../pipeline/AbstractStorageFactoryTest.kt | 33 ----- .../Code2VecExtractionPipelineTest.kt | 41 ------- .../pipeline/CompositePipelineFrontendTest.kt | 23 ---- src/test/kotlin/astminer/pipeline/Mocks.kt | 69 ----------- .../kotlin/astminer/pipeline/PipelineTest.kt | 115 ------------------ src/test/kotlin/astminer/pipeline/Utils.kt | 20 --- 16 files changed, 159 insertions(+), 582 deletions(-) create mode 100644 src/main/kotlin/astminer/config/StorageConfig.kt delete mode 100644 src/main/kotlin/astminer/config/StorageFactoryConfig.kt delete mode 100644 src/main/kotlin/astminer/pipeline/GetPipeline.kt create mode 100644 src/main/kotlin/astminer/pipeline/PipelineBranch.kt delete mode 100644 src/main/kotlin/astminer/pipeline/PipelineFrontend.kt delete mode 100644 src/main/kotlin/astminer/pipeline/StorageFactories.kt delete mode 100644 src/test/kotlin/astminer/pipeline/AbstractStorageFactoryTest.kt delete mode 100644 src/test/kotlin/astminer/pipeline/Code2VecExtractionPipelineTest.kt delete mode 100644 src/test/kotlin/astminer/pipeline/CompositePipelineFrontendTest.kt delete mode 100644 src/test/kotlin/astminer/pipeline/Mocks.kt delete mode 100644 src/test/kotlin/astminer/pipeline/PipelineTest.kt delete mode 100644 src/test/kotlin/astminer/pipeline/Utils.kt diff --git a/src/main/kotlin/astminer/Main.kt b/src/main/kotlin/astminer/Main.kt index e35709f5..eb058ed4 100644 --- a/src/main/kotlin/astminer/Main.kt +++ b/src/main/kotlin/astminer/Main.kt @@ -1,14 +1,13 @@ package astminer import astminer.config.PipelineConfig -import astminer.pipeline.getPipeline +import astminer.pipeline.Pipeline import com.charleskorn.kaml.PolymorphismStyle import com.github.ajalt.clikt.core.CliktCommand import com.github.ajalt.clikt.parameters.arguments.argument import com.github.ajalt.clikt.parameters.types.file import kotlinx.serialization.SerializationException import kotlinx.serialization.decodeFromString -import kotlinx.serialization.json.Json import com.charleskorn.kaml.Yaml import com.charleskorn.kaml.YamlConfiguration import java.io.File @@ -31,7 +30,7 @@ class PipelineRunner : CliktCommand(name = "") { println("Error: $e") return } - getPipeline(config).run() + Pipeline(config).run() } } diff --git a/src/main/kotlin/astminer/config/PipelineConfig.kt b/src/main/kotlin/astminer/config/PipelineConfig.kt index 7841136e..55c19796 100644 --- a/src/main/kotlin/astminer/config/PipelineConfig.kt +++ b/src/main/kotlin/astminer/config/PipelineConfig.kt @@ -4,30 +4,41 @@ import kotlinx.serialization.SerialName import kotlinx.serialization.Serializable @Serializable -sealed class PipelineConfig +sealed class PipelineConfig { + abstract val inputDir: String + abstract val outputDir: String + abstract val parserConfig: ParserConfig + abstract val storageConfig: StorageConfig +} @Serializable @SerialName("file granularity") data class FilePipelineConfig( - val inputDir: String, - val outputDir: String, - @SerialName("parser") val parserConfig: ParserConfig, - @SerialName("filters") val filterConfigs: List = emptyList(), - @SerialName("problem") val problemConfig: FileProblemConfig, - val excludedNodeTypes: List = emptyList(), - @SerialName("storage") val storageFactoryConfig: StorageFactoryConfig + override val inputDir: String, + override val outputDir: String, + @SerialName("parser") + override val parserConfig: ParserConfig, + @SerialName("filters") + val filterConfigs: List = emptyList(), + @SerialName("problem") + val problemConfig: FileProblemConfig, + @SerialName("storage") + override val storageConfig: StorageConfig ) : PipelineConfig() @Serializable @SerialName("function granularity") data class FunctionPipelineConfig( - val inputDir: String, - val outputDir: String, - @SerialName("parser") val parserConfig: ParserConfig, - @SerialName("filters") val filterConfigs: List = emptyList(), - @SerialName("problem") val problemConfig: FunctionProblemConfig, - val excludedNodeTypes: List = emptyList(), - @SerialName("storage") val storageFactoryConfig: StorageFactoryConfig + override val inputDir: String, + override val outputDir: String, + @SerialName("parser") + override val parserConfig: ParserConfig, + @SerialName("filters") + val filterConfigs: List = emptyList(), + @SerialName("problem") + val problemConfig: FunctionProblemConfig, + @SerialName("storage") + override val storageConfig: StorageConfig ) : PipelineConfig() @Serializable diff --git a/src/main/kotlin/astminer/config/StorageConfig.kt b/src/main/kotlin/astminer/config/StorageConfig.kt new file mode 100644 index 00000000..1960bfb5 --- /dev/null +++ b/src/main/kotlin/astminer/config/StorageConfig.kt @@ -0,0 +1,40 @@ +package astminer.config + +import astminer.storage.TokenProcessor +import astminer.storage.path.PathBasedStorageConfig +import kotlinx.serialization.SerialName +import kotlinx.serialization.Serializable +import kotlinx.serialization.Transient + +@Serializable +sealed class StorageConfig + +@Serializable +enum class AstStorageFormat { + @SerialName("dot") + Dot, + @SerialName("csv") + Csv +} + +@Serializable +@SerialName("ast") +data class AstStorageConfig( + val format: AstStorageFormat, + val splitTokens: Boolean = false +) : StorageConfig() + +@Serializable +@SerialName("code2vec paths") +data class Code2VecPathStorageConfig( + val maxPathLength: Int, + val maxPathWidth: Int, + val maxTokens: Long? = null, + val maxPaths: Long? = null, + val maxPathContextsPerEntity: Int? = null, + val tokenProcessor: TokenProcessor = TokenProcessor.Normalize +) : StorageConfig() { + @Transient + val pathBasedStorageConfig = + PathBasedStorageConfig(maxPathLength, maxPathWidth, maxTokens, maxPaths, maxPathContextsPerEntity) +} diff --git a/src/main/kotlin/astminer/config/StorageFactoryConfig.kt b/src/main/kotlin/astminer/config/StorageFactoryConfig.kt deleted file mode 100644 index afce1712..00000000 --- a/src/main/kotlin/astminer/config/StorageFactoryConfig.kt +++ /dev/null @@ -1,49 +0,0 @@ -package astminer.config - -import astminer.pipeline.* -import astminer.storage.TokenProcessor -import astminer.storage.path.PathBasedStorageConfig -import kotlinx.serialization.SerialName -import kotlinx.serialization.Serializable - -@Serializable -sealed class StorageFactoryConfig { - abstract fun getCreator(outputFolderPath: String): StorageFactory -} - -@Serializable -enum class AstStorageFormat { - @SerialName("dot") Dot, - @SerialName("csv") Csv -} - -@Serializable -@SerialName("ast") -data class AstStorageFactoryConfig( - val format: AstStorageFormat, - val splitTokens: Boolean = false -) : StorageFactoryConfig() { - private val tokenProcessor = if (splitTokens) TokenProcessor.Split else TokenProcessor.Normalize - - override fun getCreator(outputFolderPath: String): StorageFactory = when (format) { - AstStorageFormat.Csv -> CsvAstStorageFactory(outputFolderPath) - AstStorageFormat.Dot -> DotAstStorageFactory(outputFolderPath, tokenProcessor) - } -} - -@Serializable -@SerialName("code2vec paths") -data class Code2VecPathStorageFactoryConfig( - val maxPathLength: Int, - val maxPathWidth: Int, - val maxTokens: Long? = null, - val maxPaths: Long? = null, - val maxPathContextsPerEntity: Int? = null, - val tokenProcessor: TokenProcessor = TokenProcessor.Normalize -) : StorageFactoryConfig() { - override fun getCreator(outputFolderPath: String) = Code2VecStorageFactory( - outputFolderPath, - PathBasedStorageConfig(maxPathLength, maxPathWidth, maxTokens, maxPaths, maxPathContextsPerEntity), - tokenProcessor - ) -} diff --git a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt index 5aecaaee..084c6bda 100644 --- a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt @@ -1,7 +1,7 @@ package astminer.examples import astminer.config.* -import astminer.pipeline.getFunctionPipeline +import astminer.pipeline.Pipeline //Retrieve paths from all Java files, using a GumTree parser. @@ -18,13 +18,11 @@ fun code2vecJavaMethods() { listOf("java") ), problemConfig = FunctionNamePredictionConfig(), - storageFactoryConfig = Code2VecPathStorageFactoryConfig( + storageConfig = Code2VecPathStorageConfig( maxPathLength = 5, maxPathWidth = 5 ) ) - val pipeline = getFunctionPipeline(pipelineConfig) - - pipeline.run() + Pipeline(pipelineConfig).run() } diff --git a/src/main/kotlin/astminer/pipeline/GetPipeline.kt b/src/main/kotlin/astminer/pipeline/GetPipeline.kt deleted file mode 100644 index 1f28e761..00000000 --- a/src/main/kotlin/astminer/pipeline/GetPipeline.kt +++ /dev/null @@ -1,42 +0,0 @@ -package astminer.pipeline - -import astminer.common.model.FunctionInfo -import astminer.common.model.Node -import astminer.common.model.ParseResult -import astminer.config.FilePipelineConfig -import astminer.config.FunctionPipelineConfig -import astminer.config.PipelineConfig - -/** - * Initializes the Pipeline given the [pipelineConfig]. - * This function must have no side effects! - */ -fun getPipeline(pipelineConfig: PipelineConfig): Pipeline<*> { - return when (pipelineConfig) { - is FilePipelineConfig -> getFilePipeline(pipelineConfig) - is FunctionPipelineConfig -> getFunctionPipeline(pipelineConfig) - } -} - -fun getFilePipeline(filePipelineConfig: FilePipelineConfig): Pipeline> = - with(filePipelineConfig) { - Pipeline( - frontend = FilePipelineFrontend(inputDir, parserConfig.type, parserConfig.extensions), - filters = filterConfigs.map { it.filter }, - problem = problemConfig.problem, - excludedNodeTypes = excludedNodeTypes, - storageFactory = storageFactoryConfig.getCreator(outputDir) - ) - } - -fun getFunctionPipeline(functionPipelineConfig: FunctionPipelineConfig): Pipeline> = - with(functionPipelineConfig) { - Pipeline( - frontend = FunctionPipelineFrontend(inputDir, parserConfig.type, parserConfig.extensions), - filters = filterConfigs.map { it.filter }, - problem = problemConfig.problem, - excludedNodeTypes = excludedNodeTypes, - storageFactory = storageFactoryConfig.getCreator(outputDir) - ) - } - diff --git a/src/main/kotlin/astminer/pipeline/Pipeline.kt b/src/main/kotlin/astminer/pipeline/Pipeline.kt index de69fc5d..f0ba2afa 100644 --- a/src/main/kotlin/astminer/pipeline/Pipeline.kt +++ b/src/main/kotlin/astminer/pipeline/Pipeline.kt @@ -1,37 +1,56 @@ package astminer.pipeline -import astminer.common.model.Node -import astminer.common.preOrder -import astminer.filters.Filter -import astminer.problem.LabeledResult -import astminer.problem.Problem - -class Pipeline( - private val frontend: PipelineFrontend, - private val filters: List> = emptyList(), - private val problem: Problem, - private val excludedNodeTypes: List = emptyList(), - private val storageFactory: StorageFactory -) { - private fun T.passesThroughFilters() = filters.all { filter -> filter.isFiltered(this) } - - private fun LabeledResult.excludeNodes() { - root.preOrder().forEach { node -> - excludedNodeTypes.forEach { node.removeChildrenOfType(it) } +import astminer.common.getProjectFilesWithExtension +import astminer.config.* +import astminer.parse.getHandlerFactory +import astminer.storage.Storage +import astminer.storage.TokenProcessor +import astminer.storage.ast.CsvAstStorage +import astminer.storage.ast.DotAstStorage +import astminer.storage.path.Code2VecPathStorage +import java.io.File + +class Pipeline(private val config: PipelineConfig) { + private val inputDirectory = File(config.inputDir) + private val outputDirectory = File(config.outputDir) + + private val branch = when (config) { + is FilePipelineConfig -> FilePipelineBranch(config) + is FunctionPipelineConfig -> FunctionPipelineBranch(config) + } + + private fun createStorageDirectory(extension: String): File { + val outputDirectoryForExtension = outputDirectory.resolve(extension) + outputDirectoryForExtension.mkdir() + return outputDirectoryForExtension + } + + private fun createStorage(extension: String): Storage = with(config.storageConfig) { + val storagePath = createStorageDirectory(extension).path + + // TODO: I should remove this later, once storage constructors have no side effects, and implement it like filters and problems + when (this) { + is AstStorageConfig -> { + val tokenProcessor = if (splitTokens) TokenProcessor.Split else TokenProcessor.Normalize + when (format) { + AstStorageFormat.Csv -> CsvAstStorage(storagePath) + AstStorageFormat.Dot -> DotAstStorage(storagePath, tokenProcessor) + } + } + is Code2VecPathStorageConfig -> { + Code2VecPathStorage(storagePath, pathBasedStorageConfig) + } } } fun run() { - for ((extension, entities) in frontend.getEntities()) { - storageFactory.createStorageAndOutputFolder(extension).use { storage -> - val labeledResults = entities - .filter { functionInfo -> functionInfo.passesThroughFilters() } - .mapNotNull { problem.process(it) } - - for (labeledResult in labeledResults) { - labeledResult.excludeNodes() - } + for (extension in config.parserConfig.extensions) { + val languageFactory = getHandlerFactory(extension, config.parserConfig.type) + + val files = getProjectFilesWithExtension(inputDirectory, extension).asSequence() + val labeledResults = files.map { languageFactory.createHandler(it) }.flatMap { branch.process(it) } + createStorage(extension).use { storage -> storage.store(labeledResults.asIterable()) } } diff --git a/src/main/kotlin/astminer/pipeline/PipelineBranch.kt b/src/main/kotlin/astminer/pipeline/PipelineBranch.kt new file mode 100644 index 00000000..c189442a --- /dev/null +++ b/src/main/kotlin/astminer/pipeline/PipelineBranch.kt @@ -0,0 +1,42 @@ +package astminer.pipeline + +import astminer.common.model.FunctionInfo +import astminer.common.model.LanguageHandler +import astminer.common.model.Node +import astminer.common.model.ParseResult +import astminer.config.FilePipelineConfig +import astminer.config.FunctionPipelineConfig +import astminer.problem.LabeledResult + +interface PipelineBranch { + fun process(languageHandler: LanguageHandler): Sequence> +} + +class FilePipelineBranch(config: FilePipelineConfig) : PipelineBranch { + val filters = config.filterConfigs.map { it.filter } + val problem = config.problemConfig.problem + + private fun ParseResult.passesThroughFilters() = filters.all { filter -> filter.isFiltered(this) } + + override fun process(languageHandler: LanguageHandler): Sequence> { + val parseResult = languageHandler.parseResult + return if (parseResult.passesThroughFilters()) { + val labeledResult = problem.process(parseResult) ?: return emptySequence() + sequenceOf(labeledResult) + } else { + emptySequence() + } + } +} + +class FunctionPipelineBranch(config: FunctionPipelineConfig) : PipelineBranch { + val filters = config.filterConfigs.map { it.filter } + val problem = config.problemConfig.problem + + private fun FunctionInfo.passesThroughFilters() = filters.all { filter -> filter.isFiltered(this) } + + override fun process(languageHandler: LanguageHandler): Sequence> = + languageHandler.splitIntoMethods().asSequence() + .filter { functionInfo -> functionInfo.passesThroughFilters() } + .mapNotNull { functionInfo -> problem.process(functionInfo) } +} \ No newline at end of file diff --git a/src/main/kotlin/astminer/pipeline/PipelineFrontend.kt b/src/main/kotlin/astminer/pipeline/PipelineFrontend.kt deleted file mode 100644 index 00b3c4ec..00000000 --- a/src/main/kotlin/astminer/pipeline/PipelineFrontend.kt +++ /dev/null @@ -1,86 +0,0 @@ -package astminer.pipeline - -import astminer.common.getProjectFilesWithExtension -import astminer.common.model.* -import astminer.parse.getHandlerFactory -import mu.KotlinLogging -import java.io.File - -/** - * A group of entities that come from the files with the same file extension. - * @param fileExtension The file extension that all entities share. - * @param entities The entities that are extracted from the files - * @see PipelineFrontend for the definition of "entity" - */ -data class EntitiesFromFiles(val fileExtension: String, val entities: Sequence) - -/** - * Extracts entities from files and groups them by file extensions. - * Entity -- anything that can be extracted from a file of code. - * @param T The type of entities - */ -interface PipelineFrontend { - /** - * Extract entities and group them by file extensions. - * @see PipelineFrontend for the definition of "entity". - */ - fun getEntities(): Sequence> -} - -private val logger = KotlinLogging.logger("PipelineFrontend") - -/** - * Base class for several PipelineFrontend implementations. - * Finds parsers of type [parserType] for all the given languages by [extensions]. - * Looks for files in [inputDirectoryPath]. - */ -abstract class CompositePipelineFrontend( - private val inputDirectoryPath: String, - private val parserType: String, - private val extensions: List -) : PipelineFrontend { - - protected abstract fun LanguageHandler.getEntities(): Sequence - - override fun getEntities(): Sequence> = sequence { - val inputDirectory = File(inputDirectoryPath) - - logger.info { "Reading ${inputDirectory.absolutePath}" } - - for (extension in extensions) { - val handlerFactory = try { - getHandlerFactory(extension, parserType) - } catch (e: UnsupportedOperationException) { - // TODO: log everything - println("Damn") - yield(EntitiesFromFiles(extension, emptySequence())) - continue - } - val files = getProjectFilesWithExtension(inputDirectory, extension).asSequence() - val entities = files.flatMap { file -> handlerFactory.createHandler(file).getEntities() } - yield(EntitiesFromFiles(extension, entities)) - } - } -} - -/** - * PipelineFrontend that extracts ParseResult from files. - * Basically, it parses the given files and returns the results. - * @see ParseResult - */ -class FilePipelineFrontend(inputDirectoryPath: String, parserType: String, extensions: List) : - CompositePipelineFrontend>(inputDirectoryPath, parserType, extensions) { - override fun LanguageHandler.getEntities(): Sequence> = sequenceOf(parseResult) -} - -/** - * PipelineFrontend that extracts FunctionInfo from files. - * It parses the files, finds functions in those files and collects information about the functions. - * @see FunctionInfo - */ -class FunctionPipelineFrontend(inputDirectoryPath: String, parserType: String, extensions: List) : - CompositePipelineFrontend>(inputDirectoryPath, parserType, extensions) { - - override fun LanguageHandler.getEntities(): Sequence> = - splitIntoMethods().asSequence() -} diff --git a/src/main/kotlin/astminer/pipeline/StorageFactories.kt b/src/main/kotlin/astminer/pipeline/StorageFactories.kt deleted file mode 100644 index 3c4a0f55..00000000 --- a/src/main/kotlin/astminer/pipeline/StorageFactories.kt +++ /dev/null @@ -1,54 +0,0 @@ -package astminer.pipeline - -import astminer.storage.Storage -import astminer.storage.TokenProcessor -import astminer.storage.ast.CsvAstStorage -import astminer.storage.ast.DotAstStorage -import astminer.storage.path.Code2VecPathStorage -import astminer.storage.path.PathBasedStorageConfig -import java.io.File - -interface StorageFactory { - fun createStorageAndOutputFolder(extension: String): Storage -} - -abstract class AbstractStorageFactory(private val outputDirectoryPath: String) : StorageFactory { - private fun createOutputFolder(extension: String): File { - val outputDirectoryForExtension = File(outputDirectoryPath).resolve(extension) - outputDirectoryForExtension.mkdir() - return outputDirectoryForExtension - } - - abstract fun initializeStorage(outputFolderPath: String): Storage - - override fun createStorageAndOutputFolder(extension: String): Storage = - initializeStorage(createOutputFolder(extension).path) -} - -/** - * Creates CsvAstStorages - */ -class CsvAstStorageFactory(outputDirectoryPath: String) : AbstractStorageFactory(outputDirectoryPath) { - override fun initializeStorage(outputFolderPath: String) = CsvAstStorage(outputFolderPath) -} - -/** - * Creates DotAstStorages given [tokenProcessor] - */ -class DotAstStorageFactory(outputDirectoryPath: String, private val tokenProcessor: TokenProcessor) : - AbstractStorageFactory(outputDirectoryPath) { - override fun initializeStorage(outputFolderPath: String) = DotAstStorage(outputFolderPath, tokenProcessor) -} - -/** - * Creates Code2VecStorages given [config] and [tokenProcessor] - */ -class Code2VecStorageFactory( - outputDirectoryPath: String, - private val config: PathBasedStorageConfig, - private val tokenProcessor: TokenProcessor -) : AbstractStorageFactory(outputDirectoryPath) { - override fun initializeStorage(outputFolderPath: String) = - Code2VecPathStorage(outputFolderPath, config, tokenProcessor) -} - diff --git a/src/test/kotlin/astminer/pipeline/AbstractStorageFactoryTest.kt b/src/test/kotlin/astminer/pipeline/AbstractStorageFactoryTest.kt deleted file mode 100644 index dd3ea8c4..00000000 --- a/src/test/kotlin/astminer/pipeline/AbstractStorageFactoryTest.kt +++ /dev/null @@ -1,33 +0,0 @@ -package astminer.pipeline - -import astminer.storage.Storage -import org.junit.Before -import org.junit.Test -import java.io.File -import java.nio.file.Files.createTempDirectory -import kotlin.test.assertEquals - -internal class AbstractStorageFactoryTest { - lateinit var tempDir: File - - @Before - fun init() { - tempDir = createTempDirectory("prefix").toFile() - } - - @Test - fun `test creating a StorageFactory should not alter the output directory`() { - AbstractStorageFactoryImpl(tempDir.path) - assertEquals(0, tempDir.listFiles()?.size, "There should be no files in the directory") - } - - @Test - fun `test StorageFactory's createStorageAndOutputFolder should create a subdirectory named after the file extension`() { - AbstractStorageFactoryImpl(tempDir.path).createStorageAndOutputFolder("file extension") - assertEquals(listOf("file extension"), tempDir.listFiles()?.map { it.name }) - } - - class AbstractStorageFactoryImpl(outputFolderPath: String) : AbstractStorageFactory(outputFolderPath) { - override fun initializeStorage(outputFolderPath: String): Storage = DummyStorage() - } -} diff --git a/src/test/kotlin/astminer/pipeline/Code2VecExtractionPipelineTest.kt b/src/test/kotlin/astminer/pipeline/Code2VecExtractionPipelineTest.kt deleted file mode 100644 index f5b77f1d..00000000 --- a/src/test/kotlin/astminer/pipeline/Code2VecExtractionPipelineTest.kt +++ /dev/null @@ -1,41 +0,0 @@ -package astminer.pipeline - -import astminer.cli.util.verifyPathContextExtraction -import astminer.config.Code2VecPathStorageFactoryConfig -import astminer.config.FilePathExtractorConfig -import astminer.config.FilePipelineConfig -import astminer.config.ParserConfig -import org.junit.Test -import java.io.File -import java.nio.file.Files.createTempDirectory - -internal class Code2VecExtractionPipelineTest { - private val testDataDir = File("src/test/resources") - - @Test - fun testDefaultExtraction() { - val extractedDataDir = createTempDirectory("extractedData").toFile() - - val languages = listOf("java", "python") - - val config = FilePipelineConfig( - inputDir = testDataDir.path, - outputDir = extractedDataDir.path, - parserConfig = ParserConfig( - "gumtree", - languages - ), - problemConfig = FilePathExtractorConfig(), - storageFactoryConfig = Code2VecPathStorageFactoryConfig( - maxPathLength = 8, - maxPathWidth = 3 - ) - ) - - val pipeline = getFilePipeline(config) - - pipeline.run() - - verifyPathContextExtraction(extractedDataDir, languages, false) - } -} \ No newline at end of file diff --git a/src/test/kotlin/astminer/pipeline/CompositePipelineFrontendTest.kt b/src/test/kotlin/astminer/pipeline/CompositePipelineFrontendTest.kt deleted file mode 100644 index 01297f5c..00000000 --- a/src/test/kotlin/astminer/pipeline/CompositePipelineFrontendTest.kt +++ /dev/null @@ -1,23 +0,0 @@ -package astminer.pipeline - -import astminer.common.model.LanguageHandler -import astminer.common.model.Node -import org.junit.Test -import kotlin.test.assertEquals - -internal class CompositePipelineFrontendTest { - class DummyCompositeFrontend(inputDirectoryPath: String, parserType: String, extensions: List) : - CompositePipelineFrontend(inputDirectoryPath, parserType, extensions) { - - override fun LanguageHandler.getEntities(): Sequence = sequenceOf(Unit) - } - - @Test - fun `test should skip language if it is not supported`() { - val tempDir = createTempDirectoryWithEmptyFiles(mapOf("py" to 5)) - val frontend = DummyCompositeFrontend(tempDir.path, "antlr", listOf("py", "unsupported_language")) - val entitiesCounts = getExtractedEntitiesCounts(frontend.getEntities()) - - assertEquals(mapOf("py" to 5, "unsupported_language" to 0), entitiesCounts) - } -} diff --git a/src/test/kotlin/astminer/pipeline/Mocks.kt b/src/test/kotlin/astminer/pipeline/Mocks.kt deleted file mode 100644 index 942c7958..00000000 --- a/src/test/kotlin/astminer/pipeline/Mocks.kt +++ /dev/null @@ -1,69 +0,0 @@ -package astminer.pipeline - -import astminer.common.DummyNode -import astminer.common.labeledWith -import astminer.common.model.Node -import astminer.filters.Filter -import astminer.problem.LabeledResult -import astminer.problem.Problem -import astminer.storage.Storage - -class DummyPipelineFrontend(private val extensionsToNodeNames: Map) : PipelineFrontend { - override fun getEntities(): Sequence> = - extensionsToNodeNames.entries.map { (extension, nodeName) -> EntitiesFromFiles(extension, sequenceOf(DummyNode(nodeName))) } - .asSequence() -} - -class SimplePipelineFrontend(private val nodes: List) : PipelineFrontend { - override fun getEntities(): Sequence> = - sequenceOf(EntitiesFromFiles("", nodes.asSequence())) -} - -class DummyFilter(private val excludeName: String = "") : Filter { - override fun isFiltered(entity: DummyNode): Boolean = entity.getToken() != excludeName -} - - -class DummyLabelExtractor(private val excludeName: String = "") : Problem { - override fun process(entity: DummyNode): LabeledResult? = if (entity.data != excludeName) { - entity.labeledWith("label ${entity.data}") - } else { - null - } -} - -class BambooLabelExtractor : Problem { - private fun getLabel(entity: Node): String { - val firstChildLabel = entity.getChildren().firstOrNull()?.let { getLabel(it) } ?: "" - return "${entity.getTypeLabel()}<$firstChildLabel" - } - - override fun process(entity: DummyNode): LabeledResult = entity.labeledWith(getLabel(entity)) -} - -class DummyStorageFactory : StorageFactory { - private val storages = mutableMapOf() - - val results: Map> - get() = storages.mapValues { (_, storage) -> storage.labeledResults } - - override fun createStorageAndOutputFolder(extension: String): Storage { - val storage = DummyStorage() - storages[extension] = storage - return storage - } -} - -class DummyStorage : Storage { - override val outputDirectoryPath: String = "" - - val labeledResults = mutableSetOf() - - override fun store(labeledResult: LabeledResult) { - labeledResults.add(labeledResult.label) - } - - override fun close() { - /* no-op */ - } -} diff --git a/src/test/kotlin/astminer/pipeline/PipelineTest.kt b/src/test/kotlin/astminer/pipeline/PipelineTest.kt deleted file mode 100644 index 5d5bfc04..00000000 --- a/src/test/kotlin/astminer/pipeline/PipelineTest.kt +++ /dev/null @@ -1,115 +0,0 @@ -package astminer.pipeline - -import astminer.common.DummyNode -import org.junit.Before -import org.junit.Test -import kotlin.test.assertEquals - -internal class PipelineTest { - companion object { - val extensionsToNodeNames = mapOf("a" to "A", "b" to "B") - } - - lateinit var storageFactory: DummyStorageFactory - - @Before - fun init() { - storageFactory = DummyStorageFactory() - } - - @Test - fun `test pipeline saves all entities if none are filtered and no node types are excluded`() { - Pipeline( - frontend = DummyPipelineFrontend(extensionsToNodeNames), - problem = DummyLabelExtractor(), - storageFactory = storageFactory - ).run() - - val expectedResults = mapOf( - "a" to setOf("label A"), - "b" to setOf("label B") - ) - assertEquals(expectedResults, storageFactory.results) - } - - @Test - fun `test pipeline saves A if B is filtered out by a DummyFilter`() { - Pipeline( - frontend = DummyPipelineFrontend(extensionsToNodeNames), - filters = listOf(DummyFilter("B")), - problem = DummyLabelExtractor(), - storageFactory = storageFactory - ).run() - - val expectedResults = mapOf( - "a" to setOf("label A"), - "b" to setOf() - ) - assertEquals(expectedResults, storageFactory.results) - } - - @Test - fun `test pipeline saves A if B is filtered out by a DummyLabelExtractor`() { - Pipeline( - frontend = DummyPipelineFrontend(extensionsToNodeNames), - filters = listOf(DummyFilter()), - problem = DummyLabelExtractor("B"), - storageFactory = storageFactory - ).run() - - val expectedResults = mapOf( - "a" to setOf("label A"), - "b" to setOf() - ) - assertEquals(expectedResults, storageFactory.results) - } - - @Test - fun `test pipeline saves nothing if 'label A' is filtered by a filter and 'label B' is filtered by a problem`() { - Pipeline( - frontend = DummyPipelineFrontend(extensionsToNodeNames), - filters = listOf(DummyFilter("A")), - problem = DummyLabelExtractor("B"), - storageFactory = storageFactory - ).run() - - val expectedResults = mapOf( - "a" to setOf(), - "b" to setOf() - ) - assertEquals(expectedResults, storageFactory.results) - } - - @Test - fun `test pipeline should not remove any nodes from the tree by default`() { - val node = DummyNode("Root", mutableListOf(DummyNode("Child"))) - - Pipeline( - frontend = SimplePipelineFrontend(listOf(node)), - problem = BambooLabelExtractor(), - storageFactory = storageFactory - ).run() - - val expectedResults = mapOf( - "" to setOf("Root): File { - val dir = createTempDir() - for ((extension, fileCount) in fileCounts.entries) { - for (i in 1..fileCount) { - dir.resolve("$i.$extension").createNewFile() - } - } - return dir -} - -fun getExtractedEntitiesCounts(entitiesFromFiles: Sequence>): Map = - entitiesFromFiles.associate { it.fileExtension to it.entities.toList().size } From 82782d1c8764bbea217daff11ae807093e900820 Mon Sep 17 00:00:00 2001 From: furetur Date: Wed, 19 May 2021 14:39:17 +0500 Subject: [PATCH 172/308] reduced filters and problems complexity --- .../kotlin/astminer/config/FilterConfigs.kt | 9 ++++- .../kotlin/astminer/filters/CommonFilters.kt | 33 +++---------------- .../kotlin/astminer/filters/FileFilters.kt | 4 ++- src/main/kotlin/astminer/filters/Filter.kt | 5 --- .../astminer/filters/FunctionFilters.kt | 24 +++++++------- .../astminer/pipeline/PipelineBranch.kt | 12 +++---- .../astminer/problem/FileLevelProblems.kt | 14 ++++---- .../astminer/problem/FunctionLevelProblems.kt | 16 ++++----- src/main/kotlin/astminer/problem/Problem.kt | 15 --------- .../astminer/filters/FileFiltersTest.kt | 8 ++--- .../astminer/filters/FunctionFiltersTest.kt | 30 ++++++++--------- 11 files changed, 67 insertions(+), 103 deletions(-) delete mode 100644 src/main/kotlin/astminer/filters/Filter.kt delete mode 100644 src/main/kotlin/astminer/problem/Problem.kt diff --git a/src/main/kotlin/astminer/config/FilterConfigs.kt b/src/main/kotlin/astminer/config/FilterConfigs.kt index 18f8aac8..900902a8 100644 --- a/src/main/kotlin/astminer/config/FilterConfigs.kt +++ b/src/main/kotlin/astminer/config/FilterConfigs.kt @@ -14,7 +14,7 @@ sealed class FileFilterConfig { @SerialName("max tree size") data class FileTreeSizeFilterConfig(val maxTreeSize: Int) : FileFilterConfig() { @Transient - override val filter = FileTreeSizeFilter(maxTreeSize) + override val filter = TreeSizeFilter(maxTreeSize) } @Serializable @@ -22,6 +22,13 @@ sealed class FunctionFilterConfig { abstract val filter: FunctionFilter } +@Serializable +@SerialName("max tree size") +data class FunctionTreeSizeFilterConfig(val maxTreeSize: Int) : FunctionFilterConfig() { + @Transient + override val filter = TreeSizeFilter(maxTreeSize) +} + @Serializable @SerialName("exclude functions with modifiers") data class ModifierFilterConfig(val modifiers: List) : FunctionFilterConfig() { diff --git a/src/main/kotlin/astminer/filters/CommonFilters.kt b/src/main/kotlin/astminer/filters/CommonFilters.kt index b56f77e4..3d886713 100644 --- a/src/main/kotlin/astminer/filters/CommonFilters.kt +++ b/src/main/kotlin/astminer/filters/CommonFilters.kt @@ -4,36 +4,11 @@ import astminer.common.model.FunctionInfo import astminer.common.model.Node import astminer.common.model.ParseResult import astminer.featureextraction.treeSize -import kotlinx.serialization.Serializable -abstract class TreeSizeFilter(private val maxSize: Int) : Filter { - private fun isTreeFiltered(root: Node): Boolean { - // TODO: this is not needed - return if (maxSize == -1) { - true - } else { - root.treeSize() <= maxSize - } - } +class TreeSizeFilter(private val maxSize: Int) : FileFilter, FunctionFilter { + private fun testTree(root: Node): Boolean = root.treeSize() <= maxSize - protected abstract val T.tree: Node + override fun test(functionInfo: FunctionInfo): Boolean = testTree(functionInfo.root) - override fun isFiltered(entity: T) = isTreeFiltered(entity.tree) -} - -/** - * Filter that excludes files that have ASTs bigger than [maxSize] - */ -class FileTreeSizeFilter(maxSize: Int) : TreeSizeFilter>(maxSize), FileFilter { - override val ParseResult.tree: Node - get() = root -} - -/** - * Filter that excludes functions that have ASTs bigger than [maxSize] - */ -class FunctionTreeSizeFilter(maxSize: Int) : TreeSizeFilter>(maxSize), - FunctionFilter { - override val FunctionInfo.tree: Node - get() = root + override fun test(parseResult: ParseResult): Boolean = testTree(parseResult.root) } diff --git a/src/main/kotlin/astminer/filters/FileFilters.kt b/src/main/kotlin/astminer/filters/FileFilters.kt index 63f6f862..0af6a808 100644 --- a/src/main/kotlin/astminer/filters/FileFilters.kt +++ b/src/main/kotlin/astminer/filters/FileFilters.kt @@ -3,4 +3,6 @@ package astminer.filters import astminer.common.model.Node import astminer.common.model.ParseResult -interface FileFilter: Filter> +interface FileFilter { + fun test(parseResult: ParseResult): Boolean +} diff --git a/src/main/kotlin/astminer/filters/Filter.kt b/src/main/kotlin/astminer/filters/Filter.kt deleted file mode 100644 index b50e30b6..00000000 --- a/src/main/kotlin/astminer/filters/Filter.kt +++ /dev/null @@ -1,5 +0,0 @@ -package astminer.filters - -interface Filter { - fun isFiltered(entity: T): Boolean -} diff --git a/src/main/kotlin/astminer/filters/FunctionFilters.kt b/src/main/kotlin/astminer/filters/FunctionFilters.kt index 6d6f1ff9..ad908e07 100644 --- a/src/main/kotlin/astminer/filters/FunctionFilters.kt +++ b/src/main/kotlin/astminer/filters/FunctionFilters.kt @@ -4,40 +4,40 @@ import astminer.common.model.FunctionInfo import astminer.common.model.Node import astminer.common.preOrder import astminer.common.splitToSubtokens -import kotlinx.serialization.SerialName -import kotlinx.serialization.Serializable -interface FunctionFilter : Filter> +interface FunctionFilter { + fun test(functionInfo: FunctionInfo): Boolean +} /** * Filter that excludes functions that have at least one of modifiers from the [excludeModifiers] list. */ class ModifierFilter(private val excludeModifiers: List) : FunctionFilter { - override fun isFiltered(entity: FunctionInfo): Boolean = - !excludeModifiers.any { modifier -> modifier in entity.modifiers } + override fun test(functionInfo: FunctionInfo): Boolean = + !excludeModifiers.any { modifier -> modifier in functionInfo.modifiers } } /** * Filter that excludes functions that have at least one annotations from the [excludeAnnotations] list. */ class AnnotationFilter(private val excludeAnnotations: List) : FunctionFilter { - override fun isFiltered(entity: FunctionInfo): Boolean = - !excludeAnnotations.any { annotation -> annotation in entity.annotations } + override fun test(functionInfo: FunctionInfo): Boolean = + !excludeAnnotations.any { annotation -> annotation in functionInfo.annotations } } /** * Filter that excludes constructors */ object ConstructorFilter : FunctionFilter { - override fun isFiltered(entity: FunctionInfo) = !entity.isConstructor + override fun test(functionInfo: FunctionInfo) = !functionInfo.isConstructor } /** * Filter that excludes functions that have more than [maxWordsNumber] words in their names. */ class FunctionNameWordsNumberFilter(private val maxWordsNumber: Int) : FunctionFilter { - override fun isFiltered(entity: FunctionInfo): Boolean { - val name = entity.name + override fun test(functionInfo: FunctionInfo): Boolean { + val name = functionInfo.name return name != null && splitToSubtokens(name).size <= maxWordsNumber } } @@ -46,7 +46,7 @@ class FunctionNameWordsNumberFilter(private val maxWordsNumber: Int) : FunctionF * Filter that excludes functions that have more words than [maxWordsNumber] in any token of their subtree. */ class FunctionAnyNodeWordsNumberFilter(private val maxWordsNumber: Int) : FunctionFilter { - override fun isFiltered(entity: FunctionInfo): Boolean = - !entity.root.preOrder().any { node -> splitToSubtokens(node.getToken()).size > maxWordsNumber } + override fun test(functionInfo: FunctionInfo): Boolean = + !functionInfo.root.preOrder().any { node -> splitToSubtokens(node.getToken()).size > maxWordsNumber } } diff --git a/src/main/kotlin/astminer/pipeline/PipelineBranch.kt b/src/main/kotlin/astminer/pipeline/PipelineBranch.kt index c189442a..3f4a022c 100644 --- a/src/main/kotlin/astminer/pipeline/PipelineBranch.kt +++ b/src/main/kotlin/astminer/pipeline/PipelineBranch.kt @@ -13,10 +13,10 @@ interface PipelineBranch { } class FilePipelineBranch(config: FilePipelineConfig) : PipelineBranch { - val filters = config.filterConfigs.map { it.filter } - val problem = config.problemConfig.problem + private val filters = config.filterConfigs.map { it.filter } + private val problem = config.problemConfig.problem - private fun ParseResult.passesThroughFilters() = filters.all { filter -> filter.isFiltered(this) } + private fun ParseResult.passesThroughFilters() = filters.all { filter -> filter.test(this) } override fun process(languageHandler: LanguageHandler): Sequence> { val parseResult = languageHandler.parseResult @@ -30,10 +30,10 @@ class FilePipelineBranch(config: FilePipelineConfig) : PipelineBranch { } class FunctionPipelineBranch(config: FunctionPipelineConfig) : PipelineBranch { - val filters = config.filterConfigs.map { it.filter } - val problem = config.problemConfig.problem + private val filters = config.filterConfigs.map { it.filter } + private val problem = config.problemConfig.problem - private fun FunctionInfo.passesThroughFilters() = filters.all { filter -> filter.isFiltered(this) } + private fun FunctionInfo.passesThroughFilters() = filters.all { filter -> filter.test(this) } override fun process(languageHandler: LanguageHandler): Sequence> = languageHandler.splitIntoMethods().asSequence() diff --git a/src/main/kotlin/astminer/problem/FileLevelProblems.kt b/src/main/kotlin/astminer/problem/FileLevelProblems.kt index 54c0d88a..7124705a 100644 --- a/src/main/kotlin/astminer/problem/FileLevelProblems.kt +++ b/src/main/kotlin/astminer/problem/FileLevelProblems.kt @@ -2,25 +2,25 @@ package astminer.problem import astminer.common.model.Node import astminer.common.model.ParseResult -import kotlinx.serialization.SerialName -import kotlinx.serialization.Serializable import java.io.File -interface FileLevelProblem : Problem> +interface FileLevelProblem { + fun process(parseResult: ParseResult): LabeledResult? +} /** * Labels files with folder names */ object FilePathExtractor : FileLevelProblem { - override fun process(entity: ParseResult): LabeledResult = entity.labeledWithFilePath() + override fun process(parseResult: ParseResult): LabeledResult = parseResult.labeledWithFilePath() } /** * Labels files with folder names */ object FolderExtractor : FileLevelProblem { - override fun process(entity: ParseResult): LabeledResult? { - val folderName = File(entity.filePath).parentFile?.name ?: return null - return entity.labeledWith(folderName) + override fun process(parseResult: ParseResult): LabeledResult? { + val folderName = File(parseResult.filePath).parentFile?.name ?: return null + return parseResult.labeledWith(folderName) } } diff --git a/src/main/kotlin/astminer/problem/FunctionLevelProblems.kt b/src/main/kotlin/astminer/problem/FunctionLevelProblems.kt index fc19e73e..7d26786a 100644 --- a/src/main/kotlin/astminer/problem/FunctionLevelProblems.kt +++ b/src/main/kotlin/astminer/problem/FunctionLevelProblems.kt @@ -4,10 +4,10 @@ import astminer.common.model.FunctionInfo import astminer.common.model.Node import astminer.common.preOrder import astminer.common.setTechnicalToken -import kotlinx.serialization.SerialName -import kotlinx.serialization.Serializable -interface FunctionLevelProblem : Problem> +interface FunctionLevelProblem { + fun process(functionInfo: FunctionInfo): LabeledResult? +} /** * Labels functions with their names. @@ -17,14 +17,14 @@ object FunctionNameProblem : FunctionLevelProblem { const val TECHNICAL_METHOD_NAME = "METHOD_NAME" const val TECHNICAL_RECURSIVE_CALL = "SELF" - override fun process(entity: FunctionInfo): LabeledResult? { - val name = entity.name ?: return null - entity.root.preOrder().forEach { node -> + override fun process(functionInfo: FunctionInfo): LabeledResult? { + val name = functionInfo.name ?: return null + functionInfo.root.preOrder().forEach { node -> if (node.getToken() == name) { node.setTechnicalToken(TECHNICAL_RECURSIVE_CALL) } } - entity.nameNode?.setTechnicalToken(TECHNICAL_METHOD_NAME) - return LabeledResult(entity.root, name, entity.filePath) + functionInfo.nameNode?.setTechnicalToken(TECHNICAL_METHOD_NAME) + return LabeledResult(functionInfo.root, name, functionInfo.filePath) } } diff --git a/src/main/kotlin/astminer/problem/Problem.kt b/src/main/kotlin/astminer/problem/Problem.kt deleted file mode 100644 index 4f76fe52..00000000 --- a/src/main/kotlin/astminer/problem/Problem.kt +++ /dev/null @@ -1,15 +0,0 @@ -package astminer.problem - -import astminer.common.model.Node - -/** - * A structural element of the pipeline. - * Extracts labels from entities and also may mutate them. - */ -interface Problem { - /** - * Extracts label from entity. - * If returns null then this entity will not be used further. - */ - fun process(entity: T): LabeledResult? -} diff --git a/src/test/kotlin/astminer/filters/FileFiltersTest.kt b/src/test/kotlin/astminer/filters/FileFiltersTest.kt index edb7b267..3fd94c92 100644 --- a/src/test/kotlin/astminer/filters/FileFiltersTest.kt +++ b/src/test/kotlin/astminer/filters/FileFiltersTest.kt @@ -8,14 +8,14 @@ import kotlin.test.assertTrue internal class FileFiltersTest { @Test - fun `test FileTreeSizeFilter for 100 should exclude bamboo of length 101`() { + fun `test TreeSizeFilter for 100 should exclude bamboo of length 101`() { val node = createBamboo(101).toParseResult() - assertFalse { FileTreeSizeFilter(100).isFiltered(node) } + assertFalse { TreeSizeFilter(100).test(node) } } @Test - fun `test FileTreeSizeFilter for 10 should not exclude bamboo of length 5`() { + fun `test TreeSizeFilter for 10 should not exclude bamboo of length 5`() { val node = createBamboo(5).toParseResult() - assertTrue { FileTreeSizeFilter(10).isFiltered(node) } + assertTrue { TreeSizeFilter(10).test(node) } } } diff --git a/src/test/kotlin/astminer/filters/FunctionFiltersTest.kt b/src/test/kotlin/astminer/filters/FunctionFiltersTest.kt index 861b0fb6..5b4d09d9 100644 --- a/src/test/kotlin/astminer/filters/FunctionFiltersTest.kt +++ b/src/test/kotlin/astminer/filters/FunctionFiltersTest.kt @@ -15,7 +15,7 @@ class FunctionFiltersTest { val functionInfo = object : FunctionInfo { override val modifiers: List = listOf("b", "c") } - assertFalse { ModifierFilter(excludedModifiers).isFiltered(functionInfo) } + assertFalse { ModifierFilter(excludedModifiers).test(functionInfo) } } @Test @@ -24,7 +24,7 @@ class FunctionFiltersTest { val functionInfo = object : FunctionInfo { override val modifiers: List = listOf("c", "d") } - assertTrue { ModifierFilter(excludedModifiers).isFiltered(functionInfo) } + assertTrue { ModifierFilter(excludedModifiers).test(functionInfo) } } @Test @@ -33,7 +33,7 @@ class FunctionFiltersTest { val functionInfo = object : FunctionInfo { override val annotations: List = listOf("a", "c") } - assertFalse { AnnotationFilter(excludedModifiers).isFiltered(functionInfo) } + assertFalse { AnnotationFilter(excludedModifiers).test(functionInfo) } } @Test @@ -42,7 +42,7 @@ class FunctionFiltersTest { val functionInfo = object : FunctionInfo { override val annotations: List = listOf("y", "x") } - assertTrue { AnnotationFilter(excludedModifiers).isFiltered(functionInfo) } + assertTrue { AnnotationFilter(excludedModifiers).test(functionInfo) } } @Test @@ -50,7 +50,7 @@ class FunctionFiltersTest { val functionInfo = object : FunctionInfo { override val isConstructor = true } - assertFalse { ConstructorFilter.isFiltered(functionInfo) } + assertFalse { ConstructorFilter.test(functionInfo) } } @Test @@ -58,7 +58,7 @@ class FunctionFiltersTest { val functionInfo = object : FunctionInfo { override val isConstructor = false } - assertTrue { ConstructorFilter.isFiltered(functionInfo) } + assertTrue { ConstructorFilter.test(functionInfo) } } @Test @@ -66,7 +66,7 @@ class FunctionFiltersTest { val functionInfo = object : FunctionInfo { override val name = "Word".repeat(100) } - assertFalse { FunctionNameWordsNumberFilter(50).isFiltered(functionInfo) } + assertFalse { FunctionNameWordsNumberFilter(50).test(functionInfo) } } @Test @@ -74,7 +74,7 @@ class FunctionFiltersTest { val functionInfo = object : FunctionInfo { override val name = "Word".repeat(100) } - assertTrue { FunctionNameWordsNumberFilter(101).isFiltered(functionInfo) } + assertTrue { FunctionNameWordsNumberFilter(101).test(functionInfo) } } @Test @@ -82,7 +82,7 @@ class FunctionFiltersTest { val functionInfo = object : FunctionInfo { override val root = AntlrNode("", null, "Word".repeat(100)) } - assertFalse { FunctionAnyNodeWordsNumberFilter(50).isFiltered(functionInfo) } + assertFalse { FunctionAnyNodeWordsNumberFilter(50).test(functionInfo) } } @Test @@ -91,7 +91,7 @@ class FunctionFiltersTest { override val name = "Word".repeat(100) override val root = createBamboo(1) } - assertTrue { FunctionAnyNodeWordsNumberFilter(101).isFiltered(functionInfo) } + assertTrue { FunctionAnyNodeWordsNumberFilter(101).test(functionInfo) } } @Test @@ -103,22 +103,22 @@ class FunctionFiltersTest { val functionInfo = object : FunctionInfo { override val root = root } - assertFalse { FunctionAnyNodeWordsNumberFilter(2).isFiltered(functionInfo) } + assertFalse { FunctionAnyNodeWordsNumberFilter(2).test(functionInfo) } } @Test - fun `test FunctionTreeSizeFilter for 100 should exclude bamboo of length 101`() { + fun `test TreeSizeFilter for 100 should exclude bamboo of length 101`() { val functionInfo = object : FunctionInfo { override val root = createBamboo(101) } - assertFalse { FunctionTreeSizeFilter(100).isFiltered(functionInfo) } + assertFalse { TreeSizeFilter(100).test(functionInfo) } } @Test - fun `test FunctionTreeSizeFilter for 10 should not exclude bamboo of length 5`() { + fun `test TreeSizeFilter for 10 should not exclude bamboo of length 5`() { val functionInfo = object : FunctionInfo { override val root = createBamboo(5) } - assertTrue { FunctionTreeSizeFilter(10).isFiltered(functionInfo) } + assertTrue { TreeSizeFilter(10).test(functionInfo) } } } \ No newline at end of file From 3fea69533fa55642bb349559493d4a610771af99 Mon Sep 17 00:00:00 2001 From: furetur Date: Thu, 20 May 2021 11:59:23 +0500 Subject: [PATCH 173/308] fixed issues after merge except GumTreePythonMethodSplitterTest.kt --- .../javascript/JavaScriptMethodSplitter.kt | 184 --------- .../astminer/cli/Code2VecExtractorTest.kt | 27 -- .../astminer/cli/PathContextsExtractorTest.kt | 25 -- src/test/kotlin/astminer/cli/util/CliArgs.kt | 44 --- .../astminer/cli/util/OutputVerification.kt | 48 --- src/test/kotlin/astminer/cli/util/Util.kt | 3 - .../python/GumTreePythonMethodSplitterTest.kt | 351 +++++++++--------- 7 files changed, 176 insertions(+), 506 deletions(-) delete mode 100644 src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt delete mode 100644 src/test/kotlin/astminer/cli/Code2VecExtractorTest.kt delete mode 100644 src/test/kotlin/astminer/cli/PathContextsExtractorTest.kt delete mode 100644 src/test/kotlin/astminer/cli/util/CliArgs.kt delete mode 100644 src/test/kotlin/astminer/cli/util/OutputVerification.kt delete mode 100644 src/test/kotlin/astminer/cli/util/Util.kt diff --git a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt deleted file mode 100644 index b2b4e508..00000000 --- a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt +++ /dev/null @@ -1,184 +0,0 @@ -package astminer.parse.antlr.javascript - -import astminer.common.model.* -import astminer.common.preOrder -import astminer.parse.antlr.AntlrNode -import astminer.parse.antlr.decompressTypeLabel - -/** - * Get all methods (in JavaScript there are divided into functions, arrow functions and methods) and information - * about their names, enclosing elements and parameters. - */ -class JavaScriptMethodSplitter : TreeMethodSplitter { - companion object { - private const val METHOD_NODE = "methodDefinition" - private const val ARROW_NODE = "ARROW" - private const val FUNCTION_NODE = "Function" - } - - override fun splitIntoMethods(root: AntlrNode): Collection> { - val methodRoots: List = root.preOrder().map { node -> - when { - node.isArrowElement() -> ArrowElement(node as AntlrNode) - node.isFunctionElement() -> FunctionElement(node as AntlrNode) - node.isMethodElement() -> MethodElement(node as AntlrNode) - else -> null - } - }.filterNotNull() - - return methodRoots.map { it.getElementInfo() } - } - - private fun Node.isArrowElement() = this.getChildOfType(ARROW_NODE) != null - private fun Node.isFunctionElement() = this.getChildOfType(FUNCTION_NODE) != null - private fun Node.isMethodElement() = decompressTypeLabel(this.getTypeLabel()).last() == METHOD_NODE -} - -/** - Base class for describing JavaScript methods, functions or arrow functions. - */ -abstract class JavaScriptElement(private val element: AntlrNode) { - companion object { - private val ENCLOSING_ELEMENT_NODES = listOf("functionDeclaration", "variableDeclaration", "classDeclaration", "methodDefinition") - private const val ENCLOSING_ELEMENT_NAME_NODE = "Identifier" - - private const val SINGLE_PARAMETER_NODE = "formalParameterArg" - private const val PARAMETER_NAME_NODE = "Identifier" - } - - /** - * Gets [element]'s information about its root, name, enclosing elements and list of parameters. - * @return element info - */ - fun getElementInfo() : MethodInfo { - val enclosingRoot = getEnclosingElementRoot(element.getParent() as AntlrNode) - return MethodInfo( - MethodNode(element, null, getElementName()), - ElementNode(enclosingRoot, getEnclosingElementName(enclosingRoot)), - getElementParametersList(getElementParametersRoot()) - ) - } - - /** - * Gets root of [element]'s enclosing element as first one with typeLabel from [ENCLOSING_ELEMENT_NAME_NODE]. - * @param node for checking if it is root of enclosing element - * @return root of enclosing element - */ - open fun getEnclosingElementRoot(node: AntlrNode?): AntlrNode? { - if (node == null || decompressTypeLabel(node.getTypeLabel()).intersect(ENCLOSING_ELEMENT_NODES).isNotEmpty()) { - return node - } - return getEnclosingElementRoot(node.getParent() as? AntlrNode) - } - - /** - * Gets name node of [element]'s enclosing element. - * @param enclosingRoot - root of enclosing element - * @return name node of enclosing element - */ - open fun getEnclosingElementName(enclosingRoot: AntlrNode?) : AntlrNode? { - return enclosingRoot?.getChildren()?.firstOrNull { - decompressTypeLabel(it.getTypeLabel()).last() == ENCLOSING_ELEMENT_NAME_NODE - } - } - - /** - * Gets list of [element]'s parameters by looking for them among [parameterRoot]'s children. - * @param parameterRoot - parent node of all parameter's nodes - * @return list of [element]'s parameters - */ - open fun getElementParametersList(parameterRoot: AntlrNode?): List> { - return when { - parameterRoot == null -> emptyList() - parameterRoot.hasLastLabel(PARAMETER_NAME_NODE) -> listOf(ParameterNode(parameterRoot, null, parameterRoot)) - else -> parameterRoot.getItOrChildrenOfType(SINGLE_PARAMETER_NODE).map { - ParameterNode(it, null, it.getItOrChildrenOfType(PARAMETER_NAME_NODE).firstOrNull()) - } - } - } - - private fun Node.hasLastLabel(typeLabel: String): Boolean { - return decompressTypeLabel(getTypeLabel()).last() == typeLabel - } - - private fun AntlrNode.getItOrChildrenOfType(typeLabel: String) : List { - return if (hasLastLabel(typeLabel)) { - listOf(this) - } else { - this.getChildrenOfType(typeLabel).map { it } - } - } - - /** - * Gets name of [element]. - * @return [element]'s name node - */ - abstract fun getElementName(): AntlrNode? - - /** - * Gets parent node of all [element]'s parameter nodes. - * @return parameters' parent node - */ - abstract fun getElementParametersRoot(): AntlrNode? -} - - -class ArrowElement(private val element: AntlrNode) : JavaScriptElement(element) { - companion object { - private const val ARROW_NAME_NODE = "Identifier" - private const val ARROW_PARAMETER_NODE = "arrowFunctionParameters" - private const val ARROW_PARAMETER_INNER_NODE = "formalParameterList" - } - - override fun getElementName(): AntlrNode? { - return element.getChildren().firstOrNull { - it.getTypeLabel() == ARROW_NAME_NODE - } - } - - override fun getElementParametersRoot(): AntlrNode? { - val parameterRoot = element.getChildOfType(ARROW_PARAMETER_NODE) - return parameterRoot?.getChildOfType(ARROW_PARAMETER_INNER_NODE) ?: parameterRoot - } -} - - -class FunctionElement(private val element: AntlrNode) : JavaScriptElement(element) { - companion object { - private const val FUNCTION_NAME_NODE = "Identifier" - private const val FUNCTION_PARAMETER_NODE = "formalParameterList" - } - - override fun getElementName(): AntlrNode? { - return element.getChildren().firstOrNull { - it.getTypeLabel() == FUNCTION_NAME_NODE - } - } - - override fun getElementParametersRoot(): AntlrNode? { - return element.getChildOfType(FUNCTION_PARAMETER_NODE) - } -} - - -class MethodElement(private val element: AntlrNode) : JavaScriptElement(element) { - companion object { - private val METHOD_GETTERS_SETTERS = listOf("getter", "setter") - private const val METHOD_NAME_NODE = "identifierName" - private const val METHOD_PARAMETER_NODE = "formalParameterList" - } - - override fun getElementName(): AntlrNode? { - val methodNameParent = element.getChildren().firstOrNull { - METHOD_GETTERS_SETTERS.contains(it.getTypeLabel()) - } ?: element - - return methodNameParent.getChildren().firstOrNull { - decompressTypeLabel(it.getTypeLabel()).contains(METHOD_NAME_NODE) - } - } - - override fun getElementParametersRoot(): AntlrNode? { - return element.getChildOfType(METHOD_PARAMETER_NODE) - } -} \ No newline at end of file diff --git a/src/test/kotlin/astminer/cli/Code2VecExtractorTest.kt b/src/test/kotlin/astminer/cli/Code2VecExtractorTest.kt deleted file mode 100644 index ab46a4b7..00000000 --- a/src/test/kotlin/astminer/cli/Code2VecExtractorTest.kt +++ /dev/null @@ -1,27 +0,0 @@ -package astminer.cli - -import astminer.cli.util.CliArgs -import astminer.cli.util.languagesToString -import astminer.cli.util.verifyPathContextExtraction -import org.junit.Test -import java.io.File -import java.nio.file.Files.createTempDirectory - -internal class Code2VecExtractorTest { - private val testDataDir = File("src/test/resources") - private val code2VecExtractor = Code2VecExtractor() - - @Test - fun testDefaultExtraction() { - val extractedDataDir = createTempDirectory("extractedData") - val languages = listOf("java", "py") - val cliArgs = CliArgs.Builder(testDataDir, extractedDataDir.toFile()) - .extensions(languagesToString(languages)) - .build() - - code2VecExtractor.main(cliArgs.args) - verifyPathContextExtraction(extractedDataDir.toFile(), languages, false) - } -} - - diff --git a/src/test/kotlin/astminer/cli/PathContextsExtractorTest.kt b/src/test/kotlin/astminer/cli/PathContextsExtractorTest.kt deleted file mode 100644 index f98bbe24..00000000 --- a/src/test/kotlin/astminer/cli/PathContextsExtractorTest.kt +++ /dev/null @@ -1,25 +0,0 @@ -package astminer.cli - -import astminer.cli.util.CliArgs -import astminer.cli.util.languagesToString -import astminer.cli.util.verifyPathContextExtraction -import org.junit.Test -import java.io.File -import java.nio.file.Files.createTempDirectory - -internal class PathContextsExtractorTest { - private val testDataDir = File("src/test/resources") - private val pathContextsExtractor = PathContextsExtractor() - - @Test - fun testDefaultExtraction() { - val extractedDataDir = createTempDirectory("extractedData") - val languages = listOf("java", "py") - val cliArgs = CliArgs.Builder(testDataDir, extractedDataDir.toFile()) - .extensions(languagesToString(languages)) - .build() - - pathContextsExtractor.main(cliArgs.args) - verifyPathContextExtraction(extractedDataDir.toFile(), languages, false) - } -} \ No newline at end of file diff --git a/src/test/kotlin/astminer/cli/util/CliArgs.kt b/src/test/kotlin/astminer/cli/util/CliArgs.kt deleted file mode 100644 index 734db41b..00000000 --- a/src/test/kotlin/astminer/cli/util/CliArgs.kt +++ /dev/null @@ -1,44 +0,0 @@ -package astminer.cli.util - -import java.io.File - -class CliArgs private constructor(val args: List) { - - data class Builder(val testDataDir: File, val extractedDataDir: File) { - val args = mutableListOf( - "--project", testDataDir.path, - "--output", extractedDataDir.path - ) - fun extensions(extensions: String) = apply { - args.add("--lang") - args.add(extensions) - } - - fun maxPathLength(l: Int) = apply { - args.add("--maxL") - args.add(l.toString()) - } - - fun maxPathWidth(w: Int) = apply { - args.add("--maxW") - args.add(w.toString()) - } - - fun maxPathContexts(maxPC: Int)= apply { - args.add("--maxContexts") - args.add(maxPC.toString()) - } - - fun maxTokens(nTokens: Long) = apply { - args.add("--maxTokens") - args.add(nTokens.toString()) - } - - fun maxPaths(nPaths: Long) = apply { - args.add("--maxPaths") - args.add(nPaths.toString()) - } - - fun build() = CliArgs(args) - } -} diff --git a/src/test/kotlin/astminer/cli/util/OutputVerification.kt b/src/test/kotlin/astminer/cli/util/OutputVerification.kt deleted file mode 100644 index 89c75d32..00000000 --- a/src/test/kotlin/astminer/cli/util/OutputVerification.kt +++ /dev/null @@ -1,48 +0,0 @@ -package astminer.cli.util - -import java.io.File -import kotlin.test.assertTrue - -/** - * Directory with extracted data should contain a directory for each specified language - */ -internal fun checkExtractedDir(extractedDataDir: File, languages: List) { - val metLanguages = mutableSetOf() - extractedDataDir.listFiles()?.forEach { file -> - with(file) { - assertTrue(isDirectory, "Extracted data directory should not contain file $name") - assertTrue(languages.contains(file.name), "Unexpected directory $name") - metLanguages.add(name) - } - } - languages.forEach { language -> - assertTrue(metLanguages.contains(language), "Did not find directory for $language") - } -} - -internal fun validPathContextsFile(name: String, batching: Boolean): Boolean { - return if (batching) { - name.startsWith("path_contexts_") && name.endsWith(".csv") - } else { - name == "path_contexts.csv" - } -} - -internal fun checkPathContextsDir(languageDir: File, batching: Boolean) { - val expectedFiles = listOf("tokens.csv", "paths.csv", "node_types.csv") - languageDir.listFiles()?.forEach { file -> - with(file) { - assertTrue( - expectedFiles.contains(name) || validPathContextsFile(name, batching), - "Unexpected file $name in ${languageDir.name}" - ) - } - } -} - -internal fun verifyPathContextExtraction(extractedDataDir: File, languages: List, batching: Boolean) { - checkExtractedDir(extractedDataDir, languages) - languages.forEach { language -> - checkPathContextsDir(extractedDataDir.resolve(language), batching) - } -} diff --git a/src/test/kotlin/astminer/cli/util/Util.kt b/src/test/kotlin/astminer/cli/util/Util.kt deleted file mode 100644 index 30d119fd..00000000 --- a/src/test/kotlin/astminer/cli/util/Util.kt +++ /dev/null @@ -1,3 +0,0 @@ -package astminer.cli.util - -fun languagesToString(languages: List) = languages.joinToString(",") diff --git a/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitterTest.kt index 30303d03..34fe2c65 100644 --- a/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitterTest.kt @@ -1,176 +1,177 @@ package astminer.parse.gumtree.python - -import astminer.checkExecutable -import astminer.common.model.MethodInfo -import astminer.parse.gumtree.GumTreeNode -import org.junit.Assume -import org.junit.Before -import org.junit.Test -import java.io.File -import kotlin.test.assertEquals -import kotlin.test.assertNotNull - -class GumTreePythonMethodSplitterTest { - private fun parse(filename: String): GumTreeNode? = - GumTreePythonParser().parseInputStream(File(filename).inputStream()) - - private fun splitMethods(filename: String): Collection> = parse(filename)?.let { - GumTreePythonMethodSplitter().splitIntoMethods(it) - } ?: emptyList() - - private fun createPath(file: String) = "src/test/resources/gumTreeMethodSplitter/$file" - - @Before - fun checkPythonParser() = Assume.assumeTrue(checkExecutable("pythonparser")) - - @Test - fun methodsCountTest() { - assertEquals(7, splitMethods(createPath("1.py")).size) - assertEquals(9, splitMethods(createPath("2.py")).size) - assertEquals(3, splitMethods(createPath("3.py")).size) - assertEquals(5, splitMethods(createPath("4.py")).size) - } - - @Test - fun funcNamesTest() { - val realNames = setOf( - "no_args_func", "with_args_no_typed", "with_typed_args", - "with_typed_return_no_args", "full_typed", - "func_dif_args_typed_return", "complex_args_full_typed" - ) - val methodInfos = splitMethods(createPath("1.py")) - val parsedNames = methodInfos.map { it.name() }.toSet() - assertEquals(realNames, parsedNames) - } - - @Test - fun methodInfoTest1TypedArgs() { - val methodInfos = splitMethods(createPath("1.py")) - val method = methodInfos.firstOrNull { it.name() == "complex_args_full_typed" } - assertNotNull(method) - with(method) { - assertEquals("complex_args_full_typed", name()) - assertEquals(null, this.method.returnTypeNode) - assertEquals(1, methodParameters.size) - assertEquals(listOf("node"), methodParameters.map { it.name() }.toList()) - assertEquals(listOf("JsonNodeType"), methodParameters.map { it.returnType() }.toList()) - } - } - - @Test - fun methodInfoTest2ManyArgs() { - val methodInfos = splitMethods(createPath("1.py")) - val method = methodInfos.firstOrNull { it.name() == "func_dif_args_typed_return" } - assertNotNull(method) - with(method) { - assertEquals("func_dif_args_typed_return", name()) - assertEquals("Constant-int", this.method.returnTypeNode?.getTypeLabel()) - assertEquals(6, methodParameters.size) - assertEquals(listOf("a", "b", "c", "d", "e", "f"), methodParameters.map { it.name() }.toList()) - assertEquals(emptyList(), methodParameters.mapNotNull { it.returnType() }.toList()) - } - } - - @Test - fun methodInfoTest3EnclosingClass() { - val methodInfos = splitMethods(createPath("2.py")) - val method = methodInfos.firstOrNull { it.name() == "foo_typed" } - assertNotNull(method) - with(method) { - assertEquals("foo_typed", name()) - assertEquals("A", enclosingElementName()) - assertEquals(null, this.method.returnTypeNode) - assertEquals(3, methodParameters.size) - assertEquals(listOf("self", "x", "y"), methodParameters.map { it.name() }.toList()) - assertEquals(listOf(null, "int", "int"), methodParameters.map { it.returnType() }.toList()) - } - } - - @Test - fun methodInfoTest4EnclosingClass() { - val methodInfos = splitMethods(createPath("2.py")) - val method = methodInfos.firstOrNull { it.name() == "bar_typed" } - assertNotNull(method) - with(method) { - assertEquals("bar_typed", name()) - assertEquals("C", enclosingElementName()) - assertEquals(null, this.method.returnTypeNode) - assertEquals(2, methodParameters.size) - assertEquals(listOf("self", "x"), methodParameters.map { it.name() }.toList()) - assertEquals(listOf(null, "int"), methodParameters.map { it.returnType() }.toList()) - } - } - - @Test - fun methodInfoTest5AsyncDef() { - val methodInfos = splitMethods(createPath("3.py")) - val method = methodInfos.firstOrNull { it.name() == "async_schrecklich_typed" } - assertNotNull(method) - with(method) { - assertEquals("async_schrecklich_typed", name()) - assertEquals("AsyncFunctionDef", this.method.root.getTypeLabel()) - assertEquals(null, enclosingElementName()) - assertEquals("Constant-int", this.method.returnTypeNode?.getTypeLabel()) - assertEquals(4, methodParameters.size) - assertEquals(listOf("event", "x", "args", "kwargs"), methodParameters.map { it.name() }.toList()) - assertEquals(listOf("str", "int", null, null), methodParameters.map { it.returnType() }.toList()) - } - } - - @Test - fun methodInfoTest6Doc() { - val methodInfos = splitMethods(createPath("3.py")) - val method = methodInfos.firstOrNull { it.name() == "async_simple_no_typed" } - assertNotNull(method) - with(method) { - assertEquals("async_simple_no_typed", name()) - assertEquals("AsyncFunctionDef", this.method.root.getTypeLabel()) - assertEquals(null, enclosingElementName()) - assertEquals( - "\n async doc\n ", - this.method.root.getChildOfType("body") - ?.getChildOfType("Expr") - ?.getChildOfType("Constant-str") - ?.getToken() - ) - assertEquals(4, methodParameters.size) - assertEquals( - listOf("gh", "original_issue", "branch", "backport_pr_number"), - methodParameters.map { it.name() }.toList() - ) - assertEquals(listOf(null, null, null, null), methodParameters.map { it.returnType() }.toList()) - } - } - - @Test - fun methodInfoTest7InnerFunc() { - val methodInfos = splitMethods(createPath("4.py")) - val method = methodInfos.firstOrNull { it.name() == "foo_2" } - assertNotNull(method) - with(method) { - assertEquals("foo_2", name()) - assertEquals("foo_1", method.method.root.parent?.wrappedNode?.parent?.label) - assertEquals(null, enclosingElementName()) - assertEquals("Constant-NoneType", this.method.returnTypeNode?.getTypeLabel()) - assertEquals(1, methodParameters.size) - assertEquals(listOf("c"), methodParameters.map { it.name() }.toList()) - assertEquals(listOf(null), methodParameters.map { it.returnType() }.toList()) - } - } - - @Test - fun methodInfoTest8InnerFunc() { - val methodInfos = splitMethods(createPath("4.py")) - val method = methodInfos.firstOrNull { it.name() == "bar_2" } - assertNotNull(method) - with(method) { - assertEquals("bar_2", name()) - assertEquals("bar_1", method.method.root.parent?.wrappedNode?.parent?.label) - assertEquals(null, enclosingElementName()) - assertEquals("Constant-int", this.method.returnTypeNode?.getTypeLabel()) - assertEquals(2, methodParameters.size) - assertEquals(listOf("d", "e"), methodParameters.map { it.name() }.toList()) - assertEquals(listOf("int", "int"), methodParameters.map { it.returnType() }.toList()) - } - } -} +// +//import astminer.checkExecutable +//import astminer.common.model.FunctionInfo +//import astminer.common.model.MethodInfo +//import astminer.parse.gumtree.GumTreeNode +//import org.junit.Assume +//import org.junit.Before +//import org.junit.Test +//import java.io.File +//import kotlin.test.assertEquals +//import kotlin.test.assertNotNull +// +//class GumTreePythonMethodSplitterTest { +// private fun parse(filename: String): GumTreeNode = +// GumTreePythonParser().parseInputStream(File(filename).inputStream()) +// +// private fun splitMethods(filename: String): Collection> = parse(filename)?.let { +// GumTreePythonFunctionSplitter().splitIntoMethods(it) +// } ?: emptyList() +// +// private fun createPath(file: String) = "src/test/resources/gumTreeMethodSplitter/$file" +// +// @Before +// fun checkPythonParser() = Assume.assumeTrue(checkExecutable("pythonparser")) +// +// @Test +// fun methodsCountTest() { +// assertEquals(7, splitMethods(createPath("1.py")).size) +// assertEquals(9, splitMethods(createPath("2.py")).size) +// assertEquals(3, splitMethods(createPath("3.py")).size) +// assertEquals(5, splitMethods(createPath("4.py")).size) +// } +// +// @Test +// fun funcNamesTest() { +// val realNames = setOf( +// "no_args_func", "with_args_no_typed", "with_typed_args", +// "with_typed_return_no_args", "full_typed", +// "func_dif_args_typed_return", "complex_args_full_typed" +// ) +// val methodInfos = splitMethods(createPath("1.py")) +// val parsedNames = methodInfos.map { it.name }.toSet() +// assertEquals(realNames, parsedNames) +// } +// +// @Test +// fun methodInfoTest1TypedArgs() { +// val methodInfos = splitMethods(createPath("1.py")) +// val method = methodInfos.firstOrNull { it.name == "complex_args_full_typed" } +// assertNotNull(method) +// with(method) { +// assertEquals("complex_args_full_typed", name) +// assertEquals(null, this.method.returnTypeNode) +// assertEquals(1, methodParameters.size) +// assertEquals(listOf("node"), methodParameters.map { it.name }.toList()) +// assertEquals(listOf("JsonNodeType"), methodParameters.map { it.returnType() }.toList()) +// } +// } +// +// @Test +// fun methodInfoTest2ManyArgs() { +// val methodInfos = splitMethods(createPath("1.py")) +// val method = methodInfos.firstOrNull { it.name == "func_dif_args_typed_return" } +// assertNotNull(method) +// with(method) { +// assertEquals("func_dif_args_typed_return", name) +// assertEquals("Constant-int", this.method.returnTypeNode?.getTypeLabel()) +// assertEquals(6, methodParameters.size) +// assertEquals(listOf("a", "b", "c", "d", "e", "f"), methodParameters.map { it.name }.toList()) +// assertEquals(emptyList(), methodParameters.mapNotNull { it.returnType }.toList()) +// } +// } +// +// @Test +// fun methodInfoTest3EnclosingClass() { +// val methodInfos = splitMethods(createPath("2.py")) +// val method = methodInfos.firstOrNull { it.name == "foo_typed" } +// assertNotNull(method) +// with(method) { +// assertEquals("foo_typed", name) +// assertEquals("A", enclosingElementName) +// assertEquals(null, this.method.returnTypeNode) +// assertEquals(3, methodParameters.size) +// assertEquals(listOf("self", "x", "y"), methodParameters.map { it.name }.toList()) +// assertEquals(listOf(null, "int", "int"), methodParameters.map { it.returnType }.toList()) +// } +// } +// +// @Test +// fun methodInfoTest4EnclosingClass() { +// val methodInfos = splitMethods(createPath("2.py")) +// val method = methodInfos.firstOrNull { it.name() == "bar_typed" } +// assertNotNull(method) +// with(method) { +// assertEquals("bar_typed", name) +// assertEquals("C", enclosingElementName) +// assertEquals(null, this.method.returnTypeNode) +// assertEquals(2, methodParameters.size) +// assertEquals(listOf("self", "x"), methodParameters.map { it.name }.toList()) +// assertEquals(listOf(null, "int"), methodParameters.map { it.returnType }.toList()) +// } +// } +// +// @Test +// fun methodInfoTest5AsyncDef() { +// val methodInfos = splitMethods(createPath("3.py")) +// val method = methodInfos.firstOrNull { it.name == "async_schrecklich_typed" } +// assertNotNull(method) +// with(method) { +// assertEquals("async_schrecklich_typed", name) +// assertEquals("AsyncFunctionDef", this.method.root.getTypeLabel()) +// assertEquals(null, enclosingElementName()) +// assertEquals("Constant-int", this.method.returnTypeNode?.getTypeLabel()) +// assertEquals(4, methodParameters.size) +// assertEquals(listOf("event", "x", "args", "kwargs"), methodParameters.map { it.name() }.toList()) +// assertEquals(listOf("str", "int", null, null), methodParameters.map { it.returnType() }.toList()) +// } +// } +// +// @Test +// fun methodInfoTest6Doc() { +// val methodInfos = splitMethods(createPath("3.py")) +// val method = methodInfos.firstOrNull { it.name() == "async_simple_no_typed" } +// assertNotNull(method) +// with(method) { +// assertEquals("async_simple_no_typed", name()) +// assertEquals("AsyncFunctionDef", this.method.root.getTypeLabel()) +// assertEquals(null, enclosingElementName()) +// assertEquals( +// "\n async doc\n ", +// this.method.root.getChildOfType("body") +// ?.getChildOfType("Expr") +// ?.getChildOfType("Constant-str") +// ?.getToken() +// ) +// assertEquals(4, methodParameters.size) +// assertEquals( +// listOf("gh", "original_issue", "branch", "backport_pr_number"), +// methodParameters.map { it.name() }.toList() +// ) +// assertEquals(listOf(null, null, null, null), methodParameters.map { it.returnType() }.toList()) +// } +// } +// +// @Test +// fun methodInfoTest7InnerFunc() { +// val methodInfos = splitMethods(createPath("4.py")) +// val method = methodInfos.firstOrNull { it.name() == "foo_2" } +// assertNotNull(method) +// with(method) { +// assertEquals("foo_2", name()) +// assertEquals("foo_1", method.method.root.parent?.wrappedNode?.parent?.label) +// assertEquals(null, enclosingElementName()) +// assertEquals("Constant-NoneType", this.method.returnTypeNode?.getTypeLabel()) +// assertEquals(1, methodParameters.size) +// assertEquals(listOf("c"), methodParameters.map { it.name() }.toList()) +// assertEquals(listOf(null), methodParameters.map { it.returnType() }.toList()) +// } +// } +// +// @Test +// fun methodInfoTest8InnerFunc() { +// val methodInfos = splitMethods(createPath("4.py")) +// val method = methodInfos.firstOrNull { it.name() == "bar_2" } +// assertNotNull(method) +// with(method) { +// assertEquals("bar_2", name()) +// assertEquals("bar_1", method.method.root.parent?.wrappedNode?.parent?.label) +// assertEquals(null, enclosingElementName()) +// assertEquals("Constant-int", this.method.returnTypeNode?.getTypeLabel()) +// assertEquals(2, methodParameters.size) +// assertEquals(listOf("d", "e"), methodParameters.map { it.name() }.toList()) +// assertEquals(listOf("int", "int"), methodParameters.map { it.returnType() }.toList()) +// } +// } +//} From 17f6edde189ef723bec1b7779bc4d0dfc85bf219 Mon Sep 17 00:00:00 2001 From: furetur Date: Thu, 20 May 2021 12:14:48 +0500 Subject: [PATCH 174/308] fixed naming issues and leftover code --- .../kotlin/astminer/cli/LabelExtractors.kt | 28 ++-- .../common/model/FunctionInfoModel.kt | 2 +- .../astminer/common/model/HandlerModel.kt | 2 +- .../kotlin/astminer/examples/AllJavaFiles.kt | 2 +- .../astminer/examples/AllJavaMethods.kt | 2 +- .../astminer/examples/AllPythonMethods.kt | 2 +- .../astminer/examples/Code2VecJavaMethods.kt | 2 +- .../parse/antlr/java/JavaMethodSplitter.kt | 2 +- .../javascript/JavaScriptFunctionSplitter.kt | 2 +- .../antlr/python/PythonFunctionSplitter.kt | 2 +- .../parse/fuzzy/cpp/FuzzyFunctionSplitter.kt | 2 +- .../gumtree/java/GumTreeJavaMethodSplitter.kt | 2 +- .../python/GumTreePythonFunctionSplitter.kt | 6 +- .../kotlin/astminer/cli/LabelExtractorTest.kt | 22 +-- .../antlr/java/JavaMethodSplitterTest.kt | 2 +- ...t.kt => JavaScriptFunctionSplitterTest.kt} | 6 +- .../python/PythonFunctionSplitterTest.kt | 158 ++++++++++++++++++ .../antlr/python/PythonMethodSplitterTest.kt | 158 ------------------ .../parse/cpp/FuzzyMethodSplitterTest.kt | 2 +- .../java/GumTreeJavaMethodSplitterTest.kt | 37 ++-- ...t => GumTreePythonFunctionSplitterTest.kt} | 86 +++++----- 21 files changed, 256 insertions(+), 271 deletions(-) rename src/test/kotlin/astminer/parse/antlr/javascript/{JavaScriptMethodSplitterTest.kt => JavaScriptFunctionSplitterTest.kt} (92%) create mode 100644 src/test/kotlin/astminer/parse/antlr/python/PythonFunctionSplitterTest.kt delete mode 100644 src/test/kotlin/astminer/parse/antlr/python/PythonMethodSplitterTest.kt rename src/test/kotlin/astminer/parse/gumtree/python/{GumTreePythonMethodSplitterTest.kt => GumTreePythonFunctionSplitterTest.kt} (66%) diff --git a/src/main/kotlin/astminer/cli/LabelExtractors.kt b/src/main/kotlin/astminer/cli/LabelExtractors.kt index 8aa6343c..1ae72b5d 100644 --- a/src/main/kotlin/astminer/cli/LabelExtractors.kt +++ b/src/main/kotlin/astminer/cli/LabelExtractors.kt @@ -54,20 +54,20 @@ abstract class MethodLabelExtractor( ): List> { val (root, filePath) = parseResult val fileExtension = File(filePath).extension - val methodInfos = when (fileExtension) { + val functionInfos = when (fileExtension) { "c", "cpp" -> { - val methodSplitter = FuzzyFunctionSplitter() - methodSplitter.splitIntoMethods(root as FuzzyNode) + val functionSplitter = FuzzyFunctionSplitter() + functionSplitter.splitIntoFunctions(root as FuzzyNode) } "java" -> { when (javaParser) { "gumtree" -> { val methodSplitter = GumTreeJavaMethodSplitter() - methodSplitter.splitIntoMethods(root as GumTreeNode) + methodSplitter.splitIntoFunctions(root as GumTreeNode) } "antlr" -> { val methodSplitter = JavaMethodSplitter() - methodSplitter.splitIntoMethods(root as AntlrNode) + methodSplitter.splitIntoFunctions(root as AntlrNode) } else -> { throw UnsupportedOperationException("Unsupported parser $javaParser") @@ -77,12 +77,12 @@ abstract class MethodLabelExtractor( "py" -> { when (pythonParser) { "gumtree" -> { - val methodSplitter = GumTreePythonFunctionSplitter() - methodSplitter.splitIntoMethods(root as GumTreeNode) + val functionSplitter = GumTreePythonFunctionSplitter() + functionSplitter.splitIntoFunctions(root as GumTreeNode) } "antlr" -> { - val methodSplitter = PythonFunctionSplitter() - methodSplitter.splitIntoMethods(root as AntlrNode) + val functionSplitter = PythonFunctionSplitter() + functionSplitter.splitIntoFunctions(root as AntlrNode) } else -> { throw UnsupportedOperationException("Unsupported parser $pythonParser") @@ -90,16 +90,16 @@ abstract class MethodLabelExtractor( } } "js" -> { - val methodSplitter = JavaScriptFunctionSplitter() - methodSplitter.splitIntoMethods(root as AntlrNode) + val functionSplitter = JavaScriptFunctionSplitter() + functionSplitter.splitIntoFunctions(root as AntlrNode) } else -> throw UnsupportedOperationException("Unsupported extension $fileExtension") - }.filter { methodInfo -> + }.filter { functionInfo -> filterPredicates.all { predicate -> - predicate.isFiltered(methodInfo) + predicate.isFiltered(functionInfo) } } - return methodInfos.mapNotNull { + return functionInfos.mapNotNull { val label = extractLabel(it, filePath) ?: return@mapNotNull null LabeledResult(it.root, label, filePath) } diff --git a/src/main/kotlin/astminer/common/model/FunctionInfoModel.kt b/src/main/kotlin/astminer/common/model/FunctionInfoModel.kt index cd4d88a9..32e25036 100644 --- a/src/main/kotlin/astminer/common/model/FunctionInfoModel.kt +++ b/src/main/kotlin/astminer/common/model/FunctionInfoModel.kt @@ -1,7 +1,7 @@ package astminer.common.model interface TreeFunctionSplitter { - fun splitIntoMethods(root: T): Collection> + fun splitIntoFunctions(root: T): Collection> } class FunctionInfoPropertyNotImplementedException(propertyName: String) : diff --git a/src/main/kotlin/astminer/common/model/HandlerModel.kt b/src/main/kotlin/astminer/common/model/HandlerModel.kt index 4c59bef0..6bca0f6d 100644 --- a/src/main/kotlin/astminer/common/model/HandlerModel.kt +++ b/src/main/kotlin/astminer/common/model/HandlerModel.kt @@ -12,6 +12,6 @@ abstract class LanguageHandler { fun splitIntoMethods(): Collection> { val root = parseResult.root - return splitter.splitIntoMethods(root) + return splitter.splitIntoFunctions(root) } } diff --git a/src/main/kotlin/astminer/examples/AllJavaFiles.kt b/src/main/kotlin/astminer/examples/AllJavaFiles.kt index 4595305e..8cc4820c 100644 --- a/src/main/kotlin/astminer/examples/AllJavaFiles.kt +++ b/src/main/kotlin/astminer/examples/AllJavaFiles.kt @@ -17,7 +17,7 @@ fun allJavaFiles() { File(inputDir).forFilesWithSuffix("11.java") { file -> val node = JavaParser().parseInputStream(file.inputStream()) node.prettyPrint() - JavaMethodSplitter().splitIntoMethods(node).forEach { + JavaMethodSplitter().splitIntoFunctions(node).forEach { println(it.name) println(it.returnType) println(it.enclosingElement?.name) diff --git a/src/main/kotlin/astminer/examples/AllJavaMethods.kt b/src/main/kotlin/astminer/examples/AllJavaMethods.kt index 293f9944..e9c981be 100644 --- a/src/main/kotlin/astminer/examples/AllJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/AllJavaMethods.kt @@ -32,7 +32,7 @@ fun allJavaMethods() { val fileNode = GumTreeJavaParser().parseInputStream(file.inputStream()) //extract method nodes - val methodNodes = GumTreeJavaMethodSplitter().splitIntoMethods(fileNode) + val methodNodes = GumTreeJavaMethodSplitter().splitIntoFunctions(fileNode) methodNodes.forEach { methodInfo -> //Retrieve a method identifier diff --git a/src/main/kotlin/astminer/examples/AllPythonMethods.kt b/src/main/kotlin/astminer/examples/AllPythonMethods.kt index 41a41e43..c54c9dcf 100644 --- a/src/main/kotlin/astminer/examples/AllPythonMethods.kt +++ b/src/main/kotlin/astminer/examples/AllPythonMethods.kt @@ -27,7 +27,7 @@ fun allPythonMethods() { val fileNode = GumTreePythonParser().parseInputStream(file.inputStream()) // extract method nodes - val methodNodes = GumTreePythonFunctionSplitter().splitIntoMethods(fileNode) + val methodNodes = GumTreePythonFunctionSplitter().splitIntoFunctions(fileNode) methodNodes.forEach { methodInfo -> // Retrieve a method identifier diff --git a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt index e3ad3df4..b4d405fc 100644 --- a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt @@ -24,7 +24,7 @@ fun code2vecJavaMethods() { val fileNode = JavaParser().parseInputStream(file.inputStream()) //extract method nodes - val methods = JavaMethodSplitter().splitIntoMethods(fileNode) + val methods = JavaMethodSplitter().splitIntoFunctions(fileNode) val labelExtractor = MethodNameExtractor() diff --git a/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt b/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt index c943fe4f..d9c9aa44 100644 --- a/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt @@ -8,7 +8,7 @@ import astminer.parse.antlr.hasLastLabel class JavaMethodSplitter : TreeFunctionSplitter { private val methodNodeType = "methodDeclaration" - override fun splitIntoMethods(root: AntlrNode): Collection> { + override fun splitIntoFunctions(root: AntlrNode): Collection> { val methodRoots = root.preOrder().filter { (it as AntlrNode).hasLastLabel(methodNodeType) } diff --git a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitter.kt b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitter.kt index 5b5a6b3b..003f35c8 100644 --- a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitter.kt @@ -16,7 +16,7 @@ class JavaScriptFunctionSplitter : TreeFunctionSplitter { private const val FUNCTION_NODE = "Function" } - override fun splitIntoMethods(root: AntlrNode): Collection> { + override fun splitIntoFunctions(root: AntlrNode): Collection> { return root.preOrder().mapNotNull { node -> when { node.isArrowElement() -> JavaScriptArrowInfo(node as AntlrNode) diff --git a/src/main/kotlin/astminer/parse/antlr/python/PythonFunctionSplitter.kt b/src/main/kotlin/astminer/parse/antlr/python/PythonFunctionSplitter.kt index f89ec721..4541947c 100644 --- a/src/main/kotlin/astminer/parse/antlr/python/PythonFunctionSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/python/PythonFunctionSplitter.kt @@ -9,7 +9,7 @@ import astminer.parse.antlr.hasLastLabel class PythonFunctionSplitter : TreeFunctionSplitter { private val methodNode = "funcdef" - override fun splitIntoMethods(root: AntlrNode): Collection> { + override fun splitIntoFunctions(root: AntlrNode): Collection> { val methodRoots = root.preOrder().filter { (it as AntlrNode).hasLastLabel(methodNode) } diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyFunctionSplitter.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyFunctionSplitter.kt index 3a2467c8..9d41aa2e 100644 --- a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyFunctionSplitter.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyFunctionSplitter.kt @@ -6,7 +6,7 @@ import astminer.common.model.* class FuzzyFunctionSplitter : TreeFunctionSplitter { private val methodNode = "METHOD" - override fun splitIntoMethods(root: FuzzyNode): Collection> { + override fun splitIntoFunctions(root: FuzzyNode): Collection> { val methodRoots = root.preOrder().filter { it.getTypeLabel() == methodNode } return methodRoots.map { FuzzyCppFunctionInfo(it as FuzzyNode) } } diff --git a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt index 1ec56414..fa915e21 100644 --- a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt @@ -7,7 +7,7 @@ import astminer.parse.gumtree.GumTreeNode class GumTreeJavaMethodSplitter : TreeFunctionSplitter { private val methodDeclaration = "MethodDeclaration" - override fun splitIntoMethods(root: GumTreeNode): Collection> { + override fun splitIntoFunctions(root: GumTreeNode): Collection> { val methodRoots = root.preOrder().filter { it.getTypeLabel() == methodDeclaration } return methodRoots.map { GumTreeJavaFunctionInfo(it as GumTreeNode) } } diff --git a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitter.kt b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitter.kt index a28dcbd7..22687e67 100644 --- a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitter.kt +++ b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitter.kt @@ -13,8 +13,8 @@ class GumTreePythonFunctionSplitter : TreeFunctionSplitter { } } - override fun splitIntoMethods(root: GumTreeNode): Collection> { - val methodRoots = root.preOrder().filter { TypeLabels.methodDefinitions.contains(it.getTypeLabel()) } - return methodRoots.map { GumTreePythonFunctionInfo(it as GumTreeNode) } + override fun splitIntoFunctions(root: GumTreeNode): Collection> { + val functionRoots = root.preOrder().filter { TypeLabels.methodDefinitions.contains(it.getTypeLabel()) } + return functionRoots.map { GumTreePythonFunctionInfo(it as GumTreeNode) } } } diff --git a/src/test/kotlin/astminer/cli/LabelExtractorTest.kt b/src/test/kotlin/astminer/cli/LabelExtractorTest.kt index 620d111b..05827a6e 100644 --- a/src/test/kotlin/astminer/cli/LabelExtractorTest.kt +++ b/src/test/kotlin/astminer/cli/LabelExtractorTest.kt @@ -6,8 +6,6 @@ import astminer.parse.antlr.AntlrNode import org.junit.Before import org.junit.Test import kotlin.test.assertEquals -import kotlin.test.assertNull -import kotlin.test.assertTrue internal class LabelExtractorTest { @@ -20,7 +18,7 @@ internal class LabelExtractorTest { private var dummyRoot = AntlrNode("", null, null) - private fun makeMethodInfo(nameNode: AntlrNode) = object : FunctionInfo { + private fun makeFunctionInfo(nameNode: AntlrNode) = object : FunctionInfo { override val root: AntlrNode = dummyRoot override val nameNode: AntlrNode = nameNode } @@ -55,28 +53,18 @@ internal class LabelExtractorTest { @Test fun `test method name extractor extracts correct method name`() { val nameNode = AntlrNode("", dummyRoot, METHOD_NAME) -// val methodInfo = MethodInfo( -// MethodNode(dummyRoot, null, nameNode), -// ElementNode(null, null), -// emptyList() -// ) - val method = makeMethodInfo(nameNode) + val functionInfo = makeFunctionInfo(nameNode) val methodNameExtractor = MethodNameExtractor() - val label = methodNameExtractor.extractLabel(method, PATH_STRING) + val label = methodNameExtractor.extractLabel(functionInfo, PATH_STRING) assertEquals(METHOD_NAME, label) } @Test fun `test method name extractor hides method name with technical token`() { val nameNode = AntlrNode("", dummyRoot, METHOD_NAME) -// val methodInfo = MethodInfo( -// MethodNode(dummyRoot, null, nameNode), -// ElementNode(null, null), -// emptyList() -// ) - val methodInfo = makeMethodInfo(nameNode) + val functionInfo = makeFunctionInfo(nameNode) val methodNameExtractor = MethodNameExtractor() - methodNameExtractor.extractLabel(methodInfo, PATH_STRING) + methodNameExtractor.extractLabel(functionInfo, PATH_STRING) assertEquals("METHOD_NAME", nameNode.getTechnicalToken()) } } diff --git a/src/test/kotlin/astminer/parse/antlr/java/JavaMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/java/JavaMethodSplitterTest.kt index 8c0f8e6c..3b950bf8 100644 --- a/src/test/kotlin/astminer/parse/antlr/java/JavaMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/java/JavaMethodSplitterTest.kt @@ -21,7 +21,7 @@ class JavaMethodSplitterTest { fun parseTree() { val testTree = parser.parseInputStream(File("src/test/resources/methodSplitting/testMethodSplitting.java").inputStream()) assertNotNull(testTree) - functionInfos = methodSplitter.splitIntoMethods(testTree) + functionInfos = methodSplitter.splitIntoFunctions(testTree) } @Test diff --git a/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitterTest.kt similarity index 92% rename from src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitterTest.kt rename to src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitterTest.kt index 07051799..bf559f25 100644 --- a/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitterTest.kt @@ -10,11 +10,11 @@ import kotlin.test.assertEquals import kotlin.test.assertNotNull -class JavaScriptMethodSplitterTest { +class JavaScriptFunctionSplitterTest { companion object { const val N_METHODS = 47 const val testFilePath = "src/test/resources/methodSplitting/testMethodSplitting.js" - val methodSplitter = JavaScriptFunctionSplitter() + val functionSplitter = JavaScriptFunctionSplitter() val parser = JavaScriptParser() } @@ -24,7 +24,7 @@ class JavaScriptMethodSplitterTest { fun parseTree() { val testTree = parser.parseInputStream(File(testFilePath).inputStream()) assertNotNull(testTree) - functionInfos = methodSplitter.splitIntoMethods(testTree) + functionInfos = functionSplitter.splitIntoFunctions(testTree) } @Test diff --git a/src/test/kotlin/astminer/parse/antlr/python/PythonFunctionSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/python/PythonFunctionSplitterTest.kt new file mode 100644 index 00000000..6605d9fa --- /dev/null +++ b/src/test/kotlin/astminer/parse/antlr/python/PythonFunctionSplitterTest.kt @@ -0,0 +1,158 @@ +package astminer.parse.antlr.python + +import astminer.common.model.EnclosingElementType +import astminer.common.model.FunctionInfo +import astminer.parse.antlr.AntlrNode +import org.junit.Test +import kotlin.test.assertEquals +import java.io.File +import kotlin.test.BeforeTest +import kotlin.test.assertNotNull +import kotlin.test.assertNull + +class PythonFunctionSplitterTest { + companion object { + const val N_FUNCTIONS = 17 + val functionSplitter = PythonFunctionSplitter() + val parser = PythonParser() + } + + var functionInfos: Collection> = listOf() + + @BeforeTest + fun parseTree() { + val testTree = parser.parseInputStream(File("src/test/resources/methodSplitting/testMethodSplitting.py").inputStream()) + assertNotNull(testTree) + functionInfos = functionSplitter.splitIntoFunctions(testTree) + } + + @Test + fun testValidSplitting() { + assertEquals(N_FUNCTIONS, functionInfos.size, "Test file contains $N_FUNCTIONS methods") + } + + @Test + fun testFunctionNotInClass() { + val functionClass = functionInfos.find { it.name == "fun_with_no_class" } + assertNotNull(functionClass) + assertNull(functionClass.enclosingElement) + } + + @Test + fun testFunctionInClass() { + val functionClass = functionInfos.find { it.name == "fun_in_class1" } + assertNotNull(functionClass) + assertEquals(EnclosingElementType.Class, functionClass.enclosingElement?.type) + assertEquals( "Class1", functionClass.enclosingElement?.name) + } + + @Test + fun testFunctionInNestedClass() { + val functionClass = functionInfos.find { it.name == "fun_in_class2" } + assertNotNull(functionClass) + assertEquals(EnclosingElementType.Class, functionClass.enclosingElement?.type) + assertEquals( "Class2", functionClass.enclosingElement?.name) + } + + @Test + fun testNoParameters() { + val functionNoParameters = functionInfos.find { it.name == "function_with_no_parameters" } + assertNotNull(functionNoParameters) + assertEquals(0, functionNoParameters.parameters.size) + } + + @Test + fun testOneParameter() { + val functionOneParameter = functionInfos.find { it.name == "function_with_one_parameter" } + assertNotNull(functionOneParameter) + assertEquals(1, functionOneParameter.parameters.size) + val parameter = functionOneParameter.parameters[0] + assertEquals("p1", parameter.name) + } + + @Test + fun testOneTypedParameter() { + val functionOneTypedParameter = functionInfos.find { it.name == "function_with_one_typed_parameter" } + assertNotNull(functionOneTypedParameter) + assertEquals(1, functionOneTypedParameter.parameters.size) + val parameter = functionOneTypedParameter.parameters[0] + assertEquals("p1", parameter.name) + assertEquals("int", parameter.type) + } + + @Test + fun functionWithComplexParameter() { + val functionOneTypedParameter = functionInfos.find { it.name == "function_with_complex_parameter" } + assertNotNull(functionOneTypedParameter) + assertEquals(1, functionOneTypedParameter.parameters.size) + val parameter = functionOneTypedParameter.parameters[0] + assertEquals("p1", parameter.name) + assertEquals("List[int]", parameter.type) + } + + @Test + fun testThreeParameters() { + val functionThreeParameters = functionInfos.find { it.name == "function_with_three_parameters" } + assertNotNull(functionThreeParameters) + assertEquals(3, functionThreeParameters.parameters.size) + val parameters = functionThreeParameters.parameters + assertEquals("p1", parameters[0].name) + + assertEquals("p2", parameters[1].name) + + assertEquals("p3", parameters[2].name) + assertEquals("int", parameters[2].type) + } + + @Test + fun testParameterInClass() { + val functionOneParameter = functionInfos.find { it.name == "fun_with_parameter_in_class" } + assertNotNull(functionOneParameter) + assertEquals(2, functionOneParameter.parameters.size) + val parameter = functionOneParameter.parameters[1] + assertEquals("p1", parameter.name) + } + + @Test + fun testTypedParameterInClass() { + val functionOneTypedParameter = functionInfos.find { it.name == "fun_with_typed_parameter_in_class" } + assertNotNull(functionOneTypedParameter) + assertEquals(2, functionOneTypedParameter.parameters.size) + val parameter = functionOneTypedParameter.parameters[1] + assertEquals("p1", parameter.name) + assertEquals("int", parameter.type) + } + + @Test + fun testEnclosingFunction() { + val functionInsideFunction = functionInfos.find { it.name == "function_inside_function" } + assertNotNull(functionInsideFunction) + val enclosingElement = functionInsideFunction.enclosingElement + + assertNotNull(enclosingElement) + assertEquals("function_containing_function", enclosingElement.name) + assertEquals(EnclosingElementType.Function, enclosingElement.type) + } + + @Test + fun testEnclosingMethod() { + val functionInsideMethod = functionInfos.find { it.name == "function_inside_method" } + assertNotNull(functionInsideMethod) + val enclosingElement = functionInsideMethod.enclosingElement + + assertNotNull(enclosingElement) + assertEquals("some_method", enclosingElement.name) + assertEquals(EnclosingElementType.Method, enclosingElement.type) + } + + @Test + fun testEnclosingFunctionInsideMethod() { + val funInsideFunInsideMethod = functionInfos.find { it.name == "fun_inside_fun_inside_method" } + assertNotNull(funInsideFunInsideMethod) + val enclosingElement = funInsideFunInsideMethod.enclosingElement + + assertNotNull(enclosingElement) + assertEquals("second_function_inside_method", enclosingElement.name) + assertEquals(EnclosingElementType.Function, enclosingElement.type) + } +} \ No newline at end of file diff --git a/src/test/kotlin/astminer/parse/antlr/python/PythonMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/python/PythonMethodSplitterTest.kt deleted file mode 100644 index bd1c4b6c..00000000 --- a/src/test/kotlin/astminer/parse/antlr/python/PythonMethodSplitterTest.kt +++ /dev/null @@ -1,158 +0,0 @@ -package astminer.parse.antlr.python - -import astminer.common.model.EnclosingElementType -import astminer.common.model.FunctionInfo -import astminer.parse.antlr.AntlrNode -import org.junit.Test -import kotlin.test.assertEquals -import java.io.File -import kotlin.test.BeforeTest -import kotlin.test.assertNotNull -import kotlin.test.assertNull - -class PythonMethodSplitterTest { - companion object { - const val N_FUNCTIONS = 17 - val methodSplitter = PythonFunctionSplitter() - val parser = PythonParser() - } - - var methodInfos: Collection> = listOf() - - @BeforeTest - fun parseTree() { - val testTree = parser.parseInputStream(File("src/test/resources/methodSplitting/testMethodSplitting.py").inputStream()) - assertNotNull(testTree) - methodInfos = methodSplitter.splitIntoMethods(testTree) - } - - @Test - fun testValidSplitting() { - assertEquals(N_FUNCTIONS, methodInfos.size, "Test file contains $N_FUNCTIONS methods") - } - - @Test - fun testFunctionNotInClass() { - val methodClass = methodInfos.find { it.name == "fun_with_no_class" } - assertNotNull(methodClass) - assertNull(methodClass.enclosingElement) - } - - @Test - fun testFunctionInClass() { - val methodClass = methodInfos.find { it.name == "fun_in_class1" } - assertNotNull(methodClass) - assertEquals(EnclosingElementType.Class, methodClass.enclosingElement?.type) - assertEquals( "Class1", methodClass.enclosingElement?.name) - } - - @Test - fun testFunctionInNestedClass() { - val methodClass = methodInfos.find { it.name == "fun_in_class2" } - assertNotNull(methodClass) - assertEquals(EnclosingElementType.Class, methodClass.enclosingElement?.type) - assertEquals( "Class2", methodClass.enclosingElement?.name) - } - - @Test - fun testNoParameters() { - val methodNoParameters = methodInfos.find { it.name == "function_with_no_parameters" } - assertNotNull(methodNoParameters) - assertEquals(0, methodNoParameters.parameters.size) - } - - @Test - fun testOneParameter() { - val methodOneParameter = methodInfos.find { it.name == "function_with_one_parameter" } - assertNotNull(methodOneParameter) - assertEquals(1, methodOneParameter.parameters.size) - val parameter = methodOneParameter.parameters[0] - assertEquals("p1", parameter.name) - } - - @Test - fun testOneTypedParameter() { - val methodOneTypedParameter = methodInfos.find { it.name == "function_with_one_typed_parameter" } - assertNotNull(methodOneTypedParameter) - assertEquals(1, methodOneTypedParameter.parameters.size) - val parameter = methodOneTypedParameter.parameters[0] - assertEquals("p1", parameter.name) - assertEquals("int", parameter.type) - } - - @Test - fun functionWithComplexParameter() { - val methodOneTypedParameter = methodInfos.find { it.name == "function_with_complex_parameter" } - assertNotNull(methodOneTypedParameter) - assertEquals(1, methodOneTypedParameter.parameters.size) - val parameter = methodOneTypedParameter.parameters[0] - assertEquals("p1", parameter.name) - assertEquals("List[int]", parameter.type) - } - - @Test - fun testThreeParameters() { - val methodThreeParameters = methodInfos.find { it.name == "function_with_three_parameters" } - assertNotNull(methodThreeParameters) - assertEquals(3, methodThreeParameters.parameters.size) - val parameters = methodThreeParameters.parameters - assertEquals("p1", parameters[0].name) - - assertEquals("p2", parameters[1].name) - - assertEquals("p3", parameters[2].name) - assertEquals("int", parameters[2].type) - } - - @Test - fun testParameterInClass() { - val methodOneParameter = methodInfos.find { it.name == "fun_with_parameter_in_class" } - assertNotNull(methodOneParameter) - assertEquals(2, methodOneParameter.parameters.size) - val parameter = methodOneParameter.parameters[1] - assertEquals("p1", parameter.name) - } - - @Test - fun testTypedParameterInClass() { - val methodOneTypedParameter = methodInfos.find { it.name == "fun_with_typed_parameter_in_class" } - assertNotNull(methodOneTypedParameter) - assertEquals(2, methodOneTypedParameter.parameters.size) - val parameter = methodOneTypedParameter.parameters[1] - assertEquals("p1", parameter.name) - assertEquals("int", parameter.type) - } - - @Test - fun testEnclosingFunction() { - val functionInsideFunction = methodInfos.find { it.name == "function_inside_function" } - assertNotNull(functionInsideFunction) - val enclosingElement = functionInsideFunction.enclosingElement - - assertNotNull(enclosingElement) - assertEquals("function_containing_function", enclosingElement.name) - assertEquals(EnclosingElementType.Function, enclosingElement.type) - } - - @Test - fun testEnclosingMethod() { - val functionInsideMethod = methodInfos.find { it.name == "function_inside_method" } - assertNotNull(functionInsideMethod) - val enclosingElement = functionInsideMethod.enclosingElement - - assertNotNull(enclosingElement) - assertEquals("some_method", enclosingElement.name) - assertEquals(EnclosingElementType.Method, enclosingElement.type) - } - - @Test - fun testEnclosingFunctionInsideMethod() { - val funInsideFunInsideMethod = methodInfos.find { it.name == "fun_inside_fun_inside_method" } - assertNotNull(funInsideFunInsideMethod) - val enclosingElement = funInsideFunInsideMethod.enclosingElement - - assertNotNull(enclosingElement) - assertEquals("second_function_inside_method", enclosingElement.name) - assertEquals(EnclosingElementType.Function, enclosingElement.type) - } -} \ No newline at end of file diff --git a/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt index e80a2ddc..1f7ff46f 100644 --- a/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt @@ -28,7 +28,7 @@ class FuzzyMethodSplitterTest { Assume.assumeTrue(checkExecutable("g++")) val testTree = parser.parseInputStream(File("src/test/resources/methodSplitting/testMethodSplitting.cpp").inputStream()) assertNotNull(testTree) - methodInfos = methodSplitter.splitIntoMethods(testTree) + methodInfos = methodSplitter.splitIntoFunctions(testTree) } @Test diff --git a/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitterTest.kt index 5309dda0..ee708bd5 100644 --- a/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitterTest.kt @@ -6,22 +6,19 @@ import org.junit.Test import java.io.File import kotlin.test.assertEquals -private fun createTree(filename: String): GumTreeNode { - val parser = GumTreeJavaParser() - return parser.parseInputStream(File(filename).inputStream()) as GumTreeNode -} +private fun createTree(filename: String): GumTreeNode = + GumTreeJavaParser().parseInputStream(File(filename).inputStream()) -private fun createAndSplitTree(filename: String): Collection> { - return GumTreeJavaMethodSplitter().splitIntoMethods(createTree(filename)) -} +private fun createAndSplitTree(filename: String): Collection> = + GumTreeJavaMethodSplitter().splitIntoFunctions(createTree(filename)) class GumTreeJavaMethodSplitterTest { @Test fun testMethodExtraction1() { - val methodInfos = createAndSplitTree("src/test/resources/gumTreeMethodSplitter/1.java") + val functionInfos = createAndSplitTree("src/test/resources/gumTreeMethodSplitter/1.java") - assertEquals(1, methodInfos.size) - with(methodInfos.first()) { + assertEquals(1, functionInfos.size) + with(functionInfos.first()) { assertEquals("fun", name) assertEquals("void", returnType) assertEquals("SingleFunction", enclosingElement?.name) @@ -33,10 +30,10 @@ class GumTreeJavaMethodSplitterTest { @Test fun testMethodExtraction2() { - val methodInfos = createAndSplitTree("src/test/resources/gumTreeMethodSplitter/2.java") + val functionInfos = createAndSplitTree("src/test/resources/gumTreeMethodSplitter/2.java") - assertEquals(1, methodInfos.size) - with(methodInfos.first()) { + assertEquals(1, functionInfos.size) + with(functionInfos.first()) { assertEquals("main", name) assertEquals("void", returnType) assertEquals("InnerClass", enclosingElement?.name) @@ -47,17 +44,17 @@ class GumTreeJavaMethodSplitterTest { @Test fun testMethodExtraction3() { - val methodInfos = createAndSplitTree("src/test/resources/gumTreeMethodSplitter/3.java") + val functionInfos = createAndSplitTree("src/test/resources/gumTreeMethodSplitter/3.java") - assertEquals(2, methodInfos.size) - with(methodInfos.first()) { + assertEquals(2, functionInfos.size) + with(functionInfos.first()) { assertEquals("main", name) assertEquals("void", returnType) assertEquals("InnerClass", enclosingElement?.name) assertEquals(listOf("args"), parameters.map { it.name }) assertEquals(listOf("String[]"), parameters.map { it.type }) } - with(methodInfos.last()) { + with(functionInfos.last()) { assertEquals("fun", name) assertEquals("void", returnType) assertEquals("SingleMethodInnerClass", enclosingElement?.name) @@ -68,10 +65,10 @@ class GumTreeJavaMethodSplitterTest { @Test fun testMethodExtraction4() { - val methodInfos = createAndSplitTree("src/test/resources/gumTreeMethodSplitter/4.java") + val functionInfos = createAndSplitTree("src/test/resources/gumTreeMethodSplitter/4.java") - assertEquals(1, methodInfos.size) - with(methodInfos.first()) { + assertEquals(1, functionInfos.size) + with(functionInfos.first()) { assertEquals("fun", name) assertEquals("int", returnType) assertEquals("SingleFunction", enclosingElement?.name) diff --git a/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitterTest.kt similarity index 66% rename from src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitterTest.kt rename to src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitterTest.kt index 6b2447ca..a5c57f66 100644 --- a/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitterTest.kt @@ -10,12 +10,12 @@ import java.io.File import kotlin.test.assertEquals import kotlin.test.assertNotNull -class GumTreePythonMethodSplitterTest { +class GumTreePythonFunctionSplitterTest { private fun parse(filename: String): GumTreeNode = GumTreePythonParser().parseInputStream(File(filename).inputStream()) - private fun splitMethods(filename: String): Collection> = - GumTreePythonFunctionSplitter().splitIntoMethods(parse(filename)) + private fun splitFunctions(filename: String): Collection> = + GumTreePythonFunctionSplitter().splitIntoFunctions(parse(filename)) private fun createPath(file: String) = "src/test/resources/gumTreeMethodSplitter/$file" @@ -24,10 +24,10 @@ class GumTreePythonMethodSplitterTest { @Test fun methodsCountTest() { - assertEquals(7, splitMethods(createPath("1.py")).size) - assertEquals(9, splitMethods(createPath("2.py")).size) - assertEquals(3, splitMethods(createPath("3.py")).size) - assertEquals(5, splitMethods(createPath("4.py")).size) + assertEquals(7, splitFunctions(createPath("1.py")).size) + assertEquals(9, splitFunctions(createPath("2.py")).size) + assertEquals(3, splitFunctions(createPath("3.py")).size) + assertEquals(5, splitFunctions(createPath("4.py")).size) } @Test @@ -37,17 +37,17 @@ class GumTreePythonMethodSplitterTest { "with_typed_return_no_args", "full_typed", "func_dif_args_typed_return", "complex_args_full_typed" ) - val methodInfos = splitMethods(createPath("1.py")) - val parsedNames = methodInfos.map { it.name }.toSet() + val functionInfos = splitFunctions(createPath("1.py")) + val parsedNames = functionInfos.map { it.name }.toSet() assertEquals(realNames, parsedNames) } @Test fun methodInfoTest1TypedArgs() { - val methodInfos = splitMethods(createPath("1.py")) - val method = methodInfos.firstOrNull { it.name == "complex_args_full_typed" } - assertNotNull(method) - with(method) { + val functionInfos = splitFunctions(createPath("1.py")) + val functionInfo = functionInfos.firstOrNull { it.name == "complex_args_full_typed" } + assertNotNull(functionInfo) + with(functionInfo) { assertEquals("complex_args_full_typed", name) assertEquals(null, returnType) assertEquals(1, parameters.size) @@ -58,10 +58,10 @@ class GumTreePythonMethodSplitterTest { @Test fun methodInfoTest2ManyArgs() { - val methodInfos = splitMethods(createPath("1.py")) - val method = methodInfos.firstOrNull { it.name == "func_dif_args_typed_return" } - assertNotNull(method) - with(method) { + val functionInfos = splitFunctions(createPath("1.py")) + val functionInfo = functionInfos.firstOrNull { it.name == "func_dif_args_typed_return" } + assertNotNull(functionInfo) + with(functionInfo) { assertEquals("func_dif_args_typed_return", name) assertEquals("Constant-int", returnType) assertEquals(6, parameters.size) @@ -72,10 +72,10 @@ class GumTreePythonMethodSplitterTest { @Test fun methodInfoTest3EnclosingClass() { - val methodInfos = splitMethods(createPath("2.py")) - val method = methodInfos.firstOrNull { it.name == "foo_typed" } - assertNotNull(method) - with(method) { + val functionInfos = splitFunctions(createPath("2.py")) + val function = functionInfos.firstOrNull { it.name == "foo_typed" } + assertNotNull(function) + with(function) { assertEquals("foo_typed", name) assertEquals("A", enclosingElement?.name) assertEquals(null, returnType) @@ -87,10 +87,10 @@ class GumTreePythonMethodSplitterTest { @Test fun methodInfoTest4EnclosingClass() { - val methodInfos = splitMethods(createPath("2.py")) - val method = methodInfos.firstOrNull { it.name == "bar_typed" } - assertNotNull(method) - with(method) { + val functionInfos = splitFunctions(createPath("2.py")) + val functionInfo = functionInfos.firstOrNull { it.name == "bar_typed" } + assertNotNull(functionInfo) + with(functionInfo) { assertEquals("bar_typed", name) assertEquals("C", enclosingElement?.name) assertEquals(null, returnType) @@ -102,10 +102,10 @@ class GumTreePythonMethodSplitterTest { @Test fun methodInfoTest5AsyncDef() { - val methodInfos = splitMethods(createPath("3.py")) - val method = methodInfos.firstOrNull { it.name == "async_schrecklich_typed" } - assertNotNull(method) - with(method) { + val functionInfos = splitFunctions(createPath("3.py")) + val functionInfo = functionInfos.firstOrNull { it.name == "async_schrecklich_typed" } + assertNotNull(functionInfo) + with(functionInfo) { assertEquals("async_schrecklich_typed", name) assertEquals("AsyncFunctionDef", root.getTypeLabel()) assertEquals(null, enclosingElement?.name) @@ -118,10 +118,10 @@ class GumTreePythonMethodSplitterTest { @Test fun methodInfoTest6Doc() { - val methodInfos = splitMethods(createPath("3.py")) - val method = methodInfos.firstOrNull { it.name == "async_simple_no_typed" } - assertNotNull(method) - with(method) { + val functionInfos = splitFunctions(createPath("3.py")) + val functionInfo = functionInfos.firstOrNull { it.name == "async_simple_no_typed" } + assertNotNull(functionInfo) + with(functionInfo) { assertEquals("async_simple_no_typed", name) assertEquals("AsyncFunctionDef", root.getTypeLabel()) assertEquals(null, enclosingElement?.name) @@ -143,12 +143,12 @@ class GumTreePythonMethodSplitterTest { @Test fun methodInfoTest7InnerFunc() { - val methodInfos = splitMethods(createPath("4.py")) - val method = methodInfos.firstOrNull { it.name == "foo_2" } - assertNotNull(method) - with(method) { + val functionInfos = splitFunctions(createPath("4.py")) + val functionInfo = functionInfos.firstOrNull { it.name == "foo_2" } + assertNotNull(functionInfo) + with(functionInfo) { assertEquals("foo_2", name) - assertEquals("foo_1", method.root.parent?.wrappedNode?.parent?.label) + assertEquals("foo_1", functionInfo.root.parent?.wrappedNode?.parent?.label) assertEquals(null, enclosingElement?.name) assertEquals("Constant-NoneType", returnType) assertEquals(1, parameters.size) @@ -159,12 +159,12 @@ class GumTreePythonMethodSplitterTest { @Test fun methodInfoTest8InnerFunc() { - val methodInfos = splitMethods(createPath("4.py")) - val method = methodInfos.firstOrNull { it.name == "bar_2" } - assertNotNull(method) - with(method) { + val functionInfos = splitFunctions(createPath("4.py")) + val functionInfo = functionInfos.firstOrNull { it.name == "bar_2" } + assertNotNull(functionInfo) + with(functionInfo) { assertEquals("bar_2", name) - assertEquals("bar_1", method.root.parent?.wrappedNode?.parent?.label) + assertEquals("bar_1", functionInfo.root.parent?.wrappedNode?.parent?.label) assertEquals(null, enclosingElement?.name) assertEquals("Constant-int", returnType) assertEquals(2, parameters.size) From 24f30bb6164d047afce9e0441f29836d3372259a Mon Sep 17 00:00:00 2001 From: furetur Date: Thu, 20 May 2021 12:18:20 +0500 Subject: [PATCH 175/308] renamed java method splitters to function splitters --- src/main/kotlin/astminer/cli/LabelExtractors.kt | 4 ++-- src/main/kotlin/astminer/examples/AllJavaMethods.kt | 4 ++-- src/main/kotlin/astminer/parse/gumtree/GumtreeHandler.kt | 4 ++-- ...eeJavaMethodSplitter.kt => GumTreeJavaFunctionSplitter.kt} | 2 +- ...thodSplitterTest.kt => GumTreeJavaFunctionSplitterTest.kt} | 4 ++-- 5 files changed, 9 insertions(+), 9 deletions(-) rename src/main/kotlin/astminer/parse/gumtree/java/{GumTreeJavaMethodSplitter.kt => GumTreeJavaFunctionSplitter.kt} (86%) rename src/test/kotlin/astminer/parse/gumtree/java/{GumTreeJavaMethodSplitterTest.kt => GumTreeJavaFunctionSplitterTest.kt} (96%) diff --git a/src/main/kotlin/astminer/cli/LabelExtractors.kt b/src/main/kotlin/astminer/cli/LabelExtractors.kt index 1ae72b5d..40eddede 100644 --- a/src/main/kotlin/astminer/cli/LabelExtractors.kt +++ b/src/main/kotlin/astminer/cli/LabelExtractors.kt @@ -12,7 +12,7 @@ import astminer.parse.antlr.python.PythonFunctionSplitter import astminer.parse.fuzzy.cpp.FuzzyFunctionSplitter import astminer.parse.fuzzy.cpp.FuzzyNode import astminer.parse.gumtree.GumTreeNode -import astminer.parse.gumtree.java.GumTreeJavaMethodSplitter +import astminer.parse.gumtree.java.GumTreeJavaFunctionSplitter import astminer.parse.gumtree.python.GumTreePythonFunctionSplitter import java.io.File @@ -62,7 +62,7 @@ abstract class MethodLabelExtractor( "java" -> { when (javaParser) { "gumtree" -> { - val methodSplitter = GumTreeJavaMethodSplitter() + val methodSplitter = GumTreeJavaFunctionSplitter() methodSplitter.splitIntoFunctions(root as GumTreeNode) } "antlr" -> { diff --git a/src/main/kotlin/astminer/examples/AllJavaMethods.kt b/src/main/kotlin/astminer/examples/AllJavaMethods.kt index e9c981be..2064e90d 100644 --- a/src/main/kotlin/astminer/examples/AllJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/AllJavaMethods.kt @@ -4,7 +4,7 @@ import astminer.cli.LabeledResult import astminer.common.model.FunctionInfo import astminer.parse.gumtree.GumTreeNode import astminer.parse.gumtree.java.GumTreeJavaParser -import astminer.parse.gumtree.java.GumTreeJavaMethodSplitter +import astminer.parse.gumtree.java.GumTreeJavaFunctionSplitter import astminer.storage.* import astminer.storage.path.Code2VecPathStorage import astminer.storage.path.PathBasedStorageConfig @@ -32,7 +32,7 @@ fun allJavaMethods() { val fileNode = GumTreeJavaParser().parseInputStream(file.inputStream()) //extract method nodes - val methodNodes = GumTreeJavaMethodSplitter().splitIntoFunctions(fileNode) + val methodNodes = GumTreeJavaFunctionSplitter().splitIntoFunctions(fileNode) methodNodes.forEach { methodInfo -> //Retrieve a method identifier diff --git a/src/main/kotlin/astminer/parse/gumtree/GumtreeHandler.kt b/src/main/kotlin/astminer/parse/gumtree/GumtreeHandler.kt index 85723a52..e16349d9 100644 --- a/src/main/kotlin/astminer/parse/gumtree/GumtreeHandler.kt +++ b/src/main/kotlin/astminer/parse/gumtree/GumtreeHandler.kt @@ -4,7 +4,7 @@ import astminer.common.model.ParseResult import astminer.common.model.HandlerFactory import astminer.common.model.LanguageHandler import astminer.parse.gumtree.java.GumTreeJavaParser -import astminer.parse.gumtree.java.GumTreeJavaMethodSplitter +import astminer.parse.gumtree.java.GumTreeJavaFunctionSplitter import astminer.parse.gumtree.python.GumTreePythonFunctionSplitter import astminer.parse.gumtree.python.GumTreePythonParser import java.io.File @@ -13,7 +13,7 @@ object GumtreeJavaHandlerFactory : HandlerFactory { override fun createHandler(file: File): LanguageHandler = JavaGumtreeHandler(file) class JavaGumtreeHandler(file: File) : LanguageHandler() { - override val splitter = GumTreeJavaMethodSplitter() + override val splitter = GumTreeJavaFunctionSplitter() override val parseResult: ParseResult = GumTreeJavaParser().parseFile(file) } } diff --git a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitter.kt similarity index 86% rename from src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt rename to src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitter.kt index fa915e21..fec61007 100644 --- a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitter.kt @@ -4,7 +4,7 @@ import astminer.common.model.* import astminer.common.preOrder import astminer.parse.gumtree.GumTreeNode -class GumTreeJavaMethodSplitter : TreeFunctionSplitter { +class GumTreeJavaFunctionSplitter : TreeFunctionSplitter { private val methodDeclaration = "MethodDeclaration" override fun splitIntoFunctions(root: GumTreeNode): Collection> { diff --git a/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitterTest.kt similarity index 96% rename from src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitterTest.kt rename to src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitterTest.kt index ee708bd5..9f77e250 100644 --- a/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitterTest.kt @@ -10,9 +10,9 @@ private fun createTree(filename: String): GumTreeNode = GumTreeJavaParser().parseInputStream(File(filename).inputStream()) private fun createAndSplitTree(filename: String): Collection> = - GumTreeJavaMethodSplitter().splitIntoFunctions(createTree(filename)) + GumTreeJavaFunctionSplitter().splitIntoFunctions(createTree(filename)) -class GumTreeJavaMethodSplitterTest { +class GumTreeJavaFunctionSplitterTest { @Test fun testMethodExtraction1() { val functionInfos = createAndSplitTree("src/test/resources/gumTreeMethodSplitter/1.java") From 42b205de2ecb7dd491773f27c222990f677a8193 Mon Sep 17 00:00:00 2001 From: furetur Date: Thu, 20 May 2021 13:29:05 +0500 Subject: [PATCH 176/308] fixed remaining naming issues and warnings --- src/main/kotlin/astminer/cli/Code2VecExtractor.kt | 1 - src/main/kotlin/astminer/cli/LabelExtractors.kt | 4 ++-- src/main/kotlin/astminer/cli/ProjectParser.kt | 1 - src/main/kotlin/astminer/cli/utils.kt | 1 - src/main/kotlin/astminer/examples/AllJavaFiles.kt | 4 ++-- .../kotlin/astminer/examples/Code2VecJavaMethods.kt | 10 +++++----- .../kotlin/astminer/featureextraction/TreeFeature.kt | 2 +- src/main/kotlin/astminer/parse/antlr/AntlrHandler.kt | 4 ++-- src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt | 2 +- ...{JavaMethodSplitter.kt => JavaFunctionSplitter.kt} | 2 +- src/test/kotlin/astminer/parse/antlr/AntrlUtilTest.kt | 2 +- ...hodSplitterTest.kt => JavaFunctionSplitterTest.kt} | 6 +++--- .../kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt | 11 +---------- 13 files changed, 19 insertions(+), 31 deletions(-) rename src/main/kotlin/astminer/parse/antlr/java/{JavaMethodSplitter.kt => JavaFunctionSplitter.kt} (89%) rename src/test/kotlin/astminer/parse/antlr/java/{JavaMethodSplitterTest.kt => JavaFunctionSplitterTest.kt} (95%) diff --git a/src/main/kotlin/astminer/cli/Code2VecExtractor.kt b/src/main/kotlin/astminer/cli/Code2VecExtractor.kt index 56c290c8..12b60a9f 100644 --- a/src/main/kotlin/astminer/cli/Code2VecExtractor.kt +++ b/src/main/kotlin/astminer/cli/Code2VecExtractor.kt @@ -166,7 +166,6 @@ class Code2VecExtractor(private val customLabelExtractor: LabelExtractor? = null val labelExtractor = customLabelExtractor ?: getLabelExtractor( granularityLevel, javaParser, - isMethodNameHide, excludeModifiers, excludeAnnotations, filterConstructors, diff --git a/src/main/kotlin/astminer/cli/LabelExtractors.kt b/src/main/kotlin/astminer/cli/LabelExtractors.kt index 40eddede..51478075 100644 --- a/src/main/kotlin/astminer/cli/LabelExtractors.kt +++ b/src/main/kotlin/astminer/cli/LabelExtractors.kt @@ -6,7 +6,7 @@ import astminer.common.model.FunctionInfo import astminer.common.preOrder import astminer.common.setTechnicalToken import astminer.parse.antlr.AntlrNode -import astminer.parse.antlr.java.JavaMethodSplitter +import astminer.parse.antlr.java.JavaFunctionSplitter import astminer.parse.antlr.javascript.JavaScriptFunctionSplitter import astminer.parse.antlr.python.PythonFunctionSplitter import astminer.parse.fuzzy.cpp.FuzzyFunctionSplitter @@ -66,7 +66,7 @@ abstract class MethodLabelExtractor( methodSplitter.splitIntoFunctions(root as GumTreeNode) } "antlr" -> { - val methodSplitter = JavaMethodSplitter() + val methodSplitter = JavaFunctionSplitter() methodSplitter.splitIntoFunctions(root as AntlrNode) } else -> { diff --git a/src/main/kotlin/astminer/cli/ProjectParser.kt b/src/main/kotlin/astminer/cli/ProjectParser.kt index cccf4c16..543fc60f 100644 --- a/src/main/kotlin/astminer/cli/ProjectParser.kt +++ b/src/main/kotlin/astminer/cli/ProjectParser.kt @@ -147,7 +147,6 @@ class ProjectParser(private val customLabelExtractor: LabelExtractor? = null) : val labelExtractor = customLabelExtractor ?: getLabelExtractor( granularityLevel, javaParser, - isMethodNameHide, excludeModifiers, excludeAnnotations, filterConstructors, diff --git a/src/main/kotlin/astminer/cli/utils.kt b/src/main/kotlin/astminer/cli/utils.kt index 44265aff..f5f21dc2 100644 --- a/src/main/kotlin/astminer/cli/utils.kt +++ b/src/main/kotlin/astminer/cli/utils.kt @@ -36,7 +36,6 @@ fun getParser( fun getLabelExtractor( granularityLevel: String, javaParser: String, - hideMethodNames: Boolean, excludeModifiers: List, excludeAnnotations: List, filterConstructors: Boolean, diff --git a/src/main/kotlin/astminer/examples/AllJavaFiles.kt b/src/main/kotlin/astminer/examples/AllJavaFiles.kt index 8cc4820c..acb14f45 100644 --- a/src/main/kotlin/astminer/examples/AllJavaFiles.kt +++ b/src/main/kotlin/astminer/examples/AllJavaFiles.kt @@ -1,7 +1,7 @@ package astminer.examples import astminer.cli.LabeledResult -import astminer.parse.antlr.java.JavaMethodSplitter +import astminer.parse.antlr.java.JavaFunctionSplitter import astminer.parse.antlr.java.JavaParser import astminer.storage.path.Code2VecPathStorage import astminer.storage.path.PathBasedStorageConfig @@ -17,7 +17,7 @@ fun allJavaFiles() { File(inputDir).forFilesWithSuffix("11.java") { file -> val node = JavaParser().parseInputStream(file.inputStream()) node.prettyPrint() - JavaMethodSplitter().splitIntoFunctions(node).forEach { + JavaFunctionSplitter().splitIntoFunctions(node).forEach { println(it.name) println(it.returnType) println(it.enclosingElement?.name) diff --git a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt index b4d405fc..8baa9f81 100644 --- a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt @@ -3,7 +3,7 @@ package astminer.examples import astminer.cli.LabeledResult import astminer.cli.MethodNameExtractor import astminer.common.* -import astminer.parse.antlr.java.JavaMethodSplitter +import astminer.parse.antlr.java.JavaFunctionSplitter import astminer.parse.antlr.java.JavaParser import astminer.storage.path.Code2VecPathStorage import astminer.storage.path.PathBasedStorageConfig @@ -24,16 +24,16 @@ fun code2vecJavaMethods() { val fileNode = JavaParser().parseInputStream(file.inputStream()) //extract method nodes - val methods = JavaMethodSplitter().splitIntoFunctions(fileNode) + val functions = JavaFunctionSplitter().splitIntoFunctions(fileNode) val labelExtractor = MethodNameExtractor() - methods.forEach { methodInfo -> - val label = labelExtractor.extractLabel(methodInfo, file.absolutePath) ?: return@forEach + functions.forEach { functionInfo -> + val label = labelExtractor.extractLabel(functionInfo, file.absolutePath) ?: return@forEach // TODO: this is ugly maybe label should be normalized by default val normalizedLabel = splitToSubtokens(label).joinToString("|") // Retrieve paths from every node individually and store them - storage.store(LabeledResult(methodInfo.root, normalizedLabel, file.absolutePath)) + storage.store(LabeledResult(functionInfo.root, normalizedLabel, file.absolutePath)) } } diff --git a/src/main/kotlin/astminer/featureextraction/TreeFeature.kt b/src/main/kotlin/astminer/featureextraction/TreeFeature.kt index c34c8d10..ace40398 100644 --- a/src/main/kotlin/astminer/featureextraction/TreeFeature.kt +++ b/src/main/kotlin/astminer/featureextraction/TreeFeature.kt @@ -48,7 +48,7 @@ object BranchingFactor : TreeFeature { */ object NumberOfNodes : TreeFeature { override fun compute(tree: Node): Int { - return tree.getChildren().map { compute(it) }.sum() + 1 + return tree.getChildren().sumOf { compute(it) } + 1 } } diff --git a/src/main/kotlin/astminer/parse/antlr/AntlrHandler.kt b/src/main/kotlin/astminer/parse/antlr/AntlrHandler.kt index 866fa1fe..2a53e548 100644 --- a/src/main/kotlin/astminer/parse/antlr/AntlrHandler.kt +++ b/src/main/kotlin/astminer/parse/antlr/AntlrHandler.kt @@ -3,7 +3,7 @@ package astminer.parse.antlr import astminer.common.model.ParseResult import astminer.common.model.HandlerFactory import astminer.common.model.LanguageHandler -import astminer.parse.antlr.java.JavaMethodSplitter +import astminer.parse.antlr.java.JavaFunctionSplitter import astminer.parse.antlr.java.JavaParser import astminer.parse.antlr.javascript.JavaScriptFunctionSplitter import astminer.parse.antlr.javascript.JavaScriptParser @@ -16,7 +16,7 @@ object AntlrJavaHandlerFactory : HandlerFactory { class AntlrJavaHandler(file: File) : LanguageHandler() { override val parseResult: ParseResult = JavaParser().parseFile(file) - override val splitter = JavaMethodSplitter() + override val splitter = JavaFunctionSplitter() } } diff --git a/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt b/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt index 0eae4ec7..20f2bf3b 100644 --- a/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt +++ b/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt @@ -98,6 +98,6 @@ fun AntlrNode.getItOrChildrenOfType(typeLabel: String) : List { return if (hasLastLabel(typeLabel)) { listOf(this) } else { - this.getChildrenOfType(typeLabel).mapNotNull { it as? AntlrNode } + this.getChildrenOfType(typeLabel).map { it } } } diff --git a/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt b/src/main/kotlin/astminer/parse/antlr/java/JavaFunctionSplitter.kt similarity index 89% rename from src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt rename to src/main/kotlin/astminer/parse/antlr/java/JavaFunctionSplitter.kt index d9c9aa44..0898b3f6 100644 --- a/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/java/JavaFunctionSplitter.kt @@ -5,7 +5,7 @@ import astminer.common.model.* import astminer.parse.antlr.AntlrNode import astminer.parse.antlr.hasLastLabel -class JavaMethodSplitter : TreeFunctionSplitter { +class JavaFunctionSplitter : TreeFunctionSplitter { private val methodNodeType = "methodDeclaration" override fun splitIntoFunctions(root: AntlrNode): Collection> { diff --git a/src/test/kotlin/astminer/parse/antlr/AntrlUtilTest.kt b/src/test/kotlin/astminer/parse/antlr/AntrlUtilTest.kt index 61355a17..3910ba8b 100644 --- a/src/test/kotlin/astminer/parse/antlr/AntrlUtilTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/AntrlUtilTest.kt @@ -15,7 +15,7 @@ class AntrlUtilTest { val node = parser.parseInputStream(FileInputStream(file)) var adoptedNodesSize = 0 - node?.preOrder()?.forEach { curNode -> + node.preOrder().forEach { curNode -> adoptedNodesSize += curNode.getChildren().filter { it.getParent() != curNode }.size } Assert.assertEquals("There should be no children with different parent", 0, adoptedNodesSize) diff --git a/src/test/kotlin/astminer/parse/antlr/java/JavaMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/java/JavaFunctionSplitterTest.kt similarity index 95% rename from src/test/kotlin/astminer/parse/antlr/java/JavaMethodSplitterTest.kt rename to src/test/kotlin/astminer/parse/antlr/java/JavaFunctionSplitterTest.kt index 3b950bf8..702e9576 100644 --- a/src/test/kotlin/astminer/parse/antlr/java/JavaMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/java/JavaFunctionSplitterTest.kt @@ -8,10 +8,10 @@ import java.io.File import kotlin.test.BeforeTest import kotlin.test.assertNotNull -class JavaMethodSplitterTest { +class JavaFunctionSplitterTest { companion object { const val N_FUNCTIONS = 9 - val methodSplitter = JavaMethodSplitter() + val functionSplitter = JavaFunctionSplitter() val parser = JavaParser() } @@ -21,7 +21,7 @@ class JavaMethodSplitterTest { fun parseTree() { val testTree = parser.parseInputStream(File("src/test/resources/methodSplitting/testMethodSplitting.java").inputStream()) assertNotNull(testTree) - functionInfos = methodSplitter.splitIntoFunctions(testTree) + functionInfos = functionSplitter.splitIntoFunctions(testTree) } @Test diff --git a/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt b/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt index 6b6b567d..41622f8a 100644 --- a/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt +++ b/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt @@ -16,15 +16,6 @@ class FuzzyCppParserTest { @Before fun checkGPP() = Assume.assumeTrue(checkExecutable("g++")) - @Test - fun testNodeIsNotNull() { - val parser = FuzzyCppParser() - val file = File("src/test/resources/fuzzy/test.cpp") - - val nodes = parser.parseFile(file) - Assert.assertTrue("Parse tree for a valid file should not be null", nodes.root != null) - } - @Test fun testInputStreamParsing() { val folder = File("src/test/resources/fuzzy/") @@ -33,7 +24,7 @@ class FuzzyCppParserTest { val parser = FuzzyCppParser() folder.forFilesWithSuffix(".cpp") { file -> n++ - parser.parseInputStream(file.inputStream())?.let { nodes.add(it) } + parser.parseInputStream(file.inputStream()).let { nodes.add(it) } } Assert.assertEquals(n, nodes.size) } From 3561075a06f471ceecbbd33291414c0da2933ac6 Mon Sep 17 00:00:00 2001 From: furetur Date: Thu, 20 May 2021 13:51:13 +0500 Subject: [PATCH 177/308] fixed label extractor --- src/main/kotlin/astminer/cli/LabelExtractors.kt | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/main/kotlin/astminer/cli/LabelExtractors.kt b/src/main/kotlin/astminer/cli/LabelExtractors.kt index 51478075..b04e22b6 100644 --- a/src/main/kotlin/astminer/cli/LabelExtractors.kt +++ b/src/main/kotlin/astminer/cli/LabelExtractors.kt @@ -14,6 +14,7 @@ import astminer.parse.fuzzy.cpp.FuzzyNode import astminer.parse.gumtree.GumTreeNode import astminer.parse.gumtree.java.GumTreeJavaFunctionSplitter import astminer.parse.gumtree.python.GumTreePythonFunctionSplitter +import astminer.storage.TokenProcessor import java.io.File @@ -127,14 +128,16 @@ class MethodNameExtractor( ) : MethodLabelExtractor(filterPredicates, javaParser, pythonParser) { override fun extractLabel(functionInfo: FunctionInfo, filePath: String): String? { + // TODO: the normalization situation is getting out of control. It should be a separate step in the pipeline + val normalizedName = functionInfo.nameNode?.let { TokenProcessor.Normalize.getPresentableToken(it) } val name = functionInfo.name ?: return null + functionInfo.root.preOrder().forEach { node -> if (node.getToken() == name) { node.setTechnicalToken("SELF") } } functionInfo.nameNode?.setTechnicalToken("METHOD_NAME") - // TODO: for some reason it is not normalized, check if something is wrong. Maybe storages normalize the label - return name + return normalizedName } } From 47cd4523b81b3b906135df7670cabfd96638c2cb Mon Sep 17 00:00:00 2001 From: furetur Date: Thu, 20 May 2021 13:52:58 +0500 Subject: [PATCH 178/308] fixed Code2VecJavaMethods.kt --- src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt index 8baa9f81..806705bb 100644 --- a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt @@ -30,10 +30,8 @@ fun code2vecJavaMethods() { functions.forEach { functionInfo -> val label = labelExtractor.extractLabel(functionInfo, file.absolutePath) ?: return@forEach - // TODO: this is ugly maybe label should be normalized by default - val normalizedLabel = splitToSubtokens(label).joinToString("|") // Retrieve paths from every node individually and store them - storage.store(LabeledResult(functionInfo.root, normalizedLabel, file.absolutePath)) + storage.store(LabeledResult(functionInfo.root, label, file.absolutePath)) } } From a0d7f6f7dd2430232a4cc8903bd96652047dc1d6 Mon Sep 17 00:00:00 2001 From: furetur Date: Thu, 20 May 2021 14:20:33 +0500 Subject: [PATCH 179/308] migrated old tests --- .../pipeline/Code2VecExtractionTest.kt | 33 +++++++++++++ .../pipeline/util/OutputVerification.kt | 48 +++++++++++++++++++ 2 files changed, 81 insertions(+) create mode 100644 src/test/kotlin/astminer/pipeline/Code2VecExtractionTest.kt create mode 100644 src/test/kotlin/astminer/pipeline/util/OutputVerification.kt diff --git a/src/test/kotlin/astminer/pipeline/Code2VecExtractionTest.kt b/src/test/kotlin/astminer/pipeline/Code2VecExtractionTest.kt new file mode 100644 index 00000000..f6fd38e5 --- /dev/null +++ b/src/test/kotlin/astminer/pipeline/Code2VecExtractionTest.kt @@ -0,0 +1,33 @@ +package astminer.pipeline + +import astminer.config.Code2VecPathStorageConfig +import astminer.config.FilePathExtractorConfig +import astminer.config.FilePipelineConfig +import astminer.config.ParserConfig +import astminer.pipeline.util.verifyPathContextExtraction +import org.junit.Test +import java.io.File +import java.nio.file.Files + +internal class Code2VecExtractionTest { + private val testDataDir = File("src/test/resources") + + // TODO: this test should probably be moved to Code2VecPathStorage + @Test + fun `test code2vec path extraction from files generates correct folders and files`() { + val extractedDataDir = Files.createTempDirectory("extractedData") + + val languages = listOf("java", "py") + + val config = FilePipelineConfig( + inputDir = testDataDir.path, + outputDir = extractedDataDir.toAbsolutePath().toString(), + parserConfig = ParserConfig("antlr", languages), + problemConfig = FilePathExtractorConfig(), + storageConfig = Code2VecPathStorageConfig(8, 3) + ) + Pipeline(config).run() + + verifyPathContextExtraction(extractedDataDir.toFile(), languages, false) + } +} diff --git a/src/test/kotlin/astminer/pipeline/util/OutputVerification.kt b/src/test/kotlin/astminer/pipeline/util/OutputVerification.kt new file mode 100644 index 00000000..f257b364 --- /dev/null +++ b/src/test/kotlin/astminer/pipeline/util/OutputVerification.kt @@ -0,0 +1,48 @@ +package astminer.pipeline.util + +import java.io.File +import kotlin.test.assertTrue + +/** + * Directory with extracted data should contain a directory for each specified language + */ +internal fun checkExtractedDir(extractedDataDir: File, languages: List) { + val metLanguages = mutableSetOf() + extractedDataDir.listFiles()?.forEach { file -> + with(file) { + assertTrue(isDirectory, "Extracted data directory should not contain file $name") + assertTrue(languages.contains(file.name), "Unexpected directory $name") + metLanguages.add(name) + } + } + languages.forEach { language -> + assertTrue(metLanguages.contains(language), "Did not find directory for $language") + } +} + +internal fun validPathContextsFile(name: String, batching: Boolean): Boolean { + return if (batching) { + name.startsWith("path_contexts_") && name.endsWith(".csv") + } else { + name == "path_contexts.csv" + } +} + +internal fun checkPathContextsDir(languageDir: File, batching: Boolean) { + val expectedFiles = listOf("tokens.csv", "paths.csv", "node_types.csv") + languageDir.listFiles()?.forEach { file -> + with(file) { + assertTrue( + expectedFiles.contains(name) || validPathContextsFile(name, batching), + "Unexpected file $name in ${languageDir.name}" + ) + } + } +} + +internal fun verifyPathContextExtraction(extractedDataDir: File, languages: List, batching: Boolean) { + checkExtractedDir(extractedDataDir, languages) + languages.forEach { language -> + checkPathContextsDir(extractedDataDir.resolve(language), batching) + } +} From 8ded188c39fbd585f6bec13ba753c018e8583019 Mon Sep 17 00:00:00 2001 From: furetur Date: Thu, 20 May 2021 14:37:08 +0500 Subject: [PATCH 180/308] renamed stuff --- .../kotlin/astminer/config/PipelineConfig.kt | 28 +++++++------------ .../astminer/examples/Code2VecJavaMethods.kt | 6 ++-- src/main/kotlin/astminer/pipeline/Pipeline.kt | 8 +++--- .../astminer/pipeline/PipelineBranch.kt | 11 ++++---- .../kotlin/astminer/problem/LabeledResult.kt | 2 -- .../astminer/storage/ast/DotAstStorage.kt | 1 - .../pipeline/Code2VecExtractionTest.kt | 6 ++-- 7 files changed, 25 insertions(+), 37 deletions(-) diff --git a/src/main/kotlin/astminer/config/PipelineConfig.kt b/src/main/kotlin/astminer/config/PipelineConfig.kt index 55c19796..88572be0 100644 --- a/src/main/kotlin/astminer/config/PipelineConfig.kt +++ b/src/main/kotlin/astminer/config/PipelineConfig.kt @@ -7,8 +7,8 @@ import kotlinx.serialization.Serializable sealed class PipelineConfig { abstract val inputDir: String abstract val outputDir: String - abstract val parserConfig: ParserConfig - abstract val storageConfig: StorageConfig + abstract val parser: ParserConfig + abstract val storage: StorageConfig } @Serializable @@ -16,14 +16,10 @@ sealed class PipelineConfig { data class FilePipelineConfig( override val inputDir: String, override val outputDir: String, - @SerialName("parser") - override val parserConfig: ParserConfig, - @SerialName("filters") - val filterConfigs: List = emptyList(), - @SerialName("problem") - val problemConfig: FileProblemConfig, - @SerialName("storage") - override val storageConfig: StorageConfig + override val parser: ParserConfig, + val filters: List = emptyList(), + val problem: FileProblemConfig, + override val storage: StorageConfig ) : PipelineConfig() @Serializable @@ -31,14 +27,10 @@ data class FilePipelineConfig( data class FunctionPipelineConfig( override val inputDir: String, override val outputDir: String, - @SerialName("parser") - override val parserConfig: ParserConfig, - @SerialName("filters") - val filterConfigs: List = emptyList(), - @SerialName("problem") - val problemConfig: FunctionProblemConfig, - @SerialName("storage") - override val storageConfig: StorageConfig + override val parser: ParserConfig, + val filters: List = emptyList(), + val problem: FunctionProblemConfig, + override val storage: StorageConfig ) : PipelineConfig() @Serializable diff --git a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt index 084c6bda..8db1bd65 100644 --- a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt @@ -13,12 +13,12 @@ fun code2vecJavaMethods() { val pipelineConfig = FunctionPipelineConfig( inputDir = folder, outputDir = outputDir, - parserConfig = ParserConfig( + parser = ParserConfig( "antlr", listOf("java") ), - problemConfig = FunctionNamePredictionConfig(), - storageConfig = Code2VecPathStorageConfig( + problem = FunctionNamePredictionConfig(), + storage = Code2VecPathStorageConfig( maxPathLength = 5, maxPathWidth = 5 ) diff --git a/src/main/kotlin/astminer/pipeline/Pipeline.kt b/src/main/kotlin/astminer/pipeline/Pipeline.kt index f0ba2afa..6d20e6c5 100644 --- a/src/main/kotlin/astminer/pipeline/Pipeline.kt +++ b/src/main/kotlin/astminer/pipeline/Pipeline.kt @@ -25,10 +25,10 @@ class Pipeline(private val config: PipelineConfig) { return outputDirectoryForExtension } - private fun createStorage(extension: String): Storage = with(config.storageConfig) { + private fun createStorage(extension: String): Storage = with(config.storage) { val storagePath = createStorageDirectory(extension).path - // TODO: I should remove this later, once storage constructors have no side effects, and implement it like filters and problems + // TODO: should be removed this later and be implemented like filters and problems, once storage constructors have no side effects when (this) { is AstStorageConfig -> { val tokenProcessor = if (splitTokens) TokenProcessor.Split else TokenProcessor.Normalize @@ -44,8 +44,8 @@ class Pipeline(private val config: PipelineConfig) { } fun run() { - for (extension in config.parserConfig.extensions) { - val languageFactory = getHandlerFactory(extension, config.parserConfig.type) + for (extension in config.parser.extensions) { + val languageFactory = getHandlerFactory(extension, config.parser.type) val files = getProjectFilesWithExtension(inputDirectory, extension).asSequence() val labeledResults = files.map { languageFactory.createHandler(it) }.flatMap { branch.process(it) } diff --git a/src/main/kotlin/astminer/pipeline/PipelineBranch.kt b/src/main/kotlin/astminer/pipeline/PipelineBranch.kt index 3f4a022c..04783061 100644 --- a/src/main/kotlin/astminer/pipeline/PipelineBranch.kt +++ b/src/main/kotlin/astminer/pipeline/PipelineBranch.kt @@ -13,16 +13,15 @@ interface PipelineBranch { } class FilePipelineBranch(config: FilePipelineConfig) : PipelineBranch { - private val filters = config.filterConfigs.map { it.filter } - private val problem = config.problemConfig.problem + private val filters = config.filters.map { it.filter } + private val problem = config.problem.problem private fun ParseResult.passesThroughFilters() = filters.all { filter -> filter.test(this) } override fun process(languageHandler: LanguageHandler): Sequence> { val parseResult = languageHandler.parseResult return if (parseResult.passesThroughFilters()) { - val labeledResult = problem.process(parseResult) ?: return emptySequence() - sequenceOf(labeledResult) + problem.process(parseResult)?.let { labeledResult -> sequenceOf(labeledResult) } ?: emptySequence() } else { emptySequence() } @@ -30,8 +29,8 @@ class FilePipelineBranch(config: FilePipelineConfig) : PipelineBranch { } class FunctionPipelineBranch(config: FunctionPipelineConfig) : PipelineBranch { - private val filters = config.filterConfigs.map { it.filter } - private val problem = config.problemConfig.problem + private val filters = config.filters.map { it.filter } + private val problem = config.problem.problem private fun FunctionInfo.passesThroughFilters() = filters.all { filter -> filter.test(this) } diff --git a/src/main/kotlin/astminer/problem/LabeledResult.kt b/src/main/kotlin/astminer/problem/LabeledResult.kt index 3fc49fe9..1b4b9b8c 100644 --- a/src/main/kotlin/astminer/problem/LabeledResult.kt +++ b/src/main/kotlin/astminer/problem/LabeledResult.kt @@ -9,5 +9,3 @@ import astminer.common.model.Node * @property filePath The path to the source file where the AST is from. */ data class LabeledResult(val root: T, val label: String, val filePath: String) - - diff --git a/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt b/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt index 310b87a2..1fa8470c 100644 --- a/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt +++ b/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt @@ -101,5 +101,4 @@ class DotAstStorage( val fileObject = File(fullPath) return FilePath(fileObject.parentFile?.path ?: "", fileObject.name) } - } diff --git a/src/test/kotlin/astminer/pipeline/Code2VecExtractionTest.kt b/src/test/kotlin/astminer/pipeline/Code2VecExtractionTest.kt index f6fd38e5..2bb3a7db 100644 --- a/src/test/kotlin/astminer/pipeline/Code2VecExtractionTest.kt +++ b/src/test/kotlin/astminer/pipeline/Code2VecExtractionTest.kt @@ -22,9 +22,9 @@ internal class Code2VecExtractionTest { val config = FilePipelineConfig( inputDir = testDataDir.path, outputDir = extractedDataDir.toAbsolutePath().toString(), - parserConfig = ParserConfig("antlr", languages), - problemConfig = FilePathExtractorConfig(), - storageConfig = Code2VecPathStorageConfig(8, 3) + parser = ParserConfig("antlr", languages), + problem = FilePathExtractorConfig(), + storage = Code2VecPathStorageConfig(8, 3) ) Pipeline(config).run() From deefe6d7f4c0e306125d30e0bb86d0d73bc82daf Mon Sep 17 00:00:00 2001 From: furetur Date: Thu, 20 May 2021 15:10:48 +0500 Subject: [PATCH 181/308] removed several todos --- build.gradle.kts | 2 -- src/main/java/astminer/examples/AllJavaFiles.java | 4 +--- .../me/vovak/antlr/parser/JavaScriptBaseLexer.java | 4 ---- src/main/kotlin/astminer/Main.kt | 6 ++++-- .../kotlin/astminer/common/model/ParsingModel.kt | 11 +++-------- src/main/kotlin/astminer/pipeline/Pipeline.kt | 13 ++++++++++++- 6 files changed, 20 insertions(+), 20 deletions(-) diff --git a/build.gradle.kts b/build.gradle.kts index 6e2f5deb..88dfad6c 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -55,8 +55,6 @@ dependencies { jmhImplementation("org.jetbrains.kotlin:kotlin-reflect:1.4.32") jmhImplementation("org.openjdk.jmh:jmh-core:1.21") jmhImplementation("org.openjdk.jmh:jmh-generator-annprocess:1.21") - - } val generatedSourcesPath = "src/main/generated" diff --git a/src/main/java/astminer/examples/AllJavaFiles.java b/src/main/java/astminer/examples/AllJavaFiles.java index 7dfa2d08..eaf5cd0c 100644 --- a/src/main/java/astminer/examples/AllJavaFiles.java +++ b/src/main/java/astminer/examples/AllJavaFiles.java @@ -8,7 +8,6 @@ import astminer.storage.path.PathBasedStorage; import astminer.storage.path.PathBasedStorageConfig; import org.jetbrains.annotations.NotNull; - import java.io.FileInputStream; import java.io.IOException; import java.nio.file.*; @@ -26,9 +25,8 @@ public static void runExample() { final Path inputFolder = Paths.get(INPUT_FOLDER); FileVisitor fileVisitor = new SimpleFileVisitor() { - @NotNull @Override - public FileVisitResult visitFile(@NotNull Path file, BasicFileAttributes attributes) throws IOException { + public FileVisitResult visitFile(Path file, BasicFileAttributes attributes) throws IOException { Node fileTree = new GumTreeJavaParser().parseInputStream(new FileInputStream(file.toFile())); String filePath = file.toAbsolutePath().toString(); diff --git a/src/main/java/me/vovak/antlr/parser/JavaScriptBaseLexer.java b/src/main/java/me/vovak/antlr/parser/JavaScriptBaseLexer.java index 1e48aa18..a0f0ef49 100644 --- a/src/main/java/me/vovak/antlr/parser/JavaScriptBaseLexer.java +++ b/src/main/java/me/vovak/antlr/parser/JavaScriptBaseLexer.java @@ -2,8 +2,6 @@ import me.vovak.antlr.parser.JavaScriptLexer; import org.antlr.v4.runtime.*; -import org.jetbrains.annotations.NotNull; -import org.jetbrains.annotations.Nullable; import java.util.Stack; @@ -17,10 +15,8 @@ public abstract class JavaScriptBaseLexer extends Lexer * Stores values of nested modes. By default mode is strict or * defined externally (useStrictDefault) */ - @NotNull private Stack scopeStrictModes = new Stack(); - @Nullable private Token lastToken = null; /** * Default value of strict mode diff --git a/src/main/kotlin/astminer/Main.kt b/src/main/kotlin/astminer/Main.kt index eb058ed4..4ce51c6d 100644 --- a/src/main/kotlin/astminer/Main.kt +++ b/src/main/kotlin/astminer/Main.kt @@ -10,8 +10,10 @@ import kotlinx.serialization.SerializationException import kotlinx.serialization.decodeFromString import com.charleskorn.kaml.Yaml import com.charleskorn.kaml.YamlConfiguration +import mu.KotlinLogging import java.io.File +private val logger = KotlinLogging.logger("Main") class PipelineRunner : CliktCommand(name = "") { val config: File by argument("config", help = "Path to config").file( @@ -26,8 +28,8 @@ class PipelineRunner : CliktCommand(name = "") { val config = try { yaml.decodeFromString(config.readText()) } catch (e: SerializationException) { - // TODO: should log it also - println("Error: $e") + logger.error(e) { "Could not read config" } + println("Could not read config: $e") return } Pipeline(config).run() diff --git a/src/main/kotlin/astminer/common/model/ParsingModel.kt b/src/main/kotlin/astminer/common/model/ParsingModel.kt index 734a8c3b..e1fc3bc1 100644 --- a/src/main/kotlin/astminer/common/model/ParsingModel.kt +++ b/src/main/kotlin/astminer/common/model/ParsingModel.kt @@ -6,9 +6,6 @@ import mu.KotlinLogging import java.io.File import java.io.InputStream -// TODO: later move this logger to Pipeline -private val logger = KotlinLogging.logger("ParsingModel") - interface Node { fun getTypeLabel(): String fun getChildren(): List @@ -55,13 +52,11 @@ interface Parser { * @param files files to parse * @param handleResult handler to invoke on each file parse result */ + // TODO: this function should be removed. it is only used in tests and examples + @Deprecated("Use getHandlerFactory instead") fun parseFiles(files: List, handleResult: (ParseResult) -> Any?) { for (file in files) { - try { - handleResult(parseFile(file)) - } catch (parsingException: ParsingException) { - logger.error(parsingException) { "Failed to parse file ${file.path}" } - } + handleResult(parseFile(file)) } } } diff --git a/src/main/kotlin/astminer/pipeline/Pipeline.kt b/src/main/kotlin/astminer/pipeline/Pipeline.kt index 6d20e6c5..423eaacd 100644 --- a/src/main/kotlin/astminer/pipeline/Pipeline.kt +++ b/src/main/kotlin/astminer/pipeline/Pipeline.kt @@ -2,14 +2,18 @@ package astminer.pipeline import astminer.common.getProjectFilesWithExtension import astminer.config.* +import astminer.parse.ParsingException import astminer.parse.getHandlerFactory import astminer.storage.Storage import astminer.storage.TokenProcessor import astminer.storage.ast.CsvAstStorage import astminer.storage.ast.DotAstStorage import astminer.storage.path.Code2VecPathStorage +import mu.KotlinLogging import java.io.File +private val logger = KotlinLogging.logger("Pipeline") + class Pipeline(private val config: PipelineConfig) { private val inputDirectory = File(config.inputDir) private val outputDirectory = File(config.outputDir) @@ -48,7 +52,14 @@ class Pipeline(private val config: PipelineConfig) { val languageFactory = getHandlerFactory(extension, config.parser.type) val files = getProjectFilesWithExtension(inputDirectory, extension).asSequence() - val labeledResults = files.map { languageFactory.createHandler(it) }.flatMap { branch.process(it) } + val labeledResults = files.mapNotNull { file -> + try { + languageFactory.createHandler(file) + } catch (e: ParsingException) { + logger.error(e) { "Failed to parse file ${file.path}" } + null + } + }.flatMap { branch.process(it) } createStorage(extension).use { storage -> storage.store(labeledResults.asIterable()) From 96ec814f4065bb4ea3c638c67ba9bae679e717db Mon Sep 17 00:00:00 2001 From: furetur Date: Thu, 20 May 2021 15:12:29 +0500 Subject: [PATCH 182/308] made the TODO more descriptive --- .../astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt b/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt index 389786df..dfab01c9 100644 --- a/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt @@ -65,7 +65,7 @@ class AntlrPythonFunctionInfo(override val root: AntlrNode) : FunctionInfo? { val enclosingNode = root.findEnclosingElementBy { it.lastLabelIn(POSSIBLE_ENCLOSING_ELEMENTS) } ?: return null val type = when { From c01501bbe8109face174be038d5d803a667421ca Mon Sep 17 00:00:00 2001 From: furetur Date: Thu, 20 May 2021 15:51:17 +0500 Subject: [PATCH 183/308] added pipeline documentation --- .../kotlin/astminer/config/FilterConfigs.kt | 27 +++++++++++++++++++ .../{PipelineConfig.kt => PipelineConfigs.kt} | 22 ++++++++++++++- .../kotlin/astminer/config/ProblemConfigs.kt | 15 +++++++++++ .../{StorageConfig.kt => StorageConfigs.kt} | 14 ++++++++++ src/main/kotlin/astminer/pipeline/Pipeline.kt | 7 +++++ .../astminer/pipeline/PipelineBranch.kt | 20 +++++++++++++- src/main/kotlin/astminer/storage/Storage.kt | 5 ++++ 7 files changed, 108 insertions(+), 2 deletions(-) rename src/main/kotlin/astminer/config/{PipelineConfig.kt => PipelineConfigs.kt} (56%) rename src/main/kotlin/astminer/config/{StorageConfig.kt => StorageConfigs.kt} (72%) diff --git a/src/main/kotlin/astminer/config/FilterConfigs.kt b/src/main/kotlin/astminer/config/FilterConfigs.kt index 900902a8..d744a929 100644 --- a/src/main/kotlin/astminer/config/FilterConfigs.kt +++ b/src/main/kotlin/astminer/config/FilterConfigs.kt @@ -5,11 +5,17 @@ import kotlinx.serialization.SerialName import kotlinx.serialization.Serializable import kotlinx.serialization.Transient +/** + * A config for filter that tests files (ParseResult) + */ @Serializable sealed class FileFilterConfig { abstract val filter: FileFilter } +/** + * @see TreeSizeFilter + */ @Serializable @SerialName("max tree size") data class FileTreeSizeFilterConfig(val maxTreeSize: Int) : FileFilterConfig() { @@ -17,11 +23,17 @@ data class FileTreeSizeFilterConfig(val maxTreeSize: Int) : FileFilterConfig() { override val filter = TreeSizeFilter(maxTreeSize) } +/** + * A config for filter that tests functions (FunctionInfo) + */ @Serializable sealed class FunctionFilterConfig { abstract val filter: FunctionFilter } +/** + * @see TreeSizeFilter + */ @Serializable @SerialName("max tree size") data class FunctionTreeSizeFilterConfig(val maxTreeSize: Int) : FunctionFilterConfig() { @@ -29,6 +41,9 @@ data class FunctionTreeSizeFilterConfig(val maxTreeSize: Int) : FunctionFilterCo override val filter = TreeSizeFilter(maxTreeSize) } +/** + * @see ModifierFilter + */ @Serializable @SerialName("exclude functions with modifiers") data class ModifierFilterConfig(val modifiers: List) : FunctionFilterConfig() { @@ -36,6 +51,9 @@ data class ModifierFilterConfig(val modifiers: List) : FunctionFilterCon override val filter = ModifierFilter(modifiers) } +/** + * @see AnnotationFilter + */ @Serializable @SerialName("exclude functions with annotations") data class AnnotationFilterConfig(val annotations: List) : FunctionFilterConfig() { @@ -43,6 +61,9 @@ data class AnnotationFilterConfig(val annotations: List) : FunctionFilte override val filter = AnnotationFilter(annotations) } +/** + * @see ConstructorFilter + */ @Serializable @SerialName("exclude constructors") class ConstructorFilterConfig : FunctionFilterConfig() { @@ -50,6 +71,9 @@ class ConstructorFilterConfig : FunctionFilterConfig() { override val filter = ConstructorFilter } +/** + * @see FunctionNameWordsNumberFilter + */ @Serializable @SerialName("by function name length") data class FunctionNameWordsNumberFilterConfig(val maxWordsNumber: Int) : FunctionFilterConfig() { @@ -57,6 +81,9 @@ data class FunctionNameWordsNumberFilterConfig(val maxWordsNumber: Int) : Functi override val filter = FunctionNameWordsNumberFilter(maxWordsNumber) } +/** + * @see FunctionAnyNodeWordsNumberFilter + */ @Serializable @SerialName("by length of any token") data class FunctionAnyNodeWordsNumberFilterConfig(val maxWordsNumber: Int) : FunctionFilterConfig() { diff --git a/src/main/kotlin/astminer/config/PipelineConfig.kt b/src/main/kotlin/astminer/config/PipelineConfigs.kt similarity index 56% rename from src/main/kotlin/astminer/config/PipelineConfig.kt rename to src/main/kotlin/astminer/config/PipelineConfigs.kt index 88572be0..dbb49b43 100644 --- a/src/main/kotlin/astminer/config/PipelineConfig.kt +++ b/src/main/kotlin/astminer/config/PipelineConfigs.kt @@ -3,6 +3,9 @@ package astminer.config import kotlinx.serialization.SerialName import kotlinx.serialization.Serializable +/** + * Base class for pipeline configs + */ @Serializable sealed class PipelineConfig { abstract val inputDir: String @@ -11,6 +14,11 @@ sealed class PipelineConfig { abstract val storage: StorageConfig } +/** + * Pipeline config for pipeline with file-level granularity. + * In other words, [filters] are used to filter parsed files + * and [problem] processes and extracts label from parsed files. + */ @Serializable @SerialName("file granularity") data class FilePipelineConfig( @@ -22,6 +30,11 @@ data class FilePipelineConfig( override val storage: StorageConfig ) : PipelineConfig() +/** + * Pipeline config for pipeline with function-level granularity. + * In other words, [filters] are used to test functions + * and [problem] processes and extracts labels from functions + */ @Serializable @SerialName("function granularity") data class FunctionPipelineConfig( @@ -33,9 +46,16 @@ data class FunctionPipelineConfig( override val storage: StorageConfig ) : PipelineConfig() +/** + * This config is used to select the parsers that should be used + * If given type = "antlr" and extensions = ["py", "java"] + * then 2 ANTLR parsers will be used (java antler parser and python antlr parser) + * @param type Type of the parser (`antlr` or `gumtree` or `fuzzy` ...) + * @param extensions File extensions that should be parsed + */ @Serializable data class ParserConfig( - val type: String, + val type: String, // TODO: should be an enum val extensions: List ) diff --git a/src/main/kotlin/astminer/config/ProblemConfigs.kt b/src/main/kotlin/astminer/config/ProblemConfigs.kt index 6a5d99b2..2d241bba 100644 --- a/src/main/kotlin/astminer/config/ProblemConfigs.kt +++ b/src/main/kotlin/astminer/config/ProblemConfigs.kt @@ -5,11 +5,17 @@ import kotlinx.serialization.SerialName import kotlinx.serialization.Serializable import kotlinx.serialization.Transient +/** + * A config for problem that processes and extracts label from files + */ @Serializable sealed class FileProblemConfig { abstract val problem: FileLevelProblem } +/** + * @see FilePathExtractor + */ @Serializable @SerialName("label with filepath") class FilePathExtractorConfig : FileProblemConfig() { @@ -17,6 +23,9 @@ class FilePathExtractorConfig : FileProblemConfig() { override val problem = FilePathExtractor } +/** + * @see FolderExtractor + */ @Serializable @SerialName("label with folder name") class FolderNameExtractorConfig : FileProblemConfig() { @@ -24,11 +33,17 @@ class FolderNameExtractorConfig : FileProblemConfig() { override val problem = FolderExtractor } +/** + * A config for problem that processes and extracts label from functions + */ @Serializable sealed class FunctionProblemConfig { abstract val problem: FunctionLevelProblem } +/** + * @see FunctionNameProblem + */ @Serializable @SerialName("function name prediction") class FunctionNamePredictionConfig : FunctionProblemConfig() { diff --git a/src/main/kotlin/astminer/config/StorageConfig.kt b/src/main/kotlin/astminer/config/StorageConfigs.kt similarity index 72% rename from src/main/kotlin/astminer/config/StorageConfig.kt rename to src/main/kotlin/astminer/config/StorageConfigs.kt index 1960bfb5..bdc2a1e2 100644 --- a/src/main/kotlin/astminer/config/StorageConfig.kt +++ b/src/main/kotlin/astminer/config/StorageConfigs.kt @@ -6,9 +6,17 @@ import kotlinx.serialization.SerialName import kotlinx.serialization.Serializable import kotlinx.serialization.Transient +/** + * Config for storage that saved the results on the disk + */ @Serializable sealed class StorageConfig +/** + * Format in which the ASTs should be saved + * [Dot] is for [astminer.storage.ast.DotAstStorage] + * [Csv] is for [astminer.storage.ast.CsvAstStorage] + */ @Serializable enum class AstStorageFormat { @SerialName("dot") @@ -17,6 +25,9 @@ enum class AstStorageFormat { Csv } +/** + * Config for [astminer.storage.ast.DotAstStorage] [astminer.storage.ast.CsvAstStorage] + */ @Serializable @SerialName("ast") data class AstStorageConfig( @@ -24,6 +35,9 @@ data class AstStorageConfig( val splitTokens: Boolean = false ) : StorageConfig() +/** + * Config for [astminer.storage.path.Code2VecPathStorage] + */ @Serializable @SerialName("code2vec paths") data class Code2VecPathStorageConfig( diff --git a/src/main/kotlin/astminer/pipeline/Pipeline.kt b/src/main/kotlin/astminer/pipeline/Pipeline.kt index 423eaacd..0f6f2b6e 100644 --- a/src/main/kotlin/astminer/pipeline/Pipeline.kt +++ b/src/main/kotlin/astminer/pipeline/Pipeline.kt @@ -14,6 +14,10 @@ import java.io.File private val logger = KotlinLogging.logger("Pipeline") +/** + * Pipeline runs all the steps needed to parse, process and save data. + * @param config The pipeline config that defines the pipeline + */ class Pipeline(private val config: PipelineConfig) { private val inputDirectory = File(config.inputDir) private val outputDirectory = File(config.outputDir) @@ -47,6 +51,9 @@ class Pipeline(private val config: PipelineConfig) { } } + /** + * Runs the pipeline that is defined in the [config] + */ fun run() { for (extension in config.parser.extensions) { val languageFactory = getHandlerFactory(extension, config.parser.type) diff --git a/src/main/kotlin/astminer/pipeline/PipelineBranch.kt b/src/main/kotlin/astminer/pipeline/PipelineBranch.kt index 04783061..ef64f882 100644 --- a/src/main/kotlin/astminer/pipeline/PipelineBranch.kt +++ b/src/main/kotlin/astminer/pipeline/PipelineBranch.kt @@ -8,10 +8,23 @@ import astminer.config.FilePipelineConfig import astminer.config.FunctionPipelineConfig import astminer.problem.LabeledResult +/** + * PipelineBranch is a part of the pipeline that can be completely different depending on the granularity (pipeline type) + * It accepts parsed files (LanguageHandler) and returns labeled results. + */ interface PipelineBranch { + /** + * Extracts labeled results from LanguageHandler + * May mutate the AST. + * Should have no other side-effects + */ fun process(languageHandler: LanguageHandler): Sequence> } +/** + * PipelineBranch for pipeline with file-level granularity (FilePipelineConfig). + * Works with files as a whole. Tests parsed files with filters and extracts a label from them. + */ class FilePipelineBranch(config: FilePipelineConfig) : PipelineBranch { private val filters = config.filters.map { it.filter } private val problem = config.problem.problem @@ -28,6 +41,11 @@ class FilePipelineBranch(config: FilePipelineConfig) : PipelineBranch { } } +/** + * PipelineBranch for pipeline with function-level granularity (FunctionPipelineConfig). + * Extracts functions from the parsed files. + * Then tests functions with filters, processes them and extracts labels from each function. + */ class FunctionPipelineBranch(config: FunctionPipelineConfig) : PipelineBranch { private val filters = config.filters.map { it.filter } private val problem = config.problem.problem @@ -38,4 +56,4 @@ class FunctionPipelineBranch(config: FunctionPipelineConfig) : PipelineBranch { languageHandler.splitIntoMethods().asSequence() .filter { functionInfo -> functionInfo.passesThroughFilters() } .mapNotNull { functionInfo -> problem.process(functionInfo) } -} \ No newline at end of file +} diff --git a/src/main/kotlin/astminer/storage/Storage.kt b/src/main/kotlin/astminer/storage/Storage.kt index 117bcddc..703cd37f 100644 --- a/src/main/kotlin/astminer/storage/Storage.kt +++ b/src/main/kotlin/astminer/storage/Storage.kt @@ -4,6 +4,11 @@ import astminer.problem.LabeledResult import astminer.common.model.Node import java.io.Closeable +/** + * Storage saved labeled results to disk in a specified format. + * Storage might extract any data from labeled result. + * For instance, it might extract paths from trees + */ interface Storage : Closeable { val outputDirectoryPath: String From 059ff0741cc370c2ffb8be0530fcdaee09fc768b Mon Sep 17 00:00:00 2001 From: furetur Date: Thu, 20 May 2021 16:30:29 +0500 Subject: [PATCH 184/308] converted parser types and file extensions into enums --- .../java/astminer/examples/AllJavaFiles.java | 1 - .../astminer/common/model/HandlerModel.kt | 2 +- .../kotlin/astminer/config/ParserConfig.kt | 33 +++++++++++++++++++ .../kotlin/astminer/config/PipelineConfigs.kt | 14 -------- .../astminer/examples/Code2VecJavaMethods.kt | 5 +-- src/main/kotlin/astminer/parse/factory.kt | 29 ++++++++-------- src/main/kotlin/astminer/pipeline/Pipeline.kt | 8 ++--- .../astminer/pipeline/PipelineBranch.kt | 2 +- .../pipeline/Code2VecExtractionTest.kt | 11 +++---- 9 files changed, 59 insertions(+), 46 deletions(-) create mode 100644 src/main/kotlin/astminer/config/ParserConfig.kt diff --git a/src/main/java/astminer/examples/AllJavaFiles.java b/src/main/java/astminer/examples/AllJavaFiles.java index eaf5cd0c..725f306b 100644 --- a/src/main/java/astminer/examples/AllJavaFiles.java +++ b/src/main/java/astminer/examples/AllJavaFiles.java @@ -7,7 +7,6 @@ import astminer.storage.path.Code2VecPathStorage; import astminer.storage.path.PathBasedStorage; import astminer.storage.path.PathBasedStorageConfig; -import org.jetbrains.annotations.NotNull; import java.io.FileInputStream; import java.io.IOException; import java.nio.file.*; diff --git a/src/main/kotlin/astminer/common/model/HandlerModel.kt b/src/main/kotlin/astminer/common/model/HandlerModel.kt index ae9b80a6..159a8fed 100644 --- a/src/main/kotlin/astminer/common/model/HandlerModel.kt +++ b/src/main/kotlin/astminer/common/model/HandlerModel.kt @@ -10,7 +10,7 @@ abstract class LanguageHandler { abstract val parseResult: ParseResult protected abstract val splitter: TreeFunctionSplitter - fun splitIntoMethods(): Collection> { + fun splitIntoFunctions(): Collection> { return splitter.splitIntoFunctions(parseResult.root) } } diff --git a/src/main/kotlin/astminer/config/ParserConfig.kt b/src/main/kotlin/astminer/config/ParserConfig.kt new file mode 100644 index 00000000..28363890 --- /dev/null +++ b/src/main/kotlin/astminer/config/ParserConfig.kt @@ -0,0 +1,33 @@ +package astminer.config + +import kotlinx.serialization.SerialName +import kotlinx.serialization.Serializable + +/** + * This config is used to select the parsers that should be used + * If given type = "antlr" and extensions = ["py", "java"] + * then 2 ANTLR parsers will be used (java antler parser and python antlr parser) + * @param type Type of the parser + * @param extensions File extensions that should be parsed + */ +@Serializable +data class ParserConfig( + val type: ParserType, + val extensions: List +) + +@Serializable +enum class ParserType { + @SerialName("antlr") Antlr, + @SerialName("gumtree") GumTree, + @SerialName("fuzzy") Fuzzy +} + +@Serializable +enum class FileExtension(val fileExtension: String) { + @SerialName("py") Python("py"), + @SerialName("java") Java("java"), + @SerialName("js") JavaScript("js"), + @SerialName("c") C("c"), + @SerialName("cpp") Cpp("cpp") +} diff --git a/src/main/kotlin/astminer/config/PipelineConfigs.kt b/src/main/kotlin/astminer/config/PipelineConfigs.kt index dbb49b43..357597ec 100644 --- a/src/main/kotlin/astminer/config/PipelineConfigs.kt +++ b/src/main/kotlin/astminer/config/PipelineConfigs.kt @@ -45,17 +45,3 @@ data class FunctionPipelineConfig( val problem: FunctionProblemConfig, override val storage: StorageConfig ) : PipelineConfig() - -/** - * This config is used to select the parsers that should be used - * If given type = "antlr" and extensions = ["py", "java"] - * then 2 ANTLR parsers will be used (java antler parser and python antlr parser) - * @param type Type of the parser (`antlr` or `gumtree` or `fuzzy` ...) - * @param extensions File extensions that should be parsed - */ -@Serializable -data class ParserConfig( - val type: String, // TODO: should be an enum - val extensions: List -) - diff --git a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt index 8db1bd65..422d2f9f 100644 --- a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt @@ -13,10 +13,7 @@ fun code2vecJavaMethods() { val pipelineConfig = FunctionPipelineConfig( inputDir = folder, outputDir = outputDir, - parser = ParserConfig( - "antlr", - listOf("java") - ), + parser = ParserConfig(ParserType.Antlr, listOf(FileExtension.Java)), problem = FunctionNamePredictionConfig(), storage = Code2VecPathStorageConfig( maxPathLength = 5, diff --git a/src/main/kotlin/astminer/parse/factory.kt b/src/main/kotlin/astminer/parse/factory.kt index e54cfd59..f35ec011 100644 --- a/src/main/kotlin/astminer/parse/factory.kt +++ b/src/main/kotlin/astminer/parse/factory.kt @@ -1,41 +1,42 @@ package astminer.parse import astminer.common.model.HandlerFactory +import astminer.config.FileExtension +import astminer.config.ParserType import astminer.parse.antlr.AntlrJavaHandlerFactory import astminer.parse.antlr.AntlrJavascriptHandlerFactory import astminer.parse.antlr.AntlrPythonHandlerFactory import astminer.parse.gumtree.GumtreeJavaHandlerFactory import astminer.parse.gumtree.GumtreePythonHandlerFactory -fun getHandlerFactory(extension: String, parserType: String): HandlerFactory { +fun getHandlerFactory(extension: FileExtension, parserType: ParserType): HandlerFactory { return when (parserType) { - "gumtree" -> getGumtreeHandlerFactory(extension) - "antlr" -> getAntlrHandlerFactory(extension) - "fuzzy" -> getFuzzyHandlerFactory(extension) - else -> throw UnsupportedOperationException() + ParserType.GumTree -> getGumtreeHandlerFactory(extension) + ParserType.Antlr -> getAntlrHandlerFactory(extension) + ParserType.Fuzzy -> getFuzzyHandlerFactory(extension) } } -private fun getGumtreeHandlerFactory(extension: String): HandlerFactory { +private fun getGumtreeHandlerFactory(extension: FileExtension): HandlerFactory { return when (extension) { - "java" -> GumtreeJavaHandlerFactory - "py" -> GumtreePythonHandlerFactory + FileExtension.Java -> GumtreeJavaHandlerFactory + FileExtension.Python -> GumtreePythonHandlerFactory else -> throw UnsupportedOperationException() } } -private fun getAntlrHandlerFactory(extension: String): HandlerFactory { +private fun getAntlrHandlerFactory(extension: FileExtension): HandlerFactory { return when (extension) { - "java" -> AntlrJavaHandlerFactory - "js" -> AntlrJavascriptHandlerFactory - "py" -> AntlrPythonHandlerFactory + FileExtension.Java -> AntlrJavaHandlerFactory + FileExtension.JavaScript -> AntlrJavascriptHandlerFactory + FileExtension.Python -> AntlrPythonHandlerFactory else -> throw UnsupportedOperationException() } } -private fun getFuzzyHandlerFactory(extension: String): HandlerFactory { +private fun getFuzzyHandlerFactory(extension: FileExtension): HandlerFactory { return when (extension) { - "c", "cpp" -> FuzzyCppHandler + FileExtension.C, FileExtension.Cpp -> FuzzyCppHandler else -> throw UnsupportedOperationException() } } diff --git a/src/main/kotlin/astminer/pipeline/Pipeline.kt b/src/main/kotlin/astminer/pipeline/Pipeline.kt index 0f6f2b6e..489a07e7 100644 --- a/src/main/kotlin/astminer/pipeline/Pipeline.kt +++ b/src/main/kotlin/astminer/pipeline/Pipeline.kt @@ -27,13 +27,13 @@ class Pipeline(private val config: PipelineConfig) { is FunctionPipelineConfig -> FunctionPipelineBranch(config) } - private fun createStorageDirectory(extension: String): File { - val outputDirectoryForExtension = outputDirectory.resolve(extension) + private fun createStorageDirectory(extension: FileExtension): File { + val outputDirectoryForExtension = outputDirectory.resolve(extension.fileExtension) outputDirectoryForExtension.mkdir() return outputDirectoryForExtension } - private fun createStorage(extension: String): Storage = with(config.storage) { + private fun createStorage(extension: FileExtension): Storage = with(config.storage) { val storagePath = createStorageDirectory(extension).path // TODO: should be removed this later and be implemented like filters and problems, once storage constructors have no side effects @@ -58,7 +58,7 @@ class Pipeline(private val config: PipelineConfig) { for (extension in config.parser.extensions) { val languageFactory = getHandlerFactory(extension, config.parser.type) - val files = getProjectFilesWithExtension(inputDirectory, extension).asSequence() + val files = getProjectFilesWithExtension(inputDirectory, extension.fileExtension).asSequence() val labeledResults = files.mapNotNull { file -> try { languageFactory.createHandler(file) diff --git a/src/main/kotlin/astminer/pipeline/PipelineBranch.kt b/src/main/kotlin/astminer/pipeline/PipelineBranch.kt index ef64f882..b96086d0 100644 --- a/src/main/kotlin/astminer/pipeline/PipelineBranch.kt +++ b/src/main/kotlin/astminer/pipeline/PipelineBranch.kt @@ -53,7 +53,7 @@ class FunctionPipelineBranch(config: FunctionPipelineConfig) : PipelineBranch { private fun FunctionInfo.passesThroughFilters() = filters.all { filter -> filter.test(this) } override fun process(languageHandler: LanguageHandler): Sequence> = - languageHandler.splitIntoMethods().asSequence() + languageHandler.splitIntoFunctions().asSequence() .filter { functionInfo -> functionInfo.passesThroughFilters() } .mapNotNull { functionInfo -> problem.process(functionInfo) } } diff --git a/src/test/kotlin/astminer/pipeline/Code2VecExtractionTest.kt b/src/test/kotlin/astminer/pipeline/Code2VecExtractionTest.kt index 2bb3a7db..2d364e1d 100644 --- a/src/test/kotlin/astminer/pipeline/Code2VecExtractionTest.kt +++ b/src/test/kotlin/astminer/pipeline/Code2VecExtractionTest.kt @@ -1,9 +1,6 @@ package astminer.pipeline -import astminer.config.Code2VecPathStorageConfig -import astminer.config.FilePathExtractorConfig -import astminer.config.FilePipelineConfig -import astminer.config.ParserConfig +import astminer.config.* import astminer.pipeline.util.verifyPathContextExtraction import org.junit.Test import java.io.File @@ -17,17 +14,17 @@ internal class Code2VecExtractionTest { fun `test code2vec path extraction from files generates correct folders and files`() { val extractedDataDir = Files.createTempDirectory("extractedData") - val languages = listOf("java", "py") + val languages = listOf(FileExtension.Java, FileExtension.Python) val config = FilePipelineConfig( inputDir = testDataDir.path, outputDir = extractedDataDir.toAbsolutePath().toString(), - parser = ParserConfig("antlr", languages), + parser = ParserConfig(ParserType.Antlr, languages), problem = FilePathExtractorConfig(), storage = Code2VecPathStorageConfig(8, 3) ) Pipeline(config).run() - verifyPathContextExtraction(extractedDataDir.toFile(), languages, false) + verifyPathContextExtraction(extractedDataDir.toFile(), languages.map { it.fileExtension }, false) } } From 0bd26b9a9f4a5e56c152369bbdf43a6b41cea0c4 Mon Sep 17 00:00:00 2001 From: illided Date: Thu, 20 May 2021 22:04:18 +0300 Subject: [PATCH 185/308] conflicts resolved --- .../kotlin/astminer/cli/FilterPredicates.kt | 2 +- .../common/model/FunctionInfoModel.kt | 2 +- .../kotlin/astminer/parse/FindingUtils.kt | 4 +-- .../kotlin/astminer/parse/antlr/AntlrUtil.kt | 8 +++--- .../parse/antlr/java/AntlrJavaFunctionInfo.kt | 6 ++--- .../parse/antlr/java/JavaFunctionSplitter.kt | 4 +-- .../javascript/AntlrJavaScriptElementInfo.kt | 20 +++++++------- .../javascript/JavaScriptFunctionSplitter.kt | 9 +++---- .../antlr/python/AntlrPythonFunctionInfo.kt | 14 +++++----- .../antlr/python/PythonFunctionSplitter.kt | 4 +-- .../parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt | 10 +++---- .../parse/fuzzy/cpp/FuzzyFunctionSplitter.kt | 4 +-- .../gumtree/java/GumTreeJavaFunctionInfo.kt | 14 +++++----- .../java/GumTreeJavaFunctionSplitter.kt | 5 ++-- .../python/GumTreePythonFunctionInfo.kt | 26 +++++++++---------- .../python/GumTreePythonFunctionSplitter.kt | 5 ++-- .../astminer/parse/antlr/AntrlUtilTest.kt | 2 +- .../GumTreePythonFunctionSplitterTest.kt | 6 ++--- 18 files changed, 71 insertions(+), 74 deletions(-) diff --git a/src/main/kotlin/astminer/cli/FilterPredicates.kt b/src/main/kotlin/astminer/cli/FilterPredicates.kt index f646ae65..3fb642db 100644 --- a/src/main/kotlin/astminer/cli/FilterPredicates.kt +++ b/src/main/kotlin/astminer/cli/FilterPredicates.kt @@ -38,7 +38,7 @@ class MethodAnyNodeWordsNumberFilter(private val maxWordsNumber: Int) : MethodFi return if (maxWordsNumber == -1) { true } else { - !functionInfo.root.preOrder().any { node -> splitToSubtokens(node.getToken()).size > maxWordsNumber } + !functionInfo.root.preOrder().any { node -> splitToSubtokens(node.token).size > maxWordsNumber } } } } diff --git a/src/main/kotlin/astminer/common/model/FunctionInfoModel.kt b/src/main/kotlin/astminer/common/model/FunctionInfoModel.kt index 32e25036..f37bf262 100644 --- a/src/main/kotlin/astminer/common/model/FunctionInfoModel.kt +++ b/src/main/kotlin/astminer/common/model/FunctionInfoModel.kt @@ -16,7 +16,7 @@ interface FunctionInfo { val nameNode: T? get() = notImplemented("nameNode") val name: String? - get() = nameNode?.getToken() + get() = nameNode?.token val root: T get() = notImplemented("root") val filePath: String diff --git a/src/main/kotlin/astminer/parse/FindingUtils.kt b/src/main/kotlin/astminer/parse/FindingUtils.kt index aa8bc1e5..84f5b78d 100644 --- a/src/main/kotlin/astminer/parse/FindingUtils.kt +++ b/src/main/kotlin/astminer/parse/FindingUtils.kt @@ -3,9 +3,9 @@ package astminer.parse import astminer.common.model.Node inline fun T.findEnclosingElementBy(condition: (T) -> Boolean): T? { - var curNode = this.getParent() + var curNode = this.parent while (!(curNode == null || condition(curNode as T))) { - curNode = curNode.getParent() + curNode = curNode.parent } return curNode as T? } diff --git a/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt b/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt index 9674dc38..236e6d1d 100644 --- a/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt +++ b/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt @@ -73,9 +73,9 @@ fun compressTree(root: AntlrNode): AntlrNode { fun decompressTypeLabel(typeLabel: String) = typeLabel.split("|") -fun AntlrNode.lastLabel() = decompressTypeLabel(getTypeLabel()).last() +fun AntlrNode.lastLabel() = decompressTypeLabel(typeLabel).last() -fun AntlrNode.firstLabel() = decompressTypeLabel(getTypeLabel()).first() +fun AntlrNode.firstLabel() = decompressTypeLabel(typeLabel).first() fun AntlrNode.hasLastLabel(label: String): Boolean = lastLabel() == label @@ -87,9 +87,9 @@ fun AntlrNode.firstLabelIn(labels: List): Boolean = labels.contains(firs fun Node.getTokensFromSubtree(): String { if (isLeaf()) { - return getToken() + return token } - return getChildren().joinToString(separator = "") { child -> + return children.joinToString(separator = "") { child -> child.getTokensFromSubtree() } } diff --git a/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt b/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt index de0be107..84f1de35 100644 --- a/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt @@ -37,7 +37,7 @@ class AntlrJavaFunctionInfo(override val root: AntlrNode) : FunctionInfo getParameterInfo(singleParameter) } } @@ -59,7 +59,7 @@ class AntlrJavaFunctionInfo(override val root: AntlrNode) : FunctionInfo { override fun splitIntoFunctions(root: AntlrNode): Collection> { val methodRoots = root.preOrder().filter { - (it as AntlrNode).hasLastLabel(methodNodeType) + (it).hasLastLabel(methodNodeType) } - return methodRoots.map { AntlrJavaFunctionInfo(it as AntlrNode) } + return methodRoots.map { AntlrJavaFunctionInfo(it) } } } \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/antlr/javascript/AntlrJavaScriptElementInfo.kt b/src/main/kotlin/astminer/parse/antlr/javascript/AntlrJavaScriptElementInfo.kt index 47a94c49..8ced7fcd 100644 --- a/src/main/kotlin/astminer/parse/antlr/javascript/AntlrJavaScriptElementInfo.kt +++ b/src/main/kotlin/astminer/parse/antlr/javascript/AntlrJavaScriptElementInfo.kt @@ -29,17 +29,17 @@ abstract class AntlrJavaScriptElementInfo(override val root: AntlrNode) : Functi } private fun AntlrNode.containsLabelIn(labels: List): Boolean { - return decompressTypeLabel(getTypeLabel()).intersect(labels).isNotEmpty() + return decompressTypeLabel(typeLabel).intersect(labels).isNotEmpty() } private fun getEnclosingElementName(enclosingRoot: AntlrNode?): String? { - return enclosingRoot?.getChildren()?.firstOrNull { + return enclosingRoot?.children?.firstOrNull { it.hasLastLabel(ENCLOSING_ELEMENT_NAME_NODE) - }?.getToken() + }?.token } private fun getEnclosingElementType(enclosingRoot: AntlrNode): EnclosingElementType { - return when (decompressTypeLabel(enclosingRoot.getTypeLabel()).last()) { + return when (decompressTypeLabel(enclosingRoot.typeLabel).last()) { "functionDeclaration" -> EnclosingElementType.Function "classDeclaration" -> EnclosingElementType.Class "methodDefinition" -> EnclosingElementType.Method @@ -56,13 +56,13 @@ abstract class AntlrJavaScriptElementInfo(override val root: AntlrNode) : Functi //Have only one parameter, which is indicated only by its name parametersRoot.hasLastLabel(PARAMETER_NAME_NODE) -> listOf( - FunctionInfoParameter(name = parametersRoot.getToken(), type = null) + FunctionInfoParameter(name = parametersRoot.token, type = null) ) //Have many parameters or one indicated not only by it's name else -> parametersRoot.getItOrChildrenOfType(SINGLE_PARAMETER_NODE).map { val nameNode = it.getChildOfType(PARAMETER_NAME_NODE) ?: it - FunctionInfoParameter(name = nameNode.getToken(), type = null) + FunctionInfoParameter(name = nameNode.token, type = null) } } } @@ -99,12 +99,12 @@ class JavaScriptMethodInfo(override val root: AntlrNode) : AntlrJavaScriptElemen override val nameNode: AntlrNode? = collectNameNode() private fun collectNameNode(): AntlrNode? { - val methodNameParent = root.getChildren().firstOrNull { - METHOD_GETTERS_SETTERS.contains(it.getTypeLabel()) + val methodNameParent = root.children.firstOrNull { + METHOD_GETTERS_SETTERS.contains(it.typeLabel) } ?: root - return methodNameParent.getChildren().firstOrNull { - decompressTypeLabel(it.getTypeLabel()).contains(METHOD_NAME_NODE) + return methodNameParent.children.firstOrNull { + decompressTypeLabel(it.typeLabel).contains(METHOD_NAME_NODE) } } diff --git a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitter.kt b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitter.kt index 003f35c8..fb14d649 100644 --- a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitter.kt @@ -1,7 +1,6 @@ package astminer.parse.antlr.javascript import astminer.common.model.* -import astminer.common.preOrder import astminer.parse.antlr.AntlrNode import astminer.parse.antlr.decompressTypeLabel @@ -19,9 +18,9 @@ class JavaScriptFunctionSplitter : TreeFunctionSplitter { override fun splitIntoFunctions(root: AntlrNode): Collection> { return root.preOrder().mapNotNull { node -> when { - node.isArrowElement() -> JavaScriptArrowInfo(node as AntlrNode) - node.isFunctionElement() -> JavaScriptFunctionInfo(node as AntlrNode) - node.isMethodElement() -> JavaScriptMethodInfo(node as AntlrNode) + node.isArrowElement() -> JavaScriptArrowInfo(node) + node.isFunctionElement() -> JavaScriptFunctionInfo(node) + node.isMethodElement() -> JavaScriptMethodInfo(node) else -> null } } @@ -29,5 +28,5 @@ class JavaScriptFunctionSplitter : TreeFunctionSplitter { private fun Node.isArrowElement() = this.getChildOfType(ARROW_NODE) != null private fun Node.isFunctionElement() = this.getChildOfType(FUNCTION_NODE) != null - private fun Node.isMethodElement() = decompressTypeLabel(this.getTypeLabel()).last() == METHOD_NODE + private fun Node.isMethodElement() = decompressTypeLabel(this.typeLabel).last() == METHOD_NODE } \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt b/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt index dfab01c9..41501b46 100644 --- a/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt @@ -51,9 +51,9 @@ class AntlrPythonFunctionInfo(override val root: AntlrNode) : FunctionInfo enclosingNode.getChildOfType(CLASS_NAME_NODE) EnclosingElementType.Method, EnclosingElementType.Function -> enclosingNode.getChildOfType(FUNCTION_NAME_NODE) else -> throw IllegalStateException("Enclosing node can only be function or class") - }?.getToken() + }?.token return EnclosingElement( type = type, name = name, @@ -91,13 +91,13 @@ class AntlrPythonFunctionInfo(override val root: AntlrNode) : FunctionInfo { override fun splitIntoFunctions(root: AntlrNode): Collection> { val methodRoots = root.preOrder().filter { - (it as AntlrNode).hasLastLabel(methodNode) + (it).hasLastLabel(methodNode) } - return methodRoots.map { AntlrPythonFunctionInfo(it as AntlrNode) } + return methodRoots.map { AntlrPythonFunctionInfo(it) } } } diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt index b5d6e11c..2dad89d5 100644 --- a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt @@ -30,7 +30,7 @@ class FuzzyCppFunctionInfo(override val root: FuzzyNode): FunctionInfo? { @@ -44,18 +44,18 @@ class FuzzyCppFunctionInfo(override val root: FuzzyNode): FunctionInfo { val parameters = root.getChildrenOfType(METHOD_PARAMETER_NODE) return parameters.map { param -> - val type = param.getChildOfType(PARAMETER_TYPE_NODE)?.getToken() - val name = param.getChildOfType(PARAMETER_NAME_NODE)?.getToken() ?: "" + val type = param.getChildOfType(PARAMETER_TYPE_NODE)?.token + val name = param.getChildOfType(PARAMETER_NAME_NODE)?.token ?: "" FunctionInfoParameter(name, type) } } diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyFunctionSplitter.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyFunctionSplitter.kt index 9d41aa2e..7d84b0c6 100644 --- a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyFunctionSplitter.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyFunctionSplitter.kt @@ -7,7 +7,7 @@ class FuzzyFunctionSplitter : TreeFunctionSplitter { private val methodNode = "METHOD" override fun splitIntoFunctions(root: FuzzyNode): Collection> { - val methodRoots = root.preOrder().filter { it.getTypeLabel() == methodNode } - return methodRoots.map { FuzzyCppFunctionInfo(it as FuzzyNode) } + val methodRoots = root.preOrder().filter { it.typeLabel == methodNode } + return methodRoots.map { FuzzyCppFunctionInfo(it) } } } \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt index fbec6aa0..93153f6f 100644 --- a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt @@ -21,8 +21,8 @@ class GumTreeJavaFunctionInfo(override val root: GumTreeNode) : FunctionInfo? = collectEnclosingClass() private fun collectEnclosingClass(): EnclosingElement? { - val enclosingClassNode = getEnclosingClassNode(root.getParent() as GumTreeNode?) ?: return null - val enclosingClassName = enclosingClassNode.getChildOfType(TypeLabels.simpleName)?.getToken() + val enclosingClassNode = getEnclosingClassNode(root.parent) ?: return null + val enclosingClassName = enclosingClassNode.getChildOfType(TypeLabels.simpleName)?.token return EnclosingElement( root = enclosingClassNode, type = EnclosingElementType.Class, @@ -31,10 +31,10 @@ class GumTreeJavaFunctionInfo(override val root: GumTreeNode) : FunctionInfo { @@ -48,13 +48,13 @@ class GumTreeJavaFunctionInfo(override val root: GumTreeNode) : FunctionInfo { private val methodDeclaration = "MethodDeclaration" override fun splitIntoFunctions(root: GumTreeNode): Collection> { - val methodRoots = root.preOrder().filter { it.getTypeLabel() == methodDeclaration } - return methodRoots.map { GumTreeJavaFunctionInfo(it as GumTreeNode) } + val methodRoots = root.preOrder().filter { it.typeLabel == methodDeclaration } + return methodRoots.map { GumTreeJavaFunctionInfo(it) } } } \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt index 648ad25a..bed0598f 100644 --- a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt @@ -35,17 +35,17 @@ class GumTreePythonFunctionInfo(override val root: GumTreeNode) : FunctionInfo = collectParameters() override val enclosingElement: EnclosingElement? = collectEnclosingClass() - override val returnType: String? = getElementType(root)?.getTypeLabel() + override val returnType: String? = getElementType(root)?.typeLabel private fun getElementType(node: GumTreeNode): GumTreeNode? { - if (node.getTypeLabel() == TypeLabels.arg) { + if (node.typeLabel == TypeLabels.arg) { return node.getChildOfType(TypeLabels.nameLoad) } // if return statement has "Constant-`Type`" return value => function type is `Type` - if (TypeLabels.methodDefinitions.contains(node.getTypeLabel())) { + if (TypeLabels.methodDefinitions.contains(node.typeLabel)) { return node.getChildOfType(TypeLabels.body)?.getChildOfType(TypeLabels.returnTypeLabel)?.let { - it.getChildren().firstOrNull { child -> - child.getTypeLabel().startsWith(TypeLabels.constantType) + it.children.firstOrNull { child -> + child.typeLabel.startsWith(TypeLabels.constantType) } } } @@ -56,29 +56,29 @@ class GumTreePythonFunctionInfo(override val root: GumTreeNode) : FunctionInfo { - val arguments = root.getChildrenOfType(TypeLabels.arguments).flatMap { it.getChildren() } + val arguments = root.getChildrenOfType(TypeLabels.arguments).flatMap { it.children } val params = arguments.flatMap { node -> - when (node.getTypeLabel()) { - in TypeLabels.funcArgsTypesNodes -> node.getChildren() - .filter { it.getTypeLabel() == TypeLabels.arg } + when (node.typeLabel) { + in TypeLabels.funcArgsTypesNodes -> node.children + .filter { it.typeLabel == TypeLabels.arg } TypeLabels.vararg, TypeLabels.kwarg -> listOf(node) else -> emptyList() } } return params.map { node-> FunctionInfoParameter( - name = node.getToken(), - type = getElementType(node)?.getToken() + name = node.token, + type = getElementType(node)?.token ) } } diff --git a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitter.kt b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitter.kt index 22687e67..1f006bb7 100644 --- a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitter.kt +++ b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitter.kt @@ -1,7 +1,6 @@ package astminer.parse.gumtree.python import astminer.common.model.* -import astminer.common.preOrder import astminer.parse.gumtree.GumTreeNode class GumTreePythonFunctionSplitter : TreeFunctionSplitter { @@ -14,7 +13,7 @@ class GumTreePythonFunctionSplitter : TreeFunctionSplitter { } override fun splitIntoFunctions(root: GumTreeNode): Collection> { - val functionRoots = root.preOrder().filter { TypeLabels.methodDefinitions.contains(it.getTypeLabel()) } - return functionRoots.map { GumTreePythonFunctionInfo(it as GumTreeNode) } + val functionRoots = root.preOrder().filter { TypeLabels.methodDefinitions.contains(it.typeLabel) } + return functionRoots.map { GumTreePythonFunctionInfo(it) } } } diff --git a/src/test/kotlin/astminer/parse/antlr/AntrlUtilTest.kt b/src/test/kotlin/astminer/parse/antlr/AntrlUtilTest.kt index 1646d014..ff5fb556 100644 --- a/src/test/kotlin/astminer/parse/antlr/AntrlUtilTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/AntrlUtilTest.kt @@ -14,7 +14,7 @@ class AntrlUtilTest { val node = parser.parseInputStream(FileInputStream(file)) var adoptedNodesSize = 0 - node?.preOrder()?.forEach { curNode -> + node.preOrder().forEach { curNode -> adoptedNodesSize += curNode.children.filter { it.parent != node }.size } Assert.assertEquals("There should be no children with different parent", 0, adoptedNodesSize) diff --git a/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitterTest.kt b/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitterTest.kt index a5c57f66..857b7bb6 100644 --- a/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitterTest.kt @@ -107,7 +107,7 @@ class GumTreePythonFunctionSplitterTest { assertNotNull(functionInfo) with(functionInfo) { assertEquals("async_schrecklich_typed", name) - assertEquals("AsyncFunctionDef", root.getTypeLabel()) + assertEquals("AsyncFunctionDef", root.typeLabel) assertEquals(null, enclosingElement?.name) assertEquals("Constant-int", returnType) assertEquals(4, parameters.size) @@ -123,14 +123,14 @@ class GumTreePythonFunctionSplitterTest { assertNotNull(functionInfo) with(functionInfo) { assertEquals("async_simple_no_typed", name) - assertEquals("AsyncFunctionDef", root.getTypeLabel()) + assertEquals("AsyncFunctionDef", root.typeLabel) assertEquals(null, enclosingElement?.name) assertEquals( "\n async doc\n ", root.getChildOfType("body") ?.getChildOfType("Expr") ?.getChildOfType("Constant-str") - ?.getToken() + ?.token ) assertEquals(4, parameters.size) assertEquals( From b068af0072529aab0963ea0a688813aca728ce7b Mon Sep 17 00:00:00 2001 From: illided Date: Thu, 20 May 2021 22:06:33 +0300 Subject: [PATCH 186/308] antlr util test fixed --- src/test/kotlin/astminer/parse/antlr/AntrlUtilTest.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/kotlin/astminer/parse/antlr/AntrlUtilTest.kt b/src/test/kotlin/astminer/parse/antlr/AntrlUtilTest.kt index ff5fb556..69dfe9a1 100644 --- a/src/test/kotlin/astminer/parse/antlr/AntrlUtilTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/AntrlUtilTest.kt @@ -15,7 +15,7 @@ class AntrlUtilTest { val node = parser.parseInputStream(FileInputStream(file)) var adoptedNodesSize = 0 node.preOrder().forEach { curNode -> - adoptedNodesSize += curNode.children.filter { it.parent != node }.size + adoptedNodesSize += curNode.children.filter { it.parent != curNode }.size } Assert.assertEquals("There should be no children with different parent", 0, adoptedNodesSize) } From f91a401c0bfcd63eafd4ab1814c50b55ef369722 Mon Sep 17 00:00:00 2001 From: illided Date: Fri, 21 May 2021 12:58:45 +0300 Subject: [PATCH 187/308] comment removed --- src/main/kotlin/astminer/common/TreeUtil.kt | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/src/main/kotlin/astminer/common/TreeUtil.kt b/src/main/kotlin/astminer/common/TreeUtil.kt index ef76d964..2120236d 100644 --- a/src/main/kotlin/astminer/common/TreeUtil.kt +++ b/src/main/kotlin/astminer/common/TreeUtil.kt @@ -10,32 +10,17 @@ fun Node.postOrderIterator(): Iterator { return postOrder().listIterator() } -/*fun Node.preOrderIterator(): Iterator { - return preOrder().listIterator() -}*/ - fun doTraversePostOrder(node: Node, resultList: MutableList) { node.children.forEach { doTraversePostOrder(it, resultList) } resultList.add(node) } -/*fun doTraversePreOrder(node: Node, resultList: MutableList) { - resultList.add(node) - node.children.forEach { doTraversePreOrder(it, resultList) } -}*/ - fun Node.postOrder(): List { val result: MutableList = ArrayList() doTraversePostOrder(this, result) return result } -/*fun Node.preOrder(): List { - val result: MutableList = ArrayList() - doTraversePreOrder(this, result) - return result -}*/ - const val DEFAULT_TOKEN = "EMPTY_TOKEN" const val TECHNICAL_TOKEN_KEY = "technical_token" From 6e1328aa59ba7836fa82bae1db92a8b18a712eea Mon Sep 17 00:00:00 2001 From: illided Date: Fri, 21 May 2021 13:29:33 +0300 Subject: [PATCH 188/308] post order iterator implemented properly --- src/main/kotlin/astminer/common/TreeUtil.kt | 16 -------------- .../astminer/common/model/ParsingModel.kt | 21 +++++++++++++++---- src/main/kotlin/astminer/paths/PathWorker.kt | 1 - .../astminer/paths/PathWorkerTestBase.kt | 1 - .../astminer/paths/PathWorkerTestUtil.kt | 1 - 5 files changed, 17 insertions(+), 23 deletions(-) diff --git a/src/main/kotlin/astminer/common/TreeUtil.kt b/src/main/kotlin/astminer/common/TreeUtil.kt index 2120236d..4e0f802c 100644 --- a/src/main/kotlin/astminer/common/TreeUtil.kt +++ b/src/main/kotlin/astminer/common/TreeUtil.kt @@ -5,22 +5,6 @@ import astminer.storage.TokenProcessor import java.util.ArrayList -fun Node.postOrderIterator(): Iterator { - //TODO implement properly - return postOrder().listIterator() -} - -fun doTraversePostOrder(node: Node, resultList: MutableList) { - node.children.forEach { doTraversePostOrder(it, resultList) } - resultList.add(node) -} - -fun Node.postOrder(): List { - val result: MutableList = ArrayList() - doTraversePostOrder(this, result) - return result -} - const val DEFAULT_TOKEN = "EMPTY_TOKEN" const val TECHNICAL_TOKEN_KEY = "technical_token" diff --git a/src/main/kotlin/astminer/common/model/ParsingModel.kt b/src/main/kotlin/astminer/common/model/ParsingModel.kt index 377077e2..ee9d898f 100644 --- a/src/main/kotlin/astminer/common/model/ParsingModel.kt +++ b/src/main/kotlin/astminer/common/model/ParsingModel.kt @@ -38,6 +38,9 @@ abstract class Node{ fun preOrderIterator(): Iterator = PreOrderIterator(this) open fun preOrder(): List = PreOrderIterator(this).asSequence().toList() + + fun postOrderIterator(): Iterator = PostOrderIterator(this) + open fun postOrder(): List = PostOrderIterator(this).asSequence().toList() } class PreOrderIterator(root: Node): Iterator { @@ -59,15 +62,25 @@ class PreOrderIterator(root: Node): Iterator { } class PostOrderIterator(root: Node): Iterator { + private data class NodeWrapper(val node: Node, var isChecked: Boolean = false) - override fun hasNext(): Boolean { - TODO("Not yet implemented") + private val tree = mutableListOf(NodeWrapper(root)) + + private fun fillWithChildren(wrapper: NodeWrapper){ + if (!wrapper.isChecked) { + tree.addAll(wrapper.node.children.asReversed().map { NodeWrapper(it) }) + wrapper.isChecked = true + } } + override fun hasNext(): Boolean = tree.isNotEmpty() + override fun next(): Node { - TODO("Not yet implemented") + while (!tree.last().isChecked) { + fillWithChildren(tree.last()) + } + return tree.removeLast().node } - } interface Parser { diff --git a/src/main/kotlin/astminer/paths/PathWorker.kt b/src/main/kotlin/astminer/paths/PathWorker.kt index 501123bc..451d7b05 100644 --- a/src/main/kotlin/astminer/paths/PathWorker.kt +++ b/src/main/kotlin/astminer/paths/PathWorker.kt @@ -3,7 +3,6 @@ package astminer.paths import astminer.common.model.ASTPath import astminer.common.model.Node import astminer.common.model.PathPiece -import astminer.common.postOrderIterator import kotlin.math.min class PathWorker { diff --git a/src/test/kotlin/astminer/paths/PathWorkerTestBase.kt b/src/test/kotlin/astminer/paths/PathWorkerTestBase.kt index ba41566b..484dcccd 100644 --- a/src/test/kotlin/astminer/paths/PathWorkerTestBase.kt +++ b/src/test/kotlin/astminer/paths/PathWorkerTestBase.kt @@ -1,7 +1,6 @@ package astminer.paths import astminer.common.model.Node -import astminer.common.postOrder import org.junit.Assert import org.junit.Test diff --git a/src/test/kotlin/astminer/paths/PathWorkerTestUtil.kt b/src/test/kotlin/astminer/paths/PathWorkerTestUtil.kt index d8d7bdf0..0c51f32e 100644 --- a/src/test/kotlin/astminer/paths/PathWorkerTestUtil.kt +++ b/src/test/kotlin/astminer/paths/PathWorkerTestUtil.kt @@ -2,7 +2,6 @@ package astminer.paths import astminer.common.model.ASTPath import astminer.common.model.Node -import astminer.common.postOrder import astminer.parse.antlr.AntlrNode import org.junit.Assert From 39934371e2b779b695b3c180ad171b5470c8c9cc Mon Sep 17 00:00:00 2001 From: furetur Date: Sun, 23 May 2021 15:25:18 +0500 Subject: [PATCH 189/308] improved readability of a few lines of code --- src/main/kotlin/astminer/pipeline/PipelineBranch.kt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/kotlin/astminer/pipeline/PipelineBranch.kt b/src/main/kotlin/astminer/pipeline/PipelineBranch.kt index b96086d0..d6fa0a26 100644 --- a/src/main/kotlin/astminer/pipeline/PipelineBranch.kt +++ b/src/main/kotlin/astminer/pipeline/PipelineBranch.kt @@ -34,7 +34,8 @@ class FilePipelineBranch(config: FilePipelineConfig) : PipelineBranch { override fun process(languageHandler: LanguageHandler): Sequence> { val parseResult = languageHandler.parseResult return if (parseResult.passesThroughFilters()) { - problem.process(parseResult)?.let { labeledResult -> sequenceOf(labeledResult) } ?: emptySequence() + val labeledResult = problem.process(parseResult) ?: return emptySequence() + sequenceOf(labeledResult) } else { emptySequence() } From 2e354d409da6954b89a2a0a14dbe2c14c7145a0f Mon Sep 17 00:00:00 2001 From: illided Date: Sun, 23 May 2021 16:54:05 +0300 Subject: [PATCH 190/308] now token uses proper constant --- src/main/kotlin/astminer/parse/antlr/AntlrNode.kt | 3 ++- src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyNode.kt | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/main/kotlin/astminer/parse/antlr/AntlrNode.kt b/src/main/kotlin/astminer/parse/antlr/AntlrNode.kt index 238377aa..fbf2ca61 100644 --- a/src/main/kotlin/astminer/parse/antlr/AntlrNode.kt +++ b/src/main/kotlin/astminer/parse/antlr/AntlrNode.kt @@ -1,12 +1,13 @@ package astminer.parse.antlr +import astminer.common.DEFAULT_TOKEN import astminer.common.model.Node class AntlrNode(override val typeLabel: String, override var parent: AntlrNode?, token: String?) : Node() { override val children: MutableList = mutableListOf() - override var token: String = token ?: "null" + override var token: String = token ?: DEFAULT_TOKEN fun replaceChildren(newChildren: List) { children.clear() diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyNode.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyNode.kt index 44474d7f..2c6bc0a5 100644 --- a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyNode.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyNode.kt @@ -1,5 +1,6 @@ package astminer.parse.fuzzy.cpp +import astminer.common.DEFAULT_TOKEN import astminer.common.model.Node import com.google.common.collect.TreeMultiset @@ -20,7 +21,7 @@ class FuzzyNode(override val typeLabel: String,token: String?, order: Int?) : No override val children get() = childrenMultiset.toList() - override var token: String = token ?: "null" + override var token: String = token ?: DEFAULT_TOKEN fun addChild(node: FuzzyNode) { childrenMultiset.add(node) From 7401a8e98e110e75e2dc72ea8f3c1021fb75f016 Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Mon, 24 May 2021 02:00:28 +0300 Subject: [PATCH 191/308] Update minor version --- README.md | 2 +- build.gradle.kts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index bff273ea..e51aeb06 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ [![JetBrains Research](https://jb.gg/badges/research.svg)](https://confluence.jetbrains.com/display/ALL/JetBrains+on+GitHub) -![astminer version](https://img.shields.io/badge/astminer-v0.6.3-blue) +![astminer version](https://img.shields.io/badge/astminer-v0.6.4-blue) # `astminer` A library for mining of [path-based representations of code](https://arxiv.org/pdf/1803.09544.pdf) and more, supported by the [Machine Learning Methods for Software Engineering](https://research.jetbrains.org/groups/ml_methods) group at [JetBrains Research](https://research.jetbrains.org). diff --git a/build.gradle.kts b/build.gradle.kts index 6d4531e0..7eeabe66 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -1,7 +1,7 @@ import tanvd.kosogor.proxy.shadowJar group = "io.github.vovak" -version = "0.6.3" +version = "0.6.4" plugins { id("java") From 5bf7c734ff72f47ecbaee6a86c026c7a17d3f689 Mon Sep 17 00:00:00 2001 From: furetur Date: Mon, 24 May 2021 15:25:43 +0500 Subject: [PATCH 192/308] reimplemented configs --- .../kotlin/astminer/config/FilterConfigs.kt | 65 ++----- .../kotlin/astminer/config/PipelineConfigs.kt | 50 +---- .../kotlin/astminer/config/ProblemConfigs.kt | 42 ++--- .../kotlin/astminer/config/StorageConfigs.kt | 21 +-- .../astminer/examples/Code2VecJavaMethods.kt | 4 +- .../astminer/featureextraction/TreeFeature.kt | 2 - .../kotlin/astminer/filters/CommonFilters.kt | 32 +++- .../kotlin/astminer/filters/FileFilters.kt | 2 +- .../astminer/filters/FunctionFilters.kt | 19 +- src/main/kotlin/astminer/pipeline/Pipeline.kt | 22 +-- .../astminer/pipeline/PipelineBranch.kt | 71 +++++-- .../astminer/problem/FileLevelProblems.kt | 2 +- .../astminer/filters/FileFiltersTest.kt | 16 +- .../astminer/filters/FunctionFiltersTest.kt | 55 +++--- .../python/GumTreePythonMethodSplitterTest.kt | 177 ------------------ .../pipeline/Code2VecExtractionTest.kt | 4 +- .../problem/FolderNameExtractorTest.kt | 4 +- 17 files changed, 205 insertions(+), 383 deletions(-) delete mode 100644 src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitterTest.kt diff --git a/src/main/kotlin/astminer/config/FilterConfigs.kt b/src/main/kotlin/astminer/config/FilterConfigs.kt index d744a929..8a68e514 100644 --- a/src/main/kotlin/astminer/config/FilterConfigs.kt +++ b/src/main/kotlin/astminer/config/FilterConfigs.kt @@ -3,72 +3,49 @@ package astminer.config import astminer.filters.* import kotlinx.serialization.SerialName import kotlinx.serialization.Serializable -import kotlinx.serialization.Transient /** - * A config for filter that tests files (ParseResult) + * Base class for all filter configs. See below */ @Serializable -sealed class FileFilterConfig { - abstract val filter: FileFilter +sealed class FilterConfig { + abstract val serialName: String } /** * @see TreeSizeFilter */ @Serializable -@SerialName("max tree size") -data class FileTreeSizeFilterConfig(val maxTreeSize: Int) : FileFilterConfig() { - @Transient - override val filter = TreeSizeFilter(maxTreeSize) -} - -/** - * A config for filter that tests functions (FunctionInfo) - */ -@Serializable -sealed class FunctionFilterConfig { - abstract val filter: FunctionFilter -} - -/** - * @see TreeSizeFilter - */ -@Serializable -@SerialName("max tree size") -data class FunctionTreeSizeFilterConfig(val maxTreeSize: Int) : FunctionFilterConfig() { - @Transient - override val filter = TreeSizeFilter(maxTreeSize) +@SerialName("by tree size") +data class TreeSizeFilterConfig(val maxTreeSize: Int) : FilterConfig() { + override val serialName = "by tree size" } /** * @see ModifierFilter */ @Serializable -@SerialName("exclude functions with modifiers") -data class ModifierFilterConfig(val modifiers: List) : FunctionFilterConfig() { - @Transient - override val filter = ModifierFilter(modifiers) +@SerialName("by modifiers") +data class ModifierFilterConfig(val modifiers: List) : FilterConfig() { + override val serialName = "by modifiers" } /** * @see AnnotationFilter */ @Serializable -@SerialName("exclude functions with annotations") -data class AnnotationFilterConfig(val annotations: List) : FunctionFilterConfig() { - @Transient - override val filter = AnnotationFilter(annotations) +@SerialName("by annotations") +data class AnnotationFilterConfig(val annotations: List) : FilterConfig() { + override val serialName = "by annotations" } /** * @see ConstructorFilter */ @Serializable -@SerialName("exclude constructors") -class ConstructorFilterConfig : FunctionFilterConfig() { - @Transient - override val filter = ConstructorFilter +@SerialName("no constructors") +object ConstructorFilterConfig : FilterConfig() { + override val serialName = "no constructors" } /** @@ -76,17 +53,15 @@ class ConstructorFilterConfig : FunctionFilterConfig() { */ @Serializable @SerialName("by function name length") -data class FunctionNameWordsNumberFilterConfig(val maxWordsNumber: Int) : FunctionFilterConfig() { - @Transient - override val filter = FunctionNameWordsNumberFilter(maxWordsNumber) +data class FunctionNameWordsNumberFilterConfig(val maxWordsNumber: Int) : FilterConfig() { + override val serialName = "by function name length" } /** - * @see FunctionAnyNodeWordsNumberFilter + * @see WordsNumberFilter */ @Serializable @SerialName("by length of any token") -data class FunctionAnyNodeWordsNumberFilterConfig(val maxWordsNumber: Int) : FunctionFilterConfig() { - @Transient - override val filter = FunctionAnyNodeWordsNumberFilter(maxWordsNumber) +data class WordsNumberFilterConfig(val maxWordsNumber: Int) : FilterConfig() { + override val serialName = "by length of any token" } diff --git a/src/main/kotlin/astminer/config/PipelineConfigs.kt b/src/main/kotlin/astminer/config/PipelineConfigs.kt index 357597ec..81749e50 100644 --- a/src/main/kotlin/astminer/config/PipelineConfigs.kt +++ b/src/main/kotlin/astminer/config/PipelineConfigs.kt @@ -1,47 +1,17 @@ package astminer.config -import kotlinx.serialization.SerialName import kotlinx.serialization.Serializable /** - * Base class for pipeline configs + * Config which defines the pipeline + * @see astminer.pipeline.Pipeline */ @Serializable -sealed class PipelineConfig { - abstract val inputDir: String - abstract val outputDir: String - abstract val parser: ParserConfig - abstract val storage: StorageConfig -} - -/** - * Pipeline config for pipeline with file-level granularity. - * In other words, [filters] are used to filter parsed files - * and [problem] processes and extracts label from parsed files. - */ -@Serializable -@SerialName("file granularity") -data class FilePipelineConfig( - override val inputDir: String, - override val outputDir: String, - override val parser: ParserConfig, - val filters: List = emptyList(), - val problem: FileProblemConfig, - override val storage: StorageConfig -) : PipelineConfig() - -/** - * Pipeline config for pipeline with function-level granularity. - * In other words, [filters] are used to test functions - * and [problem] processes and extracts labels from functions - */ -@Serializable -@SerialName("function granularity") -data class FunctionPipelineConfig( - override val inputDir: String, - override val outputDir: String, - override val parser: ParserConfig, - val filters: List = emptyList(), - val problem: FunctionProblemConfig, - override val storage: StorageConfig -) : PipelineConfig() +data class PipelineConfig( + val inputDir: String, + val outputDir: String, + val parser: ParserConfig, + val filters: List = emptyList(), + val problem: ProblemConfig, + val storage: StorageConfig +) diff --git a/src/main/kotlin/astminer/config/ProblemConfigs.kt b/src/main/kotlin/astminer/config/ProblemConfigs.kt index 2d241bba..17e76933 100644 --- a/src/main/kotlin/astminer/config/ProblemConfigs.kt +++ b/src/main/kotlin/astminer/config/ProblemConfigs.kt @@ -3,50 +3,44 @@ package astminer.config import astminer.problem.* import kotlinx.serialization.SerialName import kotlinx.serialization.Serializable -import kotlinx.serialization.Transient /** - * A config for problem that processes and extracts label from files + * Problems that have [File] granularity process and extract labels from *whole files*. + * Problems that have [Function] granularity process and extract labels from *functions* (that are collected from files). */ -@Serializable -sealed class FileProblemConfig { - abstract val problem: FileLevelProblem +enum class Granularity { + File, + Function } -/** - * @see FilePathExtractor - */ @Serializable -@SerialName("label with filepath") -class FilePathExtractorConfig : FileProblemConfig() { - @Transient - override val problem = FilePathExtractor +sealed class ProblemConfig { + abstract val granularity: Granularity } /** - * @see FolderExtractor + * @see FilePathExtractor */ @Serializable -@SerialName("label with folder name") -class FolderNameExtractorConfig : FileProblemConfig() { - @Transient - override val problem = FolderExtractor +@SerialName("file name") +object FileNameExtractorConfig : ProblemConfig() { + override val granularity = Granularity.File } /** - * A config for problem that processes and extracts label from functions + * @see FolderNameExtractor */ @Serializable -sealed class FunctionProblemConfig { - abstract val problem: FunctionLevelProblem +@SerialName("folder name") +object FolderNameExtractorConfig : ProblemConfig() { + override val granularity = Granularity.File } /** * @see FunctionNameProblem */ @Serializable -@SerialName("function name prediction") -class FunctionNamePredictionConfig : FunctionProblemConfig() { - @Transient - override val problem = FunctionNameProblem +@SerialName("function name") +object FunctionNameProblemConfig : ProblemConfig() { + override val granularity = Granularity.Function } diff --git a/src/main/kotlin/astminer/config/StorageConfigs.kt b/src/main/kotlin/astminer/config/StorageConfigs.kt index bdc2a1e2..b748d6b7 100644 --- a/src/main/kotlin/astminer/config/StorageConfigs.kt +++ b/src/main/kotlin/astminer/config/StorageConfigs.kt @@ -13,27 +13,18 @@ import kotlinx.serialization.Transient sealed class StorageConfig /** - * Format in which the ASTs should be saved - * [Dot] is for [astminer.storage.ast.DotAstStorage] - * [Csv] is for [astminer.storage.ast.CsvAstStorage] + * @see astminer.storage.ast.CsvAstStorage */ @Serializable -enum class AstStorageFormat { - @SerialName("dot") - Dot, - @SerialName("csv") - Csv -} +@SerialName("CsvAST") +object CsvAstStorageConfig : StorageConfig() /** - * Config for [astminer.storage.ast.DotAstStorage] [astminer.storage.ast.CsvAstStorage] + * @see astminer.storage.ast.DotAstStorage */ @Serializable -@SerialName("ast") -data class AstStorageConfig( - val format: AstStorageFormat, - val splitTokens: Boolean = false -) : StorageConfig() +@SerialName("DotAST") +object DotAstStorageConfig : StorageConfig() /** * Config for [astminer.storage.path.Code2VecPathStorage] diff --git a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt index 422d2f9f..db47a803 100644 --- a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt @@ -10,11 +10,11 @@ fun code2vecJavaMethods() { val folder = "src/test/resources/code2vecPathMining" val outputDir = "out_examples/code2vecPathMining" - val pipelineConfig = FunctionPipelineConfig( + val pipelineConfig = PipelineConfig( inputDir = folder, outputDir = outputDir, parser = ParserConfig(ParserType.Antlr, listOf(FileExtension.Java)), - problem = FunctionNamePredictionConfig(), + problem = FunctionNameProblemConfig, storage = Code2VecPathStorageConfig( maxPathLength = 5, maxPathWidth = 5 diff --git a/src/main/kotlin/astminer/featureextraction/TreeFeature.kt b/src/main/kotlin/astminer/featureextraction/TreeFeature.kt index ce71e9ab..ace40398 100644 --- a/src/main/kotlin/astminer/featureextraction/TreeFeature.kt +++ b/src/main/kotlin/astminer/featureextraction/TreeFeature.kt @@ -52,8 +52,6 @@ object NumberOfNodes : TreeFeature { } } -fun Node.treeSize() = NumberOfNodes.compute(this) - /** * Tree feature for computing list of all node tokens from a given tree. */ diff --git a/src/main/kotlin/astminer/filters/CommonFilters.kt b/src/main/kotlin/astminer/filters/CommonFilters.kt index 3d886713..9911ea45 100644 --- a/src/main/kotlin/astminer/filters/CommonFilters.kt +++ b/src/main/kotlin/astminer/filters/CommonFilters.kt @@ -3,12 +3,34 @@ package astminer.filters import astminer.common.model.FunctionInfo import astminer.common.model.Node import astminer.common.model.ParseResult -import astminer.featureextraction.treeSize +import astminer.common.preOrder +import astminer.common.splitToSubtokens +import astminer.featureextraction.NumberOfNodes -class TreeSizeFilter(private val maxSize: Int) : FileFilter, FunctionFilter { - private fun testTree(root: Node): Boolean = root.treeSize() <= maxSize +/** + * Filter that excludes trees which do not satisfy [minSize] <= tree size <= [maxSize] + * @param minSize The minimum size of trees that pass the filter + * @param maxSize The maximum size of trees that pass the filter. Set it to null if there should be no upper bound. + */ +class TreeSizeFilter(private val minSize: Int = 0, private val maxSize: Int? = null) : FileFilter, FunctionFilter { + private fun Node.treeSize() = NumberOfNodes.compute(this) - override fun test(functionInfo: FunctionInfo): Boolean = testTree(functionInfo.root) + private fun validateTree(root: Node): Boolean = + minSize <= root.treeSize() && (maxSize == null || root.treeSize() <= maxSize) - override fun test(parseResult: ParseResult): Boolean = testTree(parseResult.root) + override fun validate(functionInfo: FunctionInfo): Boolean = validateTree(functionInfo.root) + + override fun validate(parseResult: ParseResult): Boolean = validateTree(parseResult.root) +} + +/** + * Filter that excludes trees that have more words than [maxWordsNumber] in any token of their node. + */ +class WordsNumberFilter(private val maxWordsNumber: Int) : FunctionFilter, FileFilter { + private fun validateTree(root: Node) = + !root.preOrder().any { node -> splitToSubtokens(node.getToken()).size > maxWordsNumber } + + override fun validate(functionInfo: FunctionInfo) = validateTree(functionInfo.root) + + override fun validate(parseResult: ParseResult) = validateTree(parseResult.root) } diff --git a/src/main/kotlin/astminer/filters/FileFilters.kt b/src/main/kotlin/astminer/filters/FileFilters.kt index 0af6a808..27e6be02 100644 --- a/src/main/kotlin/astminer/filters/FileFilters.kt +++ b/src/main/kotlin/astminer/filters/FileFilters.kt @@ -4,5 +4,5 @@ import astminer.common.model.Node import astminer.common.model.ParseResult interface FileFilter { - fun test(parseResult: ParseResult): Boolean + fun validate(parseResult: ParseResult): Boolean } diff --git a/src/main/kotlin/astminer/filters/FunctionFilters.kt b/src/main/kotlin/astminer/filters/FunctionFilters.kt index ad908e07..5bf320c1 100644 --- a/src/main/kotlin/astminer/filters/FunctionFilters.kt +++ b/src/main/kotlin/astminer/filters/FunctionFilters.kt @@ -2,18 +2,17 @@ package astminer.filters import astminer.common.model.FunctionInfo import astminer.common.model.Node -import astminer.common.preOrder import astminer.common.splitToSubtokens interface FunctionFilter { - fun test(functionInfo: FunctionInfo): Boolean + fun validate(functionInfo: FunctionInfo): Boolean } /** * Filter that excludes functions that have at least one of modifiers from the [excludeModifiers] list. */ class ModifierFilter(private val excludeModifiers: List) : FunctionFilter { - override fun test(functionInfo: FunctionInfo): Boolean = + override fun validate(functionInfo: FunctionInfo): Boolean = !excludeModifiers.any { modifier -> modifier in functionInfo.modifiers } } @@ -21,7 +20,7 @@ class ModifierFilter(private val excludeModifiers: List) : FunctionFilte * Filter that excludes functions that have at least one annotations from the [excludeAnnotations] list. */ class AnnotationFilter(private val excludeAnnotations: List) : FunctionFilter { - override fun test(functionInfo: FunctionInfo): Boolean = + override fun validate(functionInfo: FunctionInfo): Boolean = !excludeAnnotations.any { annotation -> annotation in functionInfo.annotations } } @@ -29,24 +28,16 @@ class AnnotationFilter(private val excludeAnnotations: List) : FunctionF * Filter that excludes constructors */ object ConstructorFilter : FunctionFilter { - override fun test(functionInfo: FunctionInfo) = !functionInfo.isConstructor + override fun validate(functionInfo: FunctionInfo) = !functionInfo.isConstructor } /** * Filter that excludes functions that have more than [maxWordsNumber] words in their names. */ class FunctionNameWordsNumberFilter(private val maxWordsNumber: Int) : FunctionFilter { - override fun test(functionInfo: FunctionInfo): Boolean { + override fun validate(functionInfo: FunctionInfo): Boolean { val name = functionInfo.name return name != null && splitToSubtokens(name).size <= maxWordsNumber } } -/** - * Filter that excludes functions that have more words than [maxWordsNumber] in any token of their subtree. - */ -class FunctionAnyNodeWordsNumberFilter(private val maxWordsNumber: Int) : FunctionFilter { - override fun test(functionInfo: FunctionInfo): Boolean = - !functionInfo.root.preOrder().any { node -> splitToSubtokens(node.getToken()).size > maxWordsNumber } - -} diff --git a/src/main/kotlin/astminer/pipeline/Pipeline.kt b/src/main/kotlin/astminer/pipeline/Pipeline.kt index 489a07e7..3d4cb0ec 100644 --- a/src/main/kotlin/astminer/pipeline/Pipeline.kt +++ b/src/main/kotlin/astminer/pipeline/Pipeline.kt @@ -2,6 +2,8 @@ package astminer.pipeline import astminer.common.getProjectFilesWithExtension import astminer.config.* +import astminer.filters.ModifierFilter +import astminer.filters.TreeSizeFilter import astminer.parse.ParsingException import astminer.parse.getHandlerFactory import astminer.storage.Storage @@ -22,9 +24,9 @@ class Pipeline(private val config: PipelineConfig) { private val inputDirectory = File(config.inputDir) private val outputDirectory = File(config.outputDir) - private val branch = when (config) { - is FilePipelineConfig -> FilePipelineBranch(config) - is FunctionPipelineConfig -> FunctionPipelineBranch(config) + private val branch = when (config.problem.granularity) { + Granularity.File -> FilePipelineBranch(config) + Granularity.Function -> FunctionPipelineBranch(config) } private fun createStorageDirectory(extension: FileExtension): File { @@ -36,18 +38,10 @@ class Pipeline(private val config: PipelineConfig) { private fun createStorage(extension: FileExtension): Storage = with(config.storage) { val storagePath = createStorageDirectory(extension).path - // TODO: should be removed this later and be implemented like filters and problems, once storage constructors have no side effects when (this) { - is AstStorageConfig -> { - val tokenProcessor = if (splitTokens) TokenProcessor.Split else TokenProcessor.Normalize - when (format) { - AstStorageFormat.Csv -> CsvAstStorage(storagePath) - AstStorageFormat.Dot -> DotAstStorage(storagePath, tokenProcessor) - } - } - is Code2VecPathStorageConfig -> { - Code2VecPathStorage(storagePath, pathBasedStorageConfig) - } + is CsvAstStorageConfig -> CsvAstStorage(storagePath) + is DotAstStorageConfig -> DotAstStorage(storagePath, TokenProcessor.Split) + is Code2VecPathStorageConfig -> Code2VecPathStorage(storagePath, pathBasedStorageConfig) } } diff --git a/src/main/kotlin/astminer/pipeline/PipelineBranch.kt b/src/main/kotlin/astminer/pipeline/PipelineBranch.kt index d6fa0a26..514459dc 100644 --- a/src/main/kotlin/astminer/pipeline/PipelineBranch.kt +++ b/src/main/kotlin/astminer/pipeline/PipelineBranch.kt @@ -4,9 +4,10 @@ import astminer.common.model.FunctionInfo import astminer.common.model.LanguageHandler import astminer.common.model.Node import astminer.common.model.ParseResult -import astminer.config.FilePipelineConfig -import astminer.config.FunctionPipelineConfig -import astminer.problem.LabeledResult +import astminer.config.* +import astminer.filters.* +import astminer.problem.* +import mu.KotlinLogging /** * PipelineBranch is a part of the pipeline that can be completely different depending on the granularity (pipeline type) @@ -25,15 +26,30 @@ interface PipelineBranch { * PipelineBranch for pipeline with file-level granularity (FilePipelineConfig). * Works with files as a whole. Tests parsed files with filters and extracts a label from them. */ -class FilePipelineBranch(config: FilePipelineConfig) : PipelineBranch { - private val filters = config.filters.map { it.filter } - private val problem = config.problem.problem +class FilePipelineBranch(config: PipelineConfig) : PipelineBranch { + private val filters: List = config.filters.mapNotNull { filterConfig -> + when (filterConfig) { + is TreeSizeFilterConfig -> TreeSizeFilter(filterConfig.maxTreeSize) + is WordsNumberFilterConfig -> WordsNumberFilter(filterConfig.maxWordsNumber) + else -> { + println("Filter ${filterConfig.serialName} is not supported for this problem") + null + } + } + } + + private val problem: FileLevelProblem = when (config.problem) { + is FileNameExtractorConfig -> FilePathExtractor + is FolderNameExtractorConfig -> FolderNameExtractor + else -> throw ProblemNotFoundException(Granularity.File, "FilePipelineBranch") + } - private fun ParseResult.passesThroughFilters() = filters.all { filter -> filter.test(this) } + private fun passesThroughFilters(parseResult: ParseResult) = + filters.all { filter -> filter.validate(parseResult) } override fun process(languageHandler: LanguageHandler): Sequence> { val parseResult = languageHandler.parseResult - return if (parseResult.passesThroughFilters()) { + return if (passesThroughFilters(parseResult)) { val labeledResult = problem.process(parseResult) ?: return emptySequence() sequenceOf(labeledResult) } else { @@ -47,14 +63,43 @@ class FilePipelineBranch(config: FilePipelineConfig) : PipelineBranch { * Extracts functions from the parsed files. * Then tests functions with filters, processes them and extracts labels from each function. */ -class FunctionPipelineBranch(config: FunctionPipelineConfig) : PipelineBranch { - private val filters = config.filters.map { it.filter } - private val problem = config.problem.problem +class FunctionPipelineBranch(config: PipelineConfig) : + PipelineBranch { + private val filters: List = config.filters.mapNotNull { filterConfig -> + when (filterConfig) { + is TreeSizeFilterConfig -> TreeSizeFilter(filterConfig.maxTreeSize) + is WordsNumberFilterConfig -> WordsNumberFilter(filterConfig.maxWordsNumber) + is ModifierFilterConfig -> ModifierFilter(filterConfig.modifiers) + is AnnotationFilterConfig -> AnnotationFilter(filterConfig.annotations) + is ConstructorFilterConfig -> ConstructorFilter + is FunctionNameWordsNumberFilterConfig -> FunctionNameWordsNumberFilter(filterConfig.maxWordsNumber) + else -> { + println("Filter ${filterConfig.serialName} is not supported for this problem") + null + } + } + } - private fun FunctionInfo.passesThroughFilters() = filters.all { filter -> filter.test(this) } + private val problem: FunctionLevelProblem = when (config.problem) { + is FunctionNameProblemConfig -> FunctionNameProblem + else -> throw ProblemNotFoundException(Granularity.Function, "FunctionPipelineBranch") + } + + private fun passesThroughFilters(functionInfo: FunctionInfo) = + filters.all { filter -> filter.validate(functionInfo) } override fun process(languageHandler: LanguageHandler): Sequence> = languageHandler.splitIntoFunctions().asSequence() - .filter { functionInfo -> functionInfo.passesThroughFilters() } + .filter { functionInfo -> passesThroughFilters(functionInfo) } .mapNotNull { functionInfo -> problem.process(functionInfo) } } + +/** + * This exception is thrown when problem granularity is implemented incorrectly or the problem is not specified + * inside the correct pipeline branch. + */ +class ProblemNotFoundException(granularity: Granularity, branchName: String) : + IllegalStateException( + "The specified problem with granularity $granularity is not implemented inside of branch $branchName. " + + "This should never happen!" + ) \ No newline at end of file diff --git a/src/main/kotlin/astminer/problem/FileLevelProblems.kt b/src/main/kotlin/astminer/problem/FileLevelProblems.kt index 7124705a..c185405f 100644 --- a/src/main/kotlin/astminer/problem/FileLevelProblems.kt +++ b/src/main/kotlin/astminer/problem/FileLevelProblems.kt @@ -18,7 +18,7 @@ object FilePathExtractor : FileLevelProblem { /** * Labels files with folder names */ -object FolderExtractor : FileLevelProblem { +object FolderNameExtractor : FileLevelProblem { override fun process(parseResult: ParseResult): LabeledResult? { val folderName = File(parseResult.filePath).parentFile?.name ?: return null return parseResult.labeledWith(folderName) diff --git a/src/test/kotlin/astminer/filters/FileFiltersTest.kt b/src/test/kotlin/astminer/filters/FileFiltersTest.kt index 3fd94c92..cb90db85 100644 --- a/src/test/kotlin/astminer/filters/FileFiltersTest.kt +++ b/src/test/kotlin/astminer/filters/FileFiltersTest.kt @@ -10,12 +10,24 @@ internal class FileFiltersTest { @Test fun `test TreeSizeFilter for 100 should exclude bamboo of length 101`() { val node = createBamboo(101).toParseResult() - assertFalse { TreeSizeFilter(100).test(node) } + assertFalse { TreeSizeFilter(maxSize = 100).validate(node) } } @Test fun `test TreeSizeFilter for 10 should not exclude bamboo of length 5`() { val node = createBamboo(5).toParseResult() - assertTrue { TreeSizeFilter(10).test(node) } + assertTrue { TreeSizeFilter(maxSize = 10).validate(node) } + } + + @Test + fun `test TreeSizeFilter for minSize 10 should exclude bamboo of size 5`() { + val node = createBamboo(5).toParseResult() + assertFalse { TreeSizeFilter(minSize = 10).validate(node) } + } + + @Test + fun `test TreeSizeFilter for minSize 10 should not exclude bamboo of size 100`() { + val node = createBamboo(100).toParseResult() + assertTrue { TreeSizeFilter(minSize = 10).validate(node) } } } diff --git a/src/test/kotlin/astminer/filters/FunctionFiltersTest.kt b/src/test/kotlin/astminer/filters/FunctionFiltersTest.kt index 5b4d09d9..8ee9caf9 100644 --- a/src/test/kotlin/astminer/filters/FunctionFiltersTest.kt +++ b/src/test/kotlin/astminer/filters/FunctionFiltersTest.kt @@ -15,7 +15,7 @@ class FunctionFiltersTest { val functionInfo = object : FunctionInfo { override val modifiers: List = listOf("b", "c") } - assertFalse { ModifierFilter(excludedModifiers).test(functionInfo) } + assertFalse { ModifierFilter(excludedModifiers).validate(functionInfo) } } @Test @@ -24,7 +24,7 @@ class FunctionFiltersTest { val functionInfo = object : FunctionInfo { override val modifiers: List = listOf("c", "d") } - assertTrue { ModifierFilter(excludedModifiers).test(functionInfo) } + assertTrue { ModifierFilter(excludedModifiers).validate(functionInfo) } } @Test @@ -33,7 +33,7 @@ class FunctionFiltersTest { val functionInfo = object : FunctionInfo { override val annotations: List = listOf("a", "c") } - assertFalse { AnnotationFilter(excludedModifiers).test(functionInfo) } + assertFalse { AnnotationFilter(excludedModifiers).validate(functionInfo) } } @Test @@ -42,7 +42,7 @@ class FunctionFiltersTest { val functionInfo = object : FunctionInfo { override val annotations: List = listOf("y", "x") } - assertTrue { AnnotationFilter(excludedModifiers).test(functionInfo) } + assertTrue { AnnotationFilter(excludedModifiers).validate(functionInfo) } } @Test @@ -50,7 +50,7 @@ class FunctionFiltersTest { val functionInfo = object : FunctionInfo { override val isConstructor = true } - assertFalse { ConstructorFilter.test(functionInfo) } + assertFalse { ConstructorFilter.validate(functionInfo) } } @Test @@ -58,7 +58,7 @@ class FunctionFiltersTest { val functionInfo = object : FunctionInfo { override val isConstructor = false } - assertTrue { ConstructorFilter.test(functionInfo) } + assertTrue { ConstructorFilter.validate(functionInfo) } } @Test @@ -66,36 +66,27 @@ class FunctionFiltersTest { val functionInfo = object : FunctionInfo { override val name = "Word".repeat(100) } - assertFalse { FunctionNameWordsNumberFilter(50).test(functionInfo) } + assertFalse { FunctionNameWordsNumberFilter(50).validate(functionInfo) } } @Test - fun `test FunctionNameWordsNumberFilter for 101 should not exclude function with name of 100 words`() { + fun `test WordsNumberFilter for 101 should not exclude function with name of 100 words`() { val functionInfo = object : FunctionInfo { override val name = "Word".repeat(100) } - assertTrue { FunctionNameWordsNumberFilter(101).test(functionInfo) } + assertTrue { FunctionNameWordsNumberFilter(101).validate(functionInfo) } } @Test - fun `test FunctionAnyNodeWordsNumberFilter for 50 should exclude function with name of 100 words`() { + fun `test WordsNumberFilter for 50 should exclude function with name of 100 words`() { val functionInfo = object : FunctionInfo { override val root = AntlrNode("", null, "Word".repeat(100)) } - assertFalse { FunctionAnyNodeWordsNumberFilter(50).test(functionInfo) } + assertFalse { WordsNumberFilter(50).validate(functionInfo) } } @Test - fun `test FunctionAnyNodeWordsNumberFilter for 101 should not exclude function with name of 100 words`() { - val functionInfo = object : FunctionInfo { - override val name = "Word".repeat(100) - override val root = createBamboo(1) - } - assertTrue { FunctionAnyNodeWordsNumberFilter(101).test(functionInfo) } - } - - @Test - fun `test FunctionAnyNodeWordsNumberFilter for 2 should exclude function that has a child of 3 words`() { + fun `test WordsNumberFilter for 2 should exclude function that has a child of 3 words`() { val root = AntlrNode("", null, "word") val child = AntlrNode("", root, "wordWordWord") root.setChildren(listOf(child)) @@ -103,7 +94,7 @@ class FunctionFiltersTest { val functionInfo = object : FunctionInfo { override val root = root } - assertFalse { FunctionAnyNodeWordsNumberFilter(2).test(functionInfo) } + assertFalse { WordsNumberFilter(2).validate(functionInfo) } } @Test @@ -111,7 +102,7 @@ class FunctionFiltersTest { val functionInfo = object : FunctionInfo { override val root = createBamboo(101) } - assertFalse { TreeSizeFilter(100).test(functionInfo) } + assertFalse { TreeSizeFilter(maxSize = 100).validate(functionInfo) } } @Test @@ -119,6 +110,22 @@ class FunctionFiltersTest { val functionInfo = object : FunctionInfo { override val root = createBamboo(5) } - assertTrue { TreeSizeFilter(10).test(functionInfo) } + assertTrue { TreeSizeFilter(maxSize = 10).validate(functionInfo) } + } + + @Test + fun `test TreeSizeFilter for minSize 100 should exclude bamboo of length 5`() { + val functionInfo = object : FunctionInfo { + override val root = createBamboo(5) + } + assertFalse { TreeSizeFilter(minSize = 100).validate(functionInfo) } + } + + @Test + fun `test TreeSizeFilter for (10, 100) should not exclude bambo of size 50 `() { + val functionInfo = object : FunctionInfo { + override val root = createBamboo(50) + } + assertTrue { TreeSizeFilter(10, 100).validate(functionInfo) } } } \ No newline at end of file diff --git a/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitterTest.kt deleted file mode 100644 index 34fe2c65..00000000 --- a/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonMethodSplitterTest.kt +++ /dev/null @@ -1,177 +0,0 @@ -package astminer.parse.gumtree.python -// -//import astminer.checkExecutable -//import astminer.common.model.FunctionInfo -//import astminer.common.model.MethodInfo -//import astminer.parse.gumtree.GumTreeNode -//import org.junit.Assume -//import org.junit.Before -//import org.junit.Test -//import java.io.File -//import kotlin.test.assertEquals -//import kotlin.test.assertNotNull -// -//class GumTreePythonMethodSplitterTest { -// private fun parse(filename: String): GumTreeNode = -// GumTreePythonParser().parseInputStream(File(filename).inputStream()) -// -// private fun splitMethods(filename: String): Collection> = parse(filename)?.let { -// GumTreePythonFunctionSplitter().splitIntoMethods(it) -// } ?: emptyList() -// -// private fun createPath(file: String) = "src/test/resources/gumTreeMethodSplitter/$file" -// -// @Before -// fun checkPythonParser() = Assume.assumeTrue(checkExecutable("pythonparser")) -// -// @Test -// fun methodsCountTest() { -// assertEquals(7, splitMethods(createPath("1.py")).size) -// assertEquals(9, splitMethods(createPath("2.py")).size) -// assertEquals(3, splitMethods(createPath("3.py")).size) -// assertEquals(5, splitMethods(createPath("4.py")).size) -// } -// -// @Test -// fun funcNamesTest() { -// val realNames = setOf( -// "no_args_func", "with_args_no_typed", "with_typed_args", -// "with_typed_return_no_args", "full_typed", -// "func_dif_args_typed_return", "complex_args_full_typed" -// ) -// val methodInfos = splitMethods(createPath("1.py")) -// val parsedNames = methodInfos.map { it.name }.toSet() -// assertEquals(realNames, parsedNames) -// } -// -// @Test -// fun methodInfoTest1TypedArgs() { -// val methodInfos = splitMethods(createPath("1.py")) -// val method = methodInfos.firstOrNull { it.name == "complex_args_full_typed" } -// assertNotNull(method) -// with(method) { -// assertEquals("complex_args_full_typed", name) -// assertEquals(null, this.method.returnTypeNode) -// assertEquals(1, methodParameters.size) -// assertEquals(listOf("node"), methodParameters.map { it.name }.toList()) -// assertEquals(listOf("JsonNodeType"), methodParameters.map { it.returnType() }.toList()) -// } -// } -// -// @Test -// fun methodInfoTest2ManyArgs() { -// val methodInfos = splitMethods(createPath("1.py")) -// val method = methodInfos.firstOrNull { it.name == "func_dif_args_typed_return" } -// assertNotNull(method) -// with(method) { -// assertEquals("func_dif_args_typed_return", name) -// assertEquals("Constant-int", this.method.returnTypeNode?.getTypeLabel()) -// assertEquals(6, methodParameters.size) -// assertEquals(listOf("a", "b", "c", "d", "e", "f"), methodParameters.map { it.name }.toList()) -// assertEquals(emptyList(), methodParameters.mapNotNull { it.returnType }.toList()) -// } -// } -// -// @Test -// fun methodInfoTest3EnclosingClass() { -// val methodInfos = splitMethods(createPath("2.py")) -// val method = methodInfos.firstOrNull { it.name == "foo_typed" } -// assertNotNull(method) -// with(method) { -// assertEquals("foo_typed", name) -// assertEquals("A", enclosingElementName) -// assertEquals(null, this.method.returnTypeNode) -// assertEquals(3, methodParameters.size) -// assertEquals(listOf("self", "x", "y"), methodParameters.map { it.name }.toList()) -// assertEquals(listOf(null, "int", "int"), methodParameters.map { it.returnType }.toList()) -// } -// } -// -// @Test -// fun methodInfoTest4EnclosingClass() { -// val methodInfos = splitMethods(createPath("2.py")) -// val method = methodInfos.firstOrNull { it.name() == "bar_typed" } -// assertNotNull(method) -// with(method) { -// assertEquals("bar_typed", name) -// assertEquals("C", enclosingElementName) -// assertEquals(null, this.method.returnTypeNode) -// assertEquals(2, methodParameters.size) -// assertEquals(listOf("self", "x"), methodParameters.map { it.name }.toList()) -// assertEquals(listOf(null, "int"), methodParameters.map { it.returnType }.toList()) -// } -// } -// -// @Test -// fun methodInfoTest5AsyncDef() { -// val methodInfos = splitMethods(createPath("3.py")) -// val method = methodInfos.firstOrNull { it.name == "async_schrecklich_typed" } -// assertNotNull(method) -// with(method) { -// assertEquals("async_schrecklich_typed", name) -// assertEquals("AsyncFunctionDef", this.method.root.getTypeLabel()) -// assertEquals(null, enclosingElementName()) -// assertEquals("Constant-int", this.method.returnTypeNode?.getTypeLabel()) -// assertEquals(4, methodParameters.size) -// assertEquals(listOf("event", "x", "args", "kwargs"), methodParameters.map { it.name() }.toList()) -// assertEquals(listOf("str", "int", null, null), methodParameters.map { it.returnType() }.toList()) -// } -// } -// -// @Test -// fun methodInfoTest6Doc() { -// val methodInfos = splitMethods(createPath("3.py")) -// val method = methodInfos.firstOrNull { it.name() == "async_simple_no_typed" } -// assertNotNull(method) -// with(method) { -// assertEquals("async_simple_no_typed", name()) -// assertEquals("AsyncFunctionDef", this.method.root.getTypeLabel()) -// assertEquals(null, enclosingElementName()) -// assertEquals( -// "\n async doc\n ", -// this.method.root.getChildOfType("body") -// ?.getChildOfType("Expr") -// ?.getChildOfType("Constant-str") -// ?.getToken() -// ) -// assertEquals(4, methodParameters.size) -// assertEquals( -// listOf("gh", "original_issue", "branch", "backport_pr_number"), -// methodParameters.map { it.name() }.toList() -// ) -// assertEquals(listOf(null, null, null, null), methodParameters.map { it.returnType() }.toList()) -// } -// } -// -// @Test -// fun methodInfoTest7InnerFunc() { -// val methodInfos = splitMethods(createPath("4.py")) -// val method = methodInfos.firstOrNull { it.name() == "foo_2" } -// assertNotNull(method) -// with(method) { -// assertEquals("foo_2", name()) -// assertEquals("foo_1", method.method.root.parent?.wrappedNode?.parent?.label) -// assertEquals(null, enclosingElementName()) -// assertEquals("Constant-NoneType", this.method.returnTypeNode?.getTypeLabel()) -// assertEquals(1, methodParameters.size) -// assertEquals(listOf("c"), methodParameters.map { it.name() }.toList()) -// assertEquals(listOf(null), methodParameters.map { it.returnType() }.toList()) -// } -// } -// -// @Test -// fun methodInfoTest8InnerFunc() { -// val methodInfos = splitMethods(createPath("4.py")) -// val method = methodInfos.firstOrNull { it.name() == "bar_2" } -// assertNotNull(method) -// with(method) { -// assertEquals("bar_2", name()) -// assertEquals("bar_1", method.method.root.parent?.wrappedNode?.parent?.label) -// assertEquals(null, enclosingElementName()) -// assertEquals("Constant-int", this.method.returnTypeNode?.getTypeLabel()) -// assertEquals(2, methodParameters.size) -// assertEquals(listOf("d", "e"), methodParameters.map { it.name() }.toList()) -// assertEquals(listOf("int", "int"), methodParameters.map { it.returnType() }.toList()) -// } -// } -//} diff --git a/src/test/kotlin/astminer/pipeline/Code2VecExtractionTest.kt b/src/test/kotlin/astminer/pipeline/Code2VecExtractionTest.kt index 2d364e1d..8c9ab256 100644 --- a/src/test/kotlin/astminer/pipeline/Code2VecExtractionTest.kt +++ b/src/test/kotlin/astminer/pipeline/Code2VecExtractionTest.kt @@ -16,11 +16,11 @@ internal class Code2VecExtractionTest { val languages = listOf(FileExtension.Java, FileExtension.Python) - val config = FilePipelineConfig( + val config = PipelineConfig( inputDir = testDataDir.path, outputDir = extractedDataDir.toAbsolutePath().toString(), parser = ParserConfig(ParserType.Antlr, languages), - problem = FilePathExtractorConfig(), + problem = FileNameExtractorConfig, storage = Code2VecPathStorageConfig(8, 3) ) Pipeline(config).run() diff --git a/src/test/kotlin/astminer/problem/FolderNameExtractorTest.kt b/src/test/kotlin/astminer/problem/FolderNameExtractorTest.kt index 9e5da1b4..482cbead 100644 --- a/src/test/kotlin/astminer/problem/FolderNameExtractorTest.kt +++ b/src/test/kotlin/astminer/problem/FolderNameExtractorTest.kt @@ -16,7 +16,7 @@ class FolderNameExtractorTest { @Test fun `test folder extractor returns null when folder is empty or not found`() { val nonEmptyParseResult = ParseResult(dummyRoot, "") - val labeledParseResult = FolderExtractor.process(nonEmptyParseResult) + val labeledParseResult = FolderNameExtractor.process(nonEmptyParseResult) assertNull(labeledParseResult) } @@ -24,7 +24,7 @@ class FolderNameExtractorTest { @Test fun `test folder extractor extracts folder when it is not empty`() { val nonEmptyParseResult = ParseResult(dummyRoot, PATH) - val labeledParseResult = FolderExtractor.process(nonEmptyParseResult) + val labeledParseResult = FolderNameExtractor.process(nonEmptyParseResult) assertEquals(LabeledResult(dummyRoot, FOLDER, PATH), labeledParseResult) } From dbe64fe7e236be3295f1b082c488beb104a77ed3 Mon Sep 17 00:00:00 2001 From: furetur Date: Mon, 24 May 2021 15:32:49 +0500 Subject: [PATCH 193/308] implemented FileNameExtractor --- src/main/kotlin/astminer/config/ProblemConfigs.kt | 2 +- src/main/kotlin/astminer/pipeline/PipelineBranch.kt | 6 ++---- src/main/kotlin/astminer/problem/FileLevelProblems.kt | 6 ++++-- ...FilePathExtractorTest.kt => FileNameExtractorTest.kt} | 9 +++++---- 4 files changed, 12 insertions(+), 11 deletions(-) rename src/test/kotlin/astminer/problem/{FilePathExtractorTest.kt => FileNameExtractorTest.kt} (60%) diff --git a/src/main/kotlin/astminer/config/ProblemConfigs.kt b/src/main/kotlin/astminer/config/ProblemConfigs.kt index 17e76933..2c1b24d1 100644 --- a/src/main/kotlin/astminer/config/ProblemConfigs.kt +++ b/src/main/kotlin/astminer/config/ProblemConfigs.kt @@ -19,7 +19,7 @@ sealed class ProblemConfig { } /** - * @see FilePathExtractor + * @see FileNameExtractor */ @Serializable @SerialName("file name") diff --git a/src/main/kotlin/astminer/pipeline/PipelineBranch.kt b/src/main/kotlin/astminer/pipeline/PipelineBranch.kt index 514459dc..fd43021c 100644 --- a/src/main/kotlin/astminer/pipeline/PipelineBranch.kt +++ b/src/main/kotlin/astminer/pipeline/PipelineBranch.kt @@ -7,7 +7,6 @@ import astminer.common.model.ParseResult import astminer.config.* import astminer.filters.* import astminer.problem.* -import mu.KotlinLogging /** * PipelineBranch is a part of the pipeline that can be completely different depending on the granularity (pipeline type) @@ -39,7 +38,7 @@ class FilePipelineBranch(config: PipelineConfig) : PipelineBranch { } private val problem: FileLevelProblem = when (config.problem) { - is FileNameExtractorConfig -> FilePathExtractor + is FileNameExtractorConfig -> FileNameExtractor is FolderNameExtractorConfig -> FolderNameExtractor else -> throw ProblemNotFoundException(Granularity.File, "FilePipelineBranch") } @@ -63,8 +62,7 @@ class FilePipelineBranch(config: PipelineConfig) : PipelineBranch { * Extracts functions from the parsed files. * Then tests functions with filters, processes them and extracts labels from each function. */ -class FunctionPipelineBranch(config: PipelineConfig) : - PipelineBranch { +class FunctionPipelineBranch(config: PipelineConfig) : PipelineBranch { private val filters: List = config.filters.mapNotNull { filterConfig -> when (filterConfig) { is TreeSizeFilterConfig -> TreeSizeFilter(filterConfig.maxTreeSize) diff --git a/src/main/kotlin/astminer/problem/FileLevelProblems.kt b/src/main/kotlin/astminer/problem/FileLevelProblems.kt index c185405f..d8faa229 100644 --- a/src/main/kotlin/astminer/problem/FileLevelProblems.kt +++ b/src/main/kotlin/astminer/problem/FileLevelProblems.kt @@ -11,8 +11,10 @@ interface FileLevelProblem { /** * Labels files with folder names */ -object FilePathExtractor : FileLevelProblem { - override fun process(parseResult: ParseResult): LabeledResult = parseResult.labeledWithFilePath() +object FileNameExtractor : FileLevelProblem { + override fun process(parseResult: ParseResult): LabeledResult = + parseResult.labeledWith(File(parseResult.filePath).name) + } /** diff --git a/src/test/kotlin/astminer/problem/FilePathExtractorTest.kt b/src/test/kotlin/astminer/problem/FileNameExtractorTest.kt similarity index 60% rename from src/test/kotlin/astminer/problem/FilePathExtractorTest.kt rename to src/test/kotlin/astminer/problem/FileNameExtractorTest.kt index ef560019..a6c0f7a8 100644 --- a/src/test/kotlin/astminer/problem/FilePathExtractorTest.kt +++ b/src/test/kotlin/astminer/problem/FileNameExtractorTest.kt @@ -5,17 +5,18 @@ import astminer.parse.antlr.AntlrNode import org.junit.Test import kotlin.test.assertEquals -class FilePathExtractorTest { +class FileNameExtractorTest { companion object { - private const val PATH = "random/folder/file.txt" + private const val FILE_NAME = "file.txt" + private const val PATH = "random/folder/$FILE_NAME" private var dummyRoot = AntlrNode("", null, null) } @Test fun `test file path extractor returns the same root and file path and labels with file path`() { val nonEmptyParseResult = ParseResult(dummyRoot, PATH) - val labeledParseResult = FilePathExtractor.process(nonEmptyParseResult) + val labeledParseResult = FileNameExtractor.process(nonEmptyParseResult) - assertEquals(LabeledResult(dummyRoot, PATH, PATH), labeledParseResult) + assertEquals(LabeledResult(dummyRoot, FILE_NAME, PATH), labeledParseResult) } } \ No newline at end of file From 262e5cc86314584fbc41343c0511f3c7a5e02d13 Mon Sep 17 00:00:00 2001 From: furetur Date: Mon, 24 May 2021 16:01:12 +0500 Subject: [PATCH 194/308] removed the deprecated function --- .../astminer/common/model/ParsingModel.kt | 15 ---------- .../kotlin/astminer/examples/AllCppFiles.kt | 28 +++++++++---------- .../kotlin/astminer/examples/AllJavaAst.kt | 21 +++++++------- .../astminer/examples/AllJavaFilesGumTree.kt | 21 +++++++------- .../astminer/examples/AllJavaScriptFiles.kt | 28 ++++++++----------- .../astminer/examples/AllPythonFiles.kt | 20 ++++++------- src/main/kotlin/astminer/pipeline/Pipeline.kt | 1 - src/test/kotlin/astminer/parse/Utils.kt | 7 +++++ .../parse/antlr/java/ANTLRJavaParserTest.kt | 5 ++-- .../antlr/python/ANTLRPythonParserTest.kt | 4 +-- .../astminer/parse/cpp/FuzzyCppParserTest.kt | 15 ++++------ .../gumtree/java/GumTreeJavaParserTest.kt | 7 ++--- 12 files changed, 74 insertions(+), 98 deletions(-) create mode 100644 src/test/kotlin/astminer/parse/Utils.kt diff --git a/src/main/kotlin/astminer/common/model/ParsingModel.kt b/src/main/kotlin/astminer/common/model/ParsingModel.kt index e1fc3bc1..2285acfd 100644 --- a/src/main/kotlin/astminer/common/model/ParsingModel.kt +++ b/src/main/kotlin/astminer/common/model/ParsingModel.kt @@ -46,23 +46,8 @@ interface Parser { * @return ParseResult instance */ fun parseFile(file: File) = ParseResult(parseInputStream(file.inputStream()), file.path) - - /** - * Parse list of files. - * @param files files to parse - * @param handleResult handler to invoke on each file parse result - */ - // TODO: this function should be removed. it is only used in tests and examples - @Deprecated("Use getHandlerFactory instead") - fun parseFiles(files: List, handleResult: (ParseResult) -> Any?) { - for (file in files) { - handleResult(parseFile(file)) - } - } } data class ParseResult(val root: T, val filePath: String) { fun labeledWith(label: String): LabeledResult = LabeledResult(root, label, filePath) - - fun labeledWithFilePath(): LabeledResult = labeledWith(filePath) } diff --git a/src/main/kotlin/astminer/examples/AllCppFiles.kt b/src/main/kotlin/astminer/examples/AllCppFiles.kt index 1a45b003..5a0aa345 100644 --- a/src/main/kotlin/astminer/examples/AllCppFiles.kt +++ b/src/main/kotlin/astminer/examples/AllCppFiles.kt @@ -2,28 +2,26 @@ package astminer.examples -import astminer.common.getProjectFilesWithExtension +import astminer.config.* import astminer.parse.fuzzy.cpp.FuzzyCppParser -import astminer.storage.path.Code2VecPathStorage -import astminer.storage.path.PathBasedStorageConfig +import astminer.pipeline.Pipeline import java.io.File // Retrieve paths from .cpp preprocessed files, using a fuzzyc2cpg parser. fun allCppFiles() { val inputDir = File("src/test/resources/examples/cpp") - - val outputDir = "out_examples/allCppFiles" - val storage = Code2VecPathStorage(outputDir, PathBasedStorageConfig(5, 5)) + val preprocessedDir = File("preprocessed") + // TODO: preprocessing should once become a part of the pipeline val parser = FuzzyCppParser() - val preprocOutputFolder = File("preprocessed") - - parser.preprocessProject(inputDir, preprocOutputFolder) - - val files = getProjectFilesWithExtension(preprocOutputFolder, "cpp") + parser.preprocessProject(inputDir, preprocessedDir) - parser.parseFiles(files) { parseResult -> - storage.store(parseResult.labeledWithFilePath()) - } + val config = PipelineConfig( + inputDir = preprocessedDir.path, + outputDir = "out_examples/allCppFiles", + parser = ParserConfig(ParserType.Fuzzy, listOf(FileExtension.Cpp)), + problem = FileNameExtractorConfig, + storage = Code2VecPathStorageConfig(5, 5) + ) - storage.close() + Pipeline(config).run() } diff --git a/src/main/kotlin/astminer/examples/AllJavaAst.kt b/src/main/kotlin/astminer/examples/AllJavaAst.kt index 25a88b48..6b8c5d29 100644 --- a/src/main/kotlin/astminer/examples/AllJavaAst.kt +++ b/src/main/kotlin/astminer/examples/AllJavaAst.kt @@ -1,20 +1,21 @@ package astminer.examples import astminer.common.getProjectFilesWithExtension +import astminer.config.* import astminer.storage.ast.CsvAstStorage import astminer.parse.antlr.java.JavaParser +import astminer.pipeline.Pipeline import java.io.File // Retrieve ASTs from Java files, using a generated parser. fun allJavaAsts() { - val folder = "src/test/resources/examples/" + val config = PipelineConfig( + inputDir = "src/test/resources/examples/", + outputDir = "out_examples/allJavaAstsAntlr", + parser = ParserConfig(ParserType.Antlr, listOf(FileExtension.Java)), + problem = FileNameExtractorConfig, + storage = CsvAstStorageConfig, + ) - val storage = CsvAstStorage("out_examples/allJavaAstsAntlr") - - val files = getProjectFilesWithExtension(File(folder), "java") - JavaParser().parseFiles(files) { parseResult -> - storage.store(parseResult.labeledWithFilePath()) - } - - storage.close() -} \ No newline at end of file + Pipeline(config).run() +} diff --git a/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt b/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt index c8236abf..bc305344 100644 --- a/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt +++ b/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt @@ -1,22 +1,21 @@ package astminer.examples import astminer.common.getProjectFilesWithExtension +import astminer.config.* import astminer.parse.gumtree.java.GumTreeJavaParser +import astminer.pipeline.Pipeline import astminer.storage.path.Code2VecPathStorage import astminer.storage.path.PathBasedStorageConfig import java.io.File //Retrieve paths from Java files, using a GumTree parser. fun allJavaFilesGumTree() { - val inputDir = "src/test/resources/gumTreeMethodSplitter/" - - val outputDir = "out_examples/allJavaFilesGumTree" - val storage = Code2VecPathStorage(outputDir, PathBasedStorageConfig(5, 5)) - - val files = getProjectFilesWithExtension(File(inputDir), "java") - GumTreeJavaParser().parseFiles(files) { parseResult -> - storage.store(parseResult.labeledWithFilePath()) - } - - storage.close() + val config = PipelineConfig( + inputDir = "src/test/resources/gumTreeMethodSplitter/", + outputDir = "out_examples/allJavaFilesGumTree", + parser = ParserConfig(ParserType.GumTree, listOf(FileExtension.Java)), + problem = FileNameExtractorConfig, + storage = Code2VecPathStorageConfig(5, 5) + ) + Pipeline(config).run() } diff --git a/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt b/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt index f3a835e5..cfe2ea0e 100644 --- a/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt +++ b/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt @@ -1,22 +1,16 @@ package astminer.examples -import astminer.common.getProjectFilesWithExtension -import astminer.parse.antlr.javascript.JavaScriptParser -import astminer.storage.path.PathBasedStorageConfig -import astminer.storage.TokenProcessor -import astminer.storage.path.Code2VecPathStorage -import java.io.File +import astminer.config.* +import astminer.pipeline.Pipeline fun allJavaScriptFiles() { - val folder = "src/test/resources/examples" - val outputDir = "out_examples/allJavaScriptFilesAntlr" + val config = PipelineConfig( + inputDir = "src/test/resources/examples", + outputDir = "out_examples/allJavaScriptFilesAntlr", + parser = ParserConfig(ParserType.Antlr, listOf(FileExtension.JavaScript)), + problem = FileNameExtractorConfig, + storage = Code2VecPathStorageConfig(5, 5) + ) - val storage = Code2VecPathStorage(outputDir, PathBasedStorageConfig(5, 5), TokenProcessor.Split) - - val files = getProjectFilesWithExtension(File(folder), "js") - JavaScriptParser().parseFiles(files) { parseResult -> - storage.store(parseResult.labeledWithFilePath()) - } - - storage.close() -} \ No newline at end of file + Pipeline(config).run() +} diff --git a/src/main/kotlin/astminer/examples/AllPythonFiles.kt b/src/main/kotlin/astminer/examples/AllPythonFiles.kt index 91f6fe88..b0ca0b47 100644 --- a/src/main/kotlin/astminer/examples/AllPythonFiles.kt +++ b/src/main/kotlin/astminer/examples/AllPythonFiles.kt @@ -1,7 +1,9 @@ package astminer.examples import astminer.common.getProjectFilesWithExtension +import astminer.config.* import astminer.parse.antlr.python.PythonParser +import astminer.pipeline.Pipeline import astminer.storage.path.PathBasedStorageConfig import astminer.storage.TokenProcessor import astminer.storage.path.Code2VecPathStorage @@ -9,15 +11,13 @@ import java.io.File fun allPythonFiles() { - val inputDir = "src/test/resources/examples/" + val config = PipelineConfig( + inputDir = "src/test/resources/examples", + outputDir = "out_examples/allPythonFiles", + parser = ParserConfig(ParserType.Antlr, listOf(FileExtension.Python)), + problem = FileNameExtractorConfig, + storage = Code2VecPathStorageConfig(5, 5) + ) - val outputDir = "out_examples/allPythonFiles" - val storage = Code2VecPathStorage(outputDir, PathBasedStorageConfig(5, 5), TokenProcessor.Split) - - val files = getProjectFilesWithExtension(File(inputDir), "py") - PythonParser().parseFiles(files) { parseResult -> - storage.store(parseResult.labeledWithFilePath()) - } - - storage.close() + Pipeline(config).run() } diff --git a/src/main/kotlin/astminer/pipeline/Pipeline.kt b/src/main/kotlin/astminer/pipeline/Pipeline.kt index 3d4cb0ec..e33c1859 100644 --- a/src/main/kotlin/astminer/pipeline/Pipeline.kt +++ b/src/main/kotlin/astminer/pipeline/Pipeline.kt @@ -37,7 +37,6 @@ class Pipeline(private val config: PipelineConfig) { private fun createStorage(extension: FileExtension): Storage = with(config.storage) { val storagePath = createStorageDirectory(extension).path - when (this) { is CsvAstStorageConfig -> CsvAstStorage(storagePath) is DotAstStorageConfig -> DotAstStorage(storagePath, TokenProcessor.Split) diff --git a/src/test/kotlin/astminer/parse/Utils.kt b/src/test/kotlin/astminer/parse/Utils.kt new file mode 100644 index 00000000..4ae7b169 --- /dev/null +++ b/src/test/kotlin/astminer/parse/Utils.kt @@ -0,0 +1,7 @@ +package astminer.parse + +import astminer.common.model.Node +import astminer.common.model.Parser +import java.io.File + +fun Parser.parseFiles(files: List) = files.map { parseFile(it).root } diff --git a/src/test/kotlin/astminer/parse/antlr/java/ANTLRJavaParserTest.kt b/src/test/kotlin/astminer/parse/antlr/java/ANTLRJavaParserTest.kt index 49efc0db..a3b66f12 100644 --- a/src/test/kotlin/astminer/parse/antlr/java/ANTLRJavaParserTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/java/ANTLRJavaParserTest.kt @@ -1,7 +1,7 @@ package astminer.parse.antlr.java import astminer.common.getProjectFilesWithExtension -import astminer.common.model.Node +import astminer.parse.parseFiles import org.junit.Assert import org.junit.Test import java.io.File @@ -58,8 +58,7 @@ class ANTLRJavaParserTest { fun testProjectParsing() { val parser = JavaParser() val projectRoot = File("src/test/resources/arrayCalls") - val trees = mutableListOf() - parser.parseFiles(getProjectFilesWithExtension(projectRoot, "java")) { trees.add(it.root) } + val trees = parser.parseFiles(getProjectFilesWithExtension(projectRoot, "java")) Assert.assertEquals("There is only 5 file with .java extension in 'testData/arrayCalls' folder",5, trees.size) trees.forEach { Assert.assertNotNull("Parse tree for a valid file should not be null", it) } } diff --git a/src/test/kotlin/astminer/parse/antlr/python/ANTLRPythonParserTest.kt b/src/test/kotlin/astminer/parse/antlr/python/ANTLRPythonParserTest.kt index dd45298c..d054fce9 100644 --- a/src/test/kotlin/astminer/parse/antlr/python/ANTLRPythonParserTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/python/ANTLRPythonParserTest.kt @@ -2,6 +2,7 @@ package astminer.parse.antlr.python import astminer.common.getProjectFilesWithExtension import astminer.common.model.Node +import astminer.parse.parseFiles import org.junit.Assert import org.junit.Test import java.io.File @@ -21,8 +22,7 @@ class ANTLRPythonParserTest { fun testProjectParsing() { val parser = PythonParser() val projectRoot = File("src/test/resources/examples") - val trees = mutableListOf() - parser.parseFiles(getProjectFilesWithExtension(projectRoot, "py")) { trees.add(it.root) } + val trees = parser.parseFiles(getProjectFilesWithExtension(projectRoot, "py")) Assert.assertEquals("There is only 1 file with .py extension in 'testData/examples' folder",1, trees.size) trees.forEach { Assert.assertNotNull("Parse tree for a valid file should not be null", it) } } diff --git a/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt b/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt index 41622f8a..95df406a 100644 --- a/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt +++ b/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt @@ -5,6 +5,7 @@ import astminer.common.getProjectFilesWithExtension import astminer.examples.forFilesWithSuffix import astminer.parse.fuzzy.cpp.FuzzyCppParser import astminer.parse.fuzzy.cpp.FuzzyNode +import astminer.parse.parseFiles import org.junit.Assert import org.junit.Assume import org.junit.Before @@ -33,14 +34,11 @@ class FuzzyCppParserTest { fun testProjectParsing() { val folder = File("src/test/resources/fuzzy/") val parser = FuzzyCppParser() - val nodes = mutableListOf() - parser.parseFiles(getProjectFilesWithExtension(folder, "cpp")) { - nodes.add(it.root) - } + val nodes = parser.parseFiles(getProjectFilesWithExtension(folder, "cpp")) Assert.assertEquals( "There is only 3 file with .cpp extension in 'testData/examples' folder", 3, - nodes.filterNotNull().size + nodes.size ) } @@ -88,15 +86,12 @@ class FuzzyCppParserTest { val parser = FuzzyCppParser() parser.preprocessProject(projectRoot, preprocessedRoot) - val nodes = mutableListOf() - parser.parseFiles(getProjectFilesWithExtension(projectRoot, "cpp")) { - nodes.add(it.root) - } + val nodes = parser.parseFiles(getProjectFilesWithExtension(projectRoot, "cpp")) Assert.assertEquals( "Parse tree for a valid file should not be null. There are 5 files in example project.", 5, - nodes.filterNotNull().size + nodes.size ) preprocessedRoot.deleteRecursively() } diff --git a/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaParserTest.kt b/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaParserTest.kt index 5ee1b47d..02c11511 100644 --- a/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaParserTest.kt +++ b/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaParserTest.kt @@ -1,7 +1,7 @@ package astminer.parse.gumtree.java import astminer.common.getProjectFilesWithExtension -import astminer.common.model.Node +import astminer.parse.parseFiles import org.junit.Assert import org.junit.Test import java.io.* @@ -20,9 +20,8 @@ class GumTreeJavaParserTest { fun testProjectParsing() { val parser = GumTreeJavaParser() val projectRoot = File("src/test/resources/examples") - - val trees = mutableListOf() - parser.parseFiles(getProjectFilesWithExtension(projectRoot, "java")) { trees.add(it.root) } + + val trees = parser.parseFiles(getProjectFilesWithExtension(projectRoot, "java")) Assert.assertEquals("There is only 2 file with .java extension in 'testData/examples' folder",2, trees.size) trees.forEach { Assert.assertNotNull("Parse tree for a valid file should not be null", it) } } From 4483cdbed2a840baf3f060ed1d0ccff0cb3c7832 Mon Sep 17 00:00:00 2001 From: furetur Date: Mon, 24 May 2021 16:18:40 +0500 Subject: [PATCH 195/308] resolved errors after conflicts --- src/main/kotlin/astminer/common/model/ParsingModel.kt | 2 -- src/main/kotlin/astminer/filters/CommonFilters.kt | 3 +-- src/main/kotlin/astminer/problem/FunctionLevelProblems.kt | 3 +-- src/test/kotlin/astminer/common/TestUtils.kt | 5 ++++- src/test/kotlin/astminer/filters/FunctionFiltersTest.kt | 2 +- src/test/kotlin/astminer/problem/FunctionNameProblemTest.kt | 2 +- 6 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/main/kotlin/astminer/common/model/ParsingModel.kt b/src/main/kotlin/astminer/common/model/ParsingModel.kt index ee2d6318..a20d9c74 100644 --- a/src/main/kotlin/astminer/common/model/ParsingModel.kt +++ b/src/main/kotlin/astminer/common/model/ParsingModel.kt @@ -1,8 +1,6 @@ package astminer.common.model import astminer.problem.LabeledResult -import astminer.parse.ParsingException -import mu.KotlinLogging import java.io.File import java.io.InputStream import java.util.* diff --git a/src/main/kotlin/astminer/filters/CommonFilters.kt b/src/main/kotlin/astminer/filters/CommonFilters.kt index 9911ea45..587e1fba 100644 --- a/src/main/kotlin/astminer/filters/CommonFilters.kt +++ b/src/main/kotlin/astminer/filters/CommonFilters.kt @@ -3,7 +3,6 @@ package astminer.filters import astminer.common.model.FunctionInfo import astminer.common.model.Node import astminer.common.model.ParseResult -import astminer.common.preOrder import astminer.common.splitToSubtokens import astminer.featureextraction.NumberOfNodes @@ -28,7 +27,7 @@ class TreeSizeFilter(private val minSize: Int = 0, private val maxSize: Int? = n */ class WordsNumberFilter(private val maxWordsNumber: Int) : FunctionFilter, FileFilter { private fun validateTree(root: Node) = - !root.preOrder().any { node -> splitToSubtokens(node.getToken()).size > maxWordsNumber } + !root.preOrder().any { node -> splitToSubtokens(node.token).size > maxWordsNumber } override fun validate(functionInfo: FunctionInfo) = validateTree(functionInfo.root) diff --git a/src/main/kotlin/astminer/problem/FunctionLevelProblems.kt b/src/main/kotlin/astminer/problem/FunctionLevelProblems.kt index 7d26786a..65325107 100644 --- a/src/main/kotlin/astminer/problem/FunctionLevelProblems.kt +++ b/src/main/kotlin/astminer/problem/FunctionLevelProblems.kt @@ -2,7 +2,6 @@ package astminer.problem import astminer.common.model.FunctionInfo import astminer.common.model.Node -import astminer.common.preOrder import astminer.common.setTechnicalToken interface FunctionLevelProblem { @@ -20,7 +19,7 @@ object FunctionNameProblem : FunctionLevelProblem { override fun process(functionInfo: FunctionInfo): LabeledResult? { val name = functionInfo.name ?: return null functionInfo.root.preOrder().forEach { node -> - if (node.getToken() == name) { + if (node.token == name) { node.setTechnicalToken(TECHNICAL_RECURSIVE_CALL) } } diff --git a/src/test/kotlin/astminer/common/TestUtils.kt b/src/test/kotlin/astminer/common/TestUtils.kt index 003736ca..a1703614 100644 --- a/src/test/kotlin/astminer/common/TestUtils.kt +++ b/src/test/kotlin/astminer/common/TestUtils.kt @@ -5,7 +5,10 @@ import astminer.common.model.Node import astminer.common.model.ParseResult -class DummyNode(override val typeLabel: String, override val children: MutableList) : Node() { +class DummyNode( + override val typeLabel: String, + override val children: MutableList = mutableListOf() +) : Node() { //TODO("not implemented") override val parent: Node? = null diff --git a/src/test/kotlin/astminer/filters/FunctionFiltersTest.kt b/src/test/kotlin/astminer/filters/FunctionFiltersTest.kt index 8ee9caf9..727bed95 100644 --- a/src/test/kotlin/astminer/filters/FunctionFiltersTest.kt +++ b/src/test/kotlin/astminer/filters/FunctionFiltersTest.kt @@ -89,7 +89,7 @@ class FunctionFiltersTest { fun `test WordsNumberFilter for 2 should exclude function that has a child of 3 words`() { val root = AntlrNode("", null, "word") val child = AntlrNode("", root, "wordWordWord") - root.setChildren(listOf(child)) + root.replaceChildren(listOf(child)) val functionInfo = object : FunctionInfo { override val root = root diff --git a/src/test/kotlin/astminer/problem/FunctionNameProblemTest.kt b/src/test/kotlin/astminer/problem/FunctionNameProblemTest.kt index 9cb507f9..b6505208 100644 --- a/src/test/kotlin/astminer/problem/FunctionNameProblemTest.kt +++ b/src/test/kotlin/astminer/problem/FunctionNameProblemTest.kt @@ -51,7 +51,7 @@ class FunctionNameProblemTest { @Test fun `test function name problem should hide recursive call tokens with SELF`() { FunctionNameProblem.process(functionInfo) - val recursiveCallNode = functionInfo.root.getChildren().firstOrNull()?.getChildren()?.firstOrNull() + val recursiveCallNode = functionInfo.root.children.firstOrNull()?.children?.firstOrNull() assertEquals("SELF", recursiveCallNode?.getTechnicalToken()) } } \ No newline at end of file From 9664112a75f23ed743c47cb05d06c6694e6c373d Mon Sep 17 00:00:00 2001 From: furetur Date: Mon, 24 May 2021 17:12:51 +0500 Subject: [PATCH 196/308] fixed config examples and logging --- configs/function-name-prediction-ast.yml | 10 ++++----- configs/parse-files-csv.yml | 12 +++++----- configs/paths-from-files.yml | 7 ++---- src/main/kotlin/astminer/Main.kt | 22 ++++++++++++++----- .../kotlin/astminer/config/FilterConfigs.kt | 6 ++--- .../astminer/pipeline/PipelineBranch.kt | 19 ++++++++++------ 6 files changed, 43 insertions(+), 33 deletions(-) diff --git a/configs/function-name-prediction-ast.yml b/configs/function-name-prediction-ast.yml index 04b65311..d1da0dd7 100644 --- a/configs/function-name-prediction-ast.yml +++ b/configs/function-name-prediction-ast.yml @@ -1,5 +1,3 @@ -type: 'function granularity' - inputDir: 'src/test/resources/methodSplitting/' outputDir: 'output' @@ -10,12 +8,12 @@ parser: filters: - type: 'by function name length' maxWordsNumber: 10 - - type: 'by length of any token' - maxWordsNumber: 100 + - type: 'by words number' + maxTokenWordsNumber: 100 problem: - type: 'function name prediction' + type: 'function name' storage: - type: 'ast' + type: 'CsvAST' format: 'csv' diff --git a/configs/parse-files-csv.yml b/configs/parse-files-csv.yml index a0ff9403..f63f733a 100644 --- a/configs/parse-files-csv.yml +++ b/configs/parse-files-csv.yml @@ -1,5 +1,3 @@ -type: 'file granularity' - inputDir: 'src/test/resources/methodSplitting/' outputDir: 'output' @@ -8,12 +6,14 @@ parser: extensions: ['java', 'js'] filters: - - type: 'max tree size' + - type: 'by tree size' maxTreeSize: 1000 + - type: 'by words number' + maxTokenWordsNumber: 1000 + problem: - type: 'label with filepath' + type: 'file name' storage: - type: 'ast' - format: 'csv' + type: 'CsvAST' diff --git a/configs/paths-from-files.yml b/configs/paths-from-files.yml index bf7b063b..cadc8192 100644 --- a/configs/paths-from-files.yml +++ b/configs/paths-from-files.yml @@ -1,5 +1,3 @@ -type: 'file granularity' - inputDir: 'src/test/resources/methodSplitting/' outputDir: 'output' @@ -8,12 +6,11 @@ parser: extensions: ['java', 'js'] filters: - - type: 'max tree size' + - type: 'by tree size' maxTreeSize: 1000 problem: - type: 'label with filepath' - + type: 'file name' storage: type: 'code2vec paths' diff --git a/src/main/kotlin/astminer/Main.kt b/src/main/kotlin/astminer/Main.kt index 4ce51c6d..ba69e332 100644 --- a/src/main/kotlin/astminer/Main.kt +++ b/src/main/kotlin/astminer/Main.kt @@ -1,7 +1,9 @@ package astminer +import astminer.common.model.FunctionInfoPropertyNotImplementedException import astminer.config.PipelineConfig import astminer.pipeline.Pipeline +import astminer.pipeline.ProblemDefinitionException import com.charleskorn.kaml.PolymorphismStyle import com.github.ajalt.clikt.core.CliktCommand import com.github.ajalt.clikt.parameters.arguments.argument @@ -25,14 +27,22 @@ class PipelineRunner : CliktCommand(name = "") { private val yaml = Yaml(configuration = YamlConfiguration(polymorphismStyle = PolymorphismStyle.Property)) override fun run() { - val config = try { - yaml.decodeFromString(config.readText()) + try { + val config = yaml.decodeFromString(config.readText()) + Pipeline(config).run() } catch (e: SerializationException) { - logger.error(e) { "Could not read config" } - println("Could not read config: $e") - return + report("Could not read config", e) + println("\nBe sure to check types of filters and problems for misprints!") + } catch (e: ProblemDefinitionException) { + report("Problem is defined incorrectly", e) + } catch (e: FunctionInfoPropertyNotImplementedException) { + report("Currently astminer cannot fulfill your request", e) } - Pipeline(config).run() + } + + private fun report(message: String, e: Exception) { + logger.error(e) { message } + println("$message:\n$e") } } diff --git a/src/main/kotlin/astminer/config/FilterConfigs.kt b/src/main/kotlin/astminer/config/FilterConfigs.kt index 8a68e514..e1d027e2 100644 --- a/src/main/kotlin/astminer/config/FilterConfigs.kt +++ b/src/main/kotlin/astminer/config/FilterConfigs.kt @@ -61,7 +61,7 @@ data class FunctionNameWordsNumberFilterConfig(val maxWordsNumber: Int) : Filter * @see WordsNumberFilter */ @Serializable -@SerialName("by length of any token") -data class WordsNumberFilterConfig(val maxWordsNumber: Int) : FilterConfig() { - override val serialName = "by length of any token" +@SerialName("by words number") +data class WordsNumberFilterConfig(val maxTokenWordsNumber: Int) : FilterConfig() { + override val serialName = "by words number" } diff --git a/src/main/kotlin/astminer/pipeline/PipelineBranch.kt b/src/main/kotlin/astminer/pipeline/PipelineBranch.kt index fd43021c..54f57349 100644 --- a/src/main/kotlin/astminer/pipeline/PipelineBranch.kt +++ b/src/main/kotlin/astminer/pipeline/PipelineBranch.kt @@ -7,6 +7,7 @@ import astminer.common.model.ParseResult import astminer.config.* import astminer.filters.* import astminer.problem.* +import mu.KotlinLogging /** * PipelineBranch is a part of the pipeline that can be completely different depending on the granularity (pipeline type) @@ -21,6 +22,8 @@ interface PipelineBranch { fun process(languageHandler: LanguageHandler): Sequence> } +private val logger = KotlinLogging.logger("PipelineBranch") + /** * PipelineBranch for pipeline with file-level granularity (FilePipelineConfig). * Works with files as a whole. Tests parsed files with filters and extracts a label from them. @@ -29,9 +32,10 @@ class FilePipelineBranch(config: PipelineConfig) : PipelineBranch { private val filters: List = config.filters.mapNotNull { filterConfig -> when (filterConfig) { is TreeSizeFilterConfig -> TreeSizeFilter(filterConfig.maxTreeSize) - is WordsNumberFilterConfig -> WordsNumberFilter(filterConfig.maxWordsNumber) + is WordsNumberFilterConfig -> WordsNumberFilter(filterConfig.maxTokenWordsNumber) else -> { - println("Filter ${filterConfig.serialName} is not supported for this problem") + println("Filter `${filterConfig.serialName}` is not supported for this problem") + logger.info { "Filter `${filterConfig.serialName}` is not supported for this problem" } null } } @@ -40,7 +44,7 @@ class FilePipelineBranch(config: PipelineConfig) : PipelineBranch { private val problem: FileLevelProblem = when (config.problem) { is FileNameExtractorConfig -> FileNameExtractor is FolderNameExtractorConfig -> FolderNameExtractor - else -> throw ProblemNotFoundException(Granularity.File, "FilePipelineBranch") + else -> throw ProblemDefinitionException(Granularity.File, "FilePipelineBranch") } private fun passesThroughFilters(parseResult: ParseResult) = @@ -66,13 +70,14 @@ class FunctionPipelineBranch(config: PipelineConfig) : PipelineBranch { private val filters: List = config.filters.mapNotNull { filterConfig -> when (filterConfig) { is TreeSizeFilterConfig -> TreeSizeFilter(filterConfig.maxTreeSize) - is WordsNumberFilterConfig -> WordsNumberFilter(filterConfig.maxWordsNumber) + is WordsNumberFilterConfig -> WordsNumberFilter(filterConfig.maxTokenWordsNumber) is ModifierFilterConfig -> ModifierFilter(filterConfig.modifiers) is AnnotationFilterConfig -> AnnotationFilter(filterConfig.annotations) is ConstructorFilterConfig -> ConstructorFilter is FunctionNameWordsNumberFilterConfig -> FunctionNameWordsNumberFilter(filterConfig.maxWordsNumber) else -> { - println("Filter ${filterConfig.serialName} is not supported for this problem") + println("Filter `${filterConfig.serialName}` is not supported for this problem") + logger.info { "Filter `${filterConfig.serialName}` is not supported for this problem" } null } } @@ -80,7 +85,7 @@ class FunctionPipelineBranch(config: PipelineConfig) : PipelineBranch { private val problem: FunctionLevelProblem = when (config.problem) { is FunctionNameProblemConfig -> FunctionNameProblem - else -> throw ProblemNotFoundException(Granularity.Function, "FunctionPipelineBranch") + else -> throw ProblemDefinitionException(Granularity.Function, "FunctionPipelineBranch") } private fun passesThroughFilters(functionInfo: FunctionInfo) = @@ -96,7 +101,7 @@ class FunctionPipelineBranch(config: PipelineConfig) : PipelineBranch { * This exception is thrown when problem granularity is implemented incorrectly or the problem is not specified * inside the correct pipeline branch. */ -class ProblemNotFoundException(granularity: Granularity, branchName: String) : +class ProblemDefinitionException(granularity: Granularity, branchName: String) : IllegalStateException( "The specified problem with granularity $granularity is not implemented inside of branch $branchName. " + "This should never happen!" From 5a70bfd4f886a84ea1727d6aa71cc9cbf83ac32c Mon Sep 17 00:00:00 2001 From: furetur Date: Mon, 24 May 2021 17:17:12 +0500 Subject: [PATCH 197/308] changed code2vec config serial name --- src/main/kotlin/astminer/config/StorageConfigs.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/kotlin/astminer/config/StorageConfigs.kt b/src/main/kotlin/astminer/config/StorageConfigs.kt index b748d6b7..dc951fdb 100644 --- a/src/main/kotlin/astminer/config/StorageConfigs.kt +++ b/src/main/kotlin/astminer/config/StorageConfigs.kt @@ -30,7 +30,7 @@ object DotAstStorageConfig : StorageConfig() * Config for [astminer.storage.path.Code2VecPathStorage] */ @Serializable -@SerialName("code2vec paths") +@SerialName("Code2vec") data class Code2VecPathStorageConfig( val maxPathLength: Int, val maxPathWidth: Int, From 5fcede8d28faa47a82802c9808ac62a2ccbd0137 Mon Sep 17 00:00:00 2001 From: furetur Date: Mon, 24 May 2021 17:38:12 +0500 Subject: [PATCH 198/308] extracted logging logic in PipelineBranch.kt into filterNotSupported() --- src/main/kotlin/astminer/pipeline/PipelineBranch.kt | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/main/kotlin/astminer/pipeline/PipelineBranch.kt b/src/main/kotlin/astminer/pipeline/PipelineBranch.kt index 54f57349..3cdfd5ba 100644 --- a/src/main/kotlin/astminer/pipeline/PipelineBranch.kt +++ b/src/main/kotlin/astminer/pipeline/PipelineBranch.kt @@ -24,6 +24,11 @@ interface PipelineBranch { private val logger = KotlinLogging.logger("PipelineBranch") +private fun filterNotSupported(filterConfig: FilterConfig) { + println("Filter `${filterConfig.serialName}` is not supported for this problem") + logger.info { "Filter `${filterConfig.serialName}` is not supported for this problem" } +} + /** * PipelineBranch for pipeline with file-level granularity (FilePipelineConfig). * Works with files as a whole. Tests parsed files with filters and extracts a label from them. @@ -34,8 +39,7 @@ class FilePipelineBranch(config: PipelineConfig) : PipelineBranch { is TreeSizeFilterConfig -> TreeSizeFilter(filterConfig.maxTreeSize) is WordsNumberFilterConfig -> WordsNumberFilter(filterConfig.maxTokenWordsNumber) else -> { - println("Filter `${filterConfig.serialName}` is not supported for this problem") - logger.info { "Filter `${filterConfig.serialName}` is not supported for this problem" } + filterNotSupported(filterConfig) null } } @@ -76,8 +80,7 @@ class FunctionPipelineBranch(config: PipelineConfig) : PipelineBranch { is ConstructorFilterConfig -> ConstructorFilter is FunctionNameWordsNumberFilterConfig -> FunctionNameWordsNumberFilter(filterConfig.maxWordsNumber) else -> { - println("Filter `${filterConfig.serialName}` is not supported for this problem") - logger.info { "Filter `${filterConfig.serialName}` is not supported for this problem" } + filterNotSupported(filterConfig) null } } From a9c080054d78ccb8a96ca8f0cd46bb86fb3837c9 Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Mon, 24 May 2021 19:48:01 +0300 Subject: [PATCH 199/308] Replace token processor with automatic handling of node token --- .../java/astminer/examples/AllJavaFiles.java | 2 +- .../kotlin/astminer/cli/Code2VecExtractor.kt | 9 +-- .../kotlin/astminer/cli/LabelExtractors.kt | 15 ++-- .../astminer/cli/PathContextsExtractor.kt | 2 +- src/main/kotlin/astminer/cli/ProjectParser.kt | 6 +- src/main/kotlin/astminer/common/TreeUtil.kt | 23 +----- .../common/model/FunctionInfoModel.kt | 2 +- .../astminer/common/model/ParsingModel.kt | 27 ++++--- .../astminer/examples/AllJavaMethods.kt | 2 +- .../astminer/examples/AllJavaScriptFiles.kt | 3 +- .../astminer/examples/AllPythonFiles.kt | 3 +- .../kotlin/astminer/parse/antlr/AntlrNode.kt | 9 ++- .../kotlin/astminer/parse/antlr/AntlrUtil.kt | 24 ++---- .../parse/antlr/java/AntlrJavaFunctionInfo.kt | 4 +- .../javascript/AntlrJavaScriptElementInfo.kt | 24 +++--- .../antlr/python/AntlrPythonFunctionInfo.kt | 9 +-- .../parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt | 8 +- .../astminer/parse/fuzzy/cpp/FuzzyNode.kt | 11 +-- .../astminer/parse/gumtree/GumTreeNode.kt | 3 +- .../gumtree/java/GumTreeJavaFunctionInfo.kt | 6 +- .../python/GumTreePythonFunctionInfo.kt | 6 +- .../kotlin/astminer/storage/TokenProcessor.kt | 39 ---------- .../astminer/storage/ast/CsvAstStorage.kt | 5 +- .../astminer/storage/ast/DotAstStorage.kt | 10 +-- .../storage/path/Code2VecPathStorage.kt | 9 +-- .../astminer/storage/path/PathBasedStorage.kt | 15 ++-- .../kotlin/astminer/cli/LabelExtractorTest.kt | 3 +- src/test/kotlin/astminer/common/TestUtils.kt | 3 +- .../kotlin/astminer/common/TreeUtilTest.kt | 2 +- .../featureextraction/TreeFeatureTestUtil.kt | 2 +- .../astminer/storage/TokenProcessorTest.kt | 78 ------------------- 31 files changed, 103 insertions(+), 261 deletions(-) delete mode 100644 src/main/kotlin/astminer/storage/TokenProcessor.kt delete mode 100644 src/test/kotlin/astminer/storage/TokenProcessorTest.kt diff --git a/src/main/java/astminer/examples/AllJavaFiles.java b/src/main/java/astminer/examples/AllJavaFiles.java index 5f705206..a1ff35f6 100644 --- a/src/main/java/astminer/examples/AllJavaFiles.java +++ b/src/main/java/astminer/examples/AllJavaFiles.java @@ -21,7 +21,7 @@ public class AllJavaFiles { public static void runExample() { final PathBasedStorageConfig config = new PathBasedStorageConfig(5, 5, null, null, null); - final PathBasedStorage pathStorage = new Code2VecPathStorage(OUTPUT_FOLDER, config, TokenProcessor.Normalize); + final PathBasedStorage pathStorage = new Code2VecPathStorage(OUTPUT_FOLDER, config); final Path inputFolder = Paths.get(INPUT_FOLDER); diff --git a/src/main/kotlin/astminer/cli/Code2VecExtractor.kt b/src/main/kotlin/astminer/cli/Code2VecExtractor.kt index 12b60a9f..51e1b5f7 100644 --- a/src/main/kotlin/astminer/cli/Code2VecExtractor.kt +++ b/src/main/kotlin/astminer/cli/Code2VecExtractor.kt @@ -3,7 +3,6 @@ package astminer.cli import astminer.common.getProjectFilesWithExtension import astminer.common.model.Node import astminer.common.model.ParseResult -import astminer.storage.* import astminer.storage.path.Code2VecPathStorage import astminer.storage.path.PathBasedStorageConfig import com.github.ajalt.clikt.core.CliktCommand @@ -139,14 +138,8 @@ class Code2VecExtractor(private val customLabelExtractor: LabelExtractor? = null for (extension in extensions) { val outputDirForLanguage = outputDir.resolve(extension) outputDirForLanguage.mkdir() - // Choose how to process tokens - val tokenProcessor = if (isTokenSplitted) { - TokenProcessor.Split - } else { - TokenProcessor.Normalize - } // Choose type of storage - val storage = Code2VecPathStorage(outputDirForLanguage.path, storageConfig, tokenProcessor) + val storage = Code2VecPathStorage(outputDirForLanguage.path, storageConfig) // Choose type of parser val parser = getParser( extension, diff --git a/src/main/kotlin/astminer/cli/LabelExtractors.kt b/src/main/kotlin/astminer/cli/LabelExtractors.kt index eb7154f2..6054c229 100644 --- a/src/main/kotlin/astminer/cli/LabelExtractors.kt +++ b/src/main/kotlin/astminer/cli/LabelExtractors.kt @@ -3,7 +3,6 @@ package astminer.cli import astminer.common.model.Node import astminer.common.model.ParseResult import astminer.common.model.FunctionInfo -import astminer.common.setTechnicalToken import astminer.parse.antlr.AntlrNode import astminer.parse.antlr.java.JavaFunctionSplitter import astminer.parse.antlr.javascript.JavaScriptFunctionSplitter @@ -13,7 +12,6 @@ import astminer.parse.fuzzy.cpp.FuzzyNode import astminer.parse.gumtree.GumTreeNode import astminer.parse.gumtree.java.GumTreeJavaFunctionSplitter import astminer.parse.gumtree.python.GumTreePythonFunctionSplitter -import astminer.storage.TokenProcessor import java.io.File @@ -128,15 +126,18 @@ class MethodNameExtractor( override fun extractLabel(functionInfo: FunctionInfo, filePath: String): String? { // TODO: the normalization situation is getting out of control. It should be a separate step in the pipeline - val normalizedName = functionInfo.nameNode?.let { TokenProcessor.Normalize.getPresentableToken(it) } + val normalizedName = functionInfo.nameNode?.normalizedToken val name = functionInfo.name ?: return null functionInfo.root.preOrder().forEach { node -> - if (node.token == name) { - node.setTechnicalToken("SELF") - } + if (node.token == name) node.technicalToken = SELF_CALL_TOKEN } - functionInfo.nameNode?.setTechnicalToken("METHOD_NAME") + functionInfo.nameNode?.technicalToken = METHOD_NAME_TOKEN return normalizedName } + + companion object { + const val METHOD_NAME_TOKEN = "METHOD_NAME" + const val SELF_CALL_TOKEN = "SELF" + } } diff --git a/src/main/kotlin/astminer/cli/PathContextsExtractor.kt b/src/main/kotlin/astminer/cli/PathContextsExtractor.kt index 2e74aae7..f14b1280 100644 --- a/src/main/kotlin/astminer/cli/PathContextsExtractor.kt +++ b/src/main/kotlin/astminer/cli/PathContextsExtractor.kt @@ -113,7 +113,7 @@ class PathContextsExtractor(private val customLabelExtractor: LabelExtractor? = val outputDirForLanguage = outputDir.resolve(extension) outputDirForLanguage.mkdir() - val storage = Code2VecPathStorage(outputDirForLanguage.path, storageConfig, TokenProcessor.Split) + val storage = Code2VecPathStorage(outputDirForLanguage.path, storageConfig) val files = getProjectFilesWithExtension(File(projectRoot), extension) parser.parseFiles(files) { parseResult -> diff --git a/src/main/kotlin/astminer/cli/ProjectParser.kt b/src/main/kotlin/astminer/cli/ProjectParser.kt index b205695a..06ca89e5 100644 --- a/src/main/kotlin/astminer/cli/ProjectParser.kt +++ b/src/main/kotlin/astminer/cli/ProjectParser.kt @@ -4,7 +4,6 @@ import astminer.storage.ast.CsvAstStorage import astminer.storage.ast.DotAstStorage import astminer.common.getProjectFilesWithExtension import astminer.storage.Storage -import astminer.storage.TokenProcessor import com.github.ajalt.clikt.core.CliktCommand import com.github.ajalt.clikt.parameters.options.* import com.github.ajalt.clikt.parameters.types.int @@ -102,10 +101,7 @@ class ProjectParser(private val customLabelExtractor: LabelExtractor? = null) : private fun getStorage(storageType: String, directoryPath: String): Storage { return when (storageType) { "csv" -> CsvAstStorage(directoryPath) - "dot" -> DotAstStorage( - directoryPath, - if (isTokenSplitted) TokenProcessor.Split else TokenProcessor.Normalize - ) + "dot" -> DotAstStorage(directoryPath) else -> { throw UnsupportedOperationException("Unsupported AST storage $storageType") } diff --git a/src/main/kotlin/astminer/common/TreeUtil.kt b/src/main/kotlin/astminer/common/TreeUtil.kt index 4e0f802c..0999f7b1 100644 --- a/src/main/kotlin/astminer/common/TreeUtil.kt +++ b/src/main/kotlin/astminer/common/TreeUtil.kt @@ -1,27 +1,6 @@ package astminer.common -import astminer.common.model.Node -import astminer.storage.TokenProcessor -import java.util.ArrayList - - -const val DEFAULT_TOKEN = "EMPTY_TOKEN" -const val TECHNICAL_TOKEN_KEY = "technical_token" - -/** - * Sets a node's technical token. - * Technical tokens do not have to represent original tokens. - * @see TokenProcessor and how it treats technical tokens - */ -fun Node.setTechnicalToken(token: String) { - metadata[TECHNICAL_TOKEN_KEY] = token -} - -/** - * Get a node's technical token. - * @see setTechnicalToken for more - */ -fun Node.getTechnicalToken(): String? = metadata[TECHNICAL_TOKEN_KEY]?.toString() +const val DEFAULT_TOKEN = "EMPTY" /** * The function was adopted from the original code2vec implementation in order to match their behavior: diff --git a/src/main/kotlin/astminer/common/model/FunctionInfoModel.kt b/src/main/kotlin/astminer/common/model/FunctionInfoModel.kt index f37bf262..d62a0407 100644 --- a/src/main/kotlin/astminer/common/model/FunctionInfoModel.kt +++ b/src/main/kotlin/astminer/common/model/FunctionInfoModel.kt @@ -16,7 +16,7 @@ interface FunctionInfo { val nameNode: T? get() = notImplemented("nameNode") val name: String? - get() = nameNode?.token + get() = nameNode?.originalToken val root: T get() = notImplemented("root") val filePath: String diff --git a/src/main/kotlin/astminer/common/model/ParsingModel.kt b/src/main/kotlin/astminer/common/model/ParsingModel.kt index ee9d898f..d5d87e76 100644 --- a/src/main/kotlin/astminer/common/model/ParsingModel.kt +++ b/src/main/kotlin/astminer/common/model/ParsingModel.kt @@ -1,33 +1,42 @@ package astminer.common.model import astminer.cli.LabeledResult +import astminer.common.DEFAULT_TOKEN +import astminer.common.splitToSubtokens import astminer.parse.ParsingException import mu.KotlinLogging import java.io.File import java.io.InputStream import java.util.* -import kotlin.collections.HashMap // TODO: later move this logger to Pipeline private val logger = KotlinLogging.logger("ParsingModel") -abstract class Node{ +abstract class Node { abstract val typeLabel: String abstract val children: List abstract val parent: Node? - abstract val token: String + abstract val originalToken: String? + + val normalizedToken: String? by lazy { + originalToken?.let { + val subtokens = splitToSubtokens(it) + if (subtokens.isEmpty()) null + else subtokens.joinToString("|") + } + } + var technicalToken: String? = null + + val token: String + get() = listOfNotNull(technicalToken, normalizedToken, originalToken).firstOrNull() ?: DEFAULT_TOKEN val metadata: MutableMap = HashMap() fun isLeaf() = children.isEmpty() + override fun toString(): String = "$typeLabel : $token" fun prettyPrint(indent: Int = 0, indentSymbol: String = "--") { repeat(indent) { print(indentSymbol) } - print(typeLabel) - if (token.isNotEmpty()) { - println(" : $token") - } else { - println() - } + println(this) children.forEach { it.prettyPrint(indent + 1, indentSymbol) } } diff --git a/src/main/kotlin/astminer/examples/AllJavaMethods.kt b/src/main/kotlin/astminer/examples/AllJavaMethods.kt index 2064e90d..916f038e 100644 --- a/src/main/kotlin/astminer/examples/AllJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/AllJavaMethods.kt @@ -25,7 +25,7 @@ fun allJavaMethods() { val inputDir = "src/test/resources/gumTreeMethodSplitter" val outputDir = "out_examples/allJavaMethods" - val storage = Code2VecPathStorage(outputDir, PathBasedStorageConfig(5, 5), TokenProcessor.Split) + val storage = Code2VecPathStorage(outputDir, PathBasedStorageConfig(5, 5)) File(inputDir).forFilesWithSuffix(".java") { file -> //parse file diff --git a/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt b/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt index f3a835e5..4b139a4a 100644 --- a/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt +++ b/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt @@ -3,7 +3,6 @@ package astminer.examples import astminer.common.getProjectFilesWithExtension import astminer.parse.antlr.javascript.JavaScriptParser import astminer.storage.path.PathBasedStorageConfig -import astminer.storage.TokenProcessor import astminer.storage.path.Code2VecPathStorage import java.io.File @@ -11,7 +10,7 @@ fun allJavaScriptFiles() { val folder = "src/test/resources/examples" val outputDir = "out_examples/allJavaScriptFilesAntlr" - val storage = Code2VecPathStorage(outputDir, PathBasedStorageConfig(5, 5), TokenProcessor.Split) + val storage = Code2VecPathStorage(outputDir, PathBasedStorageConfig(5, 5)) val files = getProjectFilesWithExtension(File(folder), "js") JavaScriptParser().parseFiles(files) { parseResult -> diff --git a/src/main/kotlin/astminer/examples/AllPythonFiles.kt b/src/main/kotlin/astminer/examples/AllPythonFiles.kt index 91f6fe88..00908e69 100644 --- a/src/main/kotlin/astminer/examples/AllPythonFiles.kt +++ b/src/main/kotlin/astminer/examples/AllPythonFiles.kt @@ -3,7 +3,6 @@ package astminer.examples import astminer.common.getProjectFilesWithExtension import astminer.parse.antlr.python.PythonParser import astminer.storage.path.PathBasedStorageConfig -import astminer.storage.TokenProcessor import astminer.storage.path.Code2VecPathStorage import java.io.File @@ -12,7 +11,7 @@ fun allPythonFiles() { val inputDir = "src/test/resources/examples/" val outputDir = "out_examples/allPythonFiles" - val storage = Code2VecPathStorage(outputDir, PathBasedStorageConfig(5, 5), TokenProcessor.Split) + val storage = Code2VecPathStorage(outputDir, PathBasedStorageConfig(5, 5)) val files = getProjectFilesWithExtension(File(inputDir), "py") PythonParser().parseFiles(files) { parseResult -> diff --git a/src/main/kotlin/astminer/parse/antlr/AntlrNode.kt b/src/main/kotlin/astminer/parse/antlr/AntlrNode.kt index fbf2ca61..4f738482 100644 --- a/src/main/kotlin/astminer/parse/antlr/AntlrNode.kt +++ b/src/main/kotlin/astminer/parse/antlr/AntlrNode.kt @@ -1,14 +1,15 @@ package astminer.parse.antlr -import astminer.common.DEFAULT_TOKEN import astminer.common.model.Node -class AntlrNode(override val typeLabel: String, override var parent: AntlrNode?, token: String?) : Node() { +class AntlrNode( + override val typeLabel: String, + override var parent: AntlrNode?, + override val originalToken: String? +) : Node() { override val children: MutableList = mutableListOf() - override var token: String = token ?: DEFAULT_TOKEN - fun replaceChildren(newChildren: List) { children.clear() newChildren.forEach { it.parent = this } diff --git a/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt b/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt index 236e6d1d..055072e8 100644 --- a/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt +++ b/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt @@ -1,5 +1,6 @@ package astminer.parse.antlr +import astminer.common.DEFAULT_TOKEN import astminer.common.model.Node import org.antlr.v4.runtime.ParserRuleContext import org.antlr.v4.runtime.Vocabulary @@ -60,7 +61,7 @@ fun compressTree(root: AntlrNode): AntlrNode { val compressedNode = AntlrNode( root.typeLabel + "|" + child.typeLabel, root.parent, - child.token + child.originalToken ) compressedNode.replaceChildren(child.children) compressedNode @@ -85,19 +86,10 @@ fun AntlrNode.hasFirstLabel(label: String): Boolean = firstLabel() == label fun AntlrNode.firstLabelIn(labels: List): Boolean = labels.contains(firstLabel()) -fun Node.getTokensFromSubtree(): String { - if (isLeaf()) { - return token - } - return children.joinToString(separator = "") { child -> - child.getTokensFromSubtree() - } -} +fun Node.getTokensFromSubtree(): String = + if (isLeaf()) originalToken ?: DEFAULT_TOKEN + else children.joinToString(separator = "") { child -> child.getTokensFromSubtree() } -fun AntlrNode.getItOrChildrenOfType(typeLabel: String) : List { - return if (hasLastLabel(typeLabel)) { - listOf(this) - } else { - this.getChildrenOfType(typeLabel).map { it } - } -} +fun AntlrNode.getItOrChildrenOfType(typeLabel: String) : List = + if (hasLastLabel(typeLabel)) listOf(this) + else this.getChildrenOfType(typeLabel).map { it } diff --git a/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt b/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt index 84f1de35..7003724f 100644 --- a/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt @@ -37,7 +37,7 @@ class AntlrJavaFunctionInfo(override val root: AntlrNode) : FunctionInfo { val parametersRoot = getParametersRoot() - return when { - //No parameters found + val parameterNameNodes = when { + // No parameters found parametersRoot == null -> emptyList() - //Have only one parameter, which is indicated only by its name - parametersRoot.hasLastLabel(PARAMETER_NAME_NODE) -> listOf( - FunctionInfoParameter(name = parametersRoot.token, type = null) - ) + // Have only one parameter, which is indicated only by its name + parametersRoot.hasLastLabel(PARAMETER_NAME_NODE) -> listOf(parametersRoot) - //Have many parameters or one indicated not only by it's name - else -> parametersRoot.getItOrChildrenOfType(SINGLE_PARAMETER_NODE).map { - val nameNode = it.getChildOfType(PARAMETER_NAME_NODE) ?: it - FunctionInfoParameter(name = nameNode.token, type = null) + // Have many parameters or one indicated not only by it's name + else -> parametersRoot + .getItOrChildrenOfType(SINGLE_PARAMETER_NODE) + .map { it.getChildOfType(PARAMETER_NAME_NODE) ?: it } } + return parameterNameNodes.map { + val parameterName = it.originalToken ?: throw IllegalStateException("Parameter name wasn't found") + FunctionInfoParameter(name = parameterName, type = null) } } diff --git a/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt b/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt index 41501b46..ee98b2be 100644 --- a/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt @@ -50,11 +50,8 @@ class AntlrPythonFunctionInfo(override val root: AntlrNode) : FunctionInfo enclosingNode.getChildOfType(CLASS_NAME_NODE) EnclosingElementType.Method, EnclosingElementType.Function -> enclosingNode.getChildOfType(FUNCTION_NAME_NODE) else -> throw IllegalStateException("Enclosing node can only be function or class") - }?.token + }?.originalToken return EnclosingElement( type = type, name = name, diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt index 2dad89d5..9806112e 100644 --- a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt @@ -30,7 +30,7 @@ class FuzzyCppFunctionInfo(override val root: FuzzyNode): FunctionInfo? { @@ -48,14 +48,14 @@ class FuzzyCppFunctionInfo(override val root: FuzzyNode): FunctionInfo { val parameters = root.getChildrenOfType(METHOD_PARAMETER_NODE) return parameters.map { param -> - val type = param.getChildOfType(PARAMETER_TYPE_NODE)?.token - val name = param.getChildOfType(PARAMETER_NAME_NODE)?.token ?: "" + val type = param.getChildOfType(PARAMETER_TYPE_NODE)?.originalToken + val name = param.getChildOfType(PARAMETER_NAME_NODE)?.originalToken ?: "" FunctionInfoParameter(name, type) } } diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyNode.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyNode.kt index 2c6bc0a5..d4a24011 100644 --- a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyNode.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyNode.kt @@ -1,16 +1,19 @@ package astminer.parse.fuzzy.cpp -import astminer.common.DEFAULT_TOKEN import astminer.common.model.Node import com.google.common.collect.TreeMultiset /** * Node for AST, created by fuzzyc2cpg. * @param typeLabel - node's label - * @param token - node's token + * @param originalToken - node's token * @param order - node's order, which used to express the ordering of children in the AST when it matters */ -class FuzzyNode(override val typeLabel: String,token: String?, order: Int?) : Node() { +class FuzzyNode( + override val typeLabel: String, + override val originalToken: String?, + order: Int? +) : Node() { private val order = order ?: -1 override var parent: Node? = null private val childrenMultiset = TreeMultiset.create(compareBy( @@ -21,8 +24,6 @@ class FuzzyNode(override val typeLabel: String,token: String?, order: Int?) : No override val children get() = childrenMultiset.toList() - override var token: String = token ?: DEFAULT_TOKEN - fun addChild(node: FuzzyNode) { childrenMultiset.add(node) node.parent = this diff --git a/src/main/kotlin/astminer/parse/gumtree/GumTreeNode.kt b/src/main/kotlin/astminer/parse/gumtree/GumTreeNode.kt index 211c06cf..dd6b29db 100644 --- a/src/main/kotlin/astminer/parse/gumtree/GumTreeNode.kt +++ b/src/main/kotlin/astminer/parse/gumtree/GumTreeNode.kt @@ -11,8 +11,7 @@ class GumTreeNode(val wrappedNode: ITree, val context: TreeContext,override var override val children: MutableList by lazy { wrappedNode.children.map { GumTreeNode(it, context, this) }.toMutableList() } - override val token: String - get() = wrappedNode.label + override val originalToken: String = wrappedNode.label override fun removeChildrenOfType(typeLabel: String) { children.removeIf { it.typeLabel == typeLabel } diff --git a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt index 93153f6f..c9cae9e2 100644 --- a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt @@ -22,7 +22,7 @@ class GumTreeJavaFunctionInfo(override val root: GumTreeNode) : FunctionInfo? { val enclosingClassNode = getEnclosingClassNode(root.parent) ?: return null - val enclosingClassName = enclosingClassNode.getChildOfType(TypeLabels.simpleName)?.token + val enclosingClassName = enclosingClassNode.getChildOfType(TypeLabels.simpleName)?.originalToken return EnclosingElement( root = enclosingClassNode, type = EnclosingElementType.Class, @@ -48,12 +48,12 @@ class GumTreeJavaFunctionInfo(override val root: GumTreeNode) : FunctionInfo FunctionInfoParameter( - name = node.token, - type = getElementType(node)?.token + name = node.originalToken, + type = getElementType(node)?.originalToken ) } } diff --git a/src/main/kotlin/astminer/storage/TokenProcessor.kt b/src/main/kotlin/astminer/storage/TokenProcessor.kt deleted file mode 100644 index dc418db2..00000000 --- a/src/main/kotlin/astminer/storage/TokenProcessor.kt +++ /dev/null @@ -1,39 +0,0 @@ -package astminer.storage - -import astminer.common.DEFAULT_TOKEN -import astminer.common.getTechnicalToken -import astminer.common.model.Node -import astminer.common.normalizeToken -import astminer.common.splitToSubtokens - -/** - * Each TokenProcessor processes a node's token and returns a new representation of it. *It respects technical tokens*. - * Before saving a token on the disk one usually processes the token with a TokenProcessor. - */ -enum class TokenProcessor { - /** - * Splits the token into subtokens (words). - * For example, "getFull_name" --> "get|full|name" - */ - Split { - private fun separateToken(token: String): String { - return splitToSubtokens(token).joinToString("|") - } - - override fun processToken(node: Node): String = separateToken(node.token) - }, - - /** - * Processes the token according to the original code2vec implementation in order to match their behavior. - */ - Normalize { - override fun processToken(node: Node): String = normalizeToken(node.token, DEFAULT_TOKEN) - }; - - protected abstract fun processToken(node: Node): String - - /** - * Returns technical token, if technical token is set. Returns processed original token otherwise. - */ - fun getPresentableToken(node: Node) = node.getTechnicalToken() ?: processToken(node) -} diff --git a/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt b/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt index f466bf76..3d3524d1 100644 --- a/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt +++ b/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt @@ -2,7 +2,10 @@ package astminer.storage.ast import astminer.cli.LabeledResult import astminer.common.model.Node -import astminer.common.storage.* +import astminer.common.storage.RankedIncrementalIdStorage +import astminer.common.storage.dumpIdStorageToCsv +import astminer.common.storage.nodeTypeToCsvString +import astminer.common.storage.tokenToCsvString import astminer.storage.Storage import java.io.File import java.io.PrintWriter diff --git a/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt b/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt index aa838ddd..4e448724 100644 --- a/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt +++ b/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt @@ -4,7 +4,6 @@ import astminer.cli.LabeledResult import astminer.common.model.Node import astminer.common.storage.RankedIncrementalIdStorage import astminer.storage.Storage -import astminer.storage.TokenProcessor import java.io.File import java.io.PrintWriter @@ -12,10 +11,7 @@ import java.io.PrintWriter * Stores multiple ASTs in dot format (https://en.wikipedia.org/wiki/DOT_(graph_description_language)) * Output consist of separate .dot files for each AST and one full description in .csv format */ -class DotAstStorage( - override val outputDirectoryPath: String, - val tokenProcessor: TokenProcessor = TokenProcessor.Normalize -) : Storage { +class DotAstStorage(override val outputDirectoryPath: String) : Storage { internal data class FilePath(val parentPath: String, val fileName: String) @@ -34,8 +30,6 @@ class DotAstStorage( descriptionFileStream.write("dot_file,source_file,label,node_id,token,type\n") } - private fun Node.getPresentableToken(): String = tokenProcessor.getPresentableToken(this) - override fun store(labeledResult: LabeledResult) { // Use filename as a label for ast // TODO: save full signature for method @@ -48,7 +42,7 @@ class DotAstStorage( descriptionFileStream.write( nodeDescriptionFormat.format( nodesMap.getId(node) - 1, - node.getPresentableToken(), + node.token, node.typeLabel ) + "\n" ) diff --git a/src/main/kotlin/astminer/storage/path/Code2VecPathStorage.kt b/src/main/kotlin/astminer/storage/path/Code2VecPathStorage.kt index 51f694dc..f69fb560 100644 --- a/src/main/kotlin/astminer/storage/path/Code2VecPathStorage.kt +++ b/src/main/kotlin/astminer/storage/path/Code2VecPathStorage.kt @@ -1,14 +1,9 @@ package astminer.storage.path import astminer.common.model.PathContextId -import astminer.storage.TokenProcessor -class Code2VecPathStorage( - outputDirectoryPath: String, - config: PathBasedStorageConfig, - tokenProcessor: TokenProcessor = TokenProcessor.Normalize -) : - PathBasedStorage(outputDirectoryPath, config, tokenProcessor) { +class Code2VecPathStorage(outputDirectoryPath: String, config: PathBasedStorageConfig) : + PathBasedStorage(outputDirectoryPath, config) { override fun pathContextIdsToString(pathContextIds: List, label: String): String { val joinedPathContexts = pathContextIds.joinToString(" ") { pathContextId -> diff --git a/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt b/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt index e998543c..bdf5572e 100644 --- a/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt +++ b/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt @@ -7,7 +7,6 @@ import astminer.paths.PathMiner import astminer.paths.PathRetrievalSettings import astminer.paths.toPathContext import astminer.storage.Storage -import astminer.storage.TokenProcessor import java.io.File import java.io.PrintWriter @@ -33,12 +32,10 @@ data class PathBasedStorageConfig( * Base class for all path storages. Extracts paths from given LabellingResult and stores it in a specified format. * @property outputDirectoryPath The path to the output directory. * @property config The config that contains hyperparameters for path extraction. - * @property tokenProcessor The token processor that is used to extract tokens from nodes. */ abstract class PathBasedStorage( final override val outputDirectoryPath: String, private val config: PathBasedStorageConfig, - private val tokenProcessor: TokenProcessor ) : Storage { private val pathMiner = PathMiner(PathRetrievalSettings(config.maxPathLength, config.maxPathWidth)) @@ -59,8 +56,6 @@ abstract class PathBasedStorage( abstract fun pathContextIdsToString(pathContextIds: List, label: String): String - private fun Node.getPresentableToken(): String = tokenProcessor.getPresentableToken(this) - private fun dumpPathContexts(labeledPathContextIds: LabeledPathContextIds) { val pathContextIdsString = labeledPathContextIds.pathContexts.filter { val isNumberOfTokensValid = config.maxTokens == null || @@ -91,7 +86,7 @@ abstract class PathBasedStorage( private fun retrieveLabeledPathContexts(labeledResult: LabeledResult): LabeledPathContexts { val paths = retrievePaths(labeledResult.root) return LabeledPathContexts(labeledResult.label, paths.map { astPath -> - toPathContext(astPath) { node -> node.getPresentableToken() } + toPathContext(astPath) { it.token } }) } @@ -121,7 +116,13 @@ abstract class PathBasedStorage( orientedNodeToCsvString, File("$outputDirectoryPath/node_types.csv") ) - dumpIdStorageToCsv(pathsMap, "path", pathToCsvString, File("$outputDirectoryPath/paths.csv"), config.maxPaths) + dumpIdStorageToCsv( + pathsMap, + "path", + pathToCsvString, + File("$outputDirectoryPath/paths.csv"), + config.maxPaths + ) labeledPathContextIdsWriter.close() } diff --git a/src/test/kotlin/astminer/cli/LabelExtractorTest.kt b/src/test/kotlin/astminer/cli/LabelExtractorTest.kt index 05827a6e..3d4d8a5e 100644 --- a/src/test/kotlin/astminer/cli/LabelExtractorTest.kt +++ b/src/test/kotlin/astminer/cli/LabelExtractorTest.kt @@ -1,6 +1,5 @@ package astminer.cli -import astminer.common.getTechnicalToken import astminer.common.model.* import astminer.parse.antlr.AntlrNode import org.junit.Before @@ -65,6 +64,6 @@ internal class LabelExtractorTest { val functionInfo = makeFunctionInfo(nameNode) val methodNameExtractor = MethodNameExtractor() methodNameExtractor.extractLabel(functionInfo, PATH_STRING) - assertEquals("METHOD_NAME", nameNode.getTechnicalToken()) + assertEquals("METHOD_NAME", nameNode.technicalToken) } } diff --git a/src/test/kotlin/astminer/common/TestUtils.kt b/src/test/kotlin/astminer/common/TestUtils.kt index 6122f576..90e590e5 100644 --- a/src/test/kotlin/astminer/common/TestUtils.kt +++ b/src/test/kotlin/astminer/common/TestUtils.kt @@ -9,8 +9,7 @@ class DummyNode(override val typeLabel: String, override val children: MutableLi //TODO("not implemented") override val parent: Node? = null - override val token: String - get() = typeLabel + override val originalToken: String = typeLabel override fun removeChildrenOfType(typeLabel: String) { children.removeIf { it.typeLabel == typeLabel } diff --git a/src/test/kotlin/astminer/common/TreeUtilTest.kt b/src/test/kotlin/astminer/common/TreeUtilTest.kt index d6dbd410..631ac3ad 100644 --- a/src/test/kotlin/astminer/common/TreeUtilTest.kt +++ b/src/test/kotlin/astminer/common/TreeUtilTest.kt @@ -20,7 +20,7 @@ class TreeUtilTest { Assert.assertArrayEquals(arrayOf("1", "2", "4", "5", "6", "3", "7", "8"), dataList.toList().toTypedArray()) } - private val defaultToken = "EMPTY_TOKEN" + private val defaultToken = "EMPTY" @Test fun testNormalizeTokenCleaning() { diff --git a/src/test/kotlin/astminer/featureextraction/TreeFeatureTestUtil.kt b/src/test/kotlin/astminer/featureextraction/TreeFeatureTestUtil.kt index e6caf732..8b1cc345 100644 --- a/src/test/kotlin/astminer/featureextraction/TreeFeatureTestUtil.kt +++ b/src/test/kotlin/astminer/featureextraction/TreeFeatureTestUtil.kt @@ -2,7 +2,7 @@ package astminer.featureextraction import astminer.common.model.Node -class PrettyNode(override val typeLabel: String, override val token: String) : Node() { +class PrettyNode(override val typeLabel: String, override val originalToken: String) : Node() { override var children: MutableList = ArrayList() override var parent: PrettyNode? = null set(value) { diff --git a/src/test/kotlin/astminer/storage/TokenProcessorTest.kt b/src/test/kotlin/astminer/storage/TokenProcessorTest.kt deleted file mode 100644 index b85a3803..00000000 --- a/src/test/kotlin/astminer/storage/TokenProcessorTest.kt +++ /dev/null @@ -1,78 +0,0 @@ -package astminer.storage - -import astminer.common.DEFAULT_TOKEN -import astminer.common.DummyNode -import astminer.common.setTechnicalToken -import org.junit.Assert -import org.junit.Test - - -internal class TokenProcessorTest { - private fun normalizeToken(token: String): String { - val node = DummyNode(token, mutableListOf()) - return TokenProcessor.Normalize.getPresentableToken(node) - } - - private fun splitToken(token: String): String { - val node = DummyNode(token, mutableListOf()) - return TokenProcessor.Split.getPresentableToken(node) - } - - @Test - fun testNormalizeTokenCleaning() { - val token = " Token THAT \n contains Whi\"t,es''pace characters!!!and pu.n.c.t.u.a.tion \n" - val expectedToken = "token" + "that" + "contains" + "whitespace" + "characters" + "and" + "punctuation" - Assert.assertEquals( - "All whitespace characters and punctuation should be removed, keeping only letters", - expectedToken, - normalizeToken(token) - ) - } - - @Test - fun testNormalizeTokenWithoutLetters() { - val token = "* *\n" - val expectedToken = "*_*" - Assert.assertEquals( - "Token without letters have whitespaces replaced with underscores", - expectedToken, - normalizeToken(token) - ) - } - - @Test - fun testNormalizeEmptyToken() { - val token = "\n\n" - val expectedToken = DEFAULT_TOKEN - Assert.assertEquals( - "Token without letters have whitespaces replaced with underscores", - expectedToken, - normalizeToken(token) - ) - } - - @Test - fun testTokenSplit() { - val token = "fun_withReallyLong_And_ComplicatedName" - val expectedToken = "fun|with|really|long|and|complicated|name" - Assert.assertEquals( - "Token with snake, camel and combined case should be split into list of its parts", - expectedToken, - splitToken(token) - ) - } - - @Test - fun `test Normalize respects technical token`() { - val node = DummyNode("tokenName", mutableListOf()) - node.setTechnicalToken("technical token") - Assert.assertEquals("technical token", TokenProcessor.Normalize.getPresentableToken(node)) - } - - @Test - fun `test Split respects technical token`() { - val node = DummyNode("tokenName", mutableListOf()) - node.setTechnicalToken("technical token") - Assert.assertEquals("technical token", TokenProcessor.Split.getPresentableToken(node)) - } -} \ No newline at end of file From 6f56364a971b544f09db53040ff1c763fa3d3d14 Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Mon, 24 May 2021 22:10:20 +0300 Subject: [PATCH 200/308] Fix splitting test, accurate matching of self calls --- src/main/kotlin/astminer/cli/FilterPredicates.kt | 4 +++- src/main/kotlin/astminer/cli/LabelExtractors.kt | 5 ++--- .../gumtree/python/GumTreePythonFunctionSplitterTest.kt | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/main/kotlin/astminer/cli/FilterPredicates.kt b/src/main/kotlin/astminer/cli/FilterPredicates.kt index 3fb642db..fc27abce 100644 --- a/src/main/kotlin/astminer/cli/FilterPredicates.kt +++ b/src/main/kotlin/astminer/cli/FilterPredicates.kt @@ -38,7 +38,9 @@ class MethodAnyNodeWordsNumberFilter(private val maxWordsNumber: Int) : MethodFi return if (maxWordsNumber == -1) { true } else { - !functionInfo.root.preOrder().any { node -> splitToSubtokens(node.token).size > maxWordsNumber } + !functionInfo.root.preOrder().any { node -> + node.normalizedToken?.let { it.split("|").size > maxWordsNumber } ?: false + } } } } diff --git a/src/main/kotlin/astminer/cli/LabelExtractors.kt b/src/main/kotlin/astminer/cli/LabelExtractors.kt index 6054c229..6ed310cc 100644 --- a/src/main/kotlin/astminer/cli/LabelExtractors.kt +++ b/src/main/kotlin/astminer/cli/LabelExtractors.kt @@ -125,12 +125,11 @@ class MethodNameExtractor( ) : MethodLabelExtractor(filterPredicates, javaParser, pythonParser) { override fun extractLabel(functionInfo: FunctionInfo, filePath: String): String? { - // TODO: the normalization situation is getting out of control. It should be a separate step in the pipeline val normalizedName = functionInfo.nameNode?.normalizedToken - val name = functionInfo.name ?: return null + functionInfo.name ?: return null functionInfo.root.preOrder().forEach { node -> - if (node.token == name) node.technicalToken = SELF_CALL_TOKEN + if (node.originalToken == functionInfo.nameNode?.originalToken) node.technicalToken = SELF_CALL_TOKEN } functionInfo.nameNode?.technicalToken = METHOD_NAME_TOKEN return normalizedName diff --git a/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitterTest.kt b/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitterTest.kt index 857b7bb6..dd3fcb7e 100644 --- a/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitterTest.kt @@ -130,7 +130,7 @@ class GumTreePythonFunctionSplitterTest { root.getChildOfType("body") ?.getChildOfType("Expr") ?.getChildOfType("Constant-str") - ?.token + ?.originalToken ) assertEquals(4, parameters.size) assertEquals( From c31ea83123c81d555a360e4f6a5533c71aed1d69 Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Mon, 24 May 2021 22:25:24 +0300 Subject: [PATCH 201/308] Shield new line symbol --- src/main/kotlin/astminer/storage/path/PathBasedStorage.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt b/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt index bdf5572e..836b70ad 100644 --- a/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt +++ b/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt @@ -86,7 +86,7 @@ abstract class PathBasedStorage( private fun retrieveLabeledPathContexts(labeledResult: LabeledResult): LabeledPathContexts { val paths = retrievePaths(labeledResult.root) return LabeledPathContexts(labeledResult.label, paths.map { astPath -> - toPathContext(astPath) { it.token } + toPathContext(astPath) { it.token.replace("\n", "\\n") } }) } From 42f80ba76d9ff4c2c2f3866a1cca3304ec0389ed Mon Sep 17 00:00:00 2001 From: furetur Date: Tue, 25 May 2021 11:33:11 +0500 Subject: [PATCH 202/308] resolved merge conflicts --- src/main/kotlin/astminer/config/StorageConfigs.kt | 2 -- src/main/kotlin/astminer/filters/CommonFilters.kt | 3 ++- src/main/kotlin/astminer/pipeline/Pipeline.kt | 5 +---- .../astminer/problem/FunctionLevelProblems.kt | 15 +++++++-------- .../astminer/problem/FunctionNameProblemTest.kt | 7 +++---- 5 files changed, 13 insertions(+), 19 deletions(-) diff --git a/src/main/kotlin/astminer/config/StorageConfigs.kt b/src/main/kotlin/astminer/config/StorageConfigs.kt index dc951fdb..3d51a358 100644 --- a/src/main/kotlin/astminer/config/StorageConfigs.kt +++ b/src/main/kotlin/astminer/config/StorageConfigs.kt @@ -1,6 +1,5 @@ package astminer.config -import astminer.storage.TokenProcessor import astminer.storage.path.PathBasedStorageConfig import kotlinx.serialization.SerialName import kotlinx.serialization.Serializable @@ -37,7 +36,6 @@ data class Code2VecPathStorageConfig( val maxTokens: Long? = null, val maxPaths: Long? = null, val maxPathContextsPerEntity: Int? = null, - val tokenProcessor: TokenProcessor = TokenProcessor.Normalize ) : StorageConfig() { @Transient val pathBasedStorageConfig = diff --git a/src/main/kotlin/astminer/filters/CommonFilters.kt b/src/main/kotlin/astminer/filters/CommonFilters.kt index 587e1fba..fbbb67ef 100644 --- a/src/main/kotlin/astminer/filters/CommonFilters.kt +++ b/src/main/kotlin/astminer/filters/CommonFilters.kt @@ -26,8 +26,9 @@ class TreeSizeFilter(private val minSize: Int = 0, private val maxSize: Int? = n * Filter that excludes trees that have more words than [maxWordsNumber] in any token of their node. */ class WordsNumberFilter(private val maxWordsNumber: Int) : FunctionFilter, FileFilter { + // TODO: splitting the token here is not the best choice. For instance, if delimiter is changed or other internal logic then this will have to ve rewritten private fun validateTree(root: Node) = - !root.preOrder().any { node -> splitToSubtokens(node.token).size > maxWordsNumber } + !root.preOrder().any { node -> node.token.split("|").size > maxWordsNumber } override fun validate(functionInfo: FunctionInfo) = validateTree(functionInfo.root) diff --git a/src/main/kotlin/astminer/pipeline/Pipeline.kt b/src/main/kotlin/astminer/pipeline/Pipeline.kt index e33c1859..692ef102 100644 --- a/src/main/kotlin/astminer/pipeline/Pipeline.kt +++ b/src/main/kotlin/astminer/pipeline/Pipeline.kt @@ -2,12 +2,9 @@ package astminer.pipeline import astminer.common.getProjectFilesWithExtension import astminer.config.* -import astminer.filters.ModifierFilter -import astminer.filters.TreeSizeFilter import astminer.parse.ParsingException import astminer.parse.getHandlerFactory import astminer.storage.Storage -import astminer.storage.TokenProcessor import astminer.storage.ast.CsvAstStorage import astminer.storage.ast.DotAstStorage import astminer.storage.path.Code2VecPathStorage @@ -39,7 +36,7 @@ class Pipeline(private val config: PipelineConfig) { val storagePath = createStorageDirectory(extension).path when (this) { is CsvAstStorageConfig -> CsvAstStorage(storagePath) - is DotAstStorageConfig -> DotAstStorage(storagePath, TokenProcessor.Split) + is DotAstStorageConfig -> DotAstStorage(storagePath) is Code2VecPathStorageConfig -> Code2VecPathStorage(storagePath, pathBasedStorageConfig) } } diff --git a/src/main/kotlin/astminer/problem/FunctionLevelProblems.kt b/src/main/kotlin/astminer/problem/FunctionLevelProblems.kt index 65325107..b22e8496 100644 --- a/src/main/kotlin/astminer/problem/FunctionLevelProblems.kt +++ b/src/main/kotlin/astminer/problem/FunctionLevelProblems.kt @@ -2,7 +2,6 @@ package astminer.problem import astminer.common.model.FunctionInfo import astminer.common.model.Node -import astminer.common.setTechnicalToken interface FunctionLevelProblem { fun process(functionInfo: FunctionInfo): LabeledResult? @@ -13,17 +12,17 @@ interface FunctionLevelProblem { * Hides the name of the function in the subtree and also all in the recursive calls. */ object FunctionNameProblem : FunctionLevelProblem { - const val TECHNICAL_METHOD_NAME = "METHOD_NAME" - const val TECHNICAL_RECURSIVE_CALL = "SELF" + const val HIDDEN_METHOD_NAME_TOKEN = "METHOD_NAME" + const val RECURSIVE_CALL_TOKEN = "SELF" override fun process(functionInfo: FunctionInfo): LabeledResult? { - val name = functionInfo.name ?: return null + val normalizedName = functionInfo.nameNode?.normalizedToken ?: return null functionInfo.root.preOrder().forEach { node -> - if (node.token == name) { - node.setTechnicalToken(TECHNICAL_RECURSIVE_CALL) + if (node.originalToken == functionInfo.name) { + node.technicalToken = RECURSIVE_CALL_TOKEN } } - functionInfo.nameNode?.setTechnicalToken(TECHNICAL_METHOD_NAME) - return LabeledResult(functionInfo.root, name, functionInfo.filePath) + functionInfo.nameNode?.technicalToken = HIDDEN_METHOD_NAME_TOKEN + return LabeledResult(functionInfo.root, normalizedName, functionInfo.filePath) } } diff --git a/src/test/kotlin/astminer/problem/FunctionNameProblemTest.kt b/src/test/kotlin/astminer/problem/FunctionNameProblemTest.kt index b6505208..e30c3792 100644 --- a/src/test/kotlin/astminer/problem/FunctionNameProblemTest.kt +++ b/src/test/kotlin/astminer/problem/FunctionNameProblemTest.kt @@ -1,7 +1,6 @@ package astminer.problem import astminer.common.DummyNode -import astminer.common.getTechnicalToken import astminer.common.model.FunctionInfo import astminer.common.model.Node import org.junit.Before @@ -39,19 +38,19 @@ class FunctionNameProblemTest { @Test fun `test FunctionNameProblem hides function name node token with METHOD_NAME`() { FunctionNameProblem.process(functionInfo) - assertEquals("METHOD_NAME", functionInfo.nameNode?.getTechnicalToken()) + assertEquals("METHOD_NAME", functionInfo.nameNode?.token) } @Test fun `test FunctionNameProblem hides function root token with METHOD_NAME if it is the name node`() { FunctionNameProblem.process(functionInfo) - assertEquals("METHOD_NAME", functionInfo.root.getTechnicalToken()) + assertEquals("METHOD_NAME", functionInfo.root.token) } @Test fun `test function name problem should hide recursive call tokens with SELF`() { FunctionNameProblem.process(functionInfo) val recursiveCallNode = functionInfo.root.children.firstOrNull()?.children?.firstOrNull() - assertEquals("SELF", recursiveCallNode?.getTechnicalToken()) + assertEquals("SELF", recursiveCallNode?.token) } } \ No newline at end of file From 73ad122c3d4946c6e359ebd944ea05bd7a53794f Mon Sep 17 00:00:00 2001 From: furetur Date: Tue, 25 May 2021 11:37:18 +0500 Subject: [PATCH 203/308] empty commit From 4377d843c28d5767d295867763f58982dec86f1a Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Tue, 25 May 2021 15:28:13 +0300 Subject: [PATCH 204/308] Add log file to gitignore --- .gitignore | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index 61ce55df..013b6c8c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,11 +1,11 @@ -*.iml -*.csv -asts/ - .idea/ .gradle/ -examples/out/ +.DS_Store + src/main/generated/ build/ +examples/out/ +asts/ +*.csv +log.txt -.DS_Store From 633115c4195574629fb7dbca86d4533a964a393e Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Tue, 25 May 2021 15:28:45 +0300 Subject: [PATCH 205/308] Use jar execution as docker entry point --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 09a5733f..85f6b050 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,7 +11,7 @@ RUN apt-get update && apt-get install -y g++ # Install PythonParser for GumTree ARG PYTHONPARSER_REPO=https://raw.githubusercontent.com/JetBrains-Research/pythonparser/master RUN apt-get update && \ - apt-get install -y --no-install-recommends -y python3.8 python3-pip git wget && \ + apt-get install -y python3.8 python3-pip git wget && \ mkdir pythonparser && \ cd pythonparser && \ wget $PYTHONPARSER_REPO/requirements.txt && \ @@ -27,4 +27,4 @@ COPY . . # Prepare shadow jar RUN ./gradlew shadowJar -CMD ["java", "-jar", "build/shadow/astminer.jar"] +ENTRYPOINT ["java", "-jar", "build/shadow/astminer.jar"] From ff82ca6250ef8cbf52c56a6d48858cf60192b17d Mon Sep 17 00:00:00 2001 From: illided Date: Fri, 28 May 2021 22:04:59 +0300 Subject: [PATCH 206/308] php lexer and parser added --- src/main/antlr/PhpLexer.g4 | 347 ++++++++++++++ src/main/antlr/PhpParser.g4 | 925 ++++++++++++++++++++++++++++++++++++ 2 files changed, 1272 insertions(+) create mode 100644 src/main/antlr/PhpLexer.g4 create mode 100644 src/main/antlr/PhpParser.g4 diff --git a/src/main/antlr/PhpLexer.g4 b/src/main/antlr/PhpLexer.g4 new file mode 100644 index 00000000..02152399 --- /dev/null +++ b/src/main/antlr/PhpLexer.g4 @@ -0,0 +1,347 @@ +/* +PHP grammar. +The MIT License (MIT). +Copyright (c) 2015-2020, Ivan Kochurkin (kvanttt@gmail.com), Positive Technologies. +Copyright (c) 2019, Thierry Marianne (thierry.marianne@weaving-the-web.org) +Copyright (c) 2019-2020, Student Main for php7, php8 support. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +lexer grammar PhpLexer; + +channels { PhpComments, ErrorLexem, SkipChannel } + +options { + superClass=PhpLexerBase; +} + +SeaWhitespace: [ \t\r\n]+ -> channel(HIDDEN); +HtmlText: ~[<#]+; +XmlStart: ' pushMode(XML); +PHPStartEcho: PhpStartEchoFragment -> type(Echo), pushMode(PHP); +PHPStart: PhpStartFragment -> channel(SkipChannel), pushMode(PHP); +HtmlScriptOpen: ' pushMode(INSIDE); +HtmlStyleOpen: ' pushMode(INSIDE); +HtmlComment: '' -> channel(HIDDEN); +HtmlDtd: ''; +HtmlOpen: '<' -> pushMode(INSIDE); +Shebang + : '#' { this.IsNewLineOrStart(-2) }? '!' ~[\r\n]* + ; +NumberSign: '#' ~'<'* -> more; +Error: . -> channel(ErrorLexem); + +// TODO: parse xml attributes. +mode XML; + +XmlText: ~'?'+; +XmlClose: '?>' -> popMode; +XmlText2: '?' -> type(XmlText); + +mode INSIDE; + +PHPStartEchoInside: PhpStartEchoFragment -> type(Echo), pushMode(PHP); +PHPStartInside: PhpStartFragment -> channel(SkipChannel), pushMode(PHP); +HtmlClose: '>' { this.PushModeOnHtmlClose(); }; +HtmlSlashClose: '/>' -> popMode; +HtmlSlash: '/'; +HtmlEquals: '='; + +HtmlStartQuoteString: '\\'? '\'' -> pushMode(HtmlQuoteStringMode); +HtmlStartDoubleQuoteString: '\\'? '"' -> pushMode(HtmlDoubleQuoteStringMode); +HtmlHex: '#' HexDigit+ ; +HtmlDecimal: Digit+; +HtmlSpace: [ \t\r\n]+ -> channel(HIDDEN); +HtmlName: HtmlNameStartChar HtmlNameChar*; +ErrorInside: . -> channel(ErrorLexem); + +mode HtmlQuoteStringMode; + +PHPStartEchoInsideQuoteString: PhpStartEchoFragment -> type(Echo), pushMode(PHP); +PHPStartInsideQuoteString: PhpStartFragment -> channel(SkipChannel), pushMode(PHP); +HtmlEndQuoteString: '\'' '\''? -> popMode; +HtmlQuoteString: ~[<']+; +ErrorHtmlQuote: . -> channel(ErrorLexem); +mode HtmlDoubleQuoteStringMode; +PHPStartEchoDoubleQuoteString: PhpStartEchoFragment -> type(Echo), pushMode(PHP); +PHPStartDoubleQuoteString: PhpStartFragment -> channel(SkipChannel), pushMode(PHP); +HtmlEndDoubleQuoteString: '"' '"'? -> popMode; +HtmlDoubleQuoteString: ~[<"]+; +ErrorHtmlDoubleQuote: . -> channel(ErrorLexem); +// Parse JavaScript with https://github.com/antlr/grammars-v4/tree/master/javascript if necessary. +// Php blocks can exist inside Script blocks too. +mode SCRIPT; +ScriptText: ~'<'+; +// TODO: handle JS strings, but handle type(ScriptText); +//ScriptString2: '\'' (~'\'' | '\\' ('\r'? '\n' | .))* '\'' -> type(ScriptText); +HtmlScriptClose: '' -> popMode; +PHPStartInsideScriptEcho: PhpStartEchoFragment -> type(Echo), pushMode(PHP); +PHPStartInsideScript: PhpStartFragment -> channel(SkipChannel), pushMode(PHP); +ScriptText2: '<' -> type(ScriptText); +mode STYLE; +StyleBody: .*? '' -> popMode; +mode PHP; +PHPEnd: ('?' | '%' {this.HasAspTags()}?) '>' + | '' {this.HasPhpScriptTag()}?; +Whitespace: [ \t\r\n]+ -> channel(SkipChannel); +MultiLineComment: '/*' .*? '*/' -> channel(PhpComments); +SingleLineComment: '//' -> channel(SkipChannel), pushMode(SingleLineCommentMode); +ShellStyleComment: '#' -> channel(SkipChannel), pushMode(SingleLineCommentMode); +AttributeStart: '#['; +Abstract: 'abstract'; +Array: 'array'; +As: 'as'; +BinaryCast: 'binary'; +BoolType: 'bool' 'ean'?; +BooleanConstant: 'true' + | 'false'; +Break: 'break'; +Callable: 'callable'; +Case: 'case'; +Catch: 'catch'; +Class: 'class'; +Clone: 'clone'; +Const: 'const'; +Continue: 'continue'; +Declare: 'declare'; +Default: 'default'; +Do: 'do'; +DoubleCast: 'real'; +DoubleType: 'double'; +Echo: 'echo'; +Else: 'else'; +ElseIf: 'elseif'; +Empty: 'empty'; +EndDeclare: 'enddeclare'; +EndFor: 'endfor'; +EndForeach: 'endforeach'; +EndIf: 'endif'; +EndSwitch: 'endswitch'; +EndWhile: 'endwhile'; +Eval: 'eval'; +Exit: 'die'; +Extends: 'extends'; +Final: 'final'; +Finally: 'finally'; +FloatCast: 'float'; +For: 'for'; +Foreach: 'foreach'; +Function_: 'function'; +Global: 'global'; +Goto: 'goto'; +If: 'if'; +Implements: 'implements'; +Import: 'import'; +Include: 'include'; +IncludeOnce: 'include_once'; +InstanceOf: 'instanceof'; +InsteadOf: 'insteadof'; +Int8Cast: 'int8'; +Int16Cast: 'int16'; +Int64Type: 'int64'; +IntType: 'int' 'eger'?; +Interface: 'interface'; +IsSet: 'isset'; +List: 'list'; +LogicalAnd: 'and'; +LogicalOr: 'or'; +LogicalXor: 'xor'; +Match: 'match'; +Namespace: 'namespace'; +New: 'new'; +Null: 'null'; +ObjectType: 'object'; +Parent_: 'parent'; +Partial: 'partial'; +Print: 'print'; +Private: 'private'; +Protected: 'protected'; +Public: 'public'; +Require: 'require'; +RequireOnce: 'require_once'; +Resource: 'resource'; +Return: 'return'; +Static: 'static'; +StringType: 'string'; +Switch: 'switch'; +Throw: 'throw'; +Trait: 'trait'; +Try: 'try'; +Typeof: 'clrtypeof'; +UintCast: 'uint' ('8' | '16' | '64')?; +UnicodeCast: 'unicode'; +Unset: 'unset'; +Use: 'use'; +Var: 'var'; +While: 'while'; +Yield: 'yield'; +From: 'from'; +LambdaFn: 'fn'; +Get: '__get'; +Set: '__set'; +Call: '__call'; +CallStatic: '__callstatic'; +Constructor: '__construct'; +Destruct: '__destruct'; +Wakeup: '__wakeup'; +Sleep: '__sleep'; +Autoload: '__autoload'; +IsSet__: '__isset'; +Unset__: '__unset'; +ToString__: '__tostring'; +Invoke: '__invoke'; +SetState: '__set_state'; +Clone__: '__clone'; +DebugInfo: '__debuginfo'; +Namespace__: '__namespace__'; +Class__: '__class__'; +Traic__: '__trait__'; +Function__: '__function__'; +Method__: '__method__'; +Line__: '__line__'; +File__: '__file__'; +Dir__: '__dir__'; +Spaceship: '<=>'; +Lgeneric: '<:'; +Rgeneric: ':>'; +DoubleArrow: '=>'; +Inc: '++'; +Dec: '--'; +IsIdentical: '==='; +IsNoidentical: '!=='; +IsEqual: '=='; +IsNotEq: '<>' + | '!='; +IsSmallerOrEqual: '<='; +IsGreaterOrEqual: '>='; +PlusEqual: '+='; +MinusEqual: '-='; +MulEqual: '*='; +Pow: '**'; +PowEqual: '**='; +DivEqual: '/='; +Concaequal: '.='; +ModEqual: '%='; +ShiftLeftEqual: '<<='; +ShiftRightEqual: '>>='; +AndEqual: '&='; +OrEqual: '|='; +XorEqual: '^='; +BooleanOr: '||'; +BooleanAnd: '&&'; +NullCoalescing: '??'; +NullCoalescingEqual:'??='; +ShiftLeft: '<<'; +ShiftRight: '>>'; +DoubleColon: '::'; +ObjectOperator: '->'; +NamespaceSeparator: '\\'; +Ellipsis: '...'; +Less: '<'; +Greater: '>'; +Ampersand: '&'; +Pipe: '|'; +Bang: '!'; +Caret: '^'; +Plus: '+'; +Minus: '-'; +Asterisk: '*'; +Percent: '%'; +Divide: '/'; +Tilde: '~'; +SuppressWarnings: '@'; +Dollar: '$'; +Dot: '.'; +QuestionMark: '?'; +OpenRoundBracket: '('; +CloseRoundBracket: ')'; +OpenSquareBracket: '['; +CloseSquareBracket: ']'; +OpenCurlyBracket: '{'; +CloseCurlyBracket: '}' +{ this.PopModeOnCurlyBracketClose(); }; +Comma: ','; +Colon: ':'; +SemiColon: ';'; +Eq: '='; +Quote: '\''; +BackQuote: '`'; +VarName: '$' NameString; +Label: [a-z_][a-z_0-9]*; +Octal: '0' [0-7]+; +Decimal: '0' | NonZeroDigit Digit*; +Real: (Digit+ '.' Digit* | '.' Digit+) ExponentPart? + | Digit+ ExponentPart; +Hex: '0x' HexDigit+; +Binary: '0b' [01_]+; +BackQuoteString: '`' ~'`'* '`'; +SingleQuoteString: '\'' (~('\'' | '\\') | '\\' . )* '\''; +DoubleQuote: '"' -> pushMode(InterpolationString); +StartNowDoc + : '<<<' [ \t]* '\'' NameString '\'' { this.ShouldPushHereDocMode(1) }? -> pushMode(HereDoc) + ; +StartHereDoc + : '<<<' [ \t]* NameString { this.ShouldPushHereDocMode(1) }? -> pushMode(HereDoc) + ; +ErrorPhp: . -> channel(ErrorLexem); + +mode InterpolationString; + +VarNameInInterpolation: '$' NameString -> type(VarName); // TODO: fix such cases: "$people->john" +DollarString: '$' -> type(StringPart); +CurlyDollar: '{' { this.IsCurlyDollar(1) }? { this.SetInsideString(); } -> channel(SkipChannel), pushMode(PHP); +CurlyString: '{' -> type(StringPart); +EscapedChar: '\\' . -> type(StringPart); +DoubleQuoteInInterpolation: '"' -> type(DoubleQuote), popMode; +UnicodeEscape: '\\u{' [a-zA-Z0-9][a-zA-Z0-9]+ '}'; +StringPart: ~[${\\"]+; +mode SingleLineCommentMode; +Comment: ~[\r\n?]+ -> channel(PhpComments); +PHPEndSingleLineComment: '?' '>'; +CommentQuestionMark: '?' -> type(Comment), channel(PhpComments); +CommentEnd: [\r\n] -> channel(SkipChannel), popMode; // exit from comment. +mode HereDoc; // TODO: interpolation for heredoc strings. +HereDocText: ~[\r\n]*? ('\r'? '\n' | '\r'); +// fragments. +// '' will be transformed to '' +fragment PhpStartEchoFragment: '<' ('?' '=' | { this.HasAspTags() }? '%' '='); +fragment PhpStartFragment: '<' ('?' 'php'? | { this.HasAspTags() }? '%'); +fragment NameString: [a-zA-Z_\u0080-\ufffe][a-zA-Z0-9_\u0080-\ufffe]*; +fragment HtmlNameChar + : HtmlNameStartChar + | '-' + | '_' + | '.' + | Digit + | '\u00B7' + | '\u0300'..'\u036F' + | '\u203F'..'\u2040' + ; +fragment HtmlNameStartChar + : [:a-z] + | '\u2070'..'\u218F' + | '\u2C00'..'\u2FEF' + | '\u3001'..'\uD7FF' + | '\uF900'..'\uFDCF' + | '\uFDF0'..'\uFFFD' + ; +fragment ExponentPart: 'e' [+-]? Digit+; +fragment NonZeroDigit: [1-9_]; +fragment Digit: [0-9_]; +fragment HexDigit: [a-f0-9_]; diff --git a/src/main/antlr/PhpParser.g4 b/src/main/antlr/PhpParser.g4 new file mode 100644 index 00000000..cc319905 --- /dev/null +++ b/src/main/antlr/PhpParser.g4 @@ -0,0 +1,925 @@ +/* +PHP grammar. +The MIT License (MIT). +Copyright (c) 2015-2020, Ivan Kochurkin (kvanttt@gmail.com), Positive Technologies. +Copyright (c) 2019-2020, Student Main for php7, php8 support. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +parser grammar PhpParser; + +options { tokenVocab=PhpLexer; } + +// HTML +// Also see here: https://github.com/antlr/grammars-v4/tree/master/html + +htmlDocument + : Shebang? (inlineHtml | phpBlock)* EOF + ; + +inlineHtml + : htmlElement+ + | scriptText + ; + +// TODO: split into html, css and xml elements +htmlElement + : HtmlDtd + | HtmlClose + | HtmlStyleOpen + | HtmlOpen + | HtmlName + | HtmlSlashClose + | HtmlSlash + | HtmlText + | HtmlEquals + | HtmlStartQuoteString + | HtmlEndQuoteString + | HtmlStartDoubleQuoteString + | HtmlEndDoubleQuoteString + | HtmlHex + | HtmlDecimal + | HtmlQuoteString + | HtmlDoubleQuoteString + + | StyleBody + + | HtmlScriptOpen + | HtmlScriptClose + + | XmlStart XmlText* XmlClose + ; + +// Script +// Parse JavaScript with https://github.com/antlr/grammars-v4/tree/master/javascript if necessary. + +scriptText + : ScriptText+ + ; + +// PHP + +phpBlock + : importStatement* topStatement+ + ; + +importStatement + : Import Namespace namespaceNameList SemiColon + ; + +topStatement + : statement + | useDeclaration + | namespaceDeclaration + | functionDeclaration + | classDeclaration + | globalConstantDeclaration + ; + +useDeclaration + : Use (Function_ | Const)? useDeclarationContentList SemiColon + ; + +useDeclarationContentList + : '\\'? useDeclarationContent (',' '\\'? useDeclarationContent)* + ; + +useDeclarationContent + : namespaceNameList + ; + +namespaceDeclaration + : Namespace (namespaceNameList? OpenCurlyBracket namespaceStatement* CloseCurlyBracket | namespaceNameList SemiColon) + ; + +namespaceStatement + : statement + | useDeclaration + | functionDeclaration + | classDeclaration + | globalConstantDeclaration + ; + +functionDeclaration + : attributes? Function_ '&'? identifier typeParameterListInBrackets? '(' formalParameterList ')' (':' QuestionMark? typeHint)? blockStatement + ; + +classDeclaration + : attributes? Private? modifier? Partial? ( + classEntryType identifier typeParameterListInBrackets? (Extends qualifiedStaticTypeRef)? (Implements interfaceList)? + | Interface identifier typeParameterListInBrackets? (Extends interfaceList)? ) + OpenCurlyBracket classStatement* CloseCurlyBracket + ; + +classEntryType + : Class + | Trait + ; + +interfaceList + : qualifiedStaticTypeRef (',' qualifiedStaticTypeRef)* + ; + +typeParameterListInBrackets + : '<:' typeParameterList ':>' + | '<:' typeParameterWithDefaultsList ':>' + | '<:' typeParameterList ',' typeParameterWithDefaultsList ':>' + ; + +typeParameterList + : typeParameterDecl (',' typeParameterDecl)* + ; + +typeParameterWithDefaultsList + : typeParameterWithDefaultDecl (',' typeParameterWithDefaultDecl)* + ; + +typeParameterDecl + : attributes? identifier + ; + +typeParameterWithDefaultDecl + : attributes? identifier Eq (qualifiedStaticTypeRef | primitiveType) + ; + +genericDynamicArgs + : '<:' typeRef (',' typeRef)* ':>' + ; + +attributes + : attributeGroup+ + ; + +attributeGroup + : AttributeStart (identifier ':')? attribute (',' attribute)* ']' + ; + +attribute + : qualifiedNamespaceName arguments? + ; + +innerStatementList + : innerStatement* + ; + +innerStatement + : statement + | functionDeclaration + | classDeclaration + ; + +// Statements + +statement + : identifier ':' + | blockStatement + | ifStatement + | whileStatement + | doWhileStatement + | forStatement + | switchStatement + | breakStatement + | continueStatement + | returnStatement + | yieldExpression SemiColon + | globalStatement + | staticVariableStatement + | echoStatement + | expressionStatement + | unsetStatement + | foreachStatement + | tryCatchFinally + | throwStatement + | gotoStatement + | declareStatement + | emptyStatement + | inlineHtmlStatement + ; + +emptyStatement + : SemiColon + ; + +blockStatement + : OpenCurlyBracket innerStatementList CloseCurlyBracket + ; + +ifStatement + : If parentheses statement elseIfStatement* elseStatement? + | If parentheses ':' innerStatementList elseIfColonStatement* elseColonStatement? EndIf SemiColon + ; + +elseIfStatement + : ElseIf parentheses statement + ; + +elseIfColonStatement + : ElseIf parentheses ':' innerStatementList + ; + +elseStatement + : Else statement + ; + +elseColonStatement + : Else ':' innerStatementList + ; + +whileStatement + : While parentheses (statement | ':' innerStatementList EndWhile SemiColon) + ; + +doWhileStatement + : Do statement While parentheses SemiColon + ; + +forStatement + : For '(' forInit? SemiColon expressionList? SemiColon forUpdate? ')' (statement | ':' innerStatementList EndFor SemiColon ) + ; + +forInit + : expressionList + ; + +forUpdate + : expressionList + ; + +switchStatement + : Switch parentheses (OpenCurlyBracket SemiColon? switchBlock* CloseCurlyBracket | ':' SemiColon? switchBlock* EndSwitch SemiColon) + ; + +switchBlock + : ((Case expression | Default) (':' | SemiColon))+ innerStatementList + ; + +breakStatement + : Break expression? SemiColon + ; + +continueStatement + : Continue expression? SemiColon + ; + +returnStatement + : Return expression? SemiColon + ; + +expressionStatement + : expression SemiColon + ; + +unsetStatement + : Unset '(' chainList ')' SemiColon + ; + +foreachStatement + : Foreach + ( '(' chain As '&'? assignable ('=>' '&'? chain)? ')' + | '(' expression As assignable ('=>' '&'? chain)? ')' + | '(' chain As List '(' assignmentList ')' ')' ) + (statement | ':' innerStatementList EndForeach SemiColon) + ; + +tryCatchFinally + : Try blockStatement (catchClause+ finallyStatement? | catchClause* finallyStatement) + ; + +catchClause + : Catch '(' qualifiedStaticTypeRef ('|' qualifiedStaticTypeRef)* VarName ')' blockStatement + ; + +finallyStatement + : Finally blockStatement + ; + +throwStatement + : Throw expression SemiColon + ; + +gotoStatement + : Goto identifier SemiColon + ; + +declareStatement + : Declare '(' declareList ')' (statement | ':' innerStatementList EndDeclare SemiColon) + ; + +inlineHtmlStatement + : inlineHtml+ + ; + +declareList + : identifierInitializer (',' identifierInitializer)* + ; + +formalParameterList + : formalParameter? (',' formalParameter)* ','? + ; + +formalParameter + : attributes? memberModifier? QuestionMark? typeHint? '&'? '...'? variableInitializer + ; + +typeHint + : qualifiedStaticTypeRef + | Callable + | primitiveType + | typeHint '|' typeHint + ; + +globalStatement + : Global globalVar (',' globalVar)* SemiColon + ; + +globalVar + : VarName + | Dollar chain + | Dollar OpenCurlyBracket expression CloseCurlyBracket + ; + +echoStatement + : Echo expressionList SemiColon + ; + +staticVariableStatement + : Static variableInitializer (',' variableInitializer)* SemiColon + ; + +classStatement + : attributes? ( propertyModifiers typeHint? variableInitializer (',' variableInitializer)* SemiColon + | memberModifiers? ( Const typeHint? identifierInitializer (',' identifierInitializer)* SemiColon + | Function_ '&'? identifier typeParameterListInBrackets? '(' formalParameterList ')' + baseCtorCall? methodBody)) + | Use qualifiedNamespaceNameList traitAdaptations + ; + +traitAdaptations + : SemiColon + | OpenCurlyBracket traitAdaptationStatement* CloseCurlyBracket + ; + +traitAdaptationStatement + : traitPrecedence + | traitAlias + ; + +traitPrecedence + : qualifiedNamespaceName '::' identifier InsteadOf qualifiedNamespaceNameList SemiColon + ; + +traitAlias + : traitMethodReference As (memberModifier | memberModifier? identifier) SemiColon + ; + +traitMethodReference + : (qualifiedNamespaceName '::')? identifier + ; + +baseCtorCall + : ':' identifier arguments? + ; + +methodBody + : SemiColon + | blockStatement + ; + +propertyModifiers + : memberModifiers + | Var + ; + +memberModifiers + : memberModifier+ + ; + +variableInitializer + : VarName (Eq constantInitializer)? + ; + +identifierInitializer + : identifier Eq constantInitializer + ; + +globalConstantDeclaration + : attributes? Const identifierInitializer (',' identifierInitializer)* SemiColon + ; + +expressionList + : expression (',' expression)* + ; + +parentheses + : '(' (expression | yieldExpression) ')' + ; + +// Expressions +// Grouped by priorities: http://php.net/manual/en/language.operators.precedence.php +expression + : Clone expression #CloneExpression + | newExpr #NewExpression + + | stringConstant '[' expression ']' #IndexerExpression + + | '(' castOperation ')' expression #CastExpression + | ('~' | '@') expression #UnaryOperatorExpression + + | ('!' | '+' | '-') expression #UnaryOperatorExpression + + | ('++' | '--') chain #PrefixIncDecExpression + | chain ('++' | '--') #PostfixIncDecExpression + + | Print expression #PrintExpression + + | chain #ChainExpression + | constant #ScalarExpression + | string #ScalarExpression + | Label #ScalarExpression + + | BackQuoteString #BackQuoteStringExpression + | parentheses #ParenthesisExpression + | arrayCreation #ArrayCreationExpression + + | Yield #SpecialWordExpression + | List '(' assignmentList ')' Eq expression #SpecialWordExpression + | IsSet '(' chainList ')' #SpecialWordExpression + | Empty '(' chain ')' #SpecialWordExpression + | Eval '(' expression ')' #SpecialWordExpression + | Exit ( '(' ')' | parentheses )? #SpecialWordExpression + | (Include | IncludeOnce) expression #SpecialWordExpression + | (Require | RequireOnce) expression #SpecialWordExpression + + | lambdaFunctionExpr #LambdaFunctionExpression + | matchExpr #MatchExpression + + | expression op='**' expression #ArithmeticExpression + | expression InstanceOf typeRef #InstanceOfExpression + | expression op=('*' | Divide | '%') expression #ArithmeticExpression + + | expression op=('+' | '-' | '.') expression #ArithmeticExpression + + | expression op=('<<' | '>>') expression #ComparisonExpression + | expression op=(Less | '<=' | Greater | '>=') expression #ComparisonExpression + | expression op=('===' | '!==' | '==' | IsNotEq) expression #ComparisonExpression + + | expression op='&' expression #BitwiseExpression + | expression op='^' expression #BitwiseExpression + | expression op='|' expression #BitwiseExpression + | expression op='&&' expression #BitwiseExpression + | expression op='||' expression #BitwiseExpression + + | expression op=QuestionMark expression? ':' expression #ConditionalExpression + | expression op='??' expression #NullCoalescingExpression + | expression op='<=>' expression #SpaceshipExpression + + | Throw expression #SpecialWordExpression + + | assignable assignmentOperator attributes? expression #AssignmentExpression + | assignable Eq attributes? '&' (chain | newExpr) #AssignmentExpression + + | expression op=LogicalAnd expression #LogicalExpression + | expression op=LogicalXor expression #LogicalExpression + | expression op=LogicalOr expression #LogicalExpression + ; + +assignable + : chain + | arrayCreation + ; + +arrayCreation + : (Array '(' arrayItemList? ')' | '[' arrayItemList? ']') ('[' expression ']')? + ; + +lambdaFunctionExpr + : Static? Function_ '&'? '(' formalParameterList ')' lambdaFunctionUseVars? (':' typeHint)? blockStatement + | LambdaFn '(' formalParameterList')' '=>' expression + ; + +matchExpr + : Match '(' expression ')' OpenCurlyBracket matchItem (',' matchItem)* ','? CloseCurlyBracket + ; + +matchItem + : expression (',' expression)* '=>' expression + ; + +newExpr + : New typeRef arguments? + ; + +assignmentOperator + : Eq + | '+=' + | '-=' + | '*=' + | '**=' + | '/=' + | '.=' + | '%=' + | '&=' + | '|=' + | '^=' + | '<<=' + | '>>=' + | '??=' + ; + +yieldExpression + : Yield (expression ('=>' expression)? | From expression) + ; + +arrayItemList + : arrayItem (',' arrayItem)* ','? + ; + +arrayItem + : expression ('=>' expression)? + | (expression '=>')? '&' chain + ; + +lambdaFunctionUseVars + : Use '(' lambdaFunctionUseVar (',' lambdaFunctionUseVar)* ')' + ; + +lambdaFunctionUseVar + : '&'? VarName + ; + +qualifiedStaticTypeRef + : qualifiedNamespaceName genericDynamicArgs? + | Static + ; + +typeRef + : (qualifiedNamespaceName | indirectTypeRef) genericDynamicArgs? + | primitiveType + | Static + | anonymousClass + ; + +anonymousClass + : attributes? Private? modifier? Partial? ( + classEntryType typeParameterListInBrackets? (Extends qualifiedStaticTypeRef)? (Implements interfaceList)? + | Interface identifier typeParameterListInBrackets? (Extends interfaceList)? ) + OpenCurlyBracket classStatement* CloseCurlyBracket + ; + +indirectTypeRef + : chainBase ('->' keyedFieldName)* + ; + +qualifiedNamespaceName + : Namespace? '\\'? namespaceNameList + ; + +namespaceNameList + : identifier + | identifier ('\\' identifier)* ('\\' namespaceNameTail)? + ; + +namespaceNameTail + : identifier (As identifier)? + | OpenCurlyBracket namespaceNameTail (','namespaceNameTail)* ','? CloseCurlyBracket + ; + +qualifiedNamespaceNameList + : qualifiedNamespaceName (',' qualifiedNamespaceName)* + ; + +arguments + : '(' ( actualArgument (',' actualArgument)* | yieldExpression)? ','? ')' + ; + +actualArgument + : argumentName? '...'? expression + | '&' chain + ; + +argumentName + : identifier ':' + ; + +constantInitializer + : constant + | string + | Array '(' (arrayItemList ','?)? ')' + | '[' (arrayItemList ','?)? ']' + | ('+' | '-') constantInitializer + ; + +constant + : Null + | literalConstant + | magicConstant + | classConstant + | qualifiedNamespaceName + ; + +literalConstant + : Real + | BooleanConstant + | numericConstant + | stringConstant + ; + +numericConstant + : Octal + | Decimal + | Hex + | Binary + ; + +classConstant + : (Class | Parent_) '::' (identifier | Constructor | Get | Set) + | (qualifiedStaticTypeRef | keyedVariable | string) '::' (identifier | keyedVariable) // 'foo'::$bar works in php7 + ; + +stringConstant + : Label + ; + +string + : StartHereDoc HereDocText+ + | StartNowDoc HereDocText+ + | SingleQuoteString + | DoubleQuote interpolatedStringPart* DoubleQuote + ; + +interpolatedStringPart + : StringPart + | UnicodeEscape + | chain + ; + +chainList + : chain (',' chain)* + ; + +chain + : chainOrigin memberAccess* + //| arrayCreation // [$a,$b]=$c + ; + +chainOrigin + : chainBase + | functionCall + | '(' newExpr ')' + ; + +memberAccess + : '->' keyedFieldName actualArguments? + ; + +functionCall + : functionCallName actualArguments + ; + +functionCallName + : qualifiedNamespaceName + | classConstant + | chainBase + | parentheses + ; + +actualArguments + : genericDynamicArgs? arguments squareCurlyExpression* + ; + +chainBase + : keyedVariable ('::' keyedVariable)? + | qualifiedStaticTypeRef '::' keyedVariable + ; + +keyedFieldName + : keyedSimpleFieldName + | keyedVariable + ; + +keyedSimpleFieldName + : (identifier | OpenCurlyBracket expression CloseCurlyBracket) squareCurlyExpression* + ; + +keyedVariable + : Dollar* (VarName | Dollar OpenCurlyBracket expression CloseCurlyBracket) squareCurlyExpression* + ; + +squareCurlyExpression + : '[' expression? ']' + | OpenCurlyBracket expression CloseCurlyBracket + ; + +assignmentList + : assignmentListElement? (',' assignmentListElement?)* + ; + +assignmentListElement + : chain + | List '(' assignmentList ')' + | arrayItem + ; + +modifier + : Abstract + | Final + ; + +identifier + : Label + + | Abstract + | Array + | As + | BinaryCast + | BoolType + | BooleanConstant + | Break + | Callable + | Case + | Catch + | Class + | Clone + | Const + | Continue + | Declare + | Default + | Do + | DoubleCast + | DoubleType + | Echo + | Else + | ElseIf + | Empty + | EndDeclare + | EndFor + | EndForeach + | EndIf + | EndSwitch + | EndWhile + | Eval + | Exit + | Extends + | Final + | Finally + | FloatCast + | For + | Foreach + | Function_ + | Global + | Goto + | If + | Implements + | Import + | Include + | IncludeOnce + | InstanceOf + | InsteadOf + | Int16Cast + | Int64Type + | Int8Cast + | Interface + | IntType + | IsSet + | List + | LogicalAnd + | LogicalOr + | LogicalXor + | Namespace + | New + | Null + | ObjectType + | Parent_ + | Partial + | Print + | Private + | Protected + | Public + | Require + | RequireOnce + | Resource + | Return + | Static + | StringType + | Switch + | Throw + | Trait + | Try + | Typeof + | UintCast + | UnicodeCast + | Unset + | Use + | Var + | While + | Yield + | From + + | Get + | Set + | Call + | CallStatic + | Constructor + | Destruct + | Wakeup + | Sleep + | Autoload + | IsSet__ + | Unset__ + | ToString__ + | Invoke + | SetState + | Clone__ + | DebugInfo + | Namespace__ + | Class__ + | Traic__ + | Function__ + | Method__ + | Line__ + | File__ + | Dir__ + ; + +memberModifier + : Public + | Protected + | Private + | Static + | Abstract + | Final + ; + +magicConstant + : Namespace__ + | Class__ + | Traic__ + | Function__ + | Method__ + | Line__ + | File__ + | Dir__ + ; + +magicMethod + : Get + | Set + | Call + | CallStatic + | Constructor + | Destruct + | Wakeup + | Sleep + | Autoload + | IsSet__ + | Unset__ + | ToString__ + | Invoke + | SetState + | Clone__ + | DebugInfo + ; + +primitiveType + : BoolType + | IntType + | Int64Type + | DoubleType + | StringType + | Resource + | ObjectType + | Array + ; + +castOperation + : BoolType + | Int8Cast + | Int16Cast + | IntType + | Int64Type + | UintCast + | DoubleCast + | DoubleType + | FloatCast + | StringType + | BinaryCast + | UnicodeCast + | Array + | ObjectType + | Resource + | Unset + ; \ No newline at end of file From bbfe5dac4d66eea2821142576de7ef6ffb9736f8 Mon Sep 17 00:00:00 2001 From: illided Date: Fri, 28 May 2021 22:54:57 +0300 Subject: [PATCH 207/308] php parser adapter added --- .../astminer/parse/antlr/php/PHPParser.kt | 28 +++++++++++++++++++ .../parse/antlr/php/ANTLRPHPParserText.kt | 17 +++++++++++ src/test/resources/examples/1.php | 28 +++++++++++++++++++ 3 files changed, 73 insertions(+) create mode 100644 src/main/kotlin/astminer/parse/antlr/php/PHPParser.kt create mode 100644 src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPParserText.kt create mode 100644 src/test/resources/examples/1.php diff --git a/src/main/kotlin/astminer/parse/antlr/php/PHPParser.kt b/src/main/kotlin/astminer/parse/antlr/php/PHPParser.kt new file mode 100644 index 00000000..bab8640b --- /dev/null +++ b/src/main/kotlin/astminer/parse/antlr/php/PHPParser.kt @@ -0,0 +1,28 @@ +package astminer.parse.antlr.php + +import astminer.common.model.Parser +import astminer.parse.ParsingException +import astminer.parse.antlr.AntlrNode +import astminer.parse.antlr.convertAntlrTree +import me.vovak.antlr.parser.PhpLexer +import me.vovak.antlr.parser.PhpParser +import org.antlr.v4.runtime.CharStreams +import org.antlr.v4.runtime.CommonTokenStream +import java.io.InputStream + +class PHPParser: Parser { + override fun parseInputStream(content: InputStream): AntlrNode { + return try { + val lexer = PhpLexer(CharStreams.fromStream(content)) + lexer.removeErrorListeners() + val tokens = CommonTokenStream(lexer) + val parser = PhpParser(tokens) + parser.removeErrorListeners() + val context = parser.htmlDocument() + convertAntlrTree(context, PhpParser.ruleNames, PhpParser.VOCABULARY) + } catch (e: Exception) { + throw ParsingException("ANTLR", "PHP", e.message) + } + } + +} \ No newline at end of file diff --git a/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPParserText.kt b/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPParserText.kt new file mode 100644 index 00000000..fe42535b --- /dev/null +++ b/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPParserText.kt @@ -0,0 +1,17 @@ +package astminer.parse.antlr.php + +import org.junit.Test +import java.io.File +import java.io.FileInputStream +import kotlin.test.assertNotNull + +internal class ANTLRPHPParserText { + + @Test + fun testNodeIsNotNull() { + val parser = PHPParser() + val file = File("src/test/resources/examples/1.php") + val node = parser.parseInputStream(FileInputStream(file)) + assertNotNull(node) + } +} \ No newline at end of file diff --git a/src/test/resources/examples/1.php b/src/test/resources/examples/1.php new file mode 100644 index 00000000..28972163 --- /dev/null +++ b/src/test/resources/examples/1.php @@ -0,0 +1,28 @@ +_add($a, $b); + } + + public function sub($a, $b) { + return $a - $b; + } + + public function mul($a, $b) { + return $a * $b; + } + + /** + * Защищённый метод + * @param interge + * @return interge + */ + protected function _add($a, $b) { + return $a + $b; + } +} + +$server = new Yar_Server(new Operator()); +$server->handle(); +?> \ No newline at end of file From f942d7d0d1d633d5b114d8f43608e04f2d0ad9cf Mon Sep 17 00:00:00 2001 From: illided Date: Fri, 28 May 2021 23:13:48 +0300 Subject: [PATCH 208/308] php keyword now being parsed correctly --- .../antlr/parser/CaseChangingCharStream.java | 82 +++++++++++++++++++ .../astminer/parse/antlr/php/PHPParser.kt | 4 +- 2 files changed, 85 insertions(+), 1 deletion(-) create mode 100644 src/main/java/me/vovak/antlr/parser/CaseChangingCharStream.java diff --git a/src/main/java/me/vovak/antlr/parser/CaseChangingCharStream.java b/src/main/java/me/vovak/antlr/parser/CaseChangingCharStream.java new file mode 100644 index 00000000..c91537b3 --- /dev/null +++ b/src/main/java/me/vovak/antlr/parser/CaseChangingCharStream.java @@ -0,0 +1,82 @@ +package me.vovak.antlr.parser; + +import org.antlr.v4.runtime.CharStream; +import org.antlr.v4.runtime.misc.Interval; + +/** + * This class supports case-insensitive lexing by wrapping an existing + * {@link CharStream} and forcing the lexer to see either upper or + * lowercase characters. Grammar literals should then be either upper or + * lower case such as 'BEGIN' or 'begin'. The text of the character + * stream is unaffected. Example: input 'BeGiN' would match lexer rule + * 'BEGIN' if constructor parameter upper=true but getText() would return + * 'BeGiN'. + */ +public class CaseChangingCharStream implements CharStream { + + final CharStream stream; + final boolean upper; + + /** + * Constructs a new CaseChangingCharStream wrapping the given {@link CharStream} forcing + * all characters to upper case or lower case. + * @param stream The stream to wrap. + * @param upper If true force each symbol to upper case, otherwise force to lower. + */ + public CaseChangingCharStream(CharStream stream, boolean upper) { + this.stream = stream; + this.upper = upper; + } + + @Override + public String getText(Interval interval) { + return stream.getText(interval); + } + + @Override + public void consume() { + stream.consume(); + } + + @Override + public int LA(int i) { + int c = stream.LA(i); + if (c <= 0) { + return c; + } + if (upper) { + return Character.toUpperCase(c); + } + return Character.toLowerCase(c); + } + + @Override + public int mark() { + return stream.mark(); + } + + @Override + public void release(int marker) { + stream.release(marker); + } + + @Override + public int index() { + return stream.index(); + } + + @Override + public void seek(int index) { + stream.seek(index); + } + + @Override + public int size() { + return stream.size(); + } + + @Override + public String getSourceName() { + return stream.getSourceName(); + } +} \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/antlr/php/PHPParser.kt b/src/main/kotlin/astminer/parse/antlr/php/PHPParser.kt index bab8640b..b3970c16 100644 --- a/src/main/kotlin/astminer/parse/antlr/php/PHPParser.kt +++ b/src/main/kotlin/astminer/parse/antlr/php/PHPParser.kt @@ -4,6 +4,7 @@ import astminer.common.model.Parser import astminer.parse.ParsingException import astminer.parse.antlr.AntlrNode import astminer.parse.antlr.convertAntlrTree +import me.vovak.antlr.parser.CaseChangingCharStream import me.vovak.antlr.parser.PhpLexer import me.vovak.antlr.parser.PhpParser import org.antlr.v4.runtime.CharStreams @@ -13,7 +14,8 @@ import java.io.InputStream class PHPParser: Parser { override fun parseInputStream(content: InputStream): AntlrNode { return try { - val lexer = PhpLexer(CharStreams.fromStream(content)) + val stream = CharStreams.fromStream(content) + val lexer = PhpLexer(CaseChangingCharStream(stream, false)) lexer.removeErrorListeners() val tokens = CommonTokenStream(lexer) val parser = PhpParser(tokens) From 1372789609de084f7f7d48463773bceb5ea3c799 Mon Sep 17 00:00:00 2001 From: illided Date: Sat, 29 May 2021 11:34:11 +0300 Subject: [PATCH 209/308] commentary added --- src/main/kotlin/astminer/parse/antlr/php/PHPParser.kt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/kotlin/astminer/parse/antlr/php/PHPParser.kt b/src/main/kotlin/astminer/parse/antlr/php/PHPParser.kt index b3970c16..921b2c25 100644 --- a/src/main/kotlin/astminer/parse/antlr/php/PHPParser.kt +++ b/src/main/kotlin/astminer/parse/antlr/php/PHPParser.kt @@ -15,6 +15,8 @@ class PHPParser: Parser { override fun parseInputStream(content: InputStream): AntlrNode { return try { val stream = CharStreams.fromStream(content) + // Php keywords are case-insensitive, so case changing stream must be used + // Tokens won't be in lower case in resulting tree val lexer = PhpLexer(CaseChangingCharStream(stream, false)) lexer.removeErrorListeners() val tokens = CommonTokenStream(lexer) From a2a19469c7a81c030add0c853643000b9a9eacf0 Mon Sep 17 00:00:00 2001 From: illided Date: Sat, 29 May 2021 14:05:48 +0300 Subject: [PATCH 210/308] raw splitter and function info added --- .../parse/antlr/php/ANTLRPHPFunctionInfo.kt | 4 ++ .../parse/antlr/php/PHPMethodSplitter.kt | 17 +++++++ .../antlr/php/ANTLRPHPMethodSplitterTest.kt | 33 ++++++++++++++ .../methodSplitting/testMethodSplitting.php | 44 +++++++++++++++++++ 4 files changed, 98 insertions(+) create mode 100644 src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt create mode 100644 src/main/kotlin/astminer/parse/antlr/php/PHPMethodSplitter.kt create mode 100644 src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPMethodSplitterTest.kt create mode 100644 src/test/resources/methodSplitting/testMethodSplitting.php diff --git a/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt b/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt new file mode 100644 index 00000000..102ee6b2 --- /dev/null +++ b/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt @@ -0,0 +1,4 @@ +package astminer.parse.antlr.php + +class ANTLRPHPFunctionInfo { +} \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/antlr/php/PHPMethodSplitter.kt b/src/main/kotlin/astminer/parse/antlr/php/PHPMethodSplitter.kt new file mode 100644 index 00000000..d374f54b --- /dev/null +++ b/src/main/kotlin/astminer/parse/antlr/php/PHPMethodSplitter.kt @@ -0,0 +1,17 @@ +package astminer.parse.antlr.php + +import astminer.common.model.FunctionInfo +import astminer.common.model.TreeFunctionSplitter +import astminer.parse.antlr.AntlrNode + +class PHPMethodSplitter: TreeFunctionSplitter { + companion object { + const val CLASS_MEMBER = "classStatement" + const val FUNCTION_TOKEN = "Function_" + + } + + override fun splitIntoFunctions(root: AntlrNode): Collection> { + TODO("implement") + } +} \ No newline at end of file diff --git a/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPMethodSplitterTest.kt new file mode 100644 index 00000000..8e741a7e --- /dev/null +++ b/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPMethodSplitterTest.kt @@ -0,0 +1,33 @@ +package astminer.parse.antlr.php + +import org.junit.Test +import kotlin.test.BeforeTest +import astminer.common.model.FunctionInfo +import astminer.parse.antlr.AntlrNode +import java.io.File +import kotlin.test.assertEquals +import kotlin.test.assertNotNull + +internal class ANTLRPHPMethodSplitterTest { + companion object { + const val N_METHODS = 4 + const val testFilePath = "src/test/resources/methodSplitting/testMethodSplitting.php" + val functionSplitter = PHPMethodSplitter() + val parser = PHPParser() + } + + private var functionInfos: Collection> = listOf() + + @BeforeTest + fun parseTree() { + val testTree = parser.parseInputStream(File(testFilePath).inputStream()) + assertNotNull(testTree) + functionInfos = functionSplitter.splitIntoFunctions(testTree) + } + + @Test + fun testValidSplitting() { + assertEquals(N_METHODS, functionInfos.size, "Test file contains $N_METHODS methods") + } + +} \ No newline at end of file diff --git a/src/test/resources/methodSplitting/testMethodSplitting.php b/src/test/resources/methodSplitting/testMethodSplitting.php new file mode 100644 index 00000000..7e717de5 --- /dev/null +++ b/src/test/resources/methodSplitting/testMethodSplitting.php @@ -0,0 +1,44 @@ + \ No newline at end of file From 3bc1332e9adcc1e4348c5ffb85c7dc355a0a577e Mon Sep 17 00:00:00 2001 From: illided Date: Sat, 29 May 2021 15:38:21 +0300 Subject: [PATCH 211/308] splitter and test for splitting added (no info being collected yet) --- .../parse/antlr/php/ANTLRPHPFunctionInfo.kt | 15 +++++- .../parse/antlr/php/PHPMethodSplitter.kt | 13 +++-- .../antlr/php/ANTLRPHPMethodSplitterTest.kt | 2 +- .../methodSplitting/testMethodSplitting.php | 51 +++++++++++++++++-- 4 files changed, 72 insertions(+), 9 deletions(-) diff --git a/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt b/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt index 102ee6b2..29e5ab06 100644 --- a/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt @@ -1,4 +1,17 @@ package astminer.parse.antlr.php -class ANTLRPHPFunctionInfo { +import astminer.common.model.FunctionInfo +import astminer.parse.antlr.AntlrNode + +abstract class ANTLRPHPFunctionInfo( + override val root: AntlrNode +) : FunctionInfo { +} + +class ArrowPhpFunctionInfo(root: AntlrNode) : ANTLRPHPFunctionInfo(root) { + +} + +class SimplePhpFunctionInfo(root: AntlrNode) : ANTLRPHPFunctionInfo(root) { + } \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/antlr/php/PHPMethodSplitter.kt b/src/main/kotlin/astminer/parse/antlr/php/PHPMethodSplitter.kt index d374f54b..e1d27b38 100644 --- a/src/main/kotlin/astminer/parse/antlr/php/PHPMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/php/PHPMethodSplitter.kt @@ -6,12 +6,19 @@ import astminer.parse.antlr.AntlrNode class PHPMethodSplitter: TreeFunctionSplitter { companion object { - const val CLASS_MEMBER = "classStatement" + const val LAMBDA_TOKEN = "LambdaFn" const val FUNCTION_TOKEN = "Function_" - } override fun splitIntoFunctions(root: AntlrNode): Collection> { - TODO("implement") + return root.preOrder().mapNotNull { node-> + node.parent?.let { statement -> + when (node.typeLabel) { + LAMBDA_TOKEN -> ArrowPhpFunctionInfo(statement) + FUNCTION_TOKEN -> SimplePhpFunctionInfo(statement) + else -> null + } + } + } } } \ No newline at end of file diff --git a/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPMethodSplitterTest.kt index 8e741a7e..527852c8 100644 --- a/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPMethodSplitterTest.kt @@ -10,7 +10,7 @@ import kotlin.test.assertNotNull internal class ANTLRPHPMethodSplitterTest { companion object { - const val N_METHODS = 4 + const val N_METHODS = 18 const val testFilePath = "src/test/resources/methodSplitting/testMethodSplitting.php" val functionSplitter = PHPMethodSplitter() val parser = PHPParser() diff --git a/src/test/resources/methodSplitting/testMethodSplitting.php b/src/test/resources/methodSplitting/testMethodSplitting.php index 7e717de5..586db132 100644 --- a/src/test/resources/methodSplitting/testMethodSplitting.php +++ b/src/test/resources/methodSplitting/testMethodSplitting.php @@ -2,7 +2,7 @@ ////////////////// FUNCTIONS ////////////////// -// #1 info : {name: fun, args : , enclosing element: null, return type: null} +// #1 info : {name: fun, args: , enclosing element: null, return type: null} function fun() { return 5; } @@ -29,16 +29,59 @@ function funWithDottedArg($a, ...$rest) { ////////////////// VAR FUNCTIONS ////////////////// -// #6 info : {name: varFunc, args: $x, enclosing element: variable, return type: null} +// #6 info : {name: $varFunc, args: $x, enclosing element: variable, return type: null} $varFunc = function ($x) { return $x; }; $outerVar = 10; -// #7 info : {name: varFuncWithOuterVar, args: $x, enclosing element: variable, return type: null} +// #7 info : {name: $varFuncWithOuterVar, args: $x, enclosing element: variable, return type: null} $varFuncWithOuterVar = function ($x) use ($outerVar) { return $x * $outerVar; }; -/> \ No newline at end of file +////////////////// ARROW FUNCTIONS ////////////////// + +// #8 info : {name: $arrow1, args: $x, $y, enclosing element: variable, return type: null} +$arrow1 = fn($x, $y) => $x + $y; + +// #9 info : {name: $arrow2, args: $x, enclosing element: variable, return type: null} +// #10 info : {name: null, args: $y, enclosing element: function, return type: null} +$arrow2 = fn($x) => fn($y) => $x * $y; + +// #12 info : {name: null, args: $x, enclosing element: null, return type: null} +fn($x = 42) => $x; + +// #13 info : {name: null, args: &$x, enclosing element: null, return type: null} +fn(&$x) => $x; + +// #14 info : {name: null, args: $x, enclosing element: null, return type: null} +fn&($x) => $x; + +// #15 info : {name: null, args: $x, ...$rest, enclosing element: null, return type: null} +fn($x, ...$rest) => $rest; + +////////////////// METHOD FUNCTIONS ////////////////// + +class someClass { + // #16 info : {name: someFunc, args: , enclosing element: class, return type: null} + public function someFunc() { + return 42; + } + + // #17 info : {name: funcWithParams, args: , enclosing element: class, return type: null} + public function funcWithParams($a, $b) { + + // #18 info : {name: innerFunction, args: , enclosing element: method, return type: null} + function innerFunction() { + + // #19 info : {name: superInnerFunction, args: , enclosing element: function, return type: null} + function superInnerFunction() { + return 42; + } + return 42; + } + return 42; + } +} \ No newline at end of file From ced0e152e5b10abb3aa0e03a9ea395308df67aa9 Mon Sep 17 00:00:00 2001 From: illided Date: Sat, 29 May 2021 15:42:37 +0300 Subject: [PATCH 212/308] php lexer base added --- .../me/vovak/antlr/parser/PhpLexerBase.java | 192 ++++++++++++++++++ 1 file changed, 192 insertions(+) create mode 100644 src/main/java/me/vovak/antlr/parser/PhpLexerBase.java diff --git a/src/main/java/me/vovak/antlr/parser/PhpLexerBase.java b/src/main/java/me/vovak/antlr/parser/PhpLexerBase.java new file mode 100644 index 00000000..92a8bed3 --- /dev/null +++ b/src/main/java/me/vovak/antlr/parser/PhpLexerBase.java @@ -0,0 +1,192 @@ +package me.vovak.antlr.parser; + +/* +PHP grammar. +The MIT License (MIT). +Copyright (c) 2015-2019, Ivan Kochurkin (kvanttt@gmail.com), Positive Technologies. +Copyright (c) 2019, Thierry Marianne (thierry.marianne@weaving-the-web.org) +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +import org.antlr.v4.runtime.*; + +import java.util.Stack; + +public abstract class PhpLexerBase extends Lexer +{ + protected boolean AspTags = true; + protected boolean _scriptTag; + protected boolean _styleTag; + protected String _heredocIdentifier; + protected int _prevTokenType; + protected String _htmlNameText; + protected boolean _phpScript; + protected boolean _insideString; + + public PhpLexerBase(CharStream input) { + super(input); + } + + @Override + public Token nextToken() { + CommonToken token = (CommonToken)super.nextToken(); + + if (token.getType() == PhpLexer.PHPEnd || token.getType() == PhpLexer.PHPEndSingleLineComment) + { + if (_mode == PhpLexer.SingleLineCommentMode) + { + // SingleLineCommentMode for such allowed syntax: + // + popMode(); // exit from SingleLineComment mode. + } + popMode(); // exit from PHP mode. + + if ("".equals(token.getText())) + { + _phpScript = false; + token.setType(PhpLexer.HtmlScriptClose); + } + else + { + // Add semicolon to the end of statement if it is absente. + // For example: + if (_prevTokenType == PhpLexer.SemiColon || _prevTokenType == PhpLexer.Colon + || _prevTokenType == PhpLexer.OpenCurlyBracket || _prevTokenType == PhpLexer.CloseCurlyBracket) + { + token.setChannel(PhpLexer.SkipChannel); + } + else + { + token = new CommonToken(PhpLexer.SemiColon); + } + } + } + else if (token.getType() == PhpLexer.HtmlName) + { + _htmlNameText = token.getText(); + } + else if (token.getType() == PhpLexer.HtmlDoubleQuoteString) + { + if ("php".equals(token.getText()) && "language".equals(_htmlNameText)) + { + _phpScript = true; + } + } + else if (_mode == PhpLexer.HereDoc) + { + // Heredoc and Nowdoc syntax support: http://php.net/manual/en/language.types.string.php#language.types.string.syntax.heredoc + switch (token.getType()) + { + case PhpLexer.StartHereDoc: + case PhpLexer.StartNowDoc: + _heredocIdentifier = token.getText().substring(3).trim().replace("'",""); + break; + + case PhpLexer.HereDocText: + if (CheckHeredocEnd(token.getText())) + { + popMode(); + + String heredocIdentifier = GetHeredocIdentifier(token.getText()); + if (token.getText().trim().endsWith(";")) + { + token = new CommonToken(PhpLexer.SemiColon, heredocIdentifier + ";\n"); + } + else + { + token = (CommonToken)super.nextToken(); + token.setText(heredocIdentifier + "\n;"); + } + } + break; + } + } + else if (_mode == PhpLexer.PHP) + { + if (_channel != PhpLexer.HIDDEN) + { + _prevTokenType = token.getType(); + } + } + + return token; + } + + private String GetHeredocIdentifier(String text) { + String trimmedText = text.trim(); + boolean semi = (trimmedText.length() > 0) ? (trimmedText.charAt(trimmedText.length() - 1) == ';') : false; + return semi ? trimmedText.substring(0, trimmedText.length() - 1) : trimmedText; + } + + private boolean CheckHeredocEnd(String text) { + return GetHeredocIdentifier(text).equals(_heredocIdentifier); + } + + protected boolean IsNewLineOrStart(int pos) { + return this._input.LA(pos) <= 0 || this._input.LA(pos) == '\r' || this._input.LA(pos) == '\n'; + } + + protected void PushModeOnHtmlClose() { + popMode(); + if (_scriptTag) + { + if (!_phpScript) + { + pushMode(PhpLexer.SCRIPT); + } + else + { + pushMode(PhpLexer.PHP); + } + _scriptTag = false; + } + else if (_styleTag) + { + pushMode(PhpLexer.STYLE); + _styleTag = false; + } + } + + protected boolean HasAspTags() { + return this.AspTags; + } + + protected boolean HasPhpScriptTag() { + return this._phpScript; + } + + protected void PopModeOnCurlyBracketClose() { + if (_insideString) + { + _insideString = false; + setChannel(PhpLexer.SkipChannel); + popMode(); + } + } + + protected boolean ShouldPushHereDocMode(int pos) { + return _input.LA(pos) == '\r' || _input.LA(pos) == '\n'; + } + + protected boolean IsCurlyDollar(int pos) { + return _input.LA(pos) == '$'; + } + + protected void SetInsideString() { + _insideString = true; + } +} \ No newline at end of file From 9ef6b1b4bc82142e2a56b8a764897829460471db Mon Sep 17 00:00:00 2001 From: illided Date: Sat, 29 May 2021 16:55:07 +0300 Subject: [PATCH 213/308] more tests added --- .../antlr/php/ANTLRPHPMethodSplitterTest.kt | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPMethodSplitterTest.kt index 527852c8..6af6be4b 100644 --- a/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPMethodSplitterTest.kt @@ -1,9 +1,11 @@ package astminer.parse.antlr.php +import astminer.common.model.EnclosingElementType import org.junit.Test import kotlin.test.BeforeTest import astminer.common.model.FunctionInfo import astminer.parse.antlr.AntlrNode +import astminer.parse.antlr.javascript.JavaScriptFunctionSplitterTest import java.io.File import kotlin.test.assertEquals import kotlin.test.assertNotNull @@ -30,4 +32,31 @@ internal class ANTLRPHPMethodSplitterTest { assertEquals(N_METHODS, functionInfos.size, "Test file contains $N_METHODS methods") } + @Test + fun testValidMethodInfo() { + fun EnclosingElementType.getEnclosingElementType(): String { + return when (this) { + EnclosingElementType.Function -> "function" + EnclosingElementType.Class -> "class" + EnclosingElementType.Method -> "method" + EnclosingElementType.VariableDeclaration -> "variable" + else -> "" + } + } + + fun FunctionInfo.getJsonInfo(): String { + return "info : {" + + "name : ${name}, " + + "args : ${parameters.joinToString(", ") { "${it.type} ${it.name}" }}, " + + "enclosing element : ${enclosingElement?.type?.getEnclosingElementType()}, " + + "}" + } + + val actualJsonInfos = functionInfos.map { it.getJsonInfo() }.sorted() + + val text = File(JavaScriptFunctionSplitterTest.testFilePath).readText() + val expectedJsonInfos = Regex("info : \\{.*\\}").findAll(text).toList().map { it.value }.sorted() + + assertEquals(expectedJsonInfos, actualJsonInfos) + } } \ No newline at end of file From 9de47564ed9bdcc580b3ecc73e8250111af262e7 Mon Sep 17 00:00:00 2001 From: illided Date: Sat, 29 May 2021 16:55:23 +0300 Subject: [PATCH 214/308] parameters extracting added --- .../parse/antlr/php/ANTLRPHPFunctionInfo.kt | 43 ++++++++++++++++++- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt b/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt index 29e5ab06..6eaa654d 100644 --- a/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt @@ -1,11 +1,50 @@ package astminer.parse.antlr.php +import astminer.common.DEFAULT_TOKEN import astminer.common.model.FunctionInfo -import astminer.parse.antlr.AntlrNode +import astminer.common.model.FunctionInfoParameter +import astminer.parse.antlr.* abstract class ANTLRPHPFunctionInfo( - override val root: AntlrNode + final override val root: AntlrNode ) : FunctionInfo { + override val parameters: List = collectParameters() + + override val returnType = getElementType(root) + + companion object { + const val PARAMETERS = "formalParameterList" + const val ONE_PARAMETER = "formalParameter" + const val TYPE = "typeHint" + const val PARAMETER_NAME = "VarName" + } + + private fun collectParameters(): List { + val parameterList = root.getChildOfType(PARAMETERS) ?: return emptyList() + return parameterList.getItOrChildrenOfType(ONE_PARAMETER).map { + assembleParameter(it) + } + } + + private fun assembleParameter(parameterNode: AntlrNode): FunctionInfoParameter { + return FunctionInfoParameter( + name = getParameterName(parameterNode), + type = getElementType(parameterNode) + ) + } + + private fun getParameterName(parameterNode: AntlrNode): String { + if (parameterNode.hasLastLabel(PARAMETER_NAME)) + return parameterNode.originalToken ?: return DEFAULT_TOKEN + return parameterNode.children + .filter { !it.hasFirstLabel(TYPE) } + .map { it.originalToken } + .joinToString("") + } + + private fun getElementType(element: AntlrNode): String? { + return element.getChildOfType(TYPE)?.originalToken + } } class ArrowPhpFunctionInfo(root: AntlrNode) : ANTLRPHPFunctionInfo(root) { From 1f04816613efaaebd783d1da644a02385c7a731a Mon Sep 17 00:00:00 2001 From: illided Date: Sat, 29 May 2021 17:07:19 +0300 Subject: [PATCH 215/308] simple parameter bug fixed --- .../astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt b/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt index 6eaa654d..cf7865be 100644 --- a/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt @@ -20,7 +20,16 @@ abstract class ANTLRPHPFunctionInfo( } private fun collectParameters(): List { + // No parameters val parameterList = root.getChildOfType(PARAMETERS) ?: return emptyList() + + // Checking if function have only one parameter without + // type or ellipsis + if (parameterList.hasLastLabel(PARAMETER_NAME)) { + return listOf(assembleParameter(parameterList)) + } + + // Otherwise find all parameters return parameterList.getItOrChildrenOfType(ONE_PARAMETER).map { assembleParameter(it) } @@ -36,6 +45,7 @@ abstract class ANTLRPHPFunctionInfo( private fun getParameterName(parameterNode: AntlrNode): String { if (parameterNode.hasLastLabel(PARAMETER_NAME)) return parameterNode.originalToken ?: return DEFAULT_TOKEN + return parameterNode.children .filter { !it.hasFirstLabel(TYPE) } .map { it.originalToken } @@ -45,6 +55,8 @@ abstract class ANTLRPHPFunctionInfo( private fun getElementType(element: AntlrNode): String? { return element.getChildOfType(TYPE)?.originalToken } + + } class ArrowPhpFunctionInfo(root: AntlrNode) : ANTLRPHPFunctionInfo(root) { From 9ae83d62a3082f1e96f3ecf6f31006f8ec5e9fd0 Mon Sep 17 00:00:00 2001 From: illided Date: Sat, 29 May 2021 17:50:29 +0300 Subject: [PATCH 216/308] actual function info changed --- .../methodSplitting/testMethodSplitting.php | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/src/test/resources/methodSplitting/testMethodSplitting.php b/src/test/resources/methodSplitting/testMethodSplitting.php index 586db132..0d76fed4 100644 --- a/src/test/resources/methodSplitting/testMethodSplitting.php +++ b/src/test/resources/methodSplitting/testMethodSplitting.php @@ -2,81 +2,81 @@ ////////////////// FUNCTIONS ////////////////// -// #1 info : {name: fun, args: , enclosing element: null, return type: null} +// #1 info : {name: fun, args: ,enclosing element: null, enclosing element name: null, return type: null} function fun() { return 5; } -// #2 info : {name: fun2, args: $a, $b, enclosing element: null, return type: null} +// #2 info : {name: fun2, args: $a, $b, enclosing element: null, enclosing element name: null, return type: null} function fun2($a, $b) { return $a + $b; } -// #3 info : {name: funWithTypedParameter, args: int $a, enclosing element: null, return type: null} +// #3 info : {name: funWithTypedParameter, args: int $a, enclosing element: null, enclosing element name: null, return type: null} function funWithTypedParameter(int $a) { return $a; } -// #4 info : {name: funWithReturnType, args: $a, $b, enclosing element: null, return type: string} +// #4 info : {name: funWithReturnType, args: $a, $b, enclosing element: null, enclosing element name: null, return type: string} function funWithReturnType($a, $b) : string { return 'hello'; } -// #5 info : {name: funWithDottedArg, args: $a, ...$rest, enclosing element: null, return type: null} +// #5 info : {name: funWithDottedArg, args: $a, ...$rest, enclosing element: null, enclosing element name: null, return type: null} function funWithDottedArg($a, ...$rest) { return 'hello'; } ////////////////// VAR FUNCTIONS ////////////////// -// #6 info : {name: $varFunc, args: $x, enclosing element: variable, return type: null} +// #6 info : {name: null, args: $x, enclosing element: variable, enclosing element name: $varFunc, return type: null} $varFunc = function ($x) { return $x; }; $outerVar = 10; -// #7 info : {name: $varFuncWithOuterVar, args: $x, enclosing element: variable, return type: null} +// #7 info : {name: null, args: $x, enclosing element: variable, enclosing element name: $varFuncWithOuterVar, return type: null} $varFuncWithOuterVar = function ($x) use ($outerVar) { return $x * $outerVar; }; ////////////////// ARROW FUNCTIONS ////////////////// -// #8 info : {name: $arrow1, args: $x, $y, enclosing element: variable, return type: null} +// #8 info : {name: null, args: $x, $y, enclosing element: variable, enclosing element name: $arrow1, return type: null} $arrow1 = fn($x, $y) => $x + $y; -// #9 info : {name: $arrow2, args: $x, enclosing element: variable, return type: null} -// #10 info : {name: null, args: $y, enclosing element: function, return type: null} +// #9 info : {name: null, args: $x, enclosing element: variable, enclosing element name: $arrow2, return type: null} +// #10 info : {name: null, args: $y, enclosing element: function, enclosing element name: null, return type: null} $arrow2 = fn($x) => fn($y) => $x * $y; -// #12 info : {name: null, args: $x, enclosing element: null, return type: null} +// #12 info : {name: null, args: $x, enclosing element: null, enclosing element name: null, return type: null} fn($x = 42) => $x; -// #13 info : {name: null, args: &$x, enclosing element: null, return type: null} +// #13 info : {name: null, args: &$x, enclosing element: null, enclosing element name: null, return type: null} fn(&$x) => $x; -// #14 info : {name: null, args: $x, enclosing element: null, return type: null} +// #14 info : {name: null, args: $x, enclosing element: null, enclosing element name: null, return type: null} fn&($x) => $x; -// #15 info : {name: null, args: $x, ...$rest, enclosing element: null, return type: null} +// #15 info : {name: null, args: $x, ...$rest, enclosing element: null, enclosing element name: null, return type: null} fn($x, ...$rest) => $rest; ////////////////// METHOD FUNCTIONS ////////////////// class someClass { - // #16 info : {name: someFunc, args: , enclosing element: class, return type: null} + // #16 info : {name: someFunc, args: , enclosing element: class, enclosing element name: someClass, return type: null} public function someFunc() { return 42; } - // #17 info : {name: funcWithParams, args: , enclosing element: class, return type: null} + // #17 info : {name: funcWithParams, args: , enclosing element: class, enclosing element name: someClass, return type: null} public function funcWithParams($a, $b) { - // #18 info : {name: innerFunction, args: , enclosing element: method, return type: null} + // #18 info : {name: innerFunction, args: , enclosing element: method, enclosing element name: funcWithParams, return type: null} function innerFunction() { - // #19 info : {name: superInnerFunction, args: , enclosing element: function, return type: null} + // #19 info : {name: superInnerFunction, args: , enclosing element: function, enclosing element name: innerFunction, return type: null} function superInnerFunction() { return 42; } From cdc28c82a832139b8798a6cc3e991ce69aba9517 Mon Sep 17 00:00:00 2001 From: illided Date: Sun, 30 May 2021 12:37:58 +0300 Subject: [PATCH 217/308] parameter extraction bug possible fix --- .../parse/antlr/php/ANTLRPHPFunctionInfo.kt | 71 +++++++++++-------- .../parse/antlr/php/PHPMethodSplitter.kt | 13 +--- .../methodSplitting/testMethodSplitting.php | 16 ++--- 3 files changed, 53 insertions(+), 47 deletions(-) diff --git a/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt b/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt index cf7865be..7f2993ed 100644 --- a/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt @@ -1,37 +1,51 @@ package astminer.parse.antlr.php -import astminer.common.DEFAULT_TOKEN +import astminer.common.model.EnclosingElement import astminer.common.model.FunctionInfo import astminer.common.model.FunctionInfoParameter import astminer.parse.antlr.* -abstract class ANTLRPHPFunctionInfo( - final override val root: AntlrNode -) : FunctionInfo { - override val parameters: List = collectParameters() - +class ANTLRPHPFunctionInfo(override val root: AntlrNode) : FunctionInfo { override val returnType = getElementType(root) + override val nameNode: AntlrNode? = root.getChildOfType(FUNCTION) + + override val parameters: List = collectParameters() + override val enclosingElement: EnclosingElement? = collectEnclosingElement() companion object { - const val PARAMETERS = "formalParameterList" - const val ONE_PARAMETER = "formalParameter" + const val PARAMETERS_LIST = "formalParameterList" + const val PARAMETER = "formalParameter" const val TYPE = "typeHint" const val PARAMETER_NAME = "VarName" + const val CLASS_MEMBER = "classStatement" + const val FUNCTION = "functionDeclaration" + const val FUNCTION_NAME = "Identifier" + const val LAMBDA_DECLARATION = "lambdaFunctionExpr" + const val CLASS_DECLARATION = "classDeclaration" + const val VAR_DECLARATION = "variableInitializer" + const val ELLIPSIS = "Ellipsis" } private fun collectParameters(): List { + // Parameters in this grammar have following structure: + //formal parameter list -> formal parameter -> type hint + // | -> ellipsis + // | -> var init -> var name + // | -> equal + // | -> default value + // No parameters - val parameterList = root.getChildOfType(PARAMETERS) ?: return emptyList() + val parameterList = root.getChildOfType(PARAMETERS_LIST) ?: return emptyList() - // Checking if function have only one parameter without - // type or ellipsis - if (parameterList.hasLastLabel(PARAMETER_NAME)) { + // Checking if function have only one parameter + // without ellipsis, type hint or default value + if (parameterList.hasLastLabel(PARAMETER_NAME) || parameterList.hasLastLabel(VAR_DECLARATION)) { return listOf(assembleParameter(parameterList)) } // Otherwise find all parameters - return parameterList.getItOrChildrenOfType(ONE_PARAMETER).map { - assembleParameter(it) + return parameterList.getItOrChildrenOfType(PARAMETER).mapNotNull { + try { assembleParameter(it) } catch (e: IllegalStateException) { return@mapNotNull null } } } @@ -43,26 +57,25 @@ abstract class ANTLRPHPFunctionInfo( } private fun getParameterName(parameterNode: AntlrNode): String { - if (parameterNode.hasLastLabel(PARAMETER_NAME)) - return parameterNode.originalToken ?: return DEFAULT_TOKEN + // "..." in php equivalent to *args in python + val isSplattedArg = parameterNode.getChildOfType(ELLIPSIS) != null + + if (parameterNode.hasLastLabel(PARAMETER_NAME)) return parameterNode.originalToken + ?: throw IllegalStateException("No name was found for a parameter") + + val varInit = parameterNode.getItOrChildrenOfType(VAR_DECLARATION).first() - return parameterNode.children - .filter { !it.hasFirstLabel(TYPE) } - .map { it.originalToken } - .joinToString("") + val name = varInit.getItOrChildrenOfType(PARAMETER_NAME).first().originalToken + ?: throw IllegalStateException("No name was found for a parameter") + + return (if (isSplattedArg) "..." else "") + name } private fun getElementType(element: AntlrNode): String? { return element.getChildOfType(TYPE)?.originalToken } - -} - -class ArrowPhpFunctionInfo(root: AntlrNode) : ANTLRPHPFunctionInfo(root) { - -} - -class SimplePhpFunctionInfo(root: AntlrNode) : ANTLRPHPFunctionInfo(root) { - + private fun collectEnclosingElement(): EnclosingElement? { + return null + } } \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/antlr/php/PHPMethodSplitter.kt b/src/main/kotlin/astminer/parse/antlr/php/PHPMethodSplitter.kt index e1d27b38..d830e938 100644 --- a/src/main/kotlin/astminer/parse/antlr/php/PHPMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/php/PHPMethodSplitter.kt @@ -4,21 +4,14 @@ import astminer.common.model.FunctionInfo import astminer.common.model.TreeFunctionSplitter import astminer.parse.antlr.AntlrNode -class PHPMethodSplitter: TreeFunctionSplitter { +class PHPMethodSplitter : TreeFunctionSplitter { companion object { const val LAMBDA_TOKEN = "LambdaFn" const val FUNCTION_TOKEN = "Function_" } override fun splitIntoFunctions(root: AntlrNode): Collection> { - return root.preOrder().mapNotNull { node-> - node.parent?.let { statement -> - when (node.typeLabel) { - LAMBDA_TOKEN -> ArrowPhpFunctionInfo(statement) - FUNCTION_TOKEN -> SimplePhpFunctionInfo(statement) - else -> null - } - } - } + return root.preOrder().filter { it.typeLabel == LAMBDA_TOKEN || it.typeLabel == FUNCTION_TOKEN } + .mapNotNull { node -> node.parent?.let {statement -> ANTLRPHPFunctionInfo(statement) } } } } \ No newline at end of file diff --git a/src/test/resources/methodSplitting/testMethodSplitting.php b/src/test/resources/methodSplitting/testMethodSplitting.php index 0d76fed4..da4df762 100644 --- a/src/test/resources/methodSplitting/testMethodSplitting.php +++ b/src/test/resources/methodSplitting/testMethodSplitting.php @@ -50,33 +50,33 @@ function funWithDottedArg($a, ...$rest) { // #10 info : {name: null, args: $y, enclosing element: function, enclosing element name: null, return type: null} $arrow2 = fn($x) => fn($y) => $x * $y; -// #12 info : {name: null, args: $x, enclosing element: null, enclosing element name: null, return type: null} +// #11 info : {name: null, args: $x, enclosing element: null, enclosing element name: null, return type: null} fn($x = 42) => $x; -// #13 info : {name: null, args: &$x, enclosing element: null, enclosing element name: null, return type: null} +// #12 info : {name: null, args: &$x, enclosing element: null, enclosing element name: null, return type: null} fn(&$x) => $x; -// #14 info : {name: null, args: $x, enclosing element: null, enclosing element name: null, return type: null} +// #13 info : {name: null, args: $x, enclosing element: null, enclosing element name: null, return type: null} fn&($x) => $x; -// #15 info : {name: null, args: $x, ...$rest, enclosing element: null, enclosing element name: null, return type: null} +// #14 info : {name: null, args: $x, ...$rest, enclosing element: null, enclosing element name: null, return type: null} fn($x, ...$rest) => $rest; ////////////////// METHOD FUNCTIONS ////////////////// class someClass { - // #16 info : {name: someFunc, args: , enclosing element: class, enclosing element name: someClass, return type: null} + // #15 info : {name: someFunc, args: , enclosing element: class, enclosing element name: someClass, return type: null} public function someFunc() { return 42; } - // #17 info : {name: funcWithParams, args: , enclosing element: class, enclosing element name: someClass, return type: null} + // #16 info : {name: funcWithParams, args: , enclosing element: class, enclosing element name: someClass, return type: null} public function funcWithParams($a, $b) { - // #18 info : {name: innerFunction, args: , enclosing element: method, enclosing element name: funcWithParams, return type: null} + // #17 info : {name: innerFunction, args: , enclosing element: method, enclosing element name: funcWithParams, return type: null} function innerFunction() { - // #19 info : {name: superInnerFunction, args: , enclosing element: function, enclosing element name: innerFunction, return type: null} + // #18 info : {name: superInnerFunction, args: , enclosing element: function, enclosing element name: innerFunction, return type: null} function superInnerFunction() { return 42; } From e75d3393424a0b25bd25691668d2b98fce673cde Mon Sep 17 00:00:00 2001 From: illided Date: Sun, 30 May 2021 15:16:54 +0300 Subject: [PATCH 218/308] raw algorithm for enclosing extraction added --- .../parse/antlr/php/ANTLRPHPFunctionInfo.kt | 53 +++++++++++++++++-- 1 file changed, 49 insertions(+), 4 deletions(-) diff --git a/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt b/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt index 7f2993ed..0f9cfe2f 100644 --- a/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt @@ -1,13 +1,15 @@ package astminer.parse.antlr.php import astminer.common.model.EnclosingElement +import astminer.common.model.EnclosingElementType import astminer.common.model.FunctionInfo import astminer.common.model.FunctionInfoParameter import astminer.parse.antlr.* +import astminer.parse.findEnclosingElementBy class ANTLRPHPFunctionInfo(override val root: AntlrNode) : FunctionInfo { override val returnType = getElementType(root) - override val nameNode: AntlrNode? = root.getChildOfType(FUNCTION) + override val nameNode: AntlrNode? = root.getChildOfType(FUNCTION_NAME) override val parameters: List = collectParameters() override val enclosingElement: EnclosingElement? = collectEnclosingElement() @@ -19,11 +21,16 @@ class ANTLRPHPFunctionInfo(override val root: AntlrNode) : FunctionInfo { @@ -57,7 +64,7 @@ class ANTLRPHPFunctionInfo(override val root: AntlrNode) : FunctionInfo? { - return null + val enclosing = root.findEnclosingElementBy { it.isPossibleEnclosing() } ?: return null + return try { + EnclosingElement( + root = enclosing, + name = getEnclosingElementName(enclosing), + type = getEnclosingType(enclosing) + ) + } catch (e: IllegalStateException) { + null + } + } + + private fun getEnclosingType(enclosing: AntlrNode): EnclosingElementType { + return when { + enclosing.isMethod() -> EnclosingElementType.Method + enclosing.isFunction() -> EnclosingElementType.Function + enclosing.isClass() -> EnclosingElementType.Class + enclosing.isAssignExpression() -> EnclosingElementType.VariableDeclaration + else -> throw IllegalStateException("No type can be associated") + } + } + + private fun getEnclosingElementName(enclosing: AntlrNode) : String?{ + return enclosing.getChildOfType(FUNCTION_NAME)?.originalToken +// return when { +// enclosing.isMethod() || enclosing.isFunction() -> enclosing.getChildOfType(FUNCTION_NAME)?.originalToken +// else -> throw IllegalStateException("No type can be associated") +// } } + + // No check for method because method is a function + private fun AntlrNode.isPossibleEnclosing() = isFunction() || isClass() || isAssignExpression() + + private fun AntlrNode.isMethod() = isFunction() && (hasFirstLabel(CLASS_MEMBER)) + + private fun AntlrNode.isFunction() = getChildOfType(LAMBDA_TOKEN) != null || getChildOfType(FUNCTION_TOKEN) != null + + private fun AntlrNode.isAssignExpression() = hasFirstLabel(EXPRESSION) && (getChildOfType(ASSIGN_OP) != null) + + private fun AntlrNode.isClass(): Boolean = hasLastLabel(CLASS_DECLARATION) } \ No newline at end of file From 0b7efea5167d772925fe33a8643091fcd2f04f1c Mon Sep 17 00:00:00 2001 From: illided Date: Sun, 30 May 2021 15:17:07 +0300 Subject: [PATCH 219/308] tests fixed --- .../antlr/php/ANTLRPHPMethodSplitterTest.kt | 18 +++++++++++------- .../methodSplitting/testMethodSplitting.php | 2 +- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPMethodSplitterTest.kt index 6af6be4b..6c599e32 100644 --- a/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPMethodSplitterTest.kt @@ -45,17 +45,21 @@ internal class ANTLRPHPMethodSplitterTest { } fun FunctionInfo.getJsonInfo(): String { - return "info : {" + - "name : ${name}, " + - "args : ${parameters.joinToString(", ") { "${it.type} ${it.name}" }}, " + - "enclosing element : ${enclosingElement?.type?.getEnclosingElementType()}, " + + return listOf( + "info : {", + "name: ${name}, ", + "args: ${parameters.joinToString(", ") { "${it.type} ${it.name}" }}, ", + "enclosing element: ${enclosingElement?.type?.getEnclosingElementType()}, ", + "enclosing element name: ${enclosingElement?.name}, ", + "return type: $returnType, ", "}" + ).joinToString("") } - val actualJsonInfos = functionInfos.map { it.getJsonInfo() }.sorted() + val actualJsonInfos = functionInfos.map { it.getJsonInfo() + '\n' }.sorted() - val text = File(JavaScriptFunctionSplitterTest.testFilePath).readText() - val expectedJsonInfos = Regex("info : \\{.*\\}").findAll(text).toList().map { it.value }.sorted() + val text = File(testFilePath).readText() + val expectedJsonInfos = Regex("info : \\{.*\\}").findAll(text).toList().map { it.value + '\n' }.sorted() assertEquals(expectedJsonInfos, actualJsonInfos) } diff --git a/src/test/resources/methodSplitting/testMethodSplitting.php b/src/test/resources/methodSplitting/testMethodSplitting.php index da4df762..1ba6f53a 100644 --- a/src/test/resources/methodSplitting/testMethodSplitting.php +++ b/src/test/resources/methodSplitting/testMethodSplitting.php @@ -2,7 +2,7 @@ ////////////////// FUNCTIONS ////////////////// -// #1 info : {name: fun, args: ,enclosing element: null, enclosing element name: null, return type: null} +// #1 info : {name: fun, args: , enclosing element: null, enclosing element name: null, return type: null} function fun() { return 5; } From 337309a777a8deb93b546e412bdd439f0fee8065 Mon Sep 17 00:00:00 2001 From: illided Date: Sun, 30 May 2021 15:36:58 +0300 Subject: [PATCH 220/308] proper type generating in actual info json --- .../astminer/parse/antlr/php/ANTLRPHPMethodSplitterTest.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPMethodSplitterTest.kt index 6c599e32..006a9786 100644 --- a/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPMethodSplitterTest.kt @@ -48,7 +48,7 @@ internal class ANTLRPHPMethodSplitterTest { return listOf( "info : {", "name: ${name}, ", - "args: ${parameters.joinToString(", ") { "${it.type} ${it.name}" }}, ", + "args: ${parameters.joinToString(", ") { "${it.type ?: ""} ${it.name}" }}, ", "enclosing element: ${enclosingElement?.type?.getEnclosingElementType()}, ", "enclosing element name: ${enclosingElement?.name}, ", "return type: $returnType, ", From dfe2bac3b388082d3637211b36babe4da579826d Mon Sep 17 00:00:00 2001 From: illided Date: Sun, 30 May 2021 16:13:53 +0300 Subject: [PATCH 221/308] bug fixes (proper enclosing extraction and ampersand parameter extraction) --- .../parse/antlr/php/ANTLRPHPFunctionInfo.kt | 23 ++++++++++--------- .../antlr/php/ANTLRPHPMethodSplitterTest.kt | 14 +++++------ .../methodSplitting/testMethodSplitting.php | 2 +- 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt b/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt index 0f9cfe2f..94f304e9 100644 --- a/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt @@ -20,9 +20,7 @@ class ANTLRPHPFunctionInfo(override val root: AntlrNode) : FunctionInfo { - // Parameters in this grammar have following structure: - //formal parameter list -> formal parameter -> type hint + // Parameters in this grammar have following structure (children order may be wrong): + //formal parameter list -> formal parameter -> Ampersand + // | -> type hint // | -> ellipsis // | -> var init -> var name // | -> equal @@ -67,6 +66,8 @@ class ANTLRPHPFunctionInfo(override val root: AntlrNode) : FunctionInfo enclosing.getChildOfType(FUNCTION_NAME)?.originalToken -// else -> throw IllegalStateException("No type can be associated") -// } + return when { + enclosing.isFunction() || enclosing.isClass() -> enclosing.getChildOfType(FUNCTION_NAME)?.originalToken + enclosing.isAssignExpression() -> enclosing.children.find { it.hasLastLabel(PARAMETER_NAME) }?.originalToken + else -> throw IllegalStateException("No type can be associated") + } } // No check for method because method is a function diff --git a/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPMethodSplitterTest.kt index 006a9786..e6e21aa5 100644 --- a/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPMethodSplitterTest.kt @@ -46,13 +46,13 @@ internal class ANTLRPHPMethodSplitterTest { fun FunctionInfo.getJsonInfo(): String { return listOf( - "info : {", - "name: ${name}, ", - "args: ${parameters.joinToString(", ") { "${it.type ?: ""} ${it.name}" }}, ", - "enclosing element: ${enclosingElement?.type?.getEnclosingElementType()}, ", - "enclosing element name: ${enclosingElement?.name}, ", - "return type: $returnType, ", - "}" + "info : {", + "name: ${name}, ", + "args: ${parameters.joinToString(", ") { listOfNotNull(it.type, it.name).joinToString(" ") }}, ", + "enclosing element: ${enclosingElement?.type?.getEnclosingElementType()}, ", + "enclosing element name: ${enclosingElement?.name}, ", + "return type: $returnType", + "}" ).joinToString("") } diff --git a/src/test/resources/methodSplitting/testMethodSplitting.php b/src/test/resources/methodSplitting/testMethodSplitting.php index 1ba6f53a..0201ecca 100644 --- a/src/test/resources/methodSplitting/testMethodSplitting.php +++ b/src/test/resources/methodSplitting/testMethodSplitting.php @@ -70,7 +70,7 @@ public function someFunc() { return 42; } - // #16 info : {name: funcWithParams, args: , enclosing element: class, enclosing element name: someClass, return type: null} + // #16 info : {name: funcWithParams, args: $a, $b, enclosing element: class, enclosing element name: someClass, return type: null} public function funcWithParams($a, $b) { // #17 info : {name: innerFunction, args: , enclosing element: method, enclosing element name: funcWithParams, return type: null} From 69317251a5467b4a92179e7bcf6d175273af6445 Mon Sep 17 00:00:00 2001 From: illided Date: Sun, 30 May 2021 16:16:37 +0300 Subject: [PATCH 222/308] renamed to function splitter --- .../php/{PHPMethodSplitter.kt => PHPFunctionSplitter.kt} | 2 +- ...MethodSplitterTest.kt => ANTLRPHPFunctionSplitterTest.kt} | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) rename src/main/kotlin/astminer/parse/antlr/php/{PHPMethodSplitter.kt => PHPFunctionSplitter.kt} (90%) rename src/test/kotlin/astminer/parse/antlr/php/{ANTLRPHPMethodSplitterTest.kt => ANTLRPHPFunctionSplitterTest.kt} (93%) diff --git a/src/main/kotlin/astminer/parse/antlr/php/PHPMethodSplitter.kt b/src/main/kotlin/astminer/parse/antlr/php/PHPFunctionSplitter.kt similarity index 90% rename from src/main/kotlin/astminer/parse/antlr/php/PHPMethodSplitter.kt rename to src/main/kotlin/astminer/parse/antlr/php/PHPFunctionSplitter.kt index d830e938..43bfa666 100644 --- a/src/main/kotlin/astminer/parse/antlr/php/PHPMethodSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/php/PHPFunctionSplitter.kt @@ -4,7 +4,7 @@ import astminer.common.model.FunctionInfo import astminer.common.model.TreeFunctionSplitter import astminer.parse.antlr.AntlrNode -class PHPMethodSplitter : TreeFunctionSplitter { +class PHPFunctionSplitter : TreeFunctionSplitter { companion object { const val LAMBDA_TOKEN = "LambdaFn" const val FUNCTION_TOKEN = "Function_" diff --git a/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionSplitterTest.kt similarity index 93% rename from src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPMethodSplitterTest.kt rename to src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionSplitterTest.kt index e6e21aa5..f17a81fa 100644 --- a/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionSplitterTest.kt @@ -5,16 +5,15 @@ import org.junit.Test import kotlin.test.BeforeTest import astminer.common.model.FunctionInfo import astminer.parse.antlr.AntlrNode -import astminer.parse.antlr.javascript.JavaScriptFunctionSplitterTest import java.io.File import kotlin.test.assertEquals import kotlin.test.assertNotNull -internal class ANTLRPHPMethodSplitterTest { +internal class ANTLRPHPFunctionSplitterTest { companion object { const val N_METHODS = 18 const val testFilePath = "src/test/resources/methodSplitting/testMethodSplitting.php" - val functionSplitter = PHPMethodSplitter() + val functionSplitter = PHPFunctionSplitter() val parser = PHPParser() } From 1ac1ed68be1c3c3073e85e1992cde13559c08d72 Mon Sep 17 00:00:00 2001 From: illided Date: Sun, 30 May 2021 16:18:05 +0300 Subject: [PATCH 223/308] factory added --- .../kotlin/astminer/parse/antlr/AntlrHandler.kt | 15 ++++++++++++--- src/main/kotlin/astminer/parse/factory.kt | 2 ++ 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/main/kotlin/astminer/parse/antlr/AntlrHandler.kt b/src/main/kotlin/astminer/parse/antlr/AntlrHandler.kt index 2a53e548..d3c270fe 100644 --- a/src/main/kotlin/astminer/parse/antlr/AntlrHandler.kt +++ b/src/main/kotlin/astminer/parse/antlr/AntlrHandler.kt @@ -1,12 +1,12 @@ package astminer.parse.antlr -import astminer.common.model.ParseResult -import astminer.common.model.HandlerFactory -import astminer.common.model.LanguageHandler +import astminer.common.model.* import astminer.parse.antlr.java.JavaFunctionSplitter import astminer.parse.antlr.java.JavaParser import astminer.parse.antlr.javascript.JavaScriptFunctionSplitter import astminer.parse.antlr.javascript.JavaScriptParser +import astminer.parse.antlr.php.PHPFunctionSplitter +import astminer.parse.antlr.php.PHPParser import astminer.parse.antlr.python.PythonFunctionSplitter import astminer.parse.antlr.python.PythonParser import java.io.File @@ -36,4 +36,13 @@ object AntlrJavascriptHandlerFactory : HandlerFactory { override val parseResult: ParseResult = JavaScriptParser().parseFile(file) override val splitter = JavaScriptFunctionSplitter() } +} + +object AntlrPHPHandlerFactory: HandlerFactory { + override fun createHandler(file: File): LanguageHandler = AntlrPHPHandler(file) + + class AntlrPHPHandler(file: File): LanguageHandler() { + override val parseResult: ParseResult = PHPParser().parseFile(file) + override val splitter: TreeFunctionSplitter = PHPFunctionSplitter() + } } \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/factory.kt b/src/main/kotlin/astminer/parse/factory.kt index 1c9a6613..ea9ba441 100644 --- a/src/main/kotlin/astminer/parse/factory.kt +++ b/src/main/kotlin/astminer/parse/factory.kt @@ -3,6 +3,7 @@ package astminer.parse import astminer.common.model.HandlerFactory import astminer.parse.antlr.AntlrJavaHandlerFactory import astminer.parse.antlr.AntlrJavascriptHandlerFactory +import astminer.parse.antlr.AntlrPHPHandlerFactory import astminer.parse.antlr.AntlrPythonHandlerFactory import astminer.parse.gumtree.GumtreeJavaHandlerFactory import astminer.parse.gumtree.GumtreePythonHandlerFactory @@ -29,6 +30,7 @@ private fun getAntlrHandlerFactory(extension: String): HandlerFactory { "java" -> AntlrJavaHandlerFactory "javascript" -> AntlrJavascriptHandlerFactory "python" -> AntlrPythonHandlerFactory + "php" -> AntlrPHPHandlerFactory else -> throw UnsupportedOperationException() } } From dc673ac956dcd2d943c9fffd293e7717b2dc1e00 Mon Sep 17 00:00:00 2001 From: furetur Date: Sun, 30 May 2021 21:08:27 +0500 Subject: [PATCH 224/308] replaces all whens in pipeline with polymorphic object creation --- ...les-csv.yml => file-asts-csv-storage.yaml} | 10 +- ...iles.yml => file-path-representation.yaml} | 8 +- ...-name-prediction-path-representation.yaml} | 11 +- src/main/kotlin/astminer/Main.kt | 26 +++- .../astminer/common/model/HandlerModel.kt | 14 +++ .../astminer/common/model/ParsingModel.kt | 8 +- .../kotlin/astminer/config/FilterConfigs.kt | 20 ++++ .../kotlin/astminer/config/ParserConfig.kt | 4 +- .../kotlin/astminer/config/ProblemConfigs.kt | 39 +++--- .../kotlin/astminer/config/StorageConfigs.kt | 22 +++- .../kotlin/astminer/examples/AllCppFiles.kt | 2 +- .../kotlin/astminer/examples/AllJavaAst.kt | 4 +- .../astminer/examples/AllJavaFilesGumTree.kt | 2 +- .../astminer/examples/AllJavaScriptFiles.kt | 2 +- .../astminer/examples/AllPythonFiles.kt | 2 +- .../astminer/examples/Code2VecJavaMethods.kt | 2 +- .../kotlin/astminer/filters/CommonFilters.kt | 3 +- .../kotlin/astminer/filters/FileFilters.kt | 2 +- src/main/kotlin/astminer/filters/Filter.kt | 3 + .../astminer/filters/FunctionFilters.kt | 2 +- src/main/kotlin/astminer/pipeline/Pipeline.kt | 33 ++---- .../astminer/pipeline/PipelineBranch.kt | 111 ------------------ .../astminer/pipeline/branch/Exceptions.kt | 16 +++ .../pipeline/branch/FilePipelineBranch.kt | 36 ++++++ .../pipeline/branch/FunctionPipelineBranch.kt | 33 ++++++ .../pipeline/branch/PipelineBranch.kt | 18 +++ .../astminer/problem/FileLevelProblems.kt | 5 +- .../astminer/problem/FunctionLevelProblems.kt | 7 +- src/main/kotlin/astminer/problem/Problem.kt | 14 +++ src/test/kotlin/astminer/Utils.kt | 6 +- src/test/kotlin/astminer/common/TestUtils.kt | 8 +- .../astminer/filters/FileFiltersTest.kt | 1 - src/test/kotlin/astminer/parse/Utils.kt | 7 -- .../parse/antlr/java/ANTLRJavaParserTest.kt | 2 +- .../antlr/python/ANTLRPythonParserTest.kt | 2 +- .../astminer/parse/cpp/FuzzyCppParserTest.kt | 2 +- .../gumtree/java/GumTreeJavaParserTest.kt | 2 +- .../pipeline/Code2VecExtractionTest.kt | 3 +- .../astminer/storage/ast/CsvAstStorageTest.kt | 2 - 39 files changed, 284 insertions(+), 210 deletions(-) rename configs/{parse-files-csv.yml => file-asts-csv-storage.yaml} (64%) rename configs/{paths-from-files.yml => file-path-representation.yaml} (69%) rename configs/{function-name-prediction-ast.yml => function-name-prediction-path-representation.yaml} (57%) create mode 100644 src/main/kotlin/astminer/filters/Filter.kt delete mode 100644 src/main/kotlin/astminer/pipeline/PipelineBranch.kt create mode 100644 src/main/kotlin/astminer/pipeline/branch/Exceptions.kt create mode 100644 src/main/kotlin/astminer/pipeline/branch/FilePipelineBranch.kt create mode 100644 src/main/kotlin/astminer/pipeline/branch/FunctionPipelineBranch.kt create mode 100644 src/main/kotlin/astminer/pipeline/branch/PipelineBranch.kt create mode 100644 src/main/kotlin/astminer/problem/Problem.kt delete mode 100644 src/test/kotlin/astminer/parse/Utils.kt diff --git a/configs/parse-files-csv.yml b/configs/file-asts-csv-storage.yaml similarity index 64% rename from configs/parse-files-csv.yml rename to configs/file-asts-csv-storage.yaml index f63f733a..1d1c2856 100644 --- a/configs/parse-files-csv.yml +++ b/configs/file-asts-csv-storage.yaml @@ -2,18 +2,18 @@ inputDir: 'src/test/resources/methodSplitting/' outputDir: 'output' parser: - type: 'antlr' + name: 'antlr' extensions: ['java', 'js'] filters: - - type: 'by tree size' + - name: 'by tree size' maxTreeSize: 1000 - - type: 'by words number' + - name: 'by words number' maxTokenWordsNumber: 1000 problem: - type: 'file name' + name: 'file name' storage: - type: 'CsvAST' + name: 'CsvAST' diff --git a/configs/paths-from-files.yml b/configs/file-path-representation.yaml similarity index 69% rename from configs/paths-from-files.yml rename to configs/file-path-representation.yaml index cadc8192..b662bbd0 100644 --- a/configs/paths-from-files.yml +++ b/configs/file-path-representation.yaml @@ -2,17 +2,17 @@ inputDir: 'src/test/resources/methodSplitting/' outputDir: 'output' parser: - type: 'antlr' + name: 'antlr' extensions: ['java', 'js'] filters: - - type: 'by tree size' + - name: 'by tree size' maxTreeSize: 1000 problem: - type: 'file name' + name: 'file name' storage: - type: 'code2vec paths' + name: 'code2vec paths' maxPathLength: 5 maxPathWidth: 5 diff --git a/configs/function-name-prediction-ast.yml b/configs/function-name-prediction-path-representation.yaml similarity index 57% rename from configs/function-name-prediction-ast.yml rename to configs/function-name-prediction-path-representation.yaml index d1da0dd7..858daedd 100644 --- a/configs/function-name-prediction-ast.yml +++ b/configs/function-name-prediction-path-representation.yaml @@ -2,18 +2,17 @@ inputDir: 'src/test/resources/methodSplitting/' outputDir: 'output' parser: - type: 'antlr' + name: 'antlr' extensions: ['java'] filters: - - type: 'by function name length' + - name: 'by function name length' maxWordsNumber: 10 - - type: 'by words number' + - name: 'by words number' maxTokenWordsNumber: 100 problem: - type: 'function name' + name: 'function name' storage: - type: 'CsvAST' - format: 'csv' + name: 'CsvAST' diff --git a/src/main/kotlin/astminer/Main.kt b/src/main/kotlin/astminer/Main.kt index ba69e332..ec510a9c 100644 --- a/src/main/kotlin/astminer/Main.kt +++ b/src/main/kotlin/astminer/Main.kt @@ -3,7 +3,8 @@ package astminer import astminer.common.model.FunctionInfoPropertyNotImplementedException import astminer.config.PipelineConfig import astminer.pipeline.Pipeline -import astminer.pipeline.ProblemDefinitionException +import astminer.pipeline.branch.IllegalFilterException +import astminer.pipeline.branch.ProblemDefinitionException import com.charleskorn.kaml.PolymorphismStyle import com.github.ajalt.clikt.core.CliktCommand import com.github.ajalt.clikt.parameters.arguments.argument @@ -24,19 +25,22 @@ class PipelineRunner : CliktCommand(name = "") { readable = true ) - private val yaml = Yaml(configuration = YamlConfiguration(polymorphismStyle = PolymorphismStyle.Property)) - override fun run() { try { val config = yaml.decodeFromString(config.readText()) Pipeline(config).run() } catch (e: SerializationException) { report("Could not read config", e) - println("\nBe sure to check types of filters and problems for misprints!") } catch (e: ProblemDefinitionException) { report("Problem is defined incorrectly", e) + } catch (e: IllegalFilterException) { + report("The chosen filter is not implemented for the chosen granularity", e) } catch (e: FunctionInfoPropertyNotImplementedException) { - report("Currently astminer cannot fulfill your request", e) + report( + "The chosen parser does not implement the required properties. " + + "Consider implementing them or change the parser", + e + ) } } @@ -44,6 +48,18 @@ class PipelineRunner : CliktCommand(name = "") { logger.error(e) { message } println("$message:\n$e") } + + companion object { + private const val POLYMORPHISM_PROPERTY_NAME = "name" + + private val yaml = Yaml( + configuration = YamlConfiguration( + polymorphismStyle = PolymorphismStyle.Property, + polymorphismPropertyName = POLYMORPHISM_PROPERTY_NAME + ) + ) + } + } fun main(args: Array) = PipelineRunner().main(args) diff --git a/src/main/kotlin/astminer/common/model/HandlerModel.kt b/src/main/kotlin/astminer/common/model/HandlerModel.kt index 159a8fed..d9a319b0 100644 --- a/src/main/kotlin/astminer/common/model/HandlerModel.kt +++ b/src/main/kotlin/astminer/common/model/HandlerModel.kt @@ -1,9 +1,23 @@ package astminer.common.model +import astminer.parse.ParsingException +import mu.KotlinLogging import java.io.File +private val logger = KotlinLogging.logger("HandlerFactory") + interface HandlerFactory { fun createHandler(file: File): LanguageHandler + + fun createHandlers(files: List, handleResult: (LanguageHandler) -> Any?) { + for (file in files) { + try { + handleResult(createHandler(file)) + } catch (parsingException: ParsingException) { + logger.error(parsingException) { "Failed to parse file ${file.path}" } + } + } + } } abstract class LanguageHandler { diff --git a/src/main/kotlin/astminer/common/model/ParsingModel.kt b/src/main/kotlin/astminer/common/model/ParsingModel.kt index 798f6c32..eb4c6bdf 100644 --- a/src/main/kotlin/astminer/common/model/ParsingModel.kt +++ b/src/main/kotlin/astminer/common/model/ParsingModel.kt @@ -3,6 +3,8 @@ package astminer.common.model import astminer.problem.LabeledResult import astminer.common.DEFAULT_TOKEN import astminer.common.splitToSubtokens +import astminer.parse.ParsingException +import mu.KotlinLogging import java.io.File import java.io.InputStream import java.util.* @@ -18,7 +20,7 @@ abstract class Node { originalToken?.let { val subtokens = splitToSubtokens(it) if (subtokens.isEmpty()) null - else subtokens.joinToString("|") + else subtokens.joinToString(TOKEN_DELIMITER) } } var technicalToken: String? = null @@ -46,6 +48,10 @@ abstract class Node { fun postOrderIterator(): Iterator = PostOrderIterator(this) open fun postOrder(): List = PostOrderIterator(this).asSequence().toList() + + companion object { + const val TOKEN_DELIMITER = "|" + } } class PreOrderIterator(root: Node): Iterator { diff --git a/src/main/kotlin/astminer/config/FilterConfigs.kt b/src/main/kotlin/astminer/config/FilterConfigs.kt index e1d027e2..4ff8376c 100644 --- a/src/main/kotlin/astminer/config/FilterConfigs.kt +++ b/src/main/kotlin/astminer/config/FilterConfigs.kt @@ -3,6 +3,7 @@ package astminer.config import astminer.filters.* import kotlinx.serialization.SerialName import kotlinx.serialization.Serializable +import kotlinx.serialization.Transient /** * Base class for all filter configs. See below @@ -10,6 +11,7 @@ import kotlinx.serialization.Serializable @Serializable sealed class FilterConfig { abstract val serialName: String + abstract val filterImplementation: Filter } /** @@ -19,6 +21,9 @@ sealed class FilterConfig { @SerialName("by tree size") data class TreeSizeFilterConfig(val maxTreeSize: Int) : FilterConfig() { override val serialName = "by tree size" + + @Transient + override val filterImplementation = TreeSizeFilter(maxTreeSize) } /** @@ -28,6 +33,9 @@ data class TreeSizeFilterConfig(val maxTreeSize: Int) : FilterConfig() { @SerialName("by modifiers") data class ModifierFilterConfig(val modifiers: List) : FilterConfig() { override val serialName = "by modifiers" + + @Transient + override val filterImplementation = ModifierFilter(modifiers) } /** @@ -37,6 +45,9 @@ data class ModifierFilterConfig(val modifiers: List) : FilterConfig() { @SerialName("by annotations") data class AnnotationFilterConfig(val annotations: List) : FilterConfig() { override val serialName = "by annotations" + + @Transient + override val filterImplementation = AnnotationFilter(annotations) } /** @@ -46,6 +57,9 @@ data class AnnotationFilterConfig(val annotations: List) : FilterConfig( @SerialName("no constructors") object ConstructorFilterConfig : FilterConfig() { override val serialName = "no constructors" + + @Transient + override val filterImplementation = ConstructorFilter } /** @@ -55,6 +69,9 @@ object ConstructorFilterConfig : FilterConfig() { @SerialName("by function name length") data class FunctionNameWordsNumberFilterConfig(val maxWordsNumber: Int) : FilterConfig() { override val serialName = "by function name length" + + @Transient + override val filterImplementation = FunctionNameWordsNumberFilter(maxWordsNumber) } /** @@ -64,4 +81,7 @@ data class FunctionNameWordsNumberFilterConfig(val maxWordsNumber: Int) : Filter @SerialName("by words number") data class WordsNumberFilterConfig(val maxTokenWordsNumber: Int) : FilterConfig() { override val serialName = "by words number" + + @Transient + override val filterImplementation = WordsNumberFilter(maxTokenWordsNumber) } diff --git a/src/main/kotlin/astminer/config/ParserConfig.kt b/src/main/kotlin/astminer/config/ParserConfig.kt index 28363890..2a716bb3 100644 --- a/src/main/kotlin/astminer/config/ParserConfig.kt +++ b/src/main/kotlin/astminer/config/ParserConfig.kt @@ -7,12 +7,12 @@ import kotlinx.serialization.Serializable * This config is used to select the parsers that should be used * If given type = "antlr" and extensions = ["py", "java"] * then 2 ANTLR parsers will be used (java antler parser and python antlr parser) - * @param type Type of the parser + * @param name Type of the parser * @param extensions File extensions that should be parsed */ @Serializable data class ParserConfig( - val type: ParserType, + val name: ParserType, val extensions: List ) diff --git a/src/main/kotlin/astminer/config/ProblemConfigs.kt b/src/main/kotlin/astminer/config/ProblemConfigs.kt index 2c1b24d1..8cc26cc3 100644 --- a/src/main/kotlin/astminer/config/ProblemConfigs.kt +++ b/src/main/kotlin/astminer/config/ProblemConfigs.kt @@ -3,19 +3,16 @@ package astminer.config import astminer.problem.* import kotlinx.serialization.SerialName import kotlinx.serialization.Serializable - -/** - * Problems that have [File] granularity process and extract labels from *whole files*. - * Problems that have [Function] granularity process and extract labels from *functions* (that are collected from files). - */ -enum class Granularity { - File, - Function -} +import kotlinx.serialization.Transient @Serializable sealed class ProblemConfig { - abstract val granularity: Granularity + abstract val problemImplementation: Problem + + val granularity: Granularity + get() = problemImplementation.granularity + + abstract val serialName: String } /** @@ -23,8 +20,11 @@ sealed class ProblemConfig { */ @Serializable @SerialName("file name") -object FileNameExtractorConfig : ProblemConfig() { - override val granularity = Granularity.File +class FileNameExtractorConfig : ProblemConfig() { + @Transient + override val problemImplementation = FileNameExtractor + @Transient + override val serialName = "file name" } /** @@ -32,8 +32,11 @@ object FileNameExtractorConfig : ProblemConfig() { */ @Serializable @SerialName("folder name") -object FolderNameExtractorConfig : ProblemConfig() { - override val granularity = Granularity.File +class FolderNameExtractorConfig : ProblemConfig() { + @Transient + override val problemImplementation = FolderNameExtractor + @Transient + override val serialName = "folder name" } /** @@ -41,6 +44,10 @@ object FolderNameExtractorConfig : ProblemConfig() { */ @Serializable @SerialName("function name") -object FunctionNameProblemConfig : ProblemConfig() { - override val granularity = Granularity.Function +class FunctionNameProblemConfig : ProblemConfig() { + @Transient + override val problemImplementation = FunctionNameProblem + + @Transient + override val serialName = "function name" } diff --git a/src/main/kotlin/astminer/config/StorageConfigs.kt b/src/main/kotlin/astminer/config/StorageConfigs.kt index 3d51a358..2096f2ef 100644 --- a/src/main/kotlin/astminer/config/StorageConfigs.kt +++ b/src/main/kotlin/astminer/config/StorageConfigs.kt @@ -1,29 +1,40 @@ package astminer.config +import astminer.storage.Storage +import astminer.storage.ast.CsvAstStorage +import astminer.storage.ast.DotAstStorage +import astminer.storage.path.Code2VecPathStorage import astminer.storage.path.PathBasedStorageConfig import kotlinx.serialization.SerialName import kotlinx.serialization.Serializable import kotlinx.serialization.Transient +import java.io.File /** * Config for storage that saved the results on the disk */ @Serializable -sealed class StorageConfig +sealed class StorageConfig { + abstract fun createStorage(outputDirectoryPath: String): Storage +} /** * @see astminer.storage.ast.CsvAstStorage */ @Serializable @SerialName("CsvAST") -object CsvAstStorageConfig : StorageConfig() +class CsvAstStorageConfig : StorageConfig() { + override fun createStorage(outputDirectoryPath: String) = CsvAstStorage(outputDirectoryPath) +} /** * @see astminer.storage.ast.DotAstStorage */ @Serializable @SerialName("DotAST") -object DotAstStorageConfig : StorageConfig() +class DotAstStorageConfig : StorageConfig() { + override fun createStorage(outputDirectoryPath: String) = DotAstStorage(outputDirectoryPath) +} /** * Config for [astminer.storage.path.Code2VecPathStorage] @@ -38,6 +49,9 @@ data class Code2VecPathStorageConfig( val maxPathContextsPerEntity: Int? = null, ) : StorageConfig() { @Transient - val pathBasedStorageConfig = + private val pathBasedStorageConfig = PathBasedStorageConfig(maxPathLength, maxPathWidth, maxTokens, maxPaths, maxPathContextsPerEntity) + + override fun createStorage(outputDirectoryPath: String) = + Code2VecPathStorage(outputDirectoryPath, pathBasedStorageConfig) } diff --git a/src/main/kotlin/astminer/examples/AllCppFiles.kt b/src/main/kotlin/astminer/examples/AllCppFiles.kt index 5a0aa345..d16de9e5 100644 --- a/src/main/kotlin/astminer/examples/AllCppFiles.kt +++ b/src/main/kotlin/astminer/examples/AllCppFiles.kt @@ -19,7 +19,7 @@ fun allCppFiles() { inputDir = preprocessedDir.path, outputDir = "out_examples/allCppFiles", parser = ParserConfig(ParserType.Fuzzy, listOf(FileExtension.Cpp)), - problem = FileNameExtractorConfig, + problem = FileNameExtractorConfig(), storage = Code2VecPathStorageConfig(5, 5) ) diff --git a/src/main/kotlin/astminer/examples/AllJavaAst.kt b/src/main/kotlin/astminer/examples/AllJavaAst.kt index 6b8c5d29..51c1d21b 100644 --- a/src/main/kotlin/astminer/examples/AllJavaAst.kt +++ b/src/main/kotlin/astminer/examples/AllJavaAst.kt @@ -13,8 +13,8 @@ fun allJavaAsts() { inputDir = "src/test/resources/examples/", outputDir = "out_examples/allJavaAstsAntlr", parser = ParserConfig(ParserType.Antlr, listOf(FileExtension.Java)), - problem = FileNameExtractorConfig, - storage = CsvAstStorageConfig, + problem = FileNameExtractorConfig(), + storage = CsvAstStorageConfig(), ) Pipeline(config).run() diff --git a/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt b/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt index bc305344..f01f640a 100644 --- a/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt +++ b/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt @@ -14,7 +14,7 @@ fun allJavaFilesGumTree() { inputDir = "src/test/resources/gumTreeMethodSplitter/", outputDir = "out_examples/allJavaFilesGumTree", parser = ParserConfig(ParserType.GumTree, listOf(FileExtension.Java)), - problem = FileNameExtractorConfig, + problem = FileNameExtractorConfig(), storage = Code2VecPathStorageConfig(5, 5) ) Pipeline(config).run() diff --git a/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt b/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt index cfe2ea0e..f20f33b6 100644 --- a/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt +++ b/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt @@ -8,7 +8,7 @@ fun allJavaScriptFiles() { inputDir = "src/test/resources/examples", outputDir = "out_examples/allJavaScriptFilesAntlr", parser = ParserConfig(ParserType.Antlr, listOf(FileExtension.JavaScript)), - problem = FileNameExtractorConfig, + problem = FileNameExtractorConfig(), storage = Code2VecPathStorageConfig(5, 5) ) diff --git a/src/main/kotlin/astminer/examples/AllPythonFiles.kt b/src/main/kotlin/astminer/examples/AllPythonFiles.kt index e1a9576b..e035887d 100644 --- a/src/main/kotlin/astminer/examples/AllPythonFiles.kt +++ b/src/main/kotlin/astminer/examples/AllPythonFiles.kt @@ -14,7 +14,7 @@ fun allPythonFiles() { inputDir = "src/test/resources/examples", outputDir = "out_examples/allPythonFiles", parser = ParserConfig(ParserType.Antlr, listOf(FileExtension.Python)), - problem = FileNameExtractorConfig, + problem = FileNameExtractorConfig(), storage = Code2VecPathStorageConfig(5, 5) ) diff --git a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt index db47a803..58ba4502 100644 --- a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt @@ -14,7 +14,7 @@ fun code2vecJavaMethods() { inputDir = folder, outputDir = outputDir, parser = ParserConfig(ParserType.Antlr, listOf(FileExtension.Java)), - problem = FunctionNameProblemConfig, + problem = FunctionNameProblemConfig(), storage = Code2VecPathStorageConfig( maxPathLength = 5, maxPathWidth = 5 diff --git a/src/main/kotlin/astminer/filters/CommonFilters.kt b/src/main/kotlin/astminer/filters/CommonFilters.kt index fbbb67ef..03dbfee0 100644 --- a/src/main/kotlin/astminer/filters/CommonFilters.kt +++ b/src/main/kotlin/astminer/filters/CommonFilters.kt @@ -26,9 +26,8 @@ class TreeSizeFilter(private val minSize: Int = 0, private val maxSize: Int? = n * Filter that excludes trees that have more words than [maxWordsNumber] in any token of their node. */ class WordsNumberFilter(private val maxWordsNumber: Int) : FunctionFilter, FileFilter { - // TODO: splitting the token here is not the best choice. For instance, if delimiter is changed or other internal logic then this will have to ve rewritten private fun validateTree(root: Node) = - !root.preOrder().any { node -> node.token.split("|").size > maxWordsNumber } + !root.preOrder().any { node -> node.token.split(Node.TOKEN_DELIMITER).size > maxWordsNumber } override fun validate(functionInfo: FunctionInfo) = validateTree(functionInfo.root) diff --git a/src/main/kotlin/astminer/filters/FileFilters.kt b/src/main/kotlin/astminer/filters/FileFilters.kt index 27e6be02..080060a6 100644 --- a/src/main/kotlin/astminer/filters/FileFilters.kt +++ b/src/main/kotlin/astminer/filters/FileFilters.kt @@ -3,6 +3,6 @@ package astminer.filters import astminer.common.model.Node import astminer.common.model.ParseResult -interface FileFilter { +interface FileFilter : Filter { fun validate(parseResult: ParseResult): Boolean } diff --git a/src/main/kotlin/astminer/filters/Filter.kt b/src/main/kotlin/astminer/filters/Filter.kt new file mode 100644 index 00000000..0845bd6a --- /dev/null +++ b/src/main/kotlin/astminer/filters/Filter.kt @@ -0,0 +1,3 @@ +package astminer.filters + +interface Filter diff --git a/src/main/kotlin/astminer/filters/FunctionFilters.kt b/src/main/kotlin/astminer/filters/FunctionFilters.kt index 5bf320c1..c9617b7a 100644 --- a/src/main/kotlin/astminer/filters/FunctionFilters.kt +++ b/src/main/kotlin/astminer/filters/FunctionFilters.kt @@ -4,7 +4,7 @@ import astminer.common.model.FunctionInfo import astminer.common.model.Node import astminer.common.splitToSubtokens -interface FunctionFilter { +interface FunctionFilter : Filter { fun validate(functionInfo: FunctionInfo): Boolean } diff --git a/src/main/kotlin/astminer/pipeline/Pipeline.kt b/src/main/kotlin/astminer/pipeline/Pipeline.kt index 692ef102..19c288ed 100644 --- a/src/main/kotlin/astminer/pipeline/Pipeline.kt +++ b/src/main/kotlin/astminer/pipeline/Pipeline.kt @@ -2,17 +2,16 @@ package astminer.pipeline import astminer.common.getProjectFilesWithExtension import astminer.config.* -import astminer.parse.ParsingException import astminer.parse.getHandlerFactory +import astminer.pipeline.branch.FilePipelineBranch +import astminer.pipeline.branch.FunctionPipelineBranch +import astminer.problem.Granularity import astminer.storage.Storage import astminer.storage.ast.CsvAstStorage import astminer.storage.ast.DotAstStorage import astminer.storage.path.Code2VecPathStorage -import mu.KotlinLogging import java.io.File -private val logger = KotlinLogging.logger("Pipeline") - /** * Pipeline runs all the steps needed to parse, process and save data. * @param config The pipeline config that defines the pipeline @@ -32,13 +31,9 @@ class Pipeline(private val config: PipelineConfig) { return outputDirectoryForExtension } - private fun createStorage(extension: FileExtension): Storage = with(config.storage) { + private fun createStorage(extension: FileExtension): Storage { val storagePath = createStorageDirectory(extension).path - when (this) { - is CsvAstStorageConfig -> CsvAstStorage(storagePath) - is DotAstStorageConfig -> DotAstStorage(storagePath) - is Code2VecPathStorageConfig -> Code2VecPathStorage(storagePath, pathBasedStorageConfig) - } + return config.storage.createStorage(storagePath) } /** @@ -46,20 +41,16 @@ class Pipeline(private val config: PipelineConfig) { */ fun run() { for (extension in config.parser.extensions) { - val languageFactory = getHandlerFactory(extension, config.parser.type) + val languageFactory = getHandlerFactory(extension, config.parser.name) - val files = getProjectFilesWithExtension(inputDirectory, extension.fileExtension).asSequence() - val labeledResults = files.mapNotNull { file -> - try { - languageFactory.createHandler(file) - } catch (e: ParsingException) { - logger.error(e) { "Failed to parse file ${file.path}" } - null - } - }.flatMap { branch.process(it) } + val files = getProjectFilesWithExtension(inputDirectory, extension.fileExtension) createStorage(extension).use { storage -> - storage.store(labeledResults.asIterable()) + languageFactory.createHandlers(files) { languageHandler -> + for (labeledResult in branch.process(languageHandler)) { + storage.store(labeledResult) + } + } } } } diff --git a/src/main/kotlin/astminer/pipeline/PipelineBranch.kt b/src/main/kotlin/astminer/pipeline/PipelineBranch.kt deleted file mode 100644 index 3cdfd5ba..00000000 --- a/src/main/kotlin/astminer/pipeline/PipelineBranch.kt +++ /dev/null @@ -1,111 +0,0 @@ -package astminer.pipeline - -import astminer.common.model.FunctionInfo -import astminer.common.model.LanguageHandler -import astminer.common.model.Node -import astminer.common.model.ParseResult -import astminer.config.* -import astminer.filters.* -import astminer.problem.* -import mu.KotlinLogging - -/** - * PipelineBranch is a part of the pipeline that can be completely different depending on the granularity (pipeline type) - * It accepts parsed files (LanguageHandler) and returns labeled results. - */ -interface PipelineBranch { - /** - * Extracts labeled results from LanguageHandler - * May mutate the AST. - * Should have no other side-effects - */ - fun process(languageHandler: LanguageHandler): Sequence> -} - -private val logger = KotlinLogging.logger("PipelineBranch") - -private fun filterNotSupported(filterConfig: FilterConfig) { - println("Filter `${filterConfig.serialName}` is not supported for this problem") - logger.info { "Filter `${filterConfig.serialName}` is not supported for this problem" } -} - -/** - * PipelineBranch for pipeline with file-level granularity (FilePipelineConfig). - * Works with files as a whole. Tests parsed files with filters and extracts a label from them. - */ -class FilePipelineBranch(config: PipelineConfig) : PipelineBranch { - private val filters: List = config.filters.mapNotNull { filterConfig -> - when (filterConfig) { - is TreeSizeFilterConfig -> TreeSizeFilter(filterConfig.maxTreeSize) - is WordsNumberFilterConfig -> WordsNumberFilter(filterConfig.maxTokenWordsNumber) - else -> { - filterNotSupported(filterConfig) - null - } - } - } - - private val problem: FileLevelProblem = when (config.problem) { - is FileNameExtractorConfig -> FileNameExtractor - is FolderNameExtractorConfig -> FolderNameExtractor - else -> throw ProblemDefinitionException(Granularity.File, "FilePipelineBranch") - } - - private fun passesThroughFilters(parseResult: ParseResult) = - filters.all { filter -> filter.validate(parseResult) } - - override fun process(languageHandler: LanguageHandler): Sequence> { - val parseResult = languageHandler.parseResult - return if (passesThroughFilters(parseResult)) { - val labeledResult = problem.process(parseResult) ?: return emptySequence() - sequenceOf(labeledResult) - } else { - emptySequence() - } - } -} - -/** - * PipelineBranch for pipeline with function-level granularity (FunctionPipelineConfig). - * Extracts functions from the parsed files. - * Then tests functions with filters, processes them and extracts labels from each function. - */ -class FunctionPipelineBranch(config: PipelineConfig) : PipelineBranch { - private val filters: List = config.filters.mapNotNull { filterConfig -> - when (filterConfig) { - is TreeSizeFilterConfig -> TreeSizeFilter(filterConfig.maxTreeSize) - is WordsNumberFilterConfig -> WordsNumberFilter(filterConfig.maxTokenWordsNumber) - is ModifierFilterConfig -> ModifierFilter(filterConfig.modifiers) - is AnnotationFilterConfig -> AnnotationFilter(filterConfig.annotations) - is ConstructorFilterConfig -> ConstructorFilter - is FunctionNameWordsNumberFilterConfig -> FunctionNameWordsNumberFilter(filterConfig.maxWordsNumber) - else -> { - filterNotSupported(filterConfig) - null - } - } - } - - private val problem: FunctionLevelProblem = when (config.problem) { - is FunctionNameProblemConfig -> FunctionNameProblem - else -> throw ProblemDefinitionException(Granularity.Function, "FunctionPipelineBranch") - } - - private fun passesThroughFilters(functionInfo: FunctionInfo) = - filters.all { filter -> filter.validate(functionInfo) } - - override fun process(languageHandler: LanguageHandler): Sequence> = - languageHandler.splitIntoFunctions().asSequence() - .filter { functionInfo -> passesThroughFilters(functionInfo) } - .mapNotNull { functionInfo -> problem.process(functionInfo) } -} - -/** - * This exception is thrown when problem granularity is implemented incorrectly or the problem is not specified - * inside the correct pipeline branch. - */ -class ProblemDefinitionException(granularity: Granularity, branchName: String) : - IllegalStateException( - "The specified problem with granularity $granularity is not implemented inside of branch $branchName. " + - "This should never happen!" - ) \ No newline at end of file diff --git a/src/main/kotlin/astminer/pipeline/branch/Exceptions.kt b/src/main/kotlin/astminer/pipeline/branch/Exceptions.kt new file mode 100644 index 00000000..5d944af9 --- /dev/null +++ b/src/main/kotlin/astminer/pipeline/branch/Exceptions.kt @@ -0,0 +1,16 @@ +package astminer.pipeline.branch + +import astminer.problem.Granularity + +/** + * This exception is thrown when problem granularity is implemented incorrectly or the problem is not specified + * inside the correct pipeline branch. + */ +class ProblemDefinitionException(granularity: Granularity, problemName: String) : + IllegalStateException("Problem `$problemName` has invalid granularity $granularity") + +/** + * This exception is thrown when the given filter is not implemented for the given granularity + */ +class IllegalFilterException(granularity: Granularity, filterName: String): + IllegalStateException("Unknown filter `$filterName` for granularity $granularity") \ No newline at end of file diff --git a/src/main/kotlin/astminer/pipeline/branch/FilePipelineBranch.kt b/src/main/kotlin/astminer/pipeline/branch/FilePipelineBranch.kt new file mode 100644 index 00000000..eaadcb27 --- /dev/null +++ b/src/main/kotlin/astminer/pipeline/branch/FilePipelineBranch.kt @@ -0,0 +1,36 @@ +package astminer.pipeline.branch + +import astminer.common.model.LanguageHandler +import astminer.common.model.Node +import astminer.common.model.ParseResult +import astminer.config.* +import astminer.filters.FileFilter +import astminer.filters.Filter +import astminer.problem.* + +/** + * PipelineBranch for pipeline with file-level granularity (FilePipelineConfig). + * Works with files as a whole. Tests parsed files with filters and extracts a label from them. + */ +class FilePipelineBranch(config: PipelineConfig) : PipelineBranch { + private val filters: List = config.filters.map { filterConfig -> + filterConfig.filterImplementation as? FileFilter + ?: throw IllegalFilterException(Granularity.File, filterConfig.serialName) + } + + private val problem: FileLevelProblem = config.problem.problemImplementation as? FileLevelProblem + ?: throw ProblemDefinitionException(Granularity.File, config.problem.serialName) + + private fun passesThroughFilters(parseResult: ParseResult) = + filters.all { filter -> filter.validate(parseResult) } + + override fun process(languageHandler: LanguageHandler): Sequence> { + val parseResult = languageHandler.parseResult + return if (passesThroughFilters(parseResult)) { + val labeledResult = problem.process(parseResult) ?: return emptySequence() + sequenceOf(labeledResult) + } else { + emptySequence() + } + } +} \ No newline at end of file diff --git a/src/main/kotlin/astminer/pipeline/branch/FunctionPipelineBranch.kt b/src/main/kotlin/astminer/pipeline/branch/FunctionPipelineBranch.kt new file mode 100644 index 00000000..0534ce79 --- /dev/null +++ b/src/main/kotlin/astminer/pipeline/branch/FunctionPipelineBranch.kt @@ -0,0 +1,33 @@ +package astminer.pipeline.branch + +import astminer.common.model.FunctionInfo +import astminer.common.model.LanguageHandler +import astminer.common.model.Node +import astminer.config.* +import astminer.filters.* +import astminer.problem.* + + +/** + * PipelineBranch for pipeline with function-level granularity (FunctionPipelineConfig). + * Extracts functions from the parsed files. + * Then tests functions with filters, processes them and extracts labels from each function. + */ +class FunctionPipelineBranch(config: PipelineConfig) : PipelineBranch { + private val filters: List = config.filters.map { filterConfig -> + filterConfig.filterImplementation as? FunctionFilter + ?: throw IllegalFilterException(Granularity.Function, filterConfig.serialName) + } + + private val problem: FunctionLevelProblem = config.problem.problemImplementation as? FunctionLevelProblem + ?: throw ProblemDefinitionException(Granularity.Function, config.problem.serialName) + + private fun passesThroughFilters(functionInfo: FunctionInfo) = + filters.all { filter -> filter.validate(functionInfo) } + + override fun process(languageHandler: LanguageHandler): Sequence> = + languageHandler.splitIntoFunctions().asSequence() + .filter { functionInfo -> passesThroughFilters(functionInfo) } + .mapNotNull { functionInfo -> problem.process(functionInfo) } +} + diff --git a/src/main/kotlin/astminer/pipeline/branch/PipelineBranch.kt b/src/main/kotlin/astminer/pipeline/branch/PipelineBranch.kt new file mode 100644 index 00000000..0635998f --- /dev/null +++ b/src/main/kotlin/astminer/pipeline/branch/PipelineBranch.kt @@ -0,0 +1,18 @@ +package astminer.pipeline.branch + +import astminer.common.model.LanguageHandler +import astminer.common.model.Node +import astminer.problem.LabeledResult + +/** + * PipelineBranch is a part of the pipeline that can be completely different depending on the granularity (pipeline type) + * It accepts parsed files (LanguageHandler) and returns labeled results. + */ +interface PipelineBranch { + /** + * Extracts labeled results from LanguageHandler + * May mutate the AST. + * Should have no other side-effects + */ + fun process(languageHandler: LanguageHandler): Sequence> +} \ No newline at end of file diff --git a/src/main/kotlin/astminer/problem/FileLevelProblems.kt b/src/main/kotlin/astminer/problem/FileLevelProblems.kt index d8faa229..95fa92ac 100644 --- a/src/main/kotlin/astminer/problem/FileLevelProblems.kt +++ b/src/main/kotlin/astminer/problem/FileLevelProblems.kt @@ -4,7 +4,10 @@ import astminer.common.model.Node import astminer.common.model.ParseResult import java.io.File -interface FileLevelProblem { +interface FileLevelProblem : Problem { + override val granularity: Granularity + get() = Granularity.File + fun process(parseResult: ParseResult): LabeledResult? } diff --git a/src/main/kotlin/astminer/problem/FunctionLevelProblems.kt b/src/main/kotlin/astminer/problem/FunctionLevelProblems.kt index b22e8496..286f0ef4 100644 --- a/src/main/kotlin/astminer/problem/FunctionLevelProblems.kt +++ b/src/main/kotlin/astminer/problem/FunctionLevelProblems.kt @@ -3,7 +3,10 @@ package astminer.problem import astminer.common.model.FunctionInfo import astminer.common.model.Node -interface FunctionLevelProblem { +interface FunctionLevelProblem : Problem { + override val granularity: Granularity + get() = Granularity.Function + fun process(functionInfo: FunctionInfo): LabeledResult? } @@ -18,7 +21,7 @@ object FunctionNameProblem : FunctionLevelProblem { override fun process(functionInfo: FunctionInfo): LabeledResult? { val normalizedName = functionInfo.nameNode?.normalizedToken ?: return null functionInfo.root.preOrder().forEach { node -> - if (node.originalToken == functionInfo.name) { + if (node.originalToken == functionInfo.nameNode?.originalToken) { node.technicalToken = RECURSIVE_CALL_TOKEN } } diff --git a/src/main/kotlin/astminer/problem/Problem.kt b/src/main/kotlin/astminer/problem/Problem.kt new file mode 100644 index 00000000..43a4c3a3 --- /dev/null +++ b/src/main/kotlin/astminer/problem/Problem.kt @@ -0,0 +1,14 @@ +package astminer.problem + +/** + * Problems that have [File] granularity process and extract labels from *files*. + * Problems that have [Function] granularity process and extract labels from *functions* (that are collected from files). + */ +enum class Granularity { + File, + Function +} + +interface Problem { + val granularity: Granularity +} diff --git a/src/test/kotlin/astminer/Utils.kt b/src/test/kotlin/astminer/Utils.kt index 676ac076..d6deacd9 100644 --- a/src/test/kotlin/astminer/Utils.kt +++ b/src/test/kotlin/astminer/Utils.kt @@ -1,5 +1,7 @@ package astminer +import astminer.common.model.Node +import astminer.common.model.Parser import java.io.File fun checkExecutable(execName: String): Boolean { @@ -11,4 +13,6 @@ fun checkExecutable(execName: String): Boolean { } } return false -} \ No newline at end of file +} + +fun Parser.parseFiles(files: List) = files.map { parseFile(it).root } diff --git a/src/test/kotlin/astminer/common/TestUtils.kt b/src/test/kotlin/astminer/common/TestUtils.kt index 78d49863..20d8e26b 100644 --- a/src/test/kotlin/astminer/common/TestUtils.kt +++ b/src/test/kotlin/astminer/common/TestUtils.kt @@ -18,6 +18,10 @@ class DummyNode( override fun removeChildrenOfType(typeLabel: String) { children.removeIf { it.typeLabel == typeLabel } } + + fun toParseResult() = ParseResult(this, "") + + fun labeledWith(label: String) = LabeledResult(this, label, "") } /** @@ -78,7 +82,3 @@ fun createBamboo(size: Int): DummyNode { } return root } - -fun T.toParseResult() = ParseResult(this, "") - -fun T.labeledWith(label: String) = LabeledResult(this, label, "") diff --git a/src/test/kotlin/astminer/filters/FileFiltersTest.kt b/src/test/kotlin/astminer/filters/FileFiltersTest.kt index cb90db85..80557632 100644 --- a/src/test/kotlin/astminer/filters/FileFiltersTest.kt +++ b/src/test/kotlin/astminer/filters/FileFiltersTest.kt @@ -1,7 +1,6 @@ package astminer.filters import astminer.common.createBamboo -import astminer.common.toParseResult import org.junit.Test import kotlin.test.assertFalse import kotlin.test.assertTrue diff --git a/src/test/kotlin/astminer/parse/Utils.kt b/src/test/kotlin/astminer/parse/Utils.kt deleted file mode 100644 index 4ae7b169..00000000 --- a/src/test/kotlin/astminer/parse/Utils.kt +++ /dev/null @@ -1,7 +0,0 @@ -package astminer.parse - -import astminer.common.model.Node -import astminer.common.model.Parser -import java.io.File - -fun Parser.parseFiles(files: List) = files.map { parseFile(it).root } diff --git a/src/test/kotlin/astminer/parse/antlr/java/ANTLRJavaParserTest.kt b/src/test/kotlin/astminer/parse/antlr/java/ANTLRJavaParserTest.kt index a3b66f12..756fec53 100644 --- a/src/test/kotlin/astminer/parse/antlr/java/ANTLRJavaParserTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/java/ANTLRJavaParserTest.kt @@ -1,7 +1,7 @@ package astminer.parse.antlr.java import astminer.common.getProjectFilesWithExtension -import astminer.parse.parseFiles +import astminer.parseFiles import org.junit.Assert import org.junit.Test import java.io.File diff --git a/src/test/kotlin/astminer/parse/antlr/python/ANTLRPythonParserTest.kt b/src/test/kotlin/astminer/parse/antlr/python/ANTLRPythonParserTest.kt index d054fce9..d92c46f6 100644 --- a/src/test/kotlin/astminer/parse/antlr/python/ANTLRPythonParserTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/python/ANTLRPythonParserTest.kt @@ -2,7 +2,7 @@ package astminer.parse.antlr.python import astminer.common.getProjectFilesWithExtension import astminer.common.model.Node -import astminer.parse.parseFiles +import astminer.parseFiles import org.junit.Assert import org.junit.Test import java.io.File diff --git a/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt b/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt index 95df406a..06a7cc63 100644 --- a/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt +++ b/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt @@ -5,7 +5,7 @@ import astminer.common.getProjectFilesWithExtension import astminer.examples.forFilesWithSuffix import astminer.parse.fuzzy.cpp.FuzzyCppParser import astminer.parse.fuzzy.cpp.FuzzyNode -import astminer.parse.parseFiles +import astminer.parseFiles import org.junit.Assert import org.junit.Assume import org.junit.Before diff --git a/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaParserTest.kt b/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaParserTest.kt index 02c11511..b4225277 100644 --- a/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaParserTest.kt +++ b/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaParserTest.kt @@ -1,7 +1,7 @@ package astminer.parse.gumtree.java import astminer.common.getProjectFilesWithExtension -import astminer.parse.parseFiles +import astminer.parseFiles import org.junit.Assert import org.junit.Test import java.io.* diff --git a/src/test/kotlin/astminer/pipeline/Code2VecExtractionTest.kt b/src/test/kotlin/astminer/pipeline/Code2VecExtractionTest.kt index 8c9ab256..b53c86cb 100644 --- a/src/test/kotlin/astminer/pipeline/Code2VecExtractionTest.kt +++ b/src/test/kotlin/astminer/pipeline/Code2VecExtractionTest.kt @@ -9,7 +9,6 @@ import java.nio.file.Files internal class Code2VecExtractionTest { private val testDataDir = File("src/test/resources") - // TODO: this test should probably be moved to Code2VecPathStorage @Test fun `test code2vec path extraction from files generates correct folders and files`() { val extractedDataDir = Files.createTempDirectory("extractedData") @@ -20,7 +19,7 @@ internal class Code2VecExtractionTest { inputDir = testDataDir.path, outputDir = extractedDataDir.toAbsolutePath().toString(), parser = ParserConfig(ParserType.Antlr, languages), - problem = FileNameExtractorConfig, + problem = FileNameExtractorConfig(), storage = Code2VecPathStorageConfig(8, 3) ) Pipeline(config).run() diff --git a/src/test/kotlin/astminer/storage/ast/CsvAstStorageTest.kt b/src/test/kotlin/astminer/storage/ast/CsvAstStorageTest.kt index ddb0b49e..b2ec8eca 100644 --- a/src/test/kotlin/astminer/storage/ast/CsvAstStorageTest.kt +++ b/src/test/kotlin/astminer/storage/ast/CsvAstStorageTest.kt @@ -3,8 +3,6 @@ package astminer.storage.ast import astminer.common.createBamboo import astminer.common.createDummyTree import astminer.common.createSmallTree -import astminer.common.labeledWith -import astminer.storage.ast.CsvAstStorage import org.junit.Assert import org.junit.Test From 90b67e64cd19e887a7fa51c204c5921f1f9f707c Mon Sep 17 00:00:00 2001 From: furetur Date: Sun, 30 May 2021 21:21:18 +0500 Subject: [PATCH 225/308] renamed Problem to LabelExtractor --- configs/file-asts-csv-storage.yaml | 2 +- configs/file-path-representation.yaml | 4 ++-- ...n-name-prediction-path-representation.yaml | 2 +- .../kotlin/astminer/cli/FilterPredicates.kt | 0 .../kotlin/astminer/config/FilterConfigs.kt | 14 ++++++------- ...lemConfigs.kt => LabelExtractorConfigs.kt} | 20 +++++++++---------- .../kotlin/astminer/config/PipelineConfigs.kt | 2 +- .../kotlin/astminer/examples/AllCppFiles.kt | 2 +- .../kotlin/astminer/examples/AllJavaAst.kt | 6 +----- .../astminer/examples/AllJavaFilesGumTree.kt | 7 +------ .../astminer/examples/AllJavaScriptFiles.kt | 2 +- .../astminer/examples/AllPythonFiles.kt | 7 +------ .../astminer/examples/Code2VecJavaMethods.kt | 2 +- src/main/kotlin/astminer/pipeline/Pipeline.kt | 5 +---- .../pipeline/branch/FilePipelineBranch.kt | 7 +++---- .../pipeline/branch/FunctionPipelineBranch.kt | 6 +++--- ...evelProblems.kt => FileLabelExtractors.kt} | 7 +++---- ...Problems.kt => FunctionLabelExtractors.kt} | 4 ++-- .../kotlin/astminer/problem/LabelExtractor.kt | 15 ++++++++++++++ src/main/kotlin/astminer/problem/Problem.kt | 14 ------------- .../pipeline/Code2VecExtractionTest.kt | 2 +- ...t.kt => FunctionNameLabelExtractorTest.kt} | 10 +++++----- 22 files changed, 61 insertions(+), 79 deletions(-) delete mode 100644 src/main/kotlin/astminer/cli/FilterPredicates.kt rename src/main/kotlin/astminer/config/{ProblemConfigs.kt => LabelExtractorConfigs.kt} (56%) rename src/main/kotlin/astminer/problem/{FileLevelProblems.kt => FileLabelExtractors.kt} (83%) rename src/main/kotlin/astminer/problem/{FunctionLevelProblems.kt => FunctionLabelExtractors.kt} (90%) create mode 100644 src/main/kotlin/astminer/problem/LabelExtractor.kt delete mode 100644 src/main/kotlin/astminer/problem/Problem.kt rename src/test/kotlin/astminer/problem/{FunctionNameProblemTest.kt => FunctionNameLabelExtractorTest.kt} (85%) diff --git a/configs/file-asts-csv-storage.yaml b/configs/file-asts-csv-storage.yaml index 1d1c2856..638b0e50 100644 --- a/configs/file-asts-csv-storage.yaml +++ b/configs/file-asts-csv-storage.yaml @@ -12,7 +12,7 @@ filters: maxTokenWordsNumber: 1000 -problem: +labelExtractor: name: 'file name' storage: diff --git a/configs/file-path-representation.yaml b/configs/file-path-representation.yaml index b662bbd0..62c36d31 100644 --- a/configs/file-path-representation.yaml +++ b/configs/file-path-representation.yaml @@ -9,10 +9,10 @@ filters: - name: 'by tree size' maxTreeSize: 1000 -problem: +labelExtractor: name: 'file name' storage: - name: 'code2vec paths' + name: 'Code2vec' maxPathLength: 5 maxPathWidth: 5 diff --git a/configs/function-name-prediction-path-representation.yaml b/configs/function-name-prediction-path-representation.yaml index 858daedd..b2846b20 100644 --- a/configs/function-name-prediction-path-representation.yaml +++ b/configs/function-name-prediction-path-representation.yaml @@ -11,7 +11,7 @@ filters: - name: 'by words number' maxTokenWordsNumber: 100 -problem: +labelExtractor: name: 'function name' storage: diff --git a/src/main/kotlin/astminer/cli/FilterPredicates.kt b/src/main/kotlin/astminer/cli/FilterPredicates.kt deleted file mode 100644 index e69de29b..00000000 diff --git a/src/main/kotlin/astminer/config/FilterConfigs.kt b/src/main/kotlin/astminer/config/FilterConfigs.kt index 4ff8376c..8bb9ce36 100644 --- a/src/main/kotlin/astminer/config/FilterConfigs.kt +++ b/src/main/kotlin/astminer/config/FilterConfigs.kt @@ -11,7 +11,7 @@ import kotlinx.serialization.Transient @Serializable sealed class FilterConfig { abstract val serialName: String - abstract val filterImplementation: Filter + abstract val filterImpl: Filter } /** @@ -23,7 +23,7 @@ data class TreeSizeFilterConfig(val maxTreeSize: Int) : FilterConfig() { override val serialName = "by tree size" @Transient - override val filterImplementation = TreeSizeFilter(maxTreeSize) + override val filterImpl = TreeSizeFilter(maxTreeSize) } /** @@ -35,7 +35,7 @@ data class ModifierFilterConfig(val modifiers: List) : FilterConfig() { override val serialName = "by modifiers" @Transient - override val filterImplementation = ModifierFilter(modifiers) + override val filterImpl = ModifierFilter(modifiers) } /** @@ -47,7 +47,7 @@ data class AnnotationFilterConfig(val annotations: List) : FilterConfig( override val serialName = "by annotations" @Transient - override val filterImplementation = AnnotationFilter(annotations) + override val filterImpl = AnnotationFilter(annotations) } /** @@ -59,7 +59,7 @@ object ConstructorFilterConfig : FilterConfig() { override val serialName = "no constructors" @Transient - override val filterImplementation = ConstructorFilter + override val filterImpl = ConstructorFilter } /** @@ -71,7 +71,7 @@ data class FunctionNameWordsNumberFilterConfig(val maxWordsNumber: Int) : Filter override val serialName = "by function name length" @Transient - override val filterImplementation = FunctionNameWordsNumberFilter(maxWordsNumber) + override val filterImpl = FunctionNameWordsNumberFilter(maxWordsNumber) } /** @@ -83,5 +83,5 @@ data class WordsNumberFilterConfig(val maxTokenWordsNumber: Int) : FilterConfig( override val serialName = "by words number" @Transient - override val filterImplementation = WordsNumberFilter(maxTokenWordsNumber) + override val filterImpl = WordsNumberFilter(maxTokenWordsNumber) } diff --git a/src/main/kotlin/astminer/config/ProblemConfigs.kt b/src/main/kotlin/astminer/config/LabelExtractorConfigs.kt similarity index 56% rename from src/main/kotlin/astminer/config/ProblemConfigs.kt rename to src/main/kotlin/astminer/config/LabelExtractorConfigs.kt index 8cc26cc3..661c9e44 100644 --- a/src/main/kotlin/astminer/config/ProblemConfigs.kt +++ b/src/main/kotlin/astminer/config/LabelExtractorConfigs.kt @@ -6,11 +6,11 @@ import kotlinx.serialization.Serializable import kotlinx.serialization.Transient @Serializable -sealed class ProblemConfig { - abstract val problemImplementation: Problem +sealed class LabelExtractorConfig { + abstract val labelExtractorImpl: LabelExtractor val granularity: Granularity - get() = problemImplementation.granularity + get() = labelExtractorImpl.granularity abstract val serialName: String } @@ -20,9 +20,9 @@ sealed class ProblemConfig { */ @Serializable @SerialName("file name") -class FileNameExtractorConfig : ProblemConfig() { +class FileNameExtractorConfig : LabelExtractorConfig() { @Transient - override val problemImplementation = FileNameExtractor + override val labelExtractorImpl = FileNameExtractor @Transient override val serialName = "file name" } @@ -32,21 +32,21 @@ class FileNameExtractorConfig : ProblemConfig() { */ @Serializable @SerialName("folder name") -class FolderNameExtractorConfig : ProblemConfig() { +class FolderNameExtractorConfig : LabelExtractorConfig() { @Transient - override val problemImplementation = FolderNameExtractor + override val labelExtractorImpl = FolderNameExtractor @Transient override val serialName = "folder name" } /** - * @see FunctionNameProblem + * @see FunctionNameLabelExtractor */ @Serializable @SerialName("function name") -class FunctionNameProblemConfig : ProblemConfig() { +class FunctionNameExtractorConfig : LabelExtractorConfig() { @Transient - override val problemImplementation = FunctionNameProblem + override val labelExtractorImpl = FunctionNameLabelExtractor @Transient override val serialName = "function name" diff --git a/src/main/kotlin/astminer/config/PipelineConfigs.kt b/src/main/kotlin/astminer/config/PipelineConfigs.kt index 81749e50..ea2425d9 100644 --- a/src/main/kotlin/astminer/config/PipelineConfigs.kt +++ b/src/main/kotlin/astminer/config/PipelineConfigs.kt @@ -12,6 +12,6 @@ data class PipelineConfig( val outputDir: String, val parser: ParserConfig, val filters: List = emptyList(), - val problem: ProblemConfig, + val labelExtractor: LabelExtractorConfig, val storage: StorageConfig ) diff --git a/src/main/kotlin/astminer/examples/AllCppFiles.kt b/src/main/kotlin/astminer/examples/AllCppFiles.kt index d16de9e5..e7eb7135 100644 --- a/src/main/kotlin/astminer/examples/AllCppFiles.kt +++ b/src/main/kotlin/astminer/examples/AllCppFiles.kt @@ -19,7 +19,7 @@ fun allCppFiles() { inputDir = preprocessedDir.path, outputDir = "out_examples/allCppFiles", parser = ParserConfig(ParserType.Fuzzy, listOf(FileExtension.Cpp)), - problem = FileNameExtractorConfig(), + labelExtractor = FileNameExtractorConfig(), storage = Code2VecPathStorageConfig(5, 5) ) diff --git a/src/main/kotlin/astminer/examples/AllJavaAst.kt b/src/main/kotlin/astminer/examples/AllJavaAst.kt index 51c1d21b..e783914c 100644 --- a/src/main/kotlin/astminer/examples/AllJavaAst.kt +++ b/src/main/kotlin/astminer/examples/AllJavaAst.kt @@ -1,11 +1,7 @@ package astminer.examples -import astminer.common.getProjectFilesWithExtension import astminer.config.* -import astminer.storage.ast.CsvAstStorage -import astminer.parse.antlr.java.JavaParser import astminer.pipeline.Pipeline -import java.io.File // Retrieve ASTs from Java files, using a generated parser. fun allJavaAsts() { @@ -13,7 +9,7 @@ fun allJavaAsts() { inputDir = "src/test/resources/examples/", outputDir = "out_examples/allJavaAstsAntlr", parser = ParserConfig(ParserType.Antlr, listOf(FileExtension.Java)), - problem = FileNameExtractorConfig(), + labelExtractor = FileNameExtractorConfig(), storage = CsvAstStorageConfig(), ) diff --git a/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt b/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt index f01f640a..0e0b6691 100644 --- a/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt +++ b/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt @@ -1,12 +1,7 @@ package astminer.examples -import astminer.common.getProjectFilesWithExtension import astminer.config.* -import astminer.parse.gumtree.java.GumTreeJavaParser import astminer.pipeline.Pipeline -import astminer.storage.path.Code2VecPathStorage -import astminer.storage.path.PathBasedStorageConfig -import java.io.File //Retrieve paths from Java files, using a GumTree parser. fun allJavaFilesGumTree() { @@ -14,7 +9,7 @@ fun allJavaFilesGumTree() { inputDir = "src/test/resources/gumTreeMethodSplitter/", outputDir = "out_examples/allJavaFilesGumTree", parser = ParserConfig(ParserType.GumTree, listOf(FileExtension.Java)), - problem = FileNameExtractorConfig(), + labelExtractor = FileNameExtractorConfig(), storage = Code2VecPathStorageConfig(5, 5) ) Pipeline(config).run() diff --git a/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt b/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt index f20f33b6..f6952851 100644 --- a/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt +++ b/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt @@ -8,7 +8,7 @@ fun allJavaScriptFiles() { inputDir = "src/test/resources/examples", outputDir = "out_examples/allJavaScriptFilesAntlr", parser = ParserConfig(ParserType.Antlr, listOf(FileExtension.JavaScript)), - problem = FileNameExtractorConfig(), + labelExtractor = FileNameExtractorConfig(), storage = Code2VecPathStorageConfig(5, 5) ) diff --git a/src/main/kotlin/astminer/examples/AllPythonFiles.kt b/src/main/kotlin/astminer/examples/AllPythonFiles.kt index e035887d..e759973f 100644 --- a/src/main/kotlin/astminer/examples/AllPythonFiles.kt +++ b/src/main/kotlin/astminer/examples/AllPythonFiles.kt @@ -1,12 +1,7 @@ package astminer.examples -import astminer.common.getProjectFilesWithExtension import astminer.config.* -import astminer.parse.antlr.python.PythonParser import astminer.pipeline.Pipeline -import astminer.storage.path.PathBasedStorageConfig -import astminer.storage.path.Code2VecPathStorage -import java.io.File fun allPythonFiles() { @@ -14,7 +9,7 @@ fun allPythonFiles() { inputDir = "src/test/resources/examples", outputDir = "out_examples/allPythonFiles", parser = ParserConfig(ParserType.Antlr, listOf(FileExtension.Python)), - problem = FileNameExtractorConfig(), + labelExtractor = FileNameExtractorConfig(), storage = Code2VecPathStorageConfig(5, 5) ) diff --git a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt index 58ba4502..70d1a011 100644 --- a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt @@ -14,7 +14,7 @@ fun code2vecJavaMethods() { inputDir = folder, outputDir = outputDir, parser = ParserConfig(ParserType.Antlr, listOf(FileExtension.Java)), - problem = FunctionNameProblemConfig(), + labelExtractor = FunctionNameExtractorConfig(), storage = Code2VecPathStorageConfig( maxPathLength = 5, maxPathWidth = 5 diff --git a/src/main/kotlin/astminer/pipeline/Pipeline.kt b/src/main/kotlin/astminer/pipeline/Pipeline.kt index 19c288ed..bbc1f0c0 100644 --- a/src/main/kotlin/astminer/pipeline/Pipeline.kt +++ b/src/main/kotlin/astminer/pipeline/Pipeline.kt @@ -7,9 +7,6 @@ import astminer.pipeline.branch.FilePipelineBranch import astminer.pipeline.branch.FunctionPipelineBranch import astminer.problem.Granularity import astminer.storage.Storage -import astminer.storage.ast.CsvAstStorage -import astminer.storage.ast.DotAstStorage -import astminer.storage.path.Code2VecPathStorage import java.io.File /** @@ -20,7 +17,7 @@ class Pipeline(private val config: PipelineConfig) { private val inputDirectory = File(config.inputDir) private val outputDirectory = File(config.outputDir) - private val branch = when (config.problem.granularity) { + private val branch = when (config.labelExtractor.granularity) { Granularity.File -> FilePipelineBranch(config) Granularity.Function -> FunctionPipelineBranch(config) } diff --git a/src/main/kotlin/astminer/pipeline/branch/FilePipelineBranch.kt b/src/main/kotlin/astminer/pipeline/branch/FilePipelineBranch.kt index eaadcb27..c1bb7ccb 100644 --- a/src/main/kotlin/astminer/pipeline/branch/FilePipelineBranch.kt +++ b/src/main/kotlin/astminer/pipeline/branch/FilePipelineBranch.kt @@ -5,7 +5,6 @@ import astminer.common.model.Node import astminer.common.model.ParseResult import astminer.config.* import astminer.filters.FileFilter -import astminer.filters.Filter import astminer.problem.* /** @@ -14,12 +13,12 @@ import astminer.problem.* */ class FilePipelineBranch(config: PipelineConfig) : PipelineBranch { private val filters: List = config.filters.map { filterConfig -> - filterConfig.filterImplementation as? FileFilter + filterConfig.filterImpl as? FileFilter ?: throw IllegalFilterException(Granularity.File, filterConfig.serialName) } - private val problem: FileLevelProblem = config.problem.problemImplementation as? FileLevelProblem - ?: throw ProblemDefinitionException(Granularity.File, config.problem.serialName) + private val problem: FileLabelExtractor = config.labelExtractor.labelExtractorImpl as? FileLabelExtractor + ?: throw ProblemDefinitionException(Granularity.File, config.labelExtractor.serialName) private fun passesThroughFilters(parseResult: ParseResult) = filters.all { filter -> filter.validate(parseResult) } diff --git a/src/main/kotlin/astminer/pipeline/branch/FunctionPipelineBranch.kt b/src/main/kotlin/astminer/pipeline/branch/FunctionPipelineBranch.kt index 0534ce79..a526c078 100644 --- a/src/main/kotlin/astminer/pipeline/branch/FunctionPipelineBranch.kt +++ b/src/main/kotlin/astminer/pipeline/branch/FunctionPipelineBranch.kt @@ -15,12 +15,12 @@ import astminer.problem.* */ class FunctionPipelineBranch(config: PipelineConfig) : PipelineBranch { private val filters: List = config.filters.map { filterConfig -> - filterConfig.filterImplementation as? FunctionFilter + filterConfig.filterImpl as? FunctionFilter ?: throw IllegalFilterException(Granularity.Function, filterConfig.serialName) } - private val problem: FunctionLevelProblem = config.problem.problemImplementation as? FunctionLevelProblem - ?: throw ProblemDefinitionException(Granularity.Function, config.problem.serialName) + private val problem: FunctionLabelExtractor = config.labelExtractor.labelExtractorImpl as? FunctionLabelExtractor + ?: throw ProblemDefinitionException(Granularity.Function, config.labelExtractor.serialName) private fun passesThroughFilters(functionInfo: FunctionInfo) = filters.all { filter -> filter.validate(functionInfo) } diff --git a/src/main/kotlin/astminer/problem/FileLevelProblems.kt b/src/main/kotlin/astminer/problem/FileLabelExtractors.kt similarity index 83% rename from src/main/kotlin/astminer/problem/FileLevelProblems.kt rename to src/main/kotlin/astminer/problem/FileLabelExtractors.kt index 95fa92ac..781003b5 100644 --- a/src/main/kotlin/astminer/problem/FileLevelProblems.kt +++ b/src/main/kotlin/astminer/problem/FileLabelExtractors.kt @@ -4,7 +4,7 @@ import astminer.common.model.Node import astminer.common.model.ParseResult import java.io.File -interface FileLevelProblem : Problem { +interface FileLabelExtractor : LabelExtractor { override val granularity: Granularity get() = Granularity.File @@ -14,16 +14,15 @@ interface FileLevelProblem : Problem { /** * Labels files with folder names */ -object FileNameExtractor : FileLevelProblem { +object FileNameExtractor : FileLabelExtractor { override fun process(parseResult: ParseResult): LabeledResult = parseResult.labeledWith(File(parseResult.filePath).name) - } /** * Labels files with folder names */ -object FolderNameExtractor : FileLevelProblem { +object FolderNameExtractor : FileLabelExtractor { override fun process(parseResult: ParseResult): LabeledResult? { val folderName = File(parseResult.filePath).parentFile?.name ?: return null return parseResult.labeledWith(folderName) diff --git a/src/main/kotlin/astminer/problem/FunctionLevelProblems.kt b/src/main/kotlin/astminer/problem/FunctionLabelExtractors.kt similarity index 90% rename from src/main/kotlin/astminer/problem/FunctionLevelProblems.kt rename to src/main/kotlin/astminer/problem/FunctionLabelExtractors.kt index 286f0ef4..8e71d31e 100644 --- a/src/main/kotlin/astminer/problem/FunctionLevelProblems.kt +++ b/src/main/kotlin/astminer/problem/FunctionLabelExtractors.kt @@ -3,7 +3,7 @@ package astminer.problem import astminer.common.model.FunctionInfo import astminer.common.model.Node -interface FunctionLevelProblem : Problem { +interface FunctionLabelExtractor : LabelExtractor { override val granularity: Granularity get() = Granularity.Function @@ -14,7 +14,7 @@ interface FunctionLevelProblem : Problem { * Labels functions with their names. * Hides the name of the function in the subtree and also all in the recursive calls. */ -object FunctionNameProblem : FunctionLevelProblem { +object FunctionNameLabelExtractor : FunctionLabelExtractor { const val HIDDEN_METHOD_NAME_TOKEN = "METHOD_NAME" const val RECURSIVE_CALL_TOKEN = "SELF" diff --git a/src/main/kotlin/astminer/problem/LabelExtractor.kt b/src/main/kotlin/astminer/problem/LabelExtractor.kt new file mode 100644 index 00000000..fc9eb0f3 --- /dev/null +++ b/src/main/kotlin/astminer/problem/LabelExtractor.kt @@ -0,0 +1,15 @@ +package astminer.problem + +/** + * Label extractors that have [File] granularity process and extract labels from *files*. + * Label extractors that have [Function] granularity process and extract labels from *functions* + * (that are collected from files). + */ +enum class Granularity { + File, + Function +} + +interface LabelExtractor { + val granularity: Granularity +} diff --git a/src/main/kotlin/astminer/problem/Problem.kt b/src/main/kotlin/astminer/problem/Problem.kt deleted file mode 100644 index 43a4c3a3..00000000 --- a/src/main/kotlin/astminer/problem/Problem.kt +++ /dev/null @@ -1,14 +0,0 @@ -package astminer.problem - -/** - * Problems that have [File] granularity process and extract labels from *files*. - * Problems that have [Function] granularity process and extract labels from *functions* (that are collected from files). - */ -enum class Granularity { - File, - Function -} - -interface Problem { - val granularity: Granularity -} diff --git a/src/test/kotlin/astminer/pipeline/Code2VecExtractionTest.kt b/src/test/kotlin/astminer/pipeline/Code2VecExtractionTest.kt index b53c86cb..d59b9bb9 100644 --- a/src/test/kotlin/astminer/pipeline/Code2VecExtractionTest.kt +++ b/src/test/kotlin/astminer/pipeline/Code2VecExtractionTest.kt @@ -19,7 +19,7 @@ internal class Code2VecExtractionTest { inputDir = testDataDir.path, outputDir = extractedDataDir.toAbsolutePath().toString(), parser = ParserConfig(ParserType.Antlr, languages), - problem = FileNameExtractorConfig(), + labelExtractor = FileNameExtractorConfig(), storage = Code2VecPathStorageConfig(8, 3) ) Pipeline(config).run() diff --git a/src/test/kotlin/astminer/problem/FunctionNameProblemTest.kt b/src/test/kotlin/astminer/problem/FunctionNameLabelExtractorTest.kt similarity index 85% rename from src/test/kotlin/astminer/problem/FunctionNameProblemTest.kt rename to src/test/kotlin/astminer/problem/FunctionNameLabelExtractorTest.kt index e30c3792..90c19683 100644 --- a/src/test/kotlin/astminer/problem/FunctionNameProblemTest.kt +++ b/src/test/kotlin/astminer/problem/FunctionNameLabelExtractorTest.kt @@ -7,7 +7,7 @@ import org.junit.Before import org.junit.Test import kotlin.test.assertEquals -class FunctionNameProblemTest { +class FunctionNameLabelExtractorTest { companion object { private const val PATH = "random/folder/file.txt" private const val FUNCTION_NAME = "method" @@ -31,25 +31,25 @@ class FunctionNameProblemTest { @Test fun `test FunctionNameProblem extracts correct method name`() { - val labeledResult = FunctionNameProblem.process(functionInfo) + val labeledResult = FunctionNameLabelExtractor.process(functionInfo) assertEquals(LabeledResult(functionRoot, FUNCTION_NAME, PATH), labeledResult) } @Test fun `test FunctionNameProblem hides function name node token with METHOD_NAME`() { - FunctionNameProblem.process(functionInfo) + FunctionNameLabelExtractor.process(functionInfo) assertEquals("METHOD_NAME", functionInfo.nameNode?.token) } @Test fun `test FunctionNameProblem hides function root token with METHOD_NAME if it is the name node`() { - FunctionNameProblem.process(functionInfo) + FunctionNameLabelExtractor.process(functionInfo) assertEquals("METHOD_NAME", functionInfo.root.token) } @Test fun `test function name problem should hide recursive call tokens with SELF`() { - FunctionNameProblem.process(functionInfo) + FunctionNameLabelExtractor.process(functionInfo) val recursiveCallNode = functionInfo.root.children.firstOrNull()?.children?.firstOrNull() assertEquals("SELF", recursiveCallNode?.token) } From 649a63a34de91231389cbbeced46e5915f41e0de Mon Sep 17 00:00:00 2001 From: furetur Date: Sun, 30 May 2021 21:32:28 +0500 Subject: [PATCH 226/308] fixed TreeSizeFilterConfig --- src/main/kotlin/astminer/config/FilterConfigs.kt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/kotlin/astminer/config/FilterConfigs.kt b/src/main/kotlin/astminer/config/FilterConfigs.kt index 8bb9ce36..05eb325b 100644 --- a/src/main/kotlin/astminer/config/FilterConfigs.kt +++ b/src/main/kotlin/astminer/config/FilterConfigs.kt @@ -19,11 +19,11 @@ sealed class FilterConfig { */ @Serializable @SerialName("by tree size") -data class TreeSizeFilterConfig(val maxTreeSize: Int) : FilterConfig() { +data class TreeSizeFilterConfig(val minTreeSize: Int = 0, val maxTreeSize: Int? = null) : FilterConfig() { override val serialName = "by tree size" @Transient - override val filterImpl = TreeSizeFilter(maxTreeSize) + override val filterImpl = TreeSizeFilter(minTreeSize, maxTreeSize) } /** From 6718cc910d2e8ee97922d769c45f1c0ed955263f Mon Sep 17 00:00:00 2001 From: furetur Date: Sun, 30 May 2021 21:39:38 +0500 Subject: [PATCH 227/308] fixed docs --- src/main/kotlin/astminer/Main.kt | 2 +- src/main/kotlin/astminer/pipeline/branch/Exceptions.kt | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/main/kotlin/astminer/Main.kt b/src/main/kotlin/astminer/Main.kt index ec510a9c..826d90b5 100644 --- a/src/main/kotlin/astminer/Main.kt +++ b/src/main/kotlin/astminer/Main.kt @@ -30,7 +30,7 @@ class PipelineRunner : CliktCommand(name = "") { val config = yaml.decodeFromString(config.readText()) Pipeline(config).run() } catch (e: SerializationException) { - report("Could not read config", e) + report("There was a problem in the config", e) } catch (e: ProblemDefinitionException) { report("Problem is defined incorrectly", e) } catch (e: IllegalFilterException) { diff --git a/src/main/kotlin/astminer/pipeline/branch/Exceptions.kt b/src/main/kotlin/astminer/pipeline/branch/Exceptions.kt index 5d944af9..e371bebf 100644 --- a/src/main/kotlin/astminer/pipeline/branch/Exceptions.kt +++ b/src/main/kotlin/astminer/pipeline/branch/Exceptions.kt @@ -3,8 +3,7 @@ package astminer.pipeline.branch import astminer.problem.Granularity /** - * This exception is thrown when problem granularity is implemented incorrectly or the problem is not specified - * inside the correct pipeline branch. + * This exception is thrown when label extractor's granularity is implemented incorrectly. */ class ProblemDefinitionException(granularity: Granularity, problemName: String) : IllegalStateException("Problem `$problemName` has invalid granularity $granularity") From 6bc35b78a69f5c98a2a62308d7cebb87c920b745 Mon Sep 17 00:00:00 2001 From: furetur Date: Mon, 31 May 2021 19:04:40 +0500 Subject: [PATCH 228/308] fixed initialization of pipeline branches + small fixes --- .../java/astminer/examples/AllJavaFiles.java | 3 +- src/main/kotlin/astminer/Main.kt | 6 ++-- .../astminer/common/model/HandlerModel.kt | 4 +-- .../astminer/common/model/ParsingModel.kt | 4 +-- .../astminer/common/model/PipelineModel.kt | 5 ++++ .../kotlin/astminer/config/FilterConfigs.kt | 14 +--------- .../astminer/config/LabelExtractorConfigs.kt | 15 ++-------- .../kotlin/astminer/examples/AllJavaFiles.kt | 2 +- .../astminer/examples/AllJavaMethods.kt | 3 +- .../astminer/examples/AllPythonMethods.kt | 2 +- .../kotlin/astminer/filters/FileFilters.kt | 1 + src/main/kotlin/astminer/filters/Filter.kt | 3 -- .../astminer/filters/FunctionFilters.kt | 1 + .../FileLabelExtractors.kt | 6 ++-- .../FunctionLabelExtractors.kt | 6 ++-- .../LabeledResult.kt | 2 +- src/main/kotlin/astminer/pipeline/Pipeline.kt | 14 +++++++--- .../astminer/pipeline/branch/Exceptions.kt | 10 +++---- .../pipeline/branch/FilePipelineBranch.kt | 28 ++++++++++--------- .../pipeline/branch/FunctionPipelineBranch.kt | 25 +++++++++-------- .../pipeline/branch/PipelineBranch.kt | 4 +-- .../kotlin/astminer/problem/LabelExtractor.kt | 15 ---------- src/main/kotlin/astminer/storage/Storage.kt | 2 +- .../astminer/storage/ast/CsvAstStorage.kt | 2 +- .../astminer/storage/ast/DotAstStorage.kt | 2 +- .../astminer/storage/path/PathBasedStorage.kt | 2 +- src/test/kotlin/astminer/common/TestUtils.kt | 2 +- .../FileNameExtractorTest.kt | 2 +- .../FolderNameExtractorTest.kt | 2 +- .../FunctionNameLabelExtractorTest.kt | 2 +- 30 files changed, 77 insertions(+), 112 deletions(-) create mode 100644 src/main/kotlin/astminer/common/model/PipelineModel.kt delete mode 100644 src/main/kotlin/astminer/filters/Filter.kt rename src/main/kotlin/astminer/{problem => labelextractor}/FileLabelExtractors.kt (88%) rename src/main/kotlin/astminer/{problem => labelextractor}/FunctionLabelExtractors.kt (91%) rename src/main/kotlin/astminer/{problem => labelextractor}/LabeledResult.kt (91%) delete mode 100644 src/main/kotlin/astminer/problem/LabelExtractor.kt rename src/test/kotlin/astminer/{problem => labelextractor}/FileNameExtractorTest.kt (95%) rename src/test/kotlin/astminer/{problem => labelextractor}/FolderNameExtractorTest.kt (96%) rename src/test/kotlin/astminer/{problem => labelextractor}/FunctionNameLabelExtractorTest.kt (98%) diff --git a/src/main/java/astminer/examples/AllJavaFiles.java b/src/main/java/astminer/examples/AllJavaFiles.java index e423252a..5591f864 100644 --- a/src/main/java/astminer/examples/AllJavaFiles.java +++ b/src/main/java/astminer/examples/AllJavaFiles.java @@ -1,9 +1,8 @@ package astminer.examples; -import astminer.problem.LabeledResult; +import astminer.labelextractor.LabeledResult; import astminer.common.model.*; import astminer.parse.gumtree.java.GumTreeJavaParser; -import astminer.storage.*; import astminer.storage.path.Code2VecPathStorage; import astminer.storage.path.PathBasedStorage; import astminer.storage.path.PathBasedStorageConfig; diff --git a/src/main/kotlin/astminer/Main.kt b/src/main/kotlin/astminer/Main.kt index 826d90b5..f31b1cdb 100644 --- a/src/main/kotlin/astminer/Main.kt +++ b/src/main/kotlin/astminer/Main.kt @@ -4,7 +4,7 @@ import astminer.common.model.FunctionInfoPropertyNotImplementedException import astminer.config.PipelineConfig import astminer.pipeline.Pipeline import astminer.pipeline.branch.IllegalFilterException -import astminer.pipeline.branch.ProblemDefinitionException +import astminer.pipeline.branch.IllegalLabelExtractorException import com.charleskorn.kaml.PolymorphismStyle import com.github.ajalt.clikt.core.CliktCommand import com.github.ajalt.clikt.parameters.arguments.argument @@ -31,8 +31,8 @@ class PipelineRunner : CliktCommand(name = "") { Pipeline(config).run() } catch (e: SerializationException) { report("There was a problem in the config", e) - } catch (e: ProblemDefinitionException) { - report("Problem is defined incorrectly", e) + } catch (e: IllegalLabelExtractorException) { + report("PipelineBranch for given label extractor not found", e) } catch (e: IllegalFilterException) { report("The chosen filter is not implemented for the chosen granularity", e) } catch (e: FunctionInfoPropertyNotImplementedException) { diff --git a/src/main/kotlin/astminer/common/model/HandlerModel.kt b/src/main/kotlin/astminer/common/model/HandlerModel.kt index d9a319b0..8ec1878e 100644 --- a/src/main/kotlin/astminer/common/model/HandlerModel.kt +++ b/src/main/kotlin/astminer/common/model/HandlerModel.kt @@ -9,10 +9,10 @@ private val logger = KotlinLogging.logger("HandlerFactory") interface HandlerFactory { fun createHandler(file: File): LanguageHandler - fun createHandlers(files: List, handleResult: (LanguageHandler) -> Any?) { + fun createHandlers(files: List, action: (LanguageHandler) -> Unit) { for (file in files) { try { - handleResult(createHandler(file)) + action(createHandler(file)) } catch (parsingException: ParsingException) { logger.error(parsingException) { "Failed to parse file ${file.path}" } } diff --git a/src/main/kotlin/astminer/common/model/ParsingModel.kt b/src/main/kotlin/astminer/common/model/ParsingModel.kt index eb4c6bdf..bbfa310b 100644 --- a/src/main/kotlin/astminer/common/model/ParsingModel.kt +++ b/src/main/kotlin/astminer/common/model/ParsingModel.kt @@ -1,10 +1,8 @@ package astminer.common.model -import astminer.problem.LabeledResult +import astminer.labelextractor.LabeledResult import astminer.common.DEFAULT_TOKEN import astminer.common.splitToSubtokens -import astminer.parse.ParsingException -import mu.KotlinLogging import java.io.File import java.io.InputStream import java.util.* diff --git a/src/main/kotlin/astminer/common/model/PipelineModel.kt b/src/main/kotlin/astminer/common/model/PipelineModel.kt new file mode 100644 index 00000000..25dc6dc4 --- /dev/null +++ b/src/main/kotlin/astminer/common/model/PipelineModel.kt @@ -0,0 +1,5 @@ +package astminer.common.model + +interface Filter + +interface LabelExtractor diff --git a/src/main/kotlin/astminer/config/FilterConfigs.kt b/src/main/kotlin/astminer/config/FilterConfigs.kt index 05eb325b..0d0760bd 100644 --- a/src/main/kotlin/astminer/config/FilterConfigs.kt +++ b/src/main/kotlin/astminer/config/FilterConfigs.kt @@ -1,5 +1,6 @@ package astminer.config +import astminer.common.model.Filter import astminer.filters.* import kotlinx.serialization.SerialName import kotlinx.serialization.Serializable @@ -10,7 +11,6 @@ import kotlinx.serialization.Transient */ @Serializable sealed class FilterConfig { - abstract val serialName: String abstract val filterImpl: Filter } @@ -20,8 +20,6 @@ sealed class FilterConfig { @Serializable @SerialName("by tree size") data class TreeSizeFilterConfig(val minTreeSize: Int = 0, val maxTreeSize: Int? = null) : FilterConfig() { - override val serialName = "by tree size" - @Transient override val filterImpl = TreeSizeFilter(minTreeSize, maxTreeSize) } @@ -32,8 +30,6 @@ data class TreeSizeFilterConfig(val minTreeSize: Int = 0, val maxTreeSize: Int? @Serializable @SerialName("by modifiers") data class ModifierFilterConfig(val modifiers: List) : FilterConfig() { - override val serialName = "by modifiers" - @Transient override val filterImpl = ModifierFilter(modifiers) } @@ -44,8 +40,6 @@ data class ModifierFilterConfig(val modifiers: List) : FilterConfig() { @Serializable @SerialName("by annotations") data class AnnotationFilterConfig(val annotations: List) : FilterConfig() { - override val serialName = "by annotations" - @Transient override val filterImpl = AnnotationFilter(annotations) } @@ -56,8 +50,6 @@ data class AnnotationFilterConfig(val annotations: List) : FilterConfig( @Serializable @SerialName("no constructors") object ConstructorFilterConfig : FilterConfig() { - override val serialName = "no constructors" - @Transient override val filterImpl = ConstructorFilter } @@ -68,8 +60,6 @@ object ConstructorFilterConfig : FilterConfig() { @Serializable @SerialName("by function name length") data class FunctionNameWordsNumberFilterConfig(val maxWordsNumber: Int) : FilterConfig() { - override val serialName = "by function name length" - @Transient override val filterImpl = FunctionNameWordsNumberFilter(maxWordsNumber) } @@ -80,8 +70,6 @@ data class FunctionNameWordsNumberFilterConfig(val maxWordsNumber: Int) : Filter @Serializable @SerialName("by words number") data class WordsNumberFilterConfig(val maxTokenWordsNumber: Int) : FilterConfig() { - override val serialName = "by words number" - @Transient override val filterImpl = WordsNumberFilter(maxTokenWordsNumber) } diff --git a/src/main/kotlin/astminer/config/LabelExtractorConfigs.kt b/src/main/kotlin/astminer/config/LabelExtractorConfigs.kt index 661c9e44..679b46a9 100644 --- a/src/main/kotlin/astminer/config/LabelExtractorConfigs.kt +++ b/src/main/kotlin/astminer/config/LabelExtractorConfigs.kt @@ -1,6 +1,7 @@ package astminer.config -import astminer.problem.* +import astminer.common.model.LabelExtractor +import astminer.labelextractor.* import kotlinx.serialization.SerialName import kotlinx.serialization.Serializable import kotlinx.serialization.Transient @@ -8,11 +9,6 @@ import kotlinx.serialization.Transient @Serializable sealed class LabelExtractorConfig { abstract val labelExtractorImpl: LabelExtractor - - val granularity: Granularity - get() = labelExtractorImpl.granularity - - abstract val serialName: String } /** @@ -23,8 +19,6 @@ sealed class LabelExtractorConfig { class FileNameExtractorConfig : LabelExtractorConfig() { @Transient override val labelExtractorImpl = FileNameExtractor - @Transient - override val serialName = "file name" } /** @@ -35,8 +29,6 @@ class FileNameExtractorConfig : LabelExtractorConfig() { class FolderNameExtractorConfig : LabelExtractorConfig() { @Transient override val labelExtractorImpl = FolderNameExtractor - @Transient - override val serialName = "folder name" } /** @@ -47,7 +39,4 @@ class FolderNameExtractorConfig : LabelExtractorConfig() { class FunctionNameExtractorConfig : LabelExtractorConfig() { @Transient override val labelExtractorImpl = FunctionNameLabelExtractor - - @Transient - override val serialName = "function name" } diff --git a/src/main/kotlin/astminer/examples/AllJavaFiles.kt b/src/main/kotlin/astminer/examples/AllJavaFiles.kt index d52da288..eef3b957 100644 --- a/src/main/kotlin/astminer/examples/AllJavaFiles.kt +++ b/src/main/kotlin/astminer/examples/AllJavaFiles.kt @@ -1,6 +1,6 @@ package astminer.examples -import astminer.problem.LabeledResult +import astminer.labelextractor.LabeledResult import astminer.parse.antlr.java.JavaFunctionSplitter import astminer.parse.antlr.java.JavaParser import astminer.storage.path.Code2VecPathStorage diff --git a/src/main/kotlin/astminer/examples/AllJavaMethods.kt b/src/main/kotlin/astminer/examples/AllJavaMethods.kt index 447e3b63..8831196b 100644 --- a/src/main/kotlin/astminer/examples/AllJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/AllJavaMethods.kt @@ -1,11 +1,10 @@ package astminer.examples -import astminer.problem.LabeledResult +import astminer.labelextractor.LabeledResult import astminer.common.model.FunctionInfo import astminer.parse.gumtree.GumTreeNode import astminer.parse.gumtree.java.GumTreeJavaParser import astminer.parse.gumtree.java.GumTreeJavaFunctionSplitter -import astminer.storage.* import astminer.storage.path.Code2VecPathStorage import astminer.storage.path.PathBasedStorageConfig import java.io.File diff --git a/src/main/kotlin/astminer/examples/AllPythonMethods.kt b/src/main/kotlin/astminer/examples/AllPythonMethods.kt index f674f3dd..e828a539 100644 --- a/src/main/kotlin/astminer/examples/AllPythonMethods.kt +++ b/src/main/kotlin/astminer/examples/AllPythonMethods.kt @@ -1,6 +1,6 @@ package astminer.examples -import astminer.problem.LabeledResult +import astminer.labelextractor.LabeledResult import astminer.common.model.FunctionInfo import astminer.parse.gumtree.python.GumTreePythonFunctionSplitter import astminer.parse.gumtree.GumTreeNode diff --git a/src/main/kotlin/astminer/filters/FileFilters.kt b/src/main/kotlin/astminer/filters/FileFilters.kt index 080060a6..fd3dfe3e 100644 --- a/src/main/kotlin/astminer/filters/FileFilters.kt +++ b/src/main/kotlin/astminer/filters/FileFilters.kt @@ -1,5 +1,6 @@ package astminer.filters +import astminer.common.model.Filter import astminer.common.model.Node import astminer.common.model.ParseResult diff --git a/src/main/kotlin/astminer/filters/Filter.kt b/src/main/kotlin/astminer/filters/Filter.kt deleted file mode 100644 index 0845bd6a..00000000 --- a/src/main/kotlin/astminer/filters/Filter.kt +++ /dev/null @@ -1,3 +0,0 @@ -package astminer.filters - -interface Filter diff --git a/src/main/kotlin/astminer/filters/FunctionFilters.kt b/src/main/kotlin/astminer/filters/FunctionFilters.kt index c9617b7a..a47942a3 100644 --- a/src/main/kotlin/astminer/filters/FunctionFilters.kt +++ b/src/main/kotlin/astminer/filters/FunctionFilters.kt @@ -1,5 +1,6 @@ package astminer.filters +import astminer.common.model.Filter import astminer.common.model.FunctionInfo import astminer.common.model.Node import astminer.common.splitToSubtokens diff --git a/src/main/kotlin/astminer/problem/FileLabelExtractors.kt b/src/main/kotlin/astminer/labelextractor/FileLabelExtractors.kt similarity index 88% rename from src/main/kotlin/astminer/problem/FileLabelExtractors.kt rename to src/main/kotlin/astminer/labelextractor/FileLabelExtractors.kt index 781003b5..c020f977 100644 --- a/src/main/kotlin/astminer/problem/FileLabelExtractors.kt +++ b/src/main/kotlin/astminer/labelextractor/FileLabelExtractors.kt @@ -1,13 +1,11 @@ -package astminer.problem +package astminer.labelextractor +import astminer.common.model.LabelExtractor import astminer.common.model.Node import astminer.common.model.ParseResult import java.io.File interface FileLabelExtractor : LabelExtractor { - override val granularity: Granularity - get() = Granularity.File - fun process(parseResult: ParseResult): LabeledResult? } diff --git a/src/main/kotlin/astminer/problem/FunctionLabelExtractors.kt b/src/main/kotlin/astminer/labelextractor/FunctionLabelExtractors.kt similarity index 91% rename from src/main/kotlin/astminer/problem/FunctionLabelExtractors.kt rename to src/main/kotlin/astminer/labelextractor/FunctionLabelExtractors.kt index 8e71d31e..db892167 100644 --- a/src/main/kotlin/astminer/problem/FunctionLabelExtractors.kt +++ b/src/main/kotlin/astminer/labelextractor/FunctionLabelExtractors.kt @@ -1,12 +1,10 @@ -package astminer.problem +package astminer.labelextractor import astminer.common.model.FunctionInfo +import astminer.common.model.LabelExtractor import astminer.common.model.Node interface FunctionLabelExtractor : LabelExtractor { - override val granularity: Granularity - get() = Granularity.Function - fun process(functionInfo: FunctionInfo): LabeledResult? } diff --git a/src/main/kotlin/astminer/problem/LabeledResult.kt b/src/main/kotlin/astminer/labelextractor/LabeledResult.kt similarity index 91% rename from src/main/kotlin/astminer/problem/LabeledResult.kt rename to src/main/kotlin/astminer/labelextractor/LabeledResult.kt index 1b4b9b8c..0031cbfe 100644 --- a/src/main/kotlin/astminer/problem/LabeledResult.kt +++ b/src/main/kotlin/astminer/labelextractor/LabeledResult.kt @@ -1,4 +1,4 @@ -package astminer.problem +package astminer.labelextractor import astminer.common.model.Node diff --git a/src/main/kotlin/astminer/pipeline/Pipeline.kt b/src/main/kotlin/astminer/pipeline/Pipeline.kt index bbc1f0c0..4e00f9a0 100644 --- a/src/main/kotlin/astminer/pipeline/Pipeline.kt +++ b/src/main/kotlin/astminer/pipeline/Pipeline.kt @@ -5,7 +5,9 @@ import astminer.config.* import astminer.parse.getHandlerFactory import astminer.pipeline.branch.FilePipelineBranch import astminer.pipeline.branch.FunctionPipelineBranch -import astminer.problem.Granularity +import astminer.pipeline.branch.IllegalLabelExtractorException +import astminer.labelextractor.FileLabelExtractor +import astminer.labelextractor.FunctionLabelExtractor import astminer.storage.Storage import java.io.File @@ -17,9 +19,13 @@ class Pipeline(private val config: PipelineConfig) { private val inputDirectory = File(config.inputDir) private val outputDirectory = File(config.outputDir) - private val branch = when (config.labelExtractor.granularity) { - Granularity.File -> FilePipelineBranch(config) - Granularity.Function -> FunctionPipelineBranch(config) + private val filters = config.filters.map { it.filterImpl } + private val labelExtractor = config.labelExtractor.labelExtractorImpl + + private val branch = when (labelExtractor) { + is FileLabelExtractor -> FilePipelineBranch(filters, labelExtractor) + is FunctionLabelExtractor -> FunctionPipelineBranch(filters, labelExtractor) + else -> throw IllegalLabelExtractorException(labelExtractor::class.simpleName) } private fun createStorageDirectory(extension: FileExtension): File { diff --git a/src/main/kotlin/astminer/pipeline/branch/Exceptions.kt b/src/main/kotlin/astminer/pipeline/branch/Exceptions.kt index e371bebf..e1b73c62 100644 --- a/src/main/kotlin/astminer/pipeline/branch/Exceptions.kt +++ b/src/main/kotlin/astminer/pipeline/branch/Exceptions.kt @@ -1,15 +1,13 @@ package astminer.pipeline.branch -import astminer.problem.Granularity - /** * This exception is thrown when label extractor's granularity is implemented incorrectly. */ -class ProblemDefinitionException(granularity: Granularity, problemName: String) : - IllegalStateException("Problem `$problemName` has invalid granularity $granularity") +class IllegalLabelExtractorException(problemName: String?) : + IllegalStateException("Unknown label extractor `${problemName ?: "anonymous"}`") /** * This exception is thrown when the given filter is not implemented for the given granularity */ -class IllegalFilterException(granularity: Granularity, filterName: String): - IllegalStateException("Unknown filter `$filterName` for granularity $granularity") \ No newline at end of file +class IllegalFilterException(granularity: String, filterName: String?): + IllegalStateException("Unknown filter `${filterName ?: "anonymous"}` for $granularity granularity") \ No newline at end of file diff --git a/src/main/kotlin/astminer/pipeline/branch/FilePipelineBranch.kt b/src/main/kotlin/astminer/pipeline/branch/FilePipelineBranch.kt index c1bb7ccb..5b099285 100644 --- a/src/main/kotlin/astminer/pipeline/branch/FilePipelineBranch.kt +++ b/src/main/kotlin/astminer/pipeline/branch/FilePipelineBranch.kt @@ -3,33 +3,35 @@ package astminer.pipeline.branch import astminer.common.model.LanguageHandler import astminer.common.model.Node import astminer.common.model.ParseResult -import astminer.config.* import astminer.filters.FileFilter -import astminer.problem.* +import astminer.common.model.Filter +import astminer.labelextractor.* /** * PipelineBranch for pipeline with file-level granularity (FilePipelineConfig). * Works with files as a whole. Tests parsed files with filters and extracts a label from them. */ -class FilePipelineBranch(config: PipelineConfig) : PipelineBranch { - private val filters: List = config.filters.map { filterConfig -> - filterConfig.filterImpl as? FileFilter - ?: throw IllegalFilterException(Granularity.File, filterConfig.serialName) - } +class FilePipelineBranch( + filters: List, + private val labelExtractor: FileLabelExtractor +) : PipelineBranch { - private val problem: FileLabelExtractor = config.labelExtractor.labelExtractorImpl as? FileLabelExtractor - ?: throw ProblemDefinitionException(Granularity.File, config.labelExtractor.serialName) + private val filters: List = filters.map { filter -> + filter as? FileFilter + ?: throw IllegalFilterException("file", filter::class.simpleName) + } private fun passesThroughFilters(parseResult: ParseResult) = filters.all { filter -> filter.validate(parseResult) } - override fun process(languageHandler: LanguageHandler): Sequence> { + override fun process(languageHandler: LanguageHandler): List> { val parseResult = languageHandler.parseResult return if (passesThroughFilters(parseResult)) { - val labeledResult = problem.process(parseResult) ?: return emptySequence() - sequenceOf(labeledResult) + val labeledResult = labelExtractor.process(parseResult) ?: return emptyList() + listOf(labeledResult) } else { - emptySequence() + emptyList() } } + } \ No newline at end of file diff --git a/src/main/kotlin/astminer/pipeline/branch/FunctionPipelineBranch.kt b/src/main/kotlin/astminer/pipeline/branch/FunctionPipelineBranch.kt index a526c078..0201d144 100644 --- a/src/main/kotlin/astminer/pipeline/branch/FunctionPipelineBranch.kt +++ b/src/main/kotlin/astminer/pipeline/branch/FunctionPipelineBranch.kt @@ -1,11 +1,11 @@ package astminer.pipeline.branch +import astminer.common.model.Filter import astminer.common.model.FunctionInfo import astminer.common.model.LanguageHandler import astminer.common.model.Node -import astminer.config.* import astminer.filters.* -import astminer.problem.* +import astminer.labelextractor.* /** @@ -13,21 +13,22 @@ import astminer.problem.* * Extracts functions from the parsed files. * Then tests functions with filters, processes them and extracts labels from each function. */ -class FunctionPipelineBranch(config: PipelineConfig) : PipelineBranch { - private val filters: List = config.filters.map { filterConfig -> - filterConfig.filterImpl as? FunctionFilter - ?: throw IllegalFilterException(Granularity.Function, filterConfig.serialName) - } +class FunctionPipelineBranch( + filters: List, + private val labelExtractor: FunctionLabelExtractor +) : PipelineBranch { - private val problem: FunctionLabelExtractor = config.labelExtractor.labelExtractorImpl as? FunctionLabelExtractor - ?: throw ProblemDefinitionException(Granularity.Function, config.labelExtractor.serialName) + private val filters: List = filters.map { filter -> + filter as? FunctionFilter + ?: throw IllegalFilterException("function", filter::class.simpleName) + } private fun passesThroughFilters(functionInfo: FunctionInfo) = filters.all { filter -> filter.validate(functionInfo) } - override fun process(languageHandler: LanguageHandler): Sequence> = - languageHandler.splitIntoFunctions().asSequence() + override fun process(languageHandler: LanguageHandler): List> = + languageHandler.splitIntoFunctions() .filter { functionInfo -> passesThroughFilters(functionInfo) } - .mapNotNull { functionInfo -> problem.process(functionInfo) } + .mapNotNull { functionInfo -> labelExtractor.process(functionInfo) } } diff --git a/src/main/kotlin/astminer/pipeline/branch/PipelineBranch.kt b/src/main/kotlin/astminer/pipeline/branch/PipelineBranch.kt index 0635998f..65caa010 100644 --- a/src/main/kotlin/astminer/pipeline/branch/PipelineBranch.kt +++ b/src/main/kotlin/astminer/pipeline/branch/PipelineBranch.kt @@ -2,7 +2,7 @@ package astminer.pipeline.branch import astminer.common.model.LanguageHandler import astminer.common.model.Node -import astminer.problem.LabeledResult +import astminer.labelextractor.LabeledResult /** * PipelineBranch is a part of the pipeline that can be completely different depending on the granularity (pipeline type) @@ -14,5 +14,5 @@ interface PipelineBranch { * May mutate the AST. * Should have no other side-effects */ - fun process(languageHandler: LanguageHandler): Sequence> + fun process(languageHandler: LanguageHandler): List> } \ No newline at end of file diff --git a/src/main/kotlin/astminer/problem/LabelExtractor.kt b/src/main/kotlin/astminer/problem/LabelExtractor.kt deleted file mode 100644 index fc9eb0f3..00000000 --- a/src/main/kotlin/astminer/problem/LabelExtractor.kt +++ /dev/null @@ -1,15 +0,0 @@ -package astminer.problem - -/** - * Label extractors that have [File] granularity process and extract labels from *files*. - * Label extractors that have [Function] granularity process and extract labels from *functions* - * (that are collected from files). - */ -enum class Granularity { - File, - Function -} - -interface LabelExtractor { - val granularity: Granularity -} diff --git a/src/main/kotlin/astminer/storage/Storage.kt b/src/main/kotlin/astminer/storage/Storage.kt index 703cd37f..b04d5834 100644 --- a/src/main/kotlin/astminer/storage/Storage.kt +++ b/src/main/kotlin/astminer/storage/Storage.kt @@ -1,6 +1,6 @@ package astminer.storage -import astminer.problem.LabeledResult +import astminer.labelextractor.LabeledResult import astminer.common.model.Node import java.io.Closeable diff --git a/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt b/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt index f0900081..b1446b70 100644 --- a/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt +++ b/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt @@ -1,6 +1,6 @@ package astminer.storage.ast -import astminer.problem.LabeledResult +import astminer.labelextractor.LabeledResult import astminer.common.model.Node import astminer.common.storage.RankedIncrementalIdStorage import astminer.common.storage.dumpIdStorageToCsv diff --git a/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt b/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt index 5ec82326..9f663716 100644 --- a/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt +++ b/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt @@ -1,6 +1,6 @@ package astminer.storage.ast -import astminer.problem.LabeledResult +import astminer.labelextractor.LabeledResult import astminer.common.model.Node import astminer.common.storage.RankedIncrementalIdStorage import astminer.storage.Storage diff --git a/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt b/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt index 81c1b05c..64c7d0f6 100644 --- a/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt +++ b/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt @@ -1,6 +1,6 @@ package astminer.storage.path -import astminer.problem.LabeledResult +import astminer.labelextractor.LabeledResult import astminer.common.model.* import astminer.common.storage.* import astminer.paths.PathMiner diff --git a/src/test/kotlin/astminer/common/TestUtils.kt b/src/test/kotlin/astminer/common/TestUtils.kt index 20d8e26b..4a233837 100644 --- a/src/test/kotlin/astminer/common/TestUtils.kt +++ b/src/test/kotlin/astminer/common/TestUtils.kt @@ -1,6 +1,6 @@ package astminer.common -import astminer.problem.LabeledResult +import astminer.labelextractor.LabeledResult import astminer.common.model.Node import astminer.common.model.ParseResult diff --git a/src/test/kotlin/astminer/problem/FileNameExtractorTest.kt b/src/test/kotlin/astminer/labelextractor/FileNameExtractorTest.kt similarity index 95% rename from src/test/kotlin/astminer/problem/FileNameExtractorTest.kt rename to src/test/kotlin/astminer/labelextractor/FileNameExtractorTest.kt index a6c0f7a8..bf7690f4 100644 --- a/src/test/kotlin/astminer/problem/FileNameExtractorTest.kt +++ b/src/test/kotlin/astminer/labelextractor/FileNameExtractorTest.kt @@ -1,4 +1,4 @@ -package astminer.problem +package astminer.labelextractor import astminer.common.model.ParseResult import astminer.parse.antlr.AntlrNode diff --git a/src/test/kotlin/astminer/problem/FolderNameExtractorTest.kt b/src/test/kotlin/astminer/labelextractor/FolderNameExtractorTest.kt similarity index 96% rename from src/test/kotlin/astminer/problem/FolderNameExtractorTest.kt rename to src/test/kotlin/astminer/labelextractor/FolderNameExtractorTest.kt index 482cbead..f1a9687b 100644 --- a/src/test/kotlin/astminer/problem/FolderNameExtractorTest.kt +++ b/src/test/kotlin/astminer/labelextractor/FolderNameExtractorTest.kt @@ -1,4 +1,4 @@ -package astminer.problem +package astminer.labelextractor import astminer.common.model.ParseResult import astminer.parse.antlr.AntlrNode diff --git a/src/test/kotlin/astminer/problem/FunctionNameLabelExtractorTest.kt b/src/test/kotlin/astminer/labelextractor/FunctionNameLabelExtractorTest.kt similarity index 98% rename from src/test/kotlin/astminer/problem/FunctionNameLabelExtractorTest.kt rename to src/test/kotlin/astminer/labelextractor/FunctionNameLabelExtractorTest.kt index 90c19683..c9563ac8 100644 --- a/src/test/kotlin/astminer/problem/FunctionNameLabelExtractorTest.kt +++ b/src/test/kotlin/astminer/labelextractor/FunctionNameLabelExtractorTest.kt @@ -1,4 +1,4 @@ -package astminer.problem +package astminer.labelextractor import astminer.common.DummyNode import astminer.common.model.FunctionInfo From 6dcac0c8826b71cefea793f0ef1b58fec8c97e5b Mon Sep 17 00:00:00 2001 From: illided Date: Mon, 31 May 2021 18:46:11 +0300 Subject: [PATCH 229/308] warning about parser added --- src/main/kotlin/astminer/parse/antlr/php/PHPParser.kt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/main/kotlin/astminer/parse/antlr/php/PHPParser.kt b/src/main/kotlin/astminer/parse/antlr/php/PHPParser.kt index 921b2c25..e206cada 100644 --- a/src/main/kotlin/astminer/parse/antlr/php/PHPParser.kt +++ b/src/main/kotlin/astminer/parse/antlr/php/PHPParser.kt @@ -11,6 +11,11 @@ import org.antlr.v4.runtime.CharStreams import org.antlr.v4.runtime.CommonTokenStream import java.io.InputStream +// Be aware that this parser can have some troubles with +// parsing function modifiers and string concatenation via dot +// (AST just falls apart when class field contain dot concatenation) +// More issues you can find here: +// https://github.com/antlr/grammars-v4/issues class PHPParser: Parser { override fun parseInputStream(content: InputStream): AntlrNode { return try { From 16da8e2872c5f573d29416dc815c163dea6eeb68 Mon Sep 17 00:00:00 2001 From: illided Date: Mon, 31 May 2021 19:40:38 +0300 Subject: [PATCH 230/308] url corrected --- src/main/kotlin/astminer/parse/antlr/php/PHPParser.kt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/kotlin/astminer/parse/antlr/php/PHPParser.kt b/src/main/kotlin/astminer/parse/antlr/php/PHPParser.kt index e206cada..86808926 100644 --- a/src/main/kotlin/astminer/parse/antlr/php/PHPParser.kt +++ b/src/main/kotlin/astminer/parse/antlr/php/PHPParser.kt @@ -14,8 +14,8 @@ import java.io.InputStream // Be aware that this parser can have some troubles with // parsing function modifiers and string concatenation via dot // (AST just falls apart when class field contain dot concatenation) -// More issues you can find here: -// https://github.com/antlr/grammars-v4/issues +// More details can be found in corresponding issues +// https://github.com/antlr/grammars-v4/issues/1991 class PHPParser: Parser { override fun parseInputStream(content: InputStream): AntlrNode { return try { From 1ecc66320d8e71470c93be8261c4c25dbb079752 Mon Sep 17 00:00:00 2001 From: furetur Date: Tue, 1 Jun 2021 17:27:51 +0500 Subject: [PATCH 231/308] moved all pipeline interfaces to PipelineModel.kt --- .../java/astminer/examples/AllJavaFiles.java | 2 +- .../astminer/common/model/ParsingModel.kt | 2 +- .../astminer/common/model/PipelineModel.kt | 44 +++++++++++++++++++ .../kotlin/astminer/config/StorageConfigs.kt | 2 +- .../kotlin/astminer/examples/AllJavaFiles.kt | 2 +- .../astminer/examples/AllJavaMethods.kt | 2 +- .../astminer/examples/AllPythonMethods.kt | 2 +- .../kotlin/astminer/filters/CommonFilters.kt | 5 +-- .../kotlin/astminer/filters/FileFilters.kt | 9 ---- .../astminer/filters/FunctionFilters.kt | 7 +-- .../labelextractor/FileLabelExtractors.kt | 7 +-- .../labelextractor/FunctionLabelExtractors.kt | 7 +-- .../astminer/labelextractor/LabeledResult.kt | 11 ----- src/main/kotlin/astminer/pipeline/Pipeline.kt | 6 +-- .../astminer/pipeline/branch/Exceptions.kt | 2 +- .../pipeline/branch/FilePipelineBranch.kt | 10 ++--- .../pipeline/branch/FunctionPipelineBranch.kt | 7 +-- .../pipeline/branch/PipelineBranch.kt | 4 +- src/main/kotlin/astminer/storage/Storage.kt | 22 ---------- .../astminer/storage/ast/CsvAstStorage.kt | 4 +- .../astminer/storage/ast/DotAstStorage.kt | 4 +- .../astminer/storage/path/PathBasedStorage.kt | 4 +- src/test/kotlin/astminer/common/TestUtils.kt | 2 +- .../labelextractor/FileNameExtractorTest.kt | 1 + .../labelextractor/FolderNameExtractorTest.kt | 1 + .../FunctionNameLabelExtractorTest.kt | 1 + 26 files changed, 76 insertions(+), 94 deletions(-) delete mode 100644 src/main/kotlin/astminer/filters/FileFilters.kt delete mode 100644 src/main/kotlin/astminer/labelextractor/LabeledResult.kt delete mode 100644 src/main/kotlin/astminer/storage/Storage.kt diff --git a/src/main/java/astminer/examples/AllJavaFiles.java b/src/main/java/astminer/examples/AllJavaFiles.java index 5591f864..46bfc39a 100644 --- a/src/main/java/astminer/examples/AllJavaFiles.java +++ b/src/main/java/astminer/examples/AllJavaFiles.java @@ -1,6 +1,6 @@ package astminer.examples; -import astminer.labelextractor.LabeledResult; +import astminer.common.model.LabeledResult; import astminer.common.model.*; import astminer.parse.gumtree.java.GumTreeJavaParser; import astminer.storage.path.Code2VecPathStorage; diff --git a/src/main/kotlin/astminer/common/model/ParsingModel.kt b/src/main/kotlin/astminer/common/model/ParsingModel.kt index bbfa310b..08ccb3ec 100644 --- a/src/main/kotlin/astminer/common/model/ParsingModel.kt +++ b/src/main/kotlin/astminer/common/model/ParsingModel.kt @@ -1,6 +1,6 @@ package astminer.common.model -import astminer.labelextractor.LabeledResult +import astminer.common.model.LabeledResult import astminer.common.DEFAULT_TOKEN import astminer.common.splitToSubtokens import java.io.File diff --git a/src/main/kotlin/astminer/common/model/PipelineModel.kt b/src/main/kotlin/astminer/common/model/PipelineModel.kt index 25dc6dc4..dd3f6420 100644 --- a/src/main/kotlin/astminer/common/model/PipelineModel.kt +++ b/src/main/kotlin/astminer/common/model/PipelineModel.kt @@ -1,5 +1,49 @@ package astminer.common.model +import java.io.Closeable + + interface Filter interface LabelExtractor + +interface FileFilter : Filter { + fun validate(parseResult: ParseResult): Boolean +} + +interface FunctionFilter : Filter { + fun validate(functionInfo: FunctionInfo): Boolean +} + +interface FileLabelExtractor : LabelExtractor { + fun process(parseResult: ParseResult): LabeledResult? +} + +interface FunctionLabelExtractor : LabelExtractor { + fun process(functionInfo: FunctionInfo): LabeledResult? +} + +/** + * An AST subtree with a label and the path of the source file. + * @property root The root of the AST subtree. + * @property label Any label for this subtree. + * @property filePath The path to the source file where the AST is from. + */ +data class LabeledResult(val root: T, val label: String, val filePath: String) + +/** + * Storage saved labeled results to disk in a specified format. + * Storage might extract any data from labeled result. + * For instance, it might extract paths from trees + */ +interface Storage : Closeable { + val outputDirectoryPath: String + + fun store(labeledResult: LabeledResult) + + fun store(labeledResults: Iterable>) { + for (labeledResult in labeledResults) { + store(labeledResult) + } + } +} diff --git a/src/main/kotlin/astminer/config/StorageConfigs.kt b/src/main/kotlin/astminer/config/StorageConfigs.kt index 2096f2ef..a87d0bcf 100644 --- a/src/main/kotlin/astminer/config/StorageConfigs.kt +++ b/src/main/kotlin/astminer/config/StorageConfigs.kt @@ -1,6 +1,6 @@ package astminer.config -import astminer.storage.Storage +import astminer.common.model.Storage import astminer.storage.ast.CsvAstStorage import astminer.storage.ast.DotAstStorage import astminer.storage.path.Code2VecPathStorage diff --git a/src/main/kotlin/astminer/examples/AllJavaFiles.kt b/src/main/kotlin/astminer/examples/AllJavaFiles.kt index eef3b957..c35b7f6a 100644 --- a/src/main/kotlin/astminer/examples/AllJavaFiles.kt +++ b/src/main/kotlin/astminer/examples/AllJavaFiles.kt @@ -1,6 +1,6 @@ package astminer.examples -import astminer.labelextractor.LabeledResult +import astminer.common.model.LabeledResult import astminer.parse.antlr.java.JavaFunctionSplitter import astminer.parse.antlr.java.JavaParser import astminer.storage.path.Code2VecPathStorage diff --git a/src/main/kotlin/astminer/examples/AllJavaMethods.kt b/src/main/kotlin/astminer/examples/AllJavaMethods.kt index 8831196b..4b450e2a 100644 --- a/src/main/kotlin/astminer/examples/AllJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/AllJavaMethods.kt @@ -1,6 +1,6 @@ package astminer.examples -import astminer.labelextractor.LabeledResult +import astminer.common.model.LabeledResult import astminer.common.model.FunctionInfo import astminer.parse.gumtree.GumTreeNode import astminer.parse.gumtree.java.GumTreeJavaParser diff --git a/src/main/kotlin/astminer/examples/AllPythonMethods.kt b/src/main/kotlin/astminer/examples/AllPythonMethods.kt index e828a539..57a0db2e 100644 --- a/src/main/kotlin/astminer/examples/AllPythonMethods.kt +++ b/src/main/kotlin/astminer/examples/AllPythonMethods.kt @@ -1,6 +1,6 @@ package astminer.examples -import astminer.labelextractor.LabeledResult +import astminer.common.model.LabeledResult import astminer.common.model.FunctionInfo import astminer.parse.gumtree.python.GumTreePythonFunctionSplitter import astminer.parse.gumtree.GumTreeNode diff --git a/src/main/kotlin/astminer/filters/CommonFilters.kt b/src/main/kotlin/astminer/filters/CommonFilters.kt index 03dbfee0..49c32534 100644 --- a/src/main/kotlin/astminer/filters/CommonFilters.kt +++ b/src/main/kotlin/astminer/filters/CommonFilters.kt @@ -1,9 +1,6 @@ package astminer.filters -import astminer.common.model.FunctionInfo -import astminer.common.model.Node -import astminer.common.model.ParseResult -import astminer.common.splitToSubtokens +import astminer.common.model.* import astminer.featureextraction.NumberOfNodes /** diff --git a/src/main/kotlin/astminer/filters/FileFilters.kt b/src/main/kotlin/astminer/filters/FileFilters.kt deleted file mode 100644 index fd3dfe3e..00000000 --- a/src/main/kotlin/astminer/filters/FileFilters.kt +++ /dev/null @@ -1,9 +0,0 @@ -package astminer.filters - -import astminer.common.model.Filter -import astminer.common.model.Node -import astminer.common.model.ParseResult - -interface FileFilter : Filter { - fun validate(parseResult: ParseResult): Boolean -} diff --git a/src/main/kotlin/astminer/filters/FunctionFilters.kt b/src/main/kotlin/astminer/filters/FunctionFilters.kt index a47942a3..6b7b9c88 100644 --- a/src/main/kotlin/astminer/filters/FunctionFilters.kt +++ b/src/main/kotlin/astminer/filters/FunctionFilters.kt @@ -1,14 +1,10 @@ package astminer.filters -import astminer.common.model.Filter +import astminer.common.model.FunctionFilter import astminer.common.model.FunctionInfo import astminer.common.model.Node import astminer.common.splitToSubtokens -interface FunctionFilter : Filter { - fun validate(functionInfo: FunctionInfo): Boolean -} - /** * Filter that excludes functions that have at least one of modifiers from the [excludeModifiers] list. */ @@ -41,4 +37,3 @@ class FunctionNameWordsNumberFilter(private val maxWordsNumber: Int) : FunctionF return name != null && splitToSubtokens(name).size <= maxWordsNumber } } - diff --git a/src/main/kotlin/astminer/labelextractor/FileLabelExtractors.kt b/src/main/kotlin/astminer/labelextractor/FileLabelExtractors.kt index c020f977..4182839f 100644 --- a/src/main/kotlin/astminer/labelextractor/FileLabelExtractors.kt +++ b/src/main/kotlin/astminer/labelextractor/FileLabelExtractors.kt @@ -1,14 +1,11 @@ package astminer.labelextractor -import astminer.common.model.LabelExtractor +import astminer.common.model.FileLabelExtractor +import astminer.common.model.LabeledResult import astminer.common.model.Node import astminer.common.model.ParseResult import java.io.File -interface FileLabelExtractor : LabelExtractor { - fun process(parseResult: ParseResult): LabeledResult? -} - /** * Labels files with folder names */ diff --git a/src/main/kotlin/astminer/labelextractor/FunctionLabelExtractors.kt b/src/main/kotlin/astminer/labelextractor/FunctionLabelExtractors.kt index db892167..9fc1b058 100644 --- a/src/main/kotlin/astminer/labelextractor/FunctionLabelExtractors.kt +++ b/src/main/kotlin/astminer/labelextractor/FunctionLabelExtractors.kt @@ -1,13 +1,10 @@ package astminer.labelextractor import astminer.common.model.FunctionInfo -import astminer.common.model.LabelExtractor +import astminer.common.model.FunctionLabelExtractor +import astminer.common.model.LabeledResult import astminer.common.model.Node -interface FunctionLabelExtractor : LabelExtractor { - fun process(functionInfo: FunctionInfo): LabeledResult? -} - /** * Labels functions with their names. * Hides the name of the function in the subtree and also all in the recursive calls. diff --git a/src/main/kotlin/astminer/labelextractor/LabeledResult.kt b/src/main/kotlin/astminer/labelextractor/LabeledResult.kt deleted file mode 100644 index 0031cbfe..00000000 --- a/src/main/kotlin/astminer/labelextractor/LabeledResult.kt +++ /dev/null @@ -1,11 +0,0 @@ -package astminer.labelextractor - -import astminer.common.model.Node - -/** - * An AST subtree with a label and the path of the source file. - * @property root The root of the AST subtree. - * @property label Any label for this subtree. - * @property filePath The path to the source file where the AST is from. - */ -data class LabeledResult(val root: T, val label: String, val filePath: String) diff --git a/src/main/kotlin/astminer/pipeline/Pipeline.kt b/src/main/kotlin/astminer/pipeline/Pipeline.kt index 4e00f9a0..9c187214 100644 --- a/src/main/kotlin/astminer/pipeline/Pipeline.kt +++ b/src/main/kotlin/astminer/pipeline/Pipeline.kt @@ -6,9 +6,9 @@ import astminer.parse.getHandlerFactory import astminer.pipeline.branch.FilePipelineBranch import astminer.pipeline.branch.FunctionPipelineBranch import astminer.pipeline.branch.IllegalLabelExtractorException -import astminer.labelextractor.FileLabelExtractor -import astminer.labelextractor.FunctionLabelExtractor -import astminer.storage.Storage +import astminer.common.model.FileLabelExtractor +import astminer.common.model.FunctionLabelExtractor +import astminer.common.model.Storage import java.io.File /** diff --git a/src/main/kotlin/astminer/pipeline/branch/Exceptions.kt b/src/main/kotlin/astminer/pipeline/branch/Exceptions.kt index e1b73c62..f2c9c6a3 100644 --- a/src/main/kotlin/astminer/pipeline/branch/Exceptions.kt +++ b/src/main/kotlin/astminer/pipeline/branch/Exceptions.kt @@ -10,4 +10,4 @@ class IllegalLabelExtractorException(problemName: String?) : * This exception is thrown when the given filter is not implemented for the given granularity */ class IllegalFilterException(granularity: String, filterName: String?): - IllegalStateException("Unknown filter `${filterName ?: "anonymous"}` for $granularity granularity") \ No newline at end of file + IllegalStateException("Unknown filter `${filterName ?: "anonymous"}` for $granularity granularity") diff --git a/src/main/kotlin/astminer/pipeline/branch/FilePipelineBranch.kt b/src/main/kotlin/astminer/pipeline/branch/FilePipelineBranch.kt index 5b099285..e31b28b4 100644 --- a/src/main/kotlin/astminer/pipeline/branch/FilePipelineBranch.kt +++ b/src/main/kotlin/astminer/pipeline/branch/FilePipelineBranch.kt @@ -1,10 +1,7 @@ package astminer.pipeline.branch -import astminer.common.model.LanguageHandler -import astminer.common.model.Node -import astminer.common.model.ParseResult -import astminer.filters.FileFilter -import astminer.common.model.Filter +import astminer.common.model.* +import astminer.common.model.FileFilter import astminer.labelextractor.* /** @@ -33,5 +30,4 @@ class FilePipelineBranch( emptyList() } } - -} \ No newline at end of file +} diff --git a/src/main/kotlin/astminer/pipeline/branch/FunctionPipelineBranch.kt b/src/main/kotlin/astminer/pipeline/branch/FunctionPipelineBranch.kt index 0201d144..f7201b96 100644 --- a/src/main/kotlin/astminer/pipeline/branch/FunctionPipelineBranch.kt +++ b/src/main/kotlin/astminer/pipeline/branch/FunctionPipelineBranch.kt @@ -1,10 +1,6 @@ package astminer.pipeline.branch -import astminer.common.model.Filter -import astminer.common.model.FunctionInfo -import astminer.common.model.LanguageHandler -import astminer.common.model.Node -import astminer.filters.* +import astminer.common.model.* import astminer.labelextractor.* @@ -31,4 +27,3 @@ class FunctionPipelineBranch( .filter { functionInfo -> passesThroughFilters(functionInfo) } .mapNotNull { functionInfo -> labelExtractor.process(functionInfo) } } - diff --git a/src/main/kotlin/astminer/pipeline/branch/PipelineBranch.kt b/src/main/kotlin/astminer/pipeline/branch/PipelineBranch.kt index 65caa010..f417247d 100644 --- a/src/main/kotlin/astminer/pipeline/branch/PipelineBranch.kt +++ b/src/main/kotlin/astminer/pipeline/branch/PipelineBranch.kt @@ -2,7 +2,7 @@ package astminer.pipeline.branch import astminer.common.model.LanguageHandler import astminer.common.model.Node -import astminer.labelextractor.LabeledResult +import astminer.common.model.LabeledResult /** * PipelineBranch is a part of the pipeline that can be completely different depending on the granularity (pipeline type) @@ -15,4 +15,4 @@ interface PipelineBranch { * Should have no other side-effects */ fun process(languageHandler: LanguageHandler): List> -} \ No newline at end of file +} diff --git a/src/main/kotlin/astminer/storage/Storage.kt b/src/main/kotlin/astminer/storage/Storage.kt deleted file mode 100644 index b04d5834..00000000 --- a/src/main/kotlin/astminer/storage/Storage.kt +++ /dev/null @@ -1,22 +0,0 @@ -package astminer.storage - -import astminer.labelextractor.LabeledResult -import astminer.common.model.Node -import java.io.Closeable - -/** - * Storage saved labeled results to disk in a specified format. - * Storage might extract any data from labeled result. - * For instance, it might extract paths from trees - */ -interface Storage : Closeable { - val outputDirectoryPath: String - - fun store(labeledResult: LabeledResult) - - fun store(labeledResults: Iterable>) { - for (labeledResult in labeledResults) { - store(labeledResult) - } - } -} diff --git a/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt b/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt index b1446b70..42777aef 100644 --- a/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt +++ b/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt @@ -1,12 +1,12 @@ package astminer.storage.ast -import astminer.labelextractor.LabeledResult +import astminer.common.model.LabeledResult import astminer.common.model.Node import astminer.common.storage.RankedIncrementalIdStorage import astminer.common.storage.dumpIdStorageToCsv import astminer.common.storage.nodeTypeToCsvString import astminer.common.storage.tokenToCsvString -import astminer.storage.Storage +import astminer.common.model.Storage import java.io.File import java.io.PrintWriter diff --git a/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt b/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt index 9f663716..3e3748a1 100644 --- a/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt +++ b/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt @@ -1,9 +1,9 @@ package astminer.storage.ast -import astminer.labelextractor.LabeledResult +import astminer.common.model.LabeledResult import astminer.common.model.Node import astminer.common.storage.RankedIncrementalIdStorage -import astminer.storage.Storage +import astminer.common.model.Storage import java.io.File import java.io.PrintWriter diff --git a/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt b/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt index 64c7d0f6..aa814206 100644 --- a/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt +++ b/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt @@ -1,12 +1,12 @@ package astminer.storage.path -import astminer.labelextractor.LabeledResult +import astminer.common.model.LabeledResult import astminer.common.model.* import astminer.common.storage.* import astminer.paths.PathMiner import astminer.paths.PathRetrievalSettings import astminer.paths.toPathContext -import astminer.storage.Storage +import astminer.common.model.Storage import java.io.File import java.io.PrintWriter diff --git a/src/test/kotlin/astminer/common/TestUtils.kt b/src/test/kotlin/astminer/common/TestUtils.kt index 4a233837..1a2bd93a 100644 --- a/src/test/kotlin/astminer/common/TestUtils.kt +++ b/src/test/kotlin/astminer/common/TestUtils.kt @@ -1,6 +1,6 @@ package astminer.common -import astminer.labelextractor.LabeledResult +import astminer.common.model.LabeledResult import astminer.common.model.Node import astminer.common.model.ParseResult diff --git a/src/test/kotlin/astminer/labelextractor/FileNameExtractorTest.kt b/src/test/kotlin/astminer/labelextractor/FileNameExtractorTest.kt index bf7690f4..ec582b1f 100644 --- a/src/test/kotlin/astminer/labelextractor/FileNameExtractorTest.kt +++ b/src/test/kotlin/astminer/labelextractor/FileNameExtractorTest.kt @@ -1,5 +1,6 @@ package astminer.labelextractor +import astminer.common.model.LabeledResult import astminer.common.model.ParseResult import astminer.parse.antlr.AntlrNode import org.junit.Test diff --git a/src/test/kotlin/astminer/labelextractor/FolderNameExtractorTest.kt b/src/test/kotlin/astminer/labelextractor/FolderNameExtractorTest.kt index f1a9687b..e6f18d0f 100644 --- a/src/test/kotlin/astminer/labelextractor/FolderNameExtractorTest.kt +++ b/src/test/kotlin/astminer/labelextractor/FolderNameExtractorTest.kt @@ -1,5 +1,6 @@ package astminer.labelextractor +import astminer.common.model.LabeledResult import astminer.common.model.ParseResult import astminer.parse.antlr.AntlrNode import org.junit.Test diff --git a/src/test/kotlin/astminer/labelextractor/FunctionNameLabelExtractorTest.kt b/src/test/kotlin/astminer/labelextractor/FunctionNameLabelExtractorTest.kt index c9563ac8..56c720d8 100644 --- a/src/test/kotlin/astminer/labelextractor/FunctionNameLabelExtractorTest.kt +++ b/src/test/kotlin/astminer/labelextractor/FunctionNameLabelExtractorTest.kt @@ -2,6 +2,7 @@ package astminer.labelextractor import astminer.common.DummyNode import astminer.common.model.FunctionInfo +import astminer.common.model.LabeledResult import astminer.common.model.Node import org.junit.Before import org.junit.Test From 4e68a4fc959e7348ab70dc5af565fe99771da83a Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Tue, 1 Jun 2021 18:31:03 +0300 Subject: [PATCH 232/308] Update astminer dependencies --- build.gradle.kts | 20 +++++++++----------- src/main/kotlin/astminer/common/TreeUtil.kt | 12 ++++-------- 2 files changed, 13 insertions(+), 19 deletions(-) diff --git a/build.gradle.kts b/build.gradle.kts index 7eeabe66..4fbd3043 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -5,11 +5,10 @@ version = "0.6.4" plugins { id("java") - kotlin("jvm") version "1.4.32" apply true + kotlin("jvm") version "1.5.10" apply true id("antlr") - id("idea") id("application") - id("org.jetbrains.dokka") version "0.9.18" + id("org.jetbrains.dokka") version "1.4.32" id("me.champeau.gradle.jmh") version "0.5.0" id("maven-publish") id("tanvd.kosogor") version "1.0.10" apply true @@ -31,23 +30,23 @@ dependencies { api("com.github.gumtreediff", "gen.python", "2.1.2") // https://mvnrepository.com/artifact/io.shiftleft/fuzzyc2cpg - api("io.shiftleft", "fuzzyc2cpg_2.13", "1.2.9") + api("io.shiftleft", "fuzzyc2cpg_2.13", "1.2.30") // ===== Main ===== implementation(kotlin("stdlib")) - implementation("com.github.ajalt", "clikt", "2.1.0") + implementation("com.github.ajalt.clikt", "clikt", "3.2.0") // ===== Logging ===== + // https://mvnrepository.com/artifact/org.slf4j/slf4j-simple implementation("org.slf4j", "slf4j-simple", "1.7.30") implementation("io.github.microutils:kotlin-logging:1.5.9") // ===== Test ===== - // https://mvnrepository.com/artifact/org.slf4j/slf4j-simple - testImplementation("junit:junit:4.11") + testImplementation("junit:junit:4.13.2") testImplementation(kotlin("test-junit")) // ===== JMH ===== - jmhImplementation("org.jetbrains.kotlin:kotlin-reflect:1.4.32") + jmhImplementation("org.jetbrains.kotlin:kotlin-reflect:1.5.0") jmhImplementation("org.openjdk.jmh:jmh-core:1.21") jmhImplementation("org.openjdk.jmh:jmh-generator-annprocess:1.21") } @@ -88,9 +87,8 @@ tasks.compileJava { sourceCompatibility = "1.8" } -tasks.dokka { - outputFormat = "html" - outputDirectory = "$buildDir/javadoc" +tasks.dokkaHtml.configure { + outputDirectory.set(buildDir.resolve("javadoc")) } jmh { diff --git a/src/main/kotlin/astminer/common/TreeUtil.kt b/src/main/kotlin/astminer/common/TreeUtil.kt index 0999f7b1..82c02d47 100644 --- a/src/main/kotlin/astminer/common/TreeUtil.kt +++ b/src/main/kotlin/astminer/common/TreeUtil.kt @@ -7,23 +7,19 @@ const val DEFAULT_TOKEN = "EMPTY" * https://github.com/tech-srl/code2vec/blob/master/JavaExtractor/JPredict/src/main/java/JavaExtractor/Common/Common.java */ fun normalizeToken(token: String, defaultToken: String): String { - val cleanToken = token.toLowerCase() + val cleanToken = token.lowercase() .replace("\\\\n".toRegex(), "") // escaped new line .replace("//s+".toRegex(), "") // whitespaces - .replace("[\"',]".toRegex(), "") // quotes, apostrophies, commas + .replace("[\"',]".toRegex(), "") // quotes, apostrophes, commas .replace("\\P{Print}".toRegex(), "") // unicode weird characters val stripped = cleanToken.replace("[^A-Za-z]".toRegex(), "") - return if (stripped.isEmpty()) { + return stripped.ifEmpty { val carefulStripped = cleanToken.replace(" ", "_") - if (carefulStripped.isEmpty()) { + carefulStripped.ifEmpty { defaultToken - } else { - carefulStripped } - } else { - stripped } } From 3d99167de1b62c74d193e453dd2ff3353a1cdda4 Mon Sep 17 00:00:00 2001 From: furetur Date: Fri, 4 Jun 2021 15:16:00 +0500 Subject: [PATCH 233/308] added JsonAstStorage.kt with tests, documentation and a config example --- configs/file-asts-json-storage.yaml | 19 ++++ .../kotlin/astminer/config/StorageConfigs.kt | 10 +++ .../astminer/storage/ast/JsonAstStorage.kt | 78 +++++++++++++++++ .../storage/ast/TreeEnumeratorTest.kt | 86 +++++++++++++++++++ 4 files changed, 193 insertions(+) create mode 100644 configs/file-asts-json-storage.yaml create mode 100644 src/main/kotlin/astminer/storage/ast/JsonAstStorage.kt create mode 100644 src/test/kotlin/astminer/storage/ast/TreeEnumeratorTest.kt diff --git a/configs/file-asts-json-storage.yaml b/configs/file-asts-json-storage.yaml new file mode 100644 index 00000000..fc0be07b --- /dev/null +++ b/configs/file-asts-json-storage.yaml @@ -0,0 +1,19 @@ +inputDir: 'src/test/resources/methodSplitting/' +outputDir: 'output' + +parser: + name: 'antlr' + extensions: ['java', 'js'] + +filters: + - name: 'by tree size' + maxTreeSize: 1000 + - name: 'by words number' + maxTokenWordsNumber: 1000 + + +labelExtractor: + name: 'file name' + +storage: + name: 'JsonAST' diff --git a/src/main/kotlin/astminer/config/StorageConfigs.kt b/src/main/kotlin/astminer/config/StorageConfigs.kt index a87d0bcf..6185a32c 100644 --- a/src/main/kotlin/astminer/config/StorageConfigs.kt +++ b/src/main/kotlin/astminer/config/StorageConfigs.kt @@ -3,6 +3,7 @@ package astminer.config import astminer.common.model.Storage import astminer.storage.ast.CsvAstStorage import astminer.storage.ast.DotAstStorage +import astminer.storage.ast.JsonAstStorage import astminer.storage.path.Code2VecPathStorage import astminer.storage.path.PathBasedStorageConfig import kotlinx.serialization.SerialName @@ -36,6 +37,15 @@ class DotAstStorageConfig : StorageConfig() { override fun createStorage(outputDirectoryPath: String) = DotAstStorage(outputDirectoryPath) } +/** + * @see JsonAstStorage + */ +@Serializable +@SerialName("JsonAST") +class JsonAstStorageConfig : StorageConfig() { + override fun createStorage(outputDirectoryPath: String) = JsonAstStorage(outputDirectoryPath) +} + /** * Config for [astminer.storage.path.Code2VecPathStorage] */ diff --git a/src/main/kotlin/astminer/storage/ast/JsonAstStorage.kt b/src/main/kotlin/astminer/storage/ast/JsonAstStorage.kt new file mode 100644 index 00000000..72905218 --- /dev/null +++ b/src/main/kotlin/astminer/storage/ast/JsonAstStorage.kt @@ -0,0 +1,78 @@ +package astminer.storage.ast + +import astminer.common.model.LabeledResult +import astminer.common.model.Node +import astminer.common.model.Storage +import kotlinx.serialization.Serializable +import kotlinx.serialization.encodeToString +import kotlinx.serialization.json.Json +import java.io.File +import java.io.PrintWriter + +private typealias Id = Int + +/** + * Formats the output in the json format by flattening the trees. + * Each line in the output file is a single json object that corresponds to one of the labeled trees. + * Each tree is flattened and represented as a list of nodes. + */ +class JsonAstStorage(override val outputDirectoryPath: String) : Storage { + private val treeEnumerator = TreeEnumerator() + + private val writer: PrintWriter + + init { + val outputDirectory = File(outputDirectoryPath) + outputDirectory.mkdirs() + val file = outputDirectory.resolve("asts.jsonl") + file.createNewFile() + writer = file.printWriter() + } + + @Serializable + private data class LabeledAst(val label: String, val ast: List) + + @Serializable + private data class OutputNode(val token: String, val typeLabel: String, val children: List) + + private fun TreeEnumerator.EnumeratedNode.toOutputNode() = + OutputNode(node.token, node.typeLabel, children.map { it.id }) + + override fun store(labeledResult: LabeledResult) { + val outputNodes = treeEnumerator.enumerate(labeledResult.root).map { it.toOutputNode() } + val labeledAst = LabeledAst(labeledResult.label, outputNodes) + writer.println(Json.encodeToString(labeledAst)) + } + + override fun close() { + writer.close() + } +} + +/** + * Gives ids to all nodes in the tree + */ +class TreeEnumerator { + private val result = mutableListOf() + + /** + * Node that has been given an Id. + * Also all his children have been given ids. + */ + data class EnumeratedNode(val id: Id, val node: Node, val children: List) + + private fun enumerateSubtree(node: Node): EnumeratedNode { + val children = node.children.map { enumerateSubtree(it) } + return EnumeratedNode(result.size, node, children).also { result.add(it) } + } + + /** + * Enumerates the given tree and returns the flattened tree. + * Enumerated node's id must be equal to its index in the returned list + */ + fun enumerate(node: Node): List { + result.clear() + enumerateSubtree(node) + return result + } +} diff --git a/src/test/kotlin/astminer/storage/ast/TreeEnumeratorTest.kt b/src/test/kotlin/astminer/storage/ast/TreeEnumeratorTest.kt new file mode 100644 index 00000000..0596b947 --- /dev/null +++ b/src/test/kotlin/astminer/storage/ast/TreeEnumeratorTest.kt @@ -0,0 +1,86 @@ +package astminer.storage.ast + +import astminer.common.DummyNode +import astminer.common.createBamboo +import astminer.common.createDummyTree +import astminer.common.createSmallTree +import org.junit.Before +import org.junit.Test +import kotlin.test.assertEquals + +class TreeEnumeratorTest { + lateinit var treeEnumerator: TreeEnumerator + + @Before + fun init() { + treeEnumerator = TreeEnumerator() + } + + private data class EnumeratedResult(val id: Int, val typeLabel: String, val children: List = emptyList()) + + private fun enumerate(node: DummyNode): List { + val enumeratedNodes = treeEnumerator.enumerate(node) + return enumeratedNodes.map { EnumeratedResult(it.id, it.node.typeLabel, it.children.map { child -> child.id }) } + } + + @Test + fun `test for 1 node`() { + val node = DummyNode("a") + val expected = listOf(EnumeratedResult(0, "a")) + assertEquals(expected, enumerate(node)) + } + + @Test + fun `test should be reusable`() { + treeEnumerator.enumerate(DummyNode("something previous")) + val node = DummyNode("a") + val expected = listOf(EnumeratedResult(0, "a")) + assertEquals(expected, enumerate(node)) + } + + @Test + fun `test on small bamboo`() { + val bamboo = createBamboo(10) + val expected = List(10) { i -> + EnumeratedResult(i, (10 - i).toString(), if (i == 0) emptyList() else listOf(i - 1)) + } + assertEquals(expected, enumerate(bamboo)) + } + + @Test + fun `test on big bamboo`() { + val bamboo = createBamboo(1000) + val expected = List(1000) { i -> + EnumeratedResult(i, (1000 - i).toString(), if (i == 0) emptyList() else listOf(i - 1)) + } + assertEquals(expected, enumerate(bamboo)) + } + + @Test + fun `test on very small dummy tree`() { + val node = createSmallTree() + val expected = listOf( + EnumeratedResult(0, "2"), + EnumeratedResult(1, "4"), + EnumeratedResult(2, "3", listOf(1)), + EnumeratedResult(3, "1", listOf(0, 2)) + ) + assertEquals(expected, enumerate(node)) + } + + @Test + fun `test on small dummy tree`() { + val node = createDummyTree() + val expected = listOf( + EnumeratedResult(0, "4"), + EnumeratedResult(1, "5"), + EnumeratedResult(2, "6"), + EnumeratedResult(3, "2", listOf(0, 1, 2)), + EnumeratedResult(4, "7"), + EnumeratedResult(5, "8"), + EnumeratedResult(6, "3", listOf(4, 5)), + EnumeratedResult(7, "1", listOf(3, 6)) + ) + assertEquals(expected, enumerate(node)) + } +} From 2ed8c579e9be3ca569d1020954b555139ea5aa7a Mon Sep 17 00:00:00 2001 From: furetur Date: Fri, 4 Jun 2021 16:17:36 +0500 Subject: [PATCH 234/308] added filePath to all function infos --- .../kotlin/astminer/common/model/FunctionInfoModel.kt | 2 +- src/main/kotlin/astminer/common/model/HandlerModel.kt | 2 +- src/main/kotlin/astminer/examples/AllJavaFiles.kt | 2 +- src/main/kotlin/astminer/examples/AllJavaMethods.kt | 2 +- src/main/kotlin/astminer/examples/AllPythonMethods.kt | 2 +- .../parse/antlr/java/AntlrJavaFunctionInfo.kt | 2 +- .../astminer/parse/antlr/java/JavaFunctionSplitter.kt | 5 ++--- .../antlr/javascript/AntlrJavaScriptElementInfo.kt | 11 ++++++----- .../antlr/javascript/JavaScriptFunctionSplitter.kt | 8 ++++---- .../parse/antlr/python/AntlrPythonFunctionInfo.kt | 2 +- .../parse/antlr/python/PythonFunctionSplitter.kt | 5 ++--- .../astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt | 2 +- .../astminer/parse/fuzzy/cpp/FuzzyFunctionSplitter.kt | 5 ++--- .../parse/gumtree/java/GumTreeJavaFunctionInfo.kt | 3 ++- .../parse/gumtree/java/GumTreeJavaFunctionSplitter.kt | 4 ++-- .../parse/gumtree/python/GumTreePythonFunctionInfo.kt | 2 +- .../gumtree/python/GumTreePythonFunctionSplitter.kt | 4 ++-- .../parse/antlr/java/JavaFunctionSplitterTest.kt | 5 +++-- .../javascript/JavaScriptFunctionSplitterTest.kt | 2 +- .../parse/antlr/python/PythonFunctionSplitterTest.kt | 5 +++-- .../astminer/parse/cpp/FuzzyMethodSplitterTest.kt | 3 ++- .../gumtree/java/GumTreeJavaFunctionSplitterTest.kt | 2 +- .../python/GumTreePythonFunctionSplitterTest.kt | 2 +- 23 files changed, 42 insertions(+), 40 deletions(-) diff --git a/src/main/kotlin/astminer/common/model/FunctionInfoModel.kt b/src/main/kotlin/astminer/common/model/FunctionInfoModel.kt index 0bb1d6ec..e417f116 100644 --- a/src/main/kotlin/astminer/common/model/FunctionInfoModel.kt +++ b/src/main/kotlin/astminer/common/model/FunctionInfoModel.kt @@ -1,7 +1,7 @@ package astminer.common.model interface TreeFunctionSplitter { - fun splitIntoFunctions(root: T): Collection> + fun splitIntoFunctions(root: T, filePath: String): Collection> } class FunctionInfoPropertyNotImplementedException(propertyName: String) : diff --git a/src/main/kotlin/astminer/common/model/HandlerModel.kt b/src/main/kotlin/astminer/common/model/HandlerModel.kt index 8ec1878e..7f312df6 100644 --- a/src/main/kotlin/astminer/common/model/HandlerModel.kt +++ b/src/main/kotlin/astminer/common/model/HandlerModel.kt @@ -25,6 +25,6 @@ abstract class LanguageHandler { protected abstract val splitter: TreeFunctionSplitter fun splitIntoFunctions(): Collection> { - return splitter.splitIntoFunctions(parseResult.root) + return splitter.splitIntoFunctions(parseResult.root, parseResult.filePath) } } diff --git a/src/main/kotlin/astminer/examples/AllJavaFiles.kt b/src/main/kotlin/astminer/examples/AllJavaFiles.kt index c35b7f6a..fa9320fe 100644 --- a/src/main/kotlin/astminer/examples/AllJavaFiles.kt +++ b/src/main/kotlin/astminer/examples/AllJavaFiles.kt @@ -17,7 +17,7 @@ fun allJavaFiles() { File(inputDir).forFilesWithSuffix("11.java") { file -> val node = JavaParser().parseInputStream(file.inputStream()) node.prettyPrint() - JavaFunctionSplitter().splitIntoFunctions(node).forEach { + JavaFunctionSplitter().splitIntoFunctions(node, file.path).forEach { println(it.name) println(it.returnType) println(it.enclosingElement?.name) diff --git a/src/main/kotlin/astminer/examples/AllJavaMethods.kt b/src/main/kotlin/astminer/examples/AllJavaMethods.kt index 4b450e2a..73ba198c 100644 --- a/src/main/kotlin/astminer/examples/AllJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/AllJavaMethods.kt @@ -31,7 +31,7 @@ fun allJavaMethods() { val fileNode = GumTreeJavaParser().parseInputStream(file.inputStream()) //extract method nodes - val methodNodes = GumTreeJavaFunctionSplitter().splitIntoFunctions(fileNode) + val methodNodes = GumTreeJavaFunctionSplitter().splitIntoFunctions(fileNode, file.path) methodNodes.forEach { methodInfo -> //Retrieve a method identifier diff --git a/src/main/kotlin/astminer/examples/AllPythonMethods.kt b/src/main/kotlin/astminer/examples/AllPythonMethods.kt index 57a0db2e..bc54721a 100644 --- a/src/main/kotlin/astminer/examples/AllPythonMethods.kt +++ b/src/main/kotlin/astminer/examples/AllPythonMethods.kt @@ -27,7 +27,7 @@ fun allPythonMethods() { val fileNode = GumTreePythonParser().parseInputStream(file.inputStream()) // extract method nodes - val methodNodes = GumTreePythonFunctionSplitter().splitIntoFunctions(fileNode) + val methodNodes = GumTreePythonFunctionSplitter().splitIntoFunctions(fileNode, file.path) methodNodes.forEach { methodInfo -> // Retrieve a method identifier diff --git a/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt b/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt index 7003724f..a60450c3 100644 --- a/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt @@ -4,7 +4,7 @@ import astminer.common.model.* import astminer.parse.antlr.* import astminer.parse.findEnclosingElementBy -class AntlrJavaFunctionInfo(override val root: AntlrNode) : FunctionInfo { +class AntlrJavaFunctionInfo(override val root: AntlrNode, override val filePath: String) : FunctionInfo { override val nameNode: AntlrNode? = collectNameNode() override val parameters: List = collectParameters() override val returnType: String? = collectReturnType() diff --git a/src/main/kotlin/astminer/parse/antlr/java/JavaFunctionSplitter.kt b/src/main/kotlin/astminer/parse/antlr/java/JavaFunctionSplitter.kt index fbf7bf00..1955f3b5 100644 --- a/src/main/kotlin/astminer/parse/antlr/java/JavaFunctionSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/java/JavaFunctionSplitter.kt @@ -1,6 +1,5 @@ package astminer.parse.antlr.java -import astminer.common.* import astminer.common.model.* import astminer.parse.antlr.AntlrNode import astminer.parse.antlr.hasLastLabel @@ -8,10 +7,10 @@ import astminer.parse.antlr.hasLastLabel class JavaFunctionSplitter : TreeFunctionSplitter { private val methodNodeType = "methodDeclaration" - override fun splitIntoFunctions(root: AntlrNode): Collection> { + override fun splitIntoFunctions(root: AntlrNode, filePath: String): Collection> { val methodRoots = root.preOrder().filter { (it).hasLastLabel(methodNodeType) } - return methodRoots.map { AntlrJavaFunctionInfo(it) } + return methodRoots.map { AntlrJavaFunctionInfo(it, filePath) } } } \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/antlr/javascript/AntlrJavaScriptElementInfo.kt b/src/main/kotlin/astminer/parse/antlr/javascript/AntlrJavaScriptElementInfo.kt index ea342e56..ceeda5b1 100644 --- a/src/main/kotlin/astminer/parse/antlr/javascript/AntlrJavaScriptElementInfo.kt +++ b/src/main/kotlin/astminer/parse/antlr/javascript/AntlrJavaScriptElementInfo.kt @@ -8,7 +8,8 @@ import astminer.parse.findEnclosingElementBy /** Base class for describing JavaScript methods, functions or arrow functions. */ -abstract class AntlrJavaScriptElementInfo(override val root: AntlrNode) : FunctionInfo { +abstract class AntlrJavaScriptElementInfo(override val root: AntlrNode, override val filePath: String) : + FunctionInfo { companion object { private val ENCLOSING_ELEMENT_NODES = listOf("functionDeclaration", "variableDeclaration", "classDeclaration", "methodDefinition") @@ -62,7 +63,7 @@ abstract class AntlrJavaScriptElementInfo(override val root: AntlrNode) : Functi else -> parametersRoot .getItOrChildrenOfType(SINGLE_PARAMETER_NODE) .map { it.getChildOfType(PARAMETER_NAME_NODE) ?: it } - } + } return parameterNameNodes.map { val parameterName = it.originalToken ?: throw IllegalStateException("Parameter name wasn't found") FunctionInfoParameter(name = parameterName, type = null) @@ -72,7 +73,7 @@ abstract class AntlrJavaScriptElementInfo(override val root: AntlrNode) : Functi abstract fun getParametersRoot(): AntlrNode? } -class JavaScriptArrowInfo(override val root: AntlrNode) : AntlrJavaScriptElementInfo(root) { +class JavaScriptArrowInfo(root: AntlrNode, filePath: String) : AntlrJavaScriptElementInfo(root, filePath) { companion object { private const val ARROW_NAME_NODE = "Identifier" private const val ARROW_PARAMETER_NODE = "arrowFunctionParameters" @@ -89,7 +90,7 @@ class JavaScriptArrowInfo(override val root: AntlrNode) : AntlrJavaScriptElement } } -class JavaScriptMethodInfo(override val root: AntlrNode) : AntlrJavaScriptElementInfo(root) { +class JavaScriptMethodInfo(root: AntlrNode, filePath: String) : AntlrJavaScriptElementInfo(root, filePath) { companion object { private val METHOD_GETTERS_SETTERS = listOf("getter", "setter") private const val METHOD_NAME_NODE = "identifierName" @@ -113,7 +114,7 @@ class JavaScriptMethodInfo(override val root: AntlrNode) : AntlrJavaScriptElemen override fun getParametersRoot(): AntlrNode? = root.getChildOfType(METHOD_PARAMETER_NODE) } -class JavaScriptFunctionInfo(override val root: AntlrNode) : AntlrJavaScriptElementInfo(root) { +class JavaScriptFunctionInfo(root: AntlrNode, filePath: String) : AntlrJavaScriptElementInfo(root, filePath) { companion object { private const val FUNCTION_NAME_NODE = "Identifier" private const val FUNCTION_PARAMETER_NODE = "formalParameterList" diff --git a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitter.kt b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitter.kt index fb14d649..8d9929e8 100644 --- a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitter.kt @@ -15,12 +15,12 @@ class JavaScriptFunctionSplitter : TreeFunctionSplitter { private const val FUNCTION_NODE = "Function" } - override fun splitIntoFunctions(root: AntlrNode): Collection> { + override fun splitIntoFunctions(root: AntlrNode, filePath: String): Collection> { return root.preOrder().mapNotNull { node -> when { - node.isArrowElement() -> JavaScriptArrowInfo(node) - node.isFunctionElement() -> JavaScriptFunctionInfo(node) - node.isMethodElement() -> JavaScriptMethodInfo(node) + node.isArrowElement() -> JavaScriptArrowInfo(node, filePath) + node.isFunctionElement() -> JavaScriptFunctionInfo(node, filePath) + node.isMethodElement() -> JavaScriptMethodInfo(node, filePath) else -> null } } diff --git a/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt b/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt index ee98b2be..01bde266 100644 --- a/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt @@ -4,7 +4,7 @@ import astminer.common.model.* import astminer.parse.antlr.* import astminer.parse.findEnclosingElementBy -class AntlrPythonFunctionInfo(override val root: AntlrNode) : FunctionInfo { +class AntlrPythonFunctionInfo(override val root: AntlrNode, override val filePath: String) : FunctionInfo { override val nameNode: AntlrNode? = collectNameNode() override val parameters: List = collectParameters() override val enclosingElement: EnclosingElement? = collectEnclosingElement() diff --git a/src/main/kotlin/astminer/parse/antlr/python/PythonFunctionSplitter.kt b/src/main/kotlin/astminer/parse/antlr/python/PythonFunctionSplitter.kt index 1c467e55..f3e987f4 100644 --- a/src/main/kotlin/astminer/parse/antlr/python/PythonFunctionSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/python/PythonFunctionSplitter.kt @@ -1,6 +1,5 @@ package astminer.parse.antlr.python -import astminer.common.* import astminer.common.model.* import astminer.parse.antlr.AntlrNode import astminer.parse.antlr.hasLastLabel @@ -9,10 +8,10 @@ import astminer.parse.antlr.hasLastLabel class PythonFunctionSplitter : TreeFunctionSplitter { private val methodNode = "funcdef" - override fun splitIntoFunctions(root: AntlrNode): Collection> { + override fun splitIntoFunctions(root: AntlrNode, filePath: String): Collection> { val methodRoots = root.preOrder().filter { (it).hasLastLabel(methodNode) } - return methodRoots.map { AntlrPythonFunctionInfo(it) } + return methodRoots.map { AntlrPythonFunctionInfo(it, filePath) } } } diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt index 9806112e..93e898cf 100644 --- a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt @@ -6,7 +6,7 @@ import astminer.common.model.FunctionInfo import astminer.common.model.FunctionInfoParameter import astminer.parse.findEnclosingElementBy -class FuzzyCppFunctionInfo(override val root: FuzzyNode): FunctionInfo { +class FuzzyCppFunctionInfo(override val root: FuzzyNode, override val filePath: String): FunctionInfo { companion object { private const val METHOD_NAME_NODE = "NAME" private const val METHOD_RETURN_NODE = "METHOD_RETURN" diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyFunctionSplitter.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyFunctionSplitter.kt index 7d84b0c6..39ca7ab7 100644 --- a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyFunctionSplitter.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyFunctionSplitter.kt @@ -1,13 +1,12 @@ package astminer.parse.fuzzy.cpp -import astminer.common.* import astminer.common.model.* class FuzzyFunctionSplitter : TreeFunctionSplitter { private val methodNode = "METHOD" - override fun splitIntoFunctions(root: FuzzyNode): Collection> { + override fun splitIntoFunctions(root: FuzzyNode, filePath: String): Collection> { val methodRoots = root.preOrder().filter { it.typeLabel == methodNode } - return methodRoots.map { FuzzyCppFunctionInfo(it) } + return methodRoots.map { FuzzyCppFunctionInfo(it, filePath) } } } \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt index c9cae9e2..6a0586a7 100644 --- a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt @@ -6,7 +6,8 @@ import astminer.common.model.FunctionInfo import astminer.common.model.FunctionInfoParameter import astminer.parse.gumtree.GumTreeNode -class GumTreeJavaFunctionInfo(override val root: GumTreeNode) : FunctionInfo { +class GumTreeJavaFunctionInfo(override val root: GumTreeNode, override val filePath: String) : + FunctionInfo { companion object { private object TypeLabels { const val simpleName = "SimpleName" diff --git a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitter.kt b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitter.kt index 381ffa64..fd9287e1 100644 --- a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitter.kt +++ b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitter.kt @@ -6,8 +6,8 @@ import astminer.parse.gumtree.GumTreeNode class GumTreeJavaFunctionSplitter : TreeFunctionSplitter { private val methodDeclaration = "MethodDeclaration" - override fun splitIntoFunctions(root: GumTreeNode): Collection> { + override fun splitIntoFunctions(root: GumTreeNode, filePath: String): Collection> { val methodRoots = root.preOrder().filter { it.typeLabel == methodDeclaration } - return methodRoots.map { GumTreeJavaFunctionInfo(it) } + return methodRoots.map { GumTreeJavaFunctionInfo(it, filePath) } } } \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt index 180ffab2..90e68a4c 100644 --- a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt @@ -7,7 +7,7 @@ import astminer.common.model.FunctionInfoParameter import astminer.parse.findEnclosingElementBy import astminer.parse.gumtree.GumTreeNode -class GumTreePythonFunctionInfo(override val root: GumTreeNode) : FunctionInfo { +class GumTreePythonFunctionInfo(override val root: GumTreeNode, override val filePath: String) : FunctionInfo { companion object { private object TypeLabels { const val classDefinition = "ClassDef" diff --git a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitter.kt b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitter.kt index 1f006bb7..04cd363a 100644 --- a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitter.kt +++ b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitter.kt @@ -12,8 +12,8 @@ class GumTreePythonFunctionSplitter : TreeFunctionSplitter { } } - override fun splitIntoFunctions(root: GumTreeNode): Collection> { + override fun splitIntoFunctions(root: GumTreeNode, filePath: String): Collection> { val functionRoots = root.preOrder().filter { TypeLabels.methodDefinitions.contains(it.typeLabel) } - return functionRoots.map { GumTreePythonFunctionInfo(it) } + return functionRoots.map { GumTreePythonFunctionInfo(it, filePath) } } } diff --git a/src/test/kotlin/astminer/parse/antlr/java/JavaFunctionSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/java/JavaFunctionSplitterTest.kt index 702e9576..d6ce9886 100644 --- a/src/test/kotlin/astminer/parse/antlr/java/JavaFunctionSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/java/JavaFunctionSplitterTest.kt @@ -10,6 +10,7 @@ import kotlin.test.assertNotNull class JavaFunctionSplitterTest { companion object { + const val FILE_PATH = "src/test/resources/methodSplitting/testMethodSplitting.java" const val N_FUNCTIONS = 9 val functionSplitter = JavaFunctionSplitter() val parser = JavaParser() @@ -19,9 +20,9 @@ class JavaFunctionSplitterTest { @BeforeTest fun parseTree() { - val testTree = parser.parseInputStream(File("src/test/resources/methodSplitting/testMethodSplitting.java").inputStream()) + val testTree = parser.parseInputStream(File(FILE_PATH).inputStream()) assertNotNull(testTree) - functionInfos = functionSplitter.splitIntoFunctions(testTree) + functionInfos = functionSplitter.splitIntoFunctions(testTree, FILE_PATH) } @Test diff --git a/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitterTest.kt index bf559f25..83b44db2 100644 --- a/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitterTest.kt @@ -24,7 +24,7 @@ class JavaScriptFunctionSplitterTest { fun parseTree() { val testTree = parser.parseInputStream(File(testFilePath).inputStream()) assertNotNull(testTree) - functionInfos = functionSplitter.splitIntoFunctions(testTree) + functionInfos = functionSplitter.splitIntoFunctions(testTree, testFilePath) } @Test diff --git a/src/test/kotlin/astminer/parse/antlr/python/PythonFunctionSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/python/PythonFunctionSplitterTest.kt index 6605d9fa..008af33f 100644 --- a/src/test/kotlin/astminer/parse/antlr/python/PythonFunctionSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/python/PythonFunctionSplitterTest.kt @@ -12,6 +12,7 @@ import kotlin.test.assertNull class PythonFunctionSplitterTest { companion object { + const val FILE_PATH = "src/test/resources/methodSplitting/testMethodSplitting.py" const val N_FUNCTIONS = 17 val functionSplitter = PythonFunctionSplitter() val parser = PythonParser() @@ -21,9 +22,9 @@ class PythonFunctionSplitterTest { @BeforeTest fun parseTree() { - val testTree = parser.parseInputStream(File("src/test/resources/methodSplitting/testMethodSplitting.py").inputStream()) + val testTree = parser.parseInputStream(File(FILE_PATH).inputStream()) assertNotNull(testTree) - functionInfos = functionSplitter.splitIntoFunctions(testTree) + functionInfos = functionSplitter.splitIntoFunctions(testTree, FILE_PATH) } @Test diff --git a/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt index 1f7ff46f..86d965a4 100644 --- a/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt @@ -16,6 +16,7 @@ import kotlin.test.assertNull class FuzzyMethodSplitterTest { companion object { + const val FILE_PATH = "src/test/resources/methodSplitting/testMethodSplitting.cpp" const val N_FUNCTIONS = 10 val methodSplitter = FuzzyFunctionSplitter() val parser = FuzzyCppParser() @@ -28,7 +29,7 @@ class FuzzyMethodSplitterTest { Assume.assumeTrue(checkExecutable("g++")) val testTree = parser.parseInputStream(File("src/test/resources/methodSplitting/testMethodSplitting.cpp").inputStream()) assertNotNull(testTree) - methodInfos = methodSplitter.splitIntoFunctions(testTree) + methodInfos = methodSplitter.splitIntoFunctions(testTree, FILE_PATH) } @Test diff --git a/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitterTest.kt b/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitterTest.kt index 9f77e250..48b2d893 100644 --- a/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitterTest.kt @@ -10,7 +10,7 @@ private fun createTree(filename: String): GumTreeNode = GumTreeJavaParser().parseInputStream(File(filename).inputStream()) private fun createAndSplitTree(filename: String): Collection> = - GumTreeJavaFunctionSplitter().splitIntoFunctions(createTree(filename)) + GumTreeJavaFunctionSplitter().splitIntoFunctions(createTree(filename), filename) class GumTreeJavaFunctionSplitterTest { @Test diff --git a/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitterTest.kt b/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitterTest.kt index dd3fcb7e..435ba5cb 100644 --- a/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitterTest.kt @@ -15,7 +15,7 @@ class GumTreePythonFunctionSplitterTest { GumTreePythonParser().parseInputStream(File(filename).inputStream()) private fun splitFunctions(filename: String): Collection> = - GumTreePythonFunctionSplitter().splitIntoFunctions(parse(filename)) + GumTreePythonFunctionSplitter().splitIntoFunctions(parse(filename), filename) private fun createPath(file: String) = "src/test/resources/gumTreeMethodSplitter/$file" From b9ba0aa6d5c54b447287258d695097b04482648f Mon Sep 17 00:00:00 2001 From: furetur Date: Fri, 4 Jun 2021 18:35:57 +0500 Subject: [PATCH 235/308] changed postorder to preorder --- .../astminer/storage/ast/JsonAstStorage.kt | 51 +++++++++++-------- .../storage/ast/TreeEnumeratorTest.kt | 34 ++++++------- 2 files changed, 46 insertions(+), 39 deletions(-) diff --git a/src/main/kotlin/astminer/storage/ast/JsonAstStorage.kt b/src/main/kotlin/astminer/storage/ast/JsonAstStorage.kt index 72905218..132af4cc 100644 --- a/src/main/kotlin/astminer/storage/ast/JsonAstStorage.kt +++ b/src/main/kotlin/astminer/storage/ast/JsonAstStorage.kt @@ -47,32 +47,39 @@ class JsonAstStorage(override val outputDirectoryPath: String) : Storage { override fun close() { writer.close() } -} - -/** - * Gives ids to all nodes in the tree - */ -class TreeEnumerator { - private val result = mutableListOf() /** - * Node that has been given an Id. - * Also all his children have been given ids. + * Gives ids to all nodes in the tree */ - data class EnumeratedNode(val id: Id, val node: Node, val children: List) + internal class TreeEnumerator { + /** + * Node that has been given an Id. + * Also all his children have been given ids. + */ + data class EnumeratedNode(val id: Id, val node: Node, val children: List, val treeSize: Int) - private fun enumerateSubtree(node: Node): EnumeratedNode { - val children = node.children.map { enumerateSubtree(it) } - return EnumeratedNode(result.size, node, children).also { result.add(it) } - } + private fun enumerateTree(node: Node, currentId: Id = 0): EnumeratedNode { + var nChildren = 0 + val children = node.children.map { child -> + val enumeratedChild = enumerateTree(child, currentId + nChildren + 1) + nChildren += enumeratedChild.treeSize + enumeratedChild + } + return EnumeratedNode(currentId, node, children, nChildren + 1) + } - /** - * Enumerates the given tree and returns the flattened tree. - * Enumerated node's id must be equal to its index in the returned list - */ - fun enumerate(node: Node): List { - result.clear() - enumerateSubtree(node) - return result + private fun flattenTree(enumeratedNode: EnumeratedNode): List { + val result = mutableListOf(enumeratedNode) + for (child in enumeratedNode.children) { + result.addAll(flattenTree(child)) + } + return result + } + + /** + * Enumerates the given tree and returns the flattened tree. + * Enumerated node's id must be equal to its index in the returned list + */ + fun enumerate(node: Node): List = flattenTree(enumerateTree(node)) } } diff --git a/src/test/kotlin/astminer/storage/ast/TreeEnumeratorTest.kt b/src/test/kotlin/astminer/storage/ast/TreeEnumeratorTest.kt index 0596b947..8b4194d3 100644 --- a/src/test/kotlin/astminer/storage/ast/TreeEnumeratorTest.kt +++ b/src/test/kotlin/astminer/storage/ast/TreeEnumeratorTest.kt @@ -8,12 +8,12 @@ import org.junit.Before import org.junit.Test import kotlin.test.assertEquals -class TreeEnumeratorTest { - lateinit var treeEnumerator: TreeEnumerator +internal class TreeEnumeratorTest { + private lateinit var treeEnumerator: JsonAstStorage.TreeEnumerator @Before fun init() { - treeEnumerator = TreeEnumerator() + treeEnumerator = JsonAstStorage.TreeEnumerator() } private data class EnumeratedResult(val id: Int, val typeLabel: String, val children: List = emptyList()) @@ -42,7 +42,7 @@ class TreeEnumeratorTest { fun `test on small bamboo`() { val bamboo = createBamboo(10) val expected = List(10) { i -> - EnumeratedResult(i, (10 - i).toString(), if (i == 0) emptyList() else listOf(i - 1)) + EnumeratedResult(i, (i + 1).toString(), if (i == 9) emptyList() else listOf(i + 1)) } assertEquals(expected, enumerate(bamboo)) } @@ -51,7 +51,7 @@ class TreeEnumeratorTest { fun `test on big bamboo`() { val bamboo = createBamboo(1000) val expected = List(1000) { i -> - EnumeratedResult(i, (1000 - i).toString(), if (i == 0) emptyList() else listOf(i - 1)) + EnumeratedResult(i, (i + 1).toString(), if (i == 999) emptyList() else listOf(i + 1)) } assertEquals(expected, enumerate(bamboo)) } @@ -60,10 +60,10 @@ class TreeEnumeratorTest { fun `test on very small dummy tree`() { val node = createSmallTree() val expected = listOf( - EnumeratedResult(0, "2"), - EnumeratedResult(1, "4"), - EnumeratedResult(2, "3", listOf(1)), - EnumeratedResult(3, "1", listOf(0, 2)) + EnumeratedResult(0, "1", listOf(1, 2)), + EnumeratedResult(1, "2"), + EnumeratedResult(2, "3", listOf(3)), + EnumeratedResult(3, "4") ) assertEquals(expected, enumerate(node)) } @@ -72,14 +72,14 @@ class TreeEnumeratorTest { fun `test on small dummy tree`() { val node = createDummyTree() val expected = listOf( - EnumeratedResult(0, "4"), - EnumeratedResult(1, "5"), - EnumeratedResult(2, "6"), - EnumeratedResult(3, "2", listOf(0, 1, 2)), - EnumeratedResult(4, "7"), - EnumeratedResult(5, "8"), - EnumeratedResult(6, "3", listOf(4, 5)), - EnumeratedResult(7, "1", listOf(3, 6)) + EnumeratedResult(0, "1", listOf(1, 5)), + EnumeratedResult(1, "2", listOf(2, 3, 4)), + EnumeratedResult(2, "4"), + EnumeratedResult(3, "5"), + EnumeratedResult(4, "6"), + EnumeratedResult(5, "3", listOf(6, 7)), + EnumeratedResult(6, "7"), + EnumeratedResult(7, "8") ) assertEquals(expected, enumerate(node)) } From 936909a277ef9cb331a15e26be7276103b04ee50 Mon Sep 17 00:00:00 2001 From: furetur Date: Fri, 4 Jun 2021 18:43:00 +0500 Subject: [PATCH 236/308] renamed splitIntoFunctions to collectFunctionInfo --- src/main/kotlin/astminer/common/model/FunctionInfoModel.kt | 2 +- src/main/kotlin/astminer/common/model/HandlerModel.kt | 2 +- src/main/kotlin/astminer/examples/AllJavaFiles.kt | 2 +- src/main/kotlin/astminer/examples/AllJavaMethods.kt | 2 +- src/main/kotlin/astminer/examples/AllPythonMethods.kt | 2 +- .../kotlin/astminer/parse/antlr/java/JavaFunctionSplitter.kt | 2 +- .../parse/antlr/javascript/JavaScriptFunctionSplitter.kt | 2 +- .../astminer/parse/antlr/python/PythonFunctionSplitter.kt | 2 +- .../kotlin/astminer/parse/fuzzy/cpp/FuzzyFunctionSplitter.kt | 2 +- .../astminer/parse/gumtree/java/GumTreeJavaFunctionSplitter.kt | 2 +- .../parse/gumtree/python/GumTreePythonFunctionSplitter.kt | 2 +- .../astminer/parse/antlr/java/JavaFunctionSplitterTest.kt | 2 +- .../parse/antlr/javascript/JavaScriptFunctionSplitterTest.kt | 2 +- .../astminer/parse/antlr/python/PythonFunctionSplitterTest.kt | 2 +- src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt | 2 +- .../parse/gumtree/java/GumTreeJavaFunctionSplitterTest.kt | 2 +- .../parse/gumtree/python/GumTreePythonFunctionSplitterTest.kt | 2 +- 17 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/main/kotlin/astminer/common/model/FunctionInfoModel.kt b/src/main/kotlin/astminer/common/model/FunctionInfoModel.kt index e417f116..b0bf80c4 100644 --- a/src/main/kotlin/astminer/common/model/FunctionInfoModel.kt +++ b/src/main/kotlin/astminer/common/model/FunctionInfoModel.kt @@ -1,7 +1,7 @@ package astminer.common.model interface TreeFunctionSplitter { - fun splitIntoFunctions(root: T, filePath: String): Collection> + fun collectFunctionInfo(root: T, filePath: String): Collection> } class FunctionInfoPropertyNotImplementedException(propertyName: String) : diff --git a/src/main/kotlin/astminer/common/model/HandlerModel.kt b/src/main/kotlin/astminer/common/model/HandlerModel.kt index 7f312df6..579b9d4e 100644 --- a/src/main/kotlin/astminer/common/model/HandlerModel.kt +++ b/src/main/kotlin/astminer/common/model/HandlerModel.kt @@ -25,6 +25,6 @@ abstract class LanguageHandler { protected abstract val splitter: TreeFunctionSplitter fun splitIntoFunctions(): Collection> { - return splitter.splitIntoFunctions(parseResult.root, parseResult.filePath) + return splitter.collectFunctionInfo(parseResult.root, parseResult.filePath) } } diff --git a/src/main/kotlin/astminer/examples/AllJavaFiles.kt b/src/main/kotlin/astminer/examples/AllJavaFiles.kt index fa9320fe..3429a603 100644 --- a/src/main/kotlin/astminer/examples/AllJavaFiles.kt +++ b/src/main/kotlin/astminer/examples/AllJavaFiles.kt @@ -17,7 +17,7 @@ fun allJavaFiles() { File(inputDir).forFilesWithSuffix("11.java") { file -> val node = JavaParser().parseInputStream(file.inputStream()) node.prettyPrint() - JavaFunctionSplitter().splitIntoFunctions(node, file.path).forEach { + JavaFunctionSplitter().collectFunctionInfo(node, file.path).forEach { println(it.name) println(it.returnType) println(it.enclosingElement?.name) diff --git a/src/main/kotlin/astminer/examples/AllJavaMethods.kt b/src/main/kotlin/astminer/examples/AllJavaMethods.kt index 73ba198c..e3d5e079 100644 --- a/src/main/kotlin/astminer/examples/AllJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/AllJavaMethods.kt @@ -31,7 +31,7 @@ fun allJavaMethods() { val fileNode = GumTreeJavaParser().parseInputStream(file.inputStream()) //extract method nodes - val methodNodes = GumTreeJavaFunctionSplitter().splitIntoFunctions(fileNode, file.path) + val methodNodes = GumTreeJavaFunctionSplitter().collectFunctionInfo(fileNode, file.path) methodNodes.forEach { methodInfo -> //Retrieve a method identifier diff --git a/src/main/kotlin/astminer/examples/AllPythonMethods.kt b/src/main/kotlin/astminer/examples/AllPythonMethods.kt index bc54721a..8f2f1f25 100644 --- a/src/main/kotlin/astminer/examples/AllPythonMethods.kt +++ b/src/main/kotlin/astminer/examples/AllPythonMethods.kt @@ -27,7 +27,7 @@ fun allPythonMethods() { val fileNode = GumTreePythonParser().parseInputStream(file.inputStream()) // extract method nodes - val methodNodes = GumTreePythonFunctionSplitter().splitIntoFunctions(fileNode, file.path) + val methodNodes = GumTreePythonFunctionSplitter().collectFunctionInfo(fileNode, file.path) methodNodes.forEach { methodInfo -> // Retrieve a method identifier diff --git a/src/main/kotlin/astminer/parse/antlr/java/JavaFunctionSplitter.kt b/src/main/kotlin/astminer/parse/antlr/java/JavaFunctionSplitter.kt index 1955f3b5..e70cdccd 100644 --- a/src/main/kotlin/astminer/parse/antlr/java/JavaFunctionSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/java/JavaFunctionSplitter.kt @@ -7,7 +7,7 @@ import astminer.parse.antlr.hasLastLabel class JavaFunctionSplitter : TreeFunctionSplitter { private val methodNodeType = "methodDeclaration" - override fun splitIntoFunctions(root: AntlrNode, filePath: String): Collection> { + override fun collectFunctionInfo(root: AntlrNode, filePath: String): Collection> { val methodRoots = root.preOrder().filter { (it).hasLastLabel(methodNodeType) } diff --git a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitter.kt b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitter.kt index 8d9929e8..5e9bbd2a 100644 --- a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitter.kt @@ -15,7 +15,7 @@ class JavaScriptFunctionSplitter : TreeFunctionSplitter { private const val FUNCTION_NODE = "Function" } - override fun splitIntoFunctions(root: AntlrNode, filePath: String): Collection> { + override fun collectFunctionInfo(root: AntlrNode, filePath: String): Collection> { return root.preOrder().mapNotNull { node -> when { node.isArrowElement() -> JavaScriptArrowInfo(node, filePath) diff --git a/src/main/kotlin/astminer/parse/antlr/python/PythonFunctionSplitter.kt b/src/main/kotlin/astminer/parse/antlr/python/PythonFunctionSplitter.kt index f3e987f4..99ceb40a 100644 --- a/src/main/kotlin/astminer/parse/antlr/python/PythonFunctionSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/python/PythonFunctionSplitter.kt @@ -8,7 +8,7 @@ import astminer.parse.antlr.hasLastLabel class PythonFunctionSplitter : TreeFunctionSplitter { private val methodNode = "funcdef" - override fun splitIntoFunctions(root: AntlrNode, filePath: String): Collection> { + override fun collectFunctionInfo(root: AntlrNode, filePath: String): Collection> { val methodRoots = root.preOrder().filter { (it).hasLastLabel(methodNode) } diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyFunctionSplitter.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyFunctionSplitter.kt index 39ca7ab7..faf343bb 100644 --- a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyFunctionSplitter.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyFunctionSplitter.kt @@ -5,7 +5,7 @@ import astminer.common.model.* class FuzzyFunctionSplitter : TreeFunctionSplitter { private val methodNode = "METHOD" - override fun splitIntoFunctions(root: FuzzyNode, filePath: String): Collection> { + override fun collectFunctionInfo(root: FuzzyNode, filePath: String): Collection> { val methodRoots = root.preOrder().filter { it.typeLabel == methodNode } return methodRoots.map { FuzzyCppFunctionInfo(it, filePath) } } diff --git a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitter.kt b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitter.kt index fd9287e1..bad0f3a9 100644 --- a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitter.kt +++ b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitter.kt @@ -6,7 +6,7 @@ import astminer.parse.gumtree.GumTreeNode class GumTreeJavaFunctionSplitter : TreeFunctionSplitter { private val methodDeclaration = "MethodDeclaration" - override fun splitIntoFunctions(root: GumTreeNode, filePath: String): Collection> { + override fun collectFunctionInfo(root: GumTreeNode, filePath: String): Collection> { val methodRoots = root.preOrder().filter { it.typeLabel == methodDeclaration } return methodRoots.map { GumTreeJavaFunctionInfo(it, filePath) } } diff --git a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitter.kt b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitter.kt index 04cd363a..a6694564 100644 --- a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitter.kt +++ b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitter.kt @@ -12,7 +12,7 @@ class GumTreePythonFunctionSplitter : TreeFunctionSplitter { } } - override fun splitIntoFunctions(root: GumTreeNode, filePath: String): Collection> { + override fun collectFunctionInfo(root: GumTreeNode, filePath: String): Collection> { val functionRoots = root.preOrder().filter { TypeLabels.methodDefinitions.contains(it.typeLabel) } return functionRoots.map { GumTreePythonFunctionInfo(it, filePath) } } diff --git a/src/test/kotlin/astminer/parse/antlr/java/JavaFunctionSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/java/JavaFunctionSplitterTest.kt index d6ce9886..23d4f791 100644 --- a/src/test/kotlin/astminer/parse/antlr/java/JavaFunctionSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/java/JavaFunctionSplitterTest.kt @@ -22,7 +22,7 @@ class JavaFunctionSplitterTest { fun parseTree() { val testTree = parser.parseInputStream(File(FILE_PATH).inputStream()) assertNotNull(testTree) - functionInfos = functionSplitter.splitIntoFunctions(testTree, FILE_PATH) + functionInfos = functionSplitter.collectFunctionInfo(testTree, FILE_PATH) } @Test diff --git a/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitterTest.kt index 83b44db2..577afb50 100644 --- a/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitterTest.kt @@ -24,7 +24,7 @@ class JavaScriptFunctionSplitterTest { fun parseTree() { val testTree = parser.parseInputStream(File(testFilePath).inputStream()) assertNotNull(testTree) - functionInfos = functionSplitter.splitIntoFunctions(testTree, testFilePath) + functionInfos = functionSplitter.collectFunctionInfo(testTree, testFilePath) } @Test diff --git a/src/test/kotlin/astminer/parse/antlr/python/PythonFunctionSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/python/PythonFunctionSplitterTest.kt index 008af33f..dc648448 100644 --- a/src/test/kotlin/astminer/parse/antlr/python/PythonFunctionSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/python/PythonFunctionSplitterTest.kt @@ -24,7 +24,7 @@ class PythonFunctionSplitterTest { fun parseTree() { val testTree = parser.parseInputStream(File(FILE_PATH).inputStream()) assertNotNull(testTree) - functionInfos = functionSplitter.splitIntoFunctions(testTree, FILE_PATH) + functionInfos = functionSplitter.collectFunctionInfo(testTree, FILE_PATH) } @Test diff --git a/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt index 86d965a4..d2ba146a 100644 --- a/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt @@ -29,7 +29,7 @@ class FuzzyMethodSplitterTest { Assume.assumeTrue(checkExecutable("g++")) val testTree = parser.parseInputStream(File("src/test/resources/methodSplitting/testMethodSplitting.cpp").inputStream()) assertNotNull(testTree) - methodInfos = methodSplitter.splitIntoFunctions(testTree, FILE_PATH) + methodInfos = methodSplitter.collectFunctionInfo(testTree, FILE_PATH) } @Test diff --git a/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitterTest.kt b/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitterTest.kt index 48b2d893..3d3d21cc 100644 --- a/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitterTest.kt @@ -10,7 +10,7 @@ private fun createTree(filename: String): GumTreeNode = GumTreeJavaParser().parseInputStream(File(filename).inputStream()) private fun createAndSplitTree(filename: String): Collection> = - GumTreeJavaFunctionSplitter().splitIntoFunctions(createTree(filename), filename) + GumTreeJavaFunctionSplitter().collectFunctionInfo(createTree(filename), filename) class GumTreeJavaFunctionSplitterTest { @Test diff --git a/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitterTest.kt b/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitterTest.kt index 435ba5cb..65ab16b4 100644 --- a/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitterTest.kt @@ -15,7 +15,7 @@ class GumTreePythonFunctionSplitterTest { GumTreePythonParser().parseInputStream(File(filename).inputStream()) private fun splitFunctions(filename: String): Collection> = - GumTreePythonFunctionSplitter().splitIntoFunctions(parse(filename), filename) + GumTreePythonFunctionSplitter().collectFunctionInfo(parse(filename), filename) private fun createPath(file: String) = "src/test/resources/gumTreeMethodSplitter/$file" From 5b074201a8af644650785b914a743b734fc304a4 Mon Sep 17 00:00:00 2001 From: furetur Date: Sat, 5 Jun 2021 11:48:34 +0500 Subject: [PATCH 237/308] reimplemented TreeEnumerator --- .../astminer/storage/ast/JsonAstStorage.kt | 63 ++++++++++--------- ...EnumeratorTest.kt => TreeFlattenerTest.kt} | 10 +-- 2 files changed, 38 insertions(+), 35 deletions(-) rename src/test/kotlin/astminer/storage/ast/{TreeEnumeratorTest.kt => TreeFlattenerTest.kt} (89%) diff --git a/src/main/kotlin/astminer/storage/ast/JsonAstStorage.kt b/src/main/kotlin/astminer/storage/ast/JsonAstStorage.kt index 132af4cc..e0007951 100644 --- a/src/main/kotlin/astminer/storage/ast/JsonAstStorage.kt +++ b/src/main/kotlin/astminer/storage/ast/JsonAstStorage.kt @@ -17,7 +17,7 @@ private typealias Id = Int * Each tree is flattened and represented as a list of nodes. */ class JsonAstStorage(override val outputDirectoryPath: String) : Storage { - private val treeEnumerator = TreeEnumerator() + private val treeFlattener = TreeFlattener() private val writer: PrintWriter @@ -35,11 +35,11 @@ class JsonAstStorage(override val outputDirectoryPath: String) : Storage { @Serializable private data class OutputNode(val token: String, val typeLabel: String, val children: List) - private fun TreeEnumerator.EnumeratedNode.toOutputNode() = + private fun TreeFlattener.EnumeratedNode.toOutputNode() = OutputNode(node.token, node.typeLabel, children.map { it.id }) override fun store(labeledResult: LabeledResult) { - val outputNodes = treeEnumerator.enumerate(labeledResult.root).map { it.toOutputNode() } + val outputNodes = treeFlattener.flatten(labeledResult.root).map { it.toOutputNode() } val labeledAst = LabeledAst(labeledResult.label, outputNodes) writer.println(Json.encodeToString(labeledAst)) } @@ -47,39 +47,42 @@ class JsonAstStorage(override val outputDirectoryPath: String) : Storage { override fun close() { writer.close() } +} + +/** + * Gives ids to all nodes in the tree and flattens the tree + */ +class TreeFlattener { + private var currentId: Id = 0 /** - * Gives ids to all nodes in the tree + * Node that has been given an Id. + * Also all his children have been given ids. */ - internal class TreeEnumerator { - /** - * Node that has been given an Id. - * Also all his children have been given ids. - */ - data class EnumeratedNode(val id: Id, val node: Node, val children: List, val treeSize: Int) + data class EnumeratedNode(val id: Id, val node: Node, val children: List) - private fun enumerateTree(node: Node, currentId: Id = 0): EnumeratedNode { - var nChildren = 0 - val children = node.children.map { child -> - val enumeratedChild = enumerateTree(child, currentId + nChildren + 1) - nChildren += enumeratedChild.treeSize - enumeratedChild - } - return EnumeratedNode(currentId, node, children, nChildren + 1) - } + private fun enumerateTree(node: Node): EnumeratedNode { + val nodeId = currentId + currentId += 1 + return EnumeratedNode(nodeId, node, node.children.map { enumerateTree(it) }) + } - private fun flattenTree(enumeratedNode: EnumeratedNode): List { - val result = mutableListOf(enumeratedNode) - for (child in enumeratedNode.children) { - result.addAll(flattenTree(child)) - } - return result + private fun putFlattenedTree(enumeratedNode: EnumeratedNode, flattenedTree: MutableList) { + flattenedTree.add(enumeratedNode) + for (child in enumeratedNode.children) { + putFlattenedTree(child, flattenedTree) } + } - /** - * Enumerates the given tree and returns the flattened tree. - * Enumerated node's id must be equal to its index in the returned list - */ - fun enumerate(node: Node): List = flattenTree(enumerateTree(node)) + /** + * Enumerates the given tree and returns the flattened tree. + * Enumerated node's id must be equal to its index in the returned list + */ + fun flatten(node: Node): List { + currentId = 0 + val enumeratedTree = enumerateTree(node) + val result = mutableListOf() + putFlattenedTree(enumeratedTree, result) + return result } } diff --git a/src/test/kotlin/astminer/storage/ast/TreeEnumeratorTest.kt b/src/test/kotlin/astminer/storage/ast/TreeFlattenerTest.kt similarity index 89% rename from src/test/kotlin/astminer/storage/ast/TreeEnumeratorTest.kt rename to src/test/kotlin/astminer/storage/ast/TreeFlattenerTest.kt index 8b4194d3..f1cae49c 100644 --- a/src/test/kotlin/astminer/storage/ast/TreeEnumeratorTest.kt +++ b/src/test/kotlin/astminer/storage/ast/TreeFlattenerTest.kt @@ -8,18 +8,18 @@ import org.junit.Before import org.junit.Test import kotlin.test.assertEquals -internal class TreeEnumeratorTest { - private lateinit var treeEnumerator: JsonAstStorage.TreeEnumerator +internal class TreeFlattenerTest { + private lateinit var treeFlattener: TreeFlattener @Before fun init() { - treeEnumerator = JsonAstStorage.TreeEnumerator() + treeFlattener = TreeFlattener() } private data class EnumeratedResult(val id: Int, val typeLabel: String, val children: List = emptyList()) private fun enumerate(node: DummyNode): List { - val enumeratedNodes = treeEnumerator.enumerate(node) + val enumeratedNodes = treeFlattener.flatten(node) return enumeratedNodes.map { EnumeratedResult(it.id, it.node.typeLabel, it.children.map { child -> child.id }) } } @@ -32,7 +32,7 @@ internal class TreeEnumeratorTest { @Test fun `test should be reusable`() { - treeEnumerator.enumerate(DummyNode("something previous")) + treeFlattener.flatten(DummyNode("something previous")) val node = DummyNode("a") val expected = listOf(EnumeratedResult(0, "a")) assertEquals(expected, enumerate(node)) From 22a5a3a0b319ef9e80356e8f5d69c0a6b1b297b9 Mon Sep 17 00:00:00 2001 From: furetur Date: Wed, 9 Jun 2021 15:11:35 +0500 Subject: [PATCH 238/308] Revert "renamed splitIntoFunctions to collectFunctionInfo" This reverts commit 936909a2 --- src/main/kotlin/astminer/common/model/FunctionInfoModel.kt | 2 +- src/main/kotlin/astminer/common/model/HandlerModel.kt | 2 +- src/main/kotlin/astminer/examples/AllJavaFiles.kt | 2 +- src/main/kotlin/astminer/examples/AllJavaMethods.kt | 2 +- src/main/kotlin/astminer/examples/AllPythonMethods.kt | 2 +- .../kotlin/astminer/parse/antlr/java/JavaFunctionSplitter.kt | 2 +- .../parse/antlr/javascript/JavaScriptFunctionSplitter.kt | 2 +- .../astminer/parse/antlr/python/PythonFunctionSplitter.kt | 2 +- .../kotlin/astminer/parse/fuzzy/cpp/FuzzyFunctionSplitter.kt | 2 +- .../astminer/parse/gumtree/java/GumTreeJavaFunctionSplitter.kt | 2 +- .../parse/gumtree/python/GumTreePythonFunctionSplitter.kt | 2 +- .../astminer/parse/antlr/java/JavaFunctionSplitterTest.kt | 2 +- .../parse/antlr/javascript/JavaScriptFunctionSplitterTest.kt | 2 +- .../astminer/parse/antlr/python/PythonFunctionSplitterTest.kt | 2 +- src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt | 2 +- .../parse/gumtree/java/GumTreeJavaFunctionSplitterTest.kt | 2 +- .../parse/gumtree/python/GumTreePythonFunctionSplitterTest.kt | 2 +- 17 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/main/kotlin/astminer/common/model/FunctionInfoModel.kt b/src/main/kotlin/astminer/common/model/FunctionInfoModel.kt index b0bf80c4..e417f116 100644 --- a/src/main/kotlin/astminer/common/model/FunctionInfoModel.kt +++ b/src/main/kotlin/astminer/common/model/FunctionInfoModel.kt @@ -1,7 +1,7 @@ package astminer.common.model interface TreeFunctionSplitter { - fun collectFunctionInfo(root: T, filePath: String): Collection> + fun splitIntoFunctions(root: T, filePath: String): Collection> } class FunctionInfoPropertyNotImplementedException(propertyName: String) : diff --git a/src/main/kotlin/astminer/common/model/HandlerModel.kt b/src/main/kotlin/astminer/common/model/HandlerModel.kt index 579b9d4e..7f312df6 100644 --- a/src/main/kotlin/astminer/common/model/HandlerModel.kt +++ b/src/main/kotlin/astminer/common/model/HandlerModel.kt @@ -25,6 +25,6 @@ abstract class LanguageHandler { protected abstract val splitter: TreeFunctionSplitter fun splitIntoFunctions(): Collection> { - return splitter.collectFunctionInfo(parseResult.root, parseResult.filePath) + return splitter.splitIntoFunctions(parseResult.root, parseResult.filePath) } } diff --git a/src/main/kotlin/astminer/examples/AllJavaFiles.kt b/src/main/kotlin/astminer/examples/AllJavaFiles.kt index 3429a603..fa9320fe 100644 --- a/src/main/kotlin/astminer/examples/AllJavaFiles.kt +++ b/src/main/kotlin/astminer/examples/AllJavaFiles.kt @@ -17,7 +17,7 @@ fun allJavaFiles() { File(inputDir).forFilesWithSuffix("11.java") { file -> val node = JavaParser().parseInputStream(file.inputStream()) node.prettyPrint() - JavaFunctionSplitter().collectFunctionInfo(node, file.path).forEach { + JavaFunctionSplitter().splitIntoFunctions(node, file.path).forEach { println(it.name) println(it.returnType) println(it.enclosingElement?.name) diff --git a/src/main/kotlin/astminer/examples/AllJavaMethods.kt b/src/main/kotlin/astminer/examples/AllJavaMethods.kt index e3d5e079..73ba198c 100644 --- a/src/main/kotlin/astminer/examples/AllJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/AllJavaMethods.kt @@ -31,7 +31,7 @@ fun allJavaMethods() { val fileNode = GumTreeJavaParser().parseInputStream(file.inputStream()) //extract method nodes - val methodNodes = GumTreeJavaFunctionSplitter().collectFunctionInfo(fileNode, file.path) + val methodNodes = GumTreeJavaFunctionSplitter().splitIntoFunctions(fileNode, file.path) methodNodes.forEach { methodInfo -> //Retrieve a method identifier diff --git a/src/main/kotlin/astminer/examples/AllPythonMethods.kt b/src/main/kotlin/astminer/examples/AllPythonMethods.kt index 8f2f1f25..bc54721a 100644 --- a/src/main/kotlin/astminer/examples/AllPythonMethods.kt +++ b/src/main/kotlin/astminer/examples/AllPythonMethods.kt @@ -27,7 +27,7 @@ fun allPythonMethods() { val fileNode = GumTreePythonParser().parseInputStream(file.inputStream()) // extract method nodes - val methodNodes = GumTreePythonFunctionSplitter().collectFunctionInfo(fileNode, file.path) + val methodNodes = GumTreePythonFunctionSplitter().splitIntoFunctions(fileNode, file.path) methodNodes.forEach { methodInfo -> // Retrieve a method identifier diff --git a/src/main/kotlin/astminer/parse/antlr/java/JavaFunctionSplitter.kt b/src/main/kotlin/astminer/parse/antlr/java/JavaFunctionSplitter.kt index e70cdccd..1955f3b5 100644 --- a/src/main/kotlin/astminer/parse/antlr/java/JavaFunctionSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/java/JavaFunctionSplitter.kt @@ -7,7 +7,7 @@ import astminer.parse.antlr.hasLastLabel class JavaFunctionSplitter : TreeFunctionSplitter { private val methodNodeType = "methodDeclaration" - override fun collectFunctionInfo(root: AntlrNode, filePath: String): Collection> { + override fun splitIntoFunctions(root: AntlrNode, filePath: String): Collection> { val methodRoots = root.preOrder().filter { (it).hasLastLabel(methodNodeType) } diff --git a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitter.kt b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitter.kt index 5e9bbd2a..8d9929e8 100644 --- a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitter.kt @@ -15,7 +15,7 @@ class JavaScriptFunctionSplitter : TreeFunctionSplitter { private const val FUNCTION_NODE = "Function" } - override fun collectFunctionInfo(root: AntlrNode, filePath: String): Collection> { + override fun splitIntoFunctions(root: AntlrNode, filePath: String): Collection> { return root.preOrder().mapNotNull { node -> when { node.isArrowElement() -> JavaScriptArrowInfo(node, filePath) diff --git a/src/main/kotlin/astminer/parse/antlr/python/PythonFunctionSplitter.kt b/src/main/kotlin/astminer/parse/antlr/python/PythonFunctionSplitter.kt index 99ceb40a..f3e987f4 100644 --- a/src/main/kotlin/astminer/parse/antlr/python/PythonFunctionSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/python/PythonFunctionSplitter.kt @@ -8,7 +8,7 @@ import astminer.parse.antlr.hasLastLabel class PythonFunctionSplitter : TreeFunctionSplitter { private val methodNode = "funcdef" - override fun collectFunctionInfo(root: AntlrNode, filePath: String): Collection> { + override fun splitIntoFunctions(root: AntlrNode, filePath: String): Collection> { val methodRoots = root.preOrder().filter { (it).hasLastLabel(methodNode) } diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyFunctionSplitter.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyFunctionSplitter.kt index faf343bb..39ca7ab7 100644 --- a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyFunctionSplitter.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyFunctionSplitter.kt @@ -5,7 +5,7 @@ import astminer.common.model.* class FuzzyFunctionSplitter : TreeFunctionSplitter { private val methodNode = "METHOD" - override fun collectFunctionInfo(root: FuzzyNode, filePath: String): Collection> { + override fun splitIntoFunctions(root: FuzzyNode, filePath: String): Collection> { val methodRoots = root.preOrder().filter { it.typeLabel == methodNode } return methodRoots.map { FuzzyCppFunctionInfo(it, filePath) } } diff --git a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitter.kt b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitter.kt index bad0f3a9..fd9287e1 100644 --- a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitter.kt +++ b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitter.kt @@ -6,7 +6,7 @@ import astminer.parse.gumtree.GumTreeNode class GumTreeJavaFunctionSplitter : TreeFunctionSplitter { private val methodDeclaration = "MethodDeclaration" - override fun collectFunctionInfo(root: GumTreeNode, filePath: String): Collection> { + override fun splitIntoFunctions(root: GumTreeNode, filePath: String): Collection> { val methodRoots = root.preOrder().filter { it.typeLabel == methodDeclaration } return methodRoots.map { GumTreeJavaFunctionInfo(it, filePath) } } diff --git a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitter.kt b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitter.kt index a6694564..04cd363a 100644 --- a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitter.kt +++ b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitter.kt @@ -12,7 +12,7 @@ class GumTreePythonFunctionSplitter : TreeFunctionSplitter { } } - override fun collectFunctionInfo(root: GumTreeNode, filePath: String): Collection> { + override fun splitIntoFunctions(root: GumTreeNode, filePath: String): Collection> { val functionRoots = root.preOrder().filter { TypeLabels.methodDefinitions.contains(it.typeLabel) } return functionRoots.map { GumTreePythonFunctionInfo(it, filePath) } } diff --git a/src/test/kotlin/astminer/parse/antlr/java/JavaFunctionSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/java/JavaFunctionSplitterTest.kt index 23d4f791..d6ce9886 100644 --- a/src/test/kotlin/astminer/parse/antlr/java/JavaFunctionSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/java/JavaFunctionSplitterTest.kt @@ -22,7 +22,7 @@ class JavaFunctionSplitterTest { fun parseTree() { val testTree = parser.parseInputStream(File(FILE_PATH).inputStream()) assertNotNull(testTree) - functionInfos = functionSplitter.collectFunctionInfo(testTree, FILE_PATH) + functionInfos = functionSplitter.splitIntoFunctions(testTree, FILE_PATH) } @Test diff --git a/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitterTest.kt index 577afb50..83b44db2 100644 --- a/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitterTest.kt @@ -24,7 +24,7 @@ class JavaScriptFunctionSplitterTest { fun parseTree() { val testTree = parser.parseInputStream(File(testFilePath).inputStream()) assertNotNull(testTree) - functionInfos = functionSplitter.collectFunctionInfo(testTree, testFilePath) + functionInfos = functionSplitter.splitIntoFunctions(testTree, testFilePath) } @Test diff --git a/src/test/kotlin/astminer/parse/antlr/python/PythonFunctionSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/python/PythonFunctionSplitterTest.kt index dc648448..008af33f 100644 --- a/src/test/kotlin/astminer/parse/antlr/python/PythonFunctionSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/python/PythonFunctionSplitterTest.kt @@ -24,7 +24,7 @@ class PythonFunctionSplitterTest { fun parseTree() { val testTree = parser.parseInputStream(File(FILE_PATH).inputStream()) assertNotNull(testTree) - functionInfos = functionSplitter.collectFunctionInfo(testTree, FILE_PATH) + functionInfos = functionSplitter.splitIntoFunctions(testTree, FILE_PATH) } @Test diff --git a/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt index d2ba146a..86d965a4 100644 --- a/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt @@ -29,7 +29,7 @@ class FuzzyMethodSplitterTest { Assume.assumeTrue(checkExecutable("g++")) val testTree = parser.parseInputStream(File("src/test/resources/methodSplitting/testMethodSplitting.cpp").inputStream()) assertNotNull(testTree) - methodInfos = methodSplitter.collectFunctionInfo(testTree, FILE_PATH) + methodInfos = methodSplitter.splitIntoFunctions(testTree, FILE_PATH) } @Test diff --git a/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitterTest.kt b/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitterTest.kt index 3d3d21cc..48b2d893 100644 --- a/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitterTest.kt @@ -10,7 +10,7 @@ private fun createTree(filename: String): GumTreeNode = GumTreeJavaParser().parseInputStream(File(filename).inputStream()) private fun createAndSplitTree(filename: String): Collection> = - GumTreeJavaFunctionSplitter().collectFunctionInfo(createTree(filename), filename) + GumTreeJavaFunctionSplitter().splitIntoFunctions(createTree(filename), filename) class GumTreeJavaFunctionSplitterTest { @Test diff --git a/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitterTest.kt b/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitterTest.kt index 65ab16b4..435ba5cb 100644 --- a/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitterTest.kt @@ -15,7 +15,7 @@ class GumTreePythonFunctionSplitterTest { GumTreePythonParser().parseInputStream(File(filename).inputStream()) private fun splitFunctions(filename: String): Collection> = - GumTreePythonFunctionSplitter().collectFunctionInfo(parse(filename), filename) + GumTreePythonFunctionSplitter().splitIntoFunctions(parse(filename), filename) private fun createPath(file: String) = "src/test/resources/gumTreeMethodSplitter/$file" From 00deebceac77173c335ca5c30c51e7dc2165a445 Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Tue, 13 Jul 2021 20:29:11 +0300 Subject: [PATCH 239/308] Update to Java 11 --- Dockerfile | 8 ++++---- build.gradle.kts | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Dockerfile b/Dockerfile index 09a5733f..822f8ad5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,8 +2,8 @@ FROM ubuntu:20.04 LABEL desc="Docker container to run ASTMiner with all preinstalled requirements" -# Instal OpenJDK8 -RUN apt-get update && apt-get install -y openjdk-8-jdk +# Instal OpenJDK11 +RUN apt-get update && apt-get install -y openjdk-11-jdk # Install G++ (required for Fuzzy parser) RUN apt-get update && apt-get install -y g++ @@ -11,7 +11,7 @@ RUN apt-get update && apt-get install -y g++ # Install PythonParser for GumTree ARG PYTHONPARSER_REPO=https://raw.githubusercontent.com/JetBrains-Research/pythonparser/master RUN apt-get update && \ - apt-get install -y --no-install-recommends -y python3.8 python3-pip git wget && \ + apt-get install -y python3.8 python3-pip git wget && \ mkdir pythonparser && \ cd pythonparser && \ wget $PYTHONPARSER_REPO/requirements.txt && \ @@ -27,4 +27,4 @@ COPY . . # Prepare shadow jar RUN ./gradlew shadowJar -CMD ["java", "-jar", "build/shadow/astminer.jar"] +ENTRYPOINT ["java", "-jar", "build/shadow/astminer.jar"] diff --git a/build.gradle.kts b/build.gradle.kts index 4fbd3043..00b3eec0 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -79,12 +79,12 @@ tasks.clean { tasks.compileKotlin { dependsOn(tasks.generateGrammarSource) - kotlinOptions.jvmTarget = "1.8" + kotlinOptions.jvmTarget = "11" } tasks.compileJava { dependsOn(tasks.generateGrammarSource) - targetCompatibility = "1.8" - sourceCompatibility = "1.8" + targetCompatibility = "11" + sourceCompatibility = "11" } tasks.dokkaHtml.configure { From b636029746cc8358c86b98d90b69fb4ceb05a0fa Mon Sep 17 00:00:00 2001 From: illided Date: Wed, 14 Jul 2021 13:15:05 +0300 Subject: [PATCH 240/308] added possible fix and test --- .../parse/antlr/java/AntlrJavaFunctionInfo.kt | 2 +- .../parse/antlr/java/JavaFunctionSplitterTest.kt | 12 +++++++++++- .../methodSplitting/testMethodSplitting.java | 2 ++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt b/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt index 7003724f..db717405 100644 --- a/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt @@ -59,7 +59,7 @@ class AntlrJavaFunctionInfo(override val root: AntlrNode) : FunctionInfo Date: Wed, 14 Jul 2021 14:11:29 +0300 Subject: [PATCH 241/308] small merging fix --- src/main/kotlin/astminer/Main.kt | 6 +++--- src/main/kotlin/astminer/config/ParserConfig.kt | 3 ++- .../kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt | 2 +- .../kotlin/astminer/parse/antlr/php/PHPFunctionSplitter.kt | 4 ++-- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/main/kotlin/astminer/Main.kt b/src/main/kotlin/astminer/Main.kt index f31b1cdb..2457e6f0 100644 --- a/src/main/kotlin/astminer/Main.kt +++ b/src/main/kotlin/astminer/Main.kt @@ -20,9 +20,9 @@ private val logger = KotlinLogging.logger("Main") class PipelineRunner : CliktCommand(name = "") { val config: File by argument("config", help = "Path to config").file( - exists = true, - folderOkay = false, - readable = true + mustExist = true, + canBeDir = false, + mustBeReadable = true ) override fun run() { diff --git a/src/main/kotlin/astminer/config/ParserConfig.kt b/src/main/kotlin/astminer/config/ParserConfig.kt index 2a716bb3..c85fd1b0 100644 --- a/src/main/kotlin/astminer/config/ParserConfig.kt +++ b/src/main/kotlin/astminer/config/ParserConfig.kt @@ -29,5 +29,6 @@ enum class FileExtension(val fileExtension: String) { @SerialName("java") Java("java"), @SerialName("js") JavaScript("js"), @SerialName("c") C("c"), - @SerialName("cpp") Cpp("cpp") + @SerialName("cpp") Cpp("cpp"), + @SerialName("php") PHP("php") } diff --git a/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt b/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt index 94f304e9..9a553897 100644 --- a/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt @@ -7,7 +7,7 @@ import astminer.common.model.FunctionInfoParameter import astminer.parse.antlr.* import astminer.parse.findEnclosingElementBy -class ANTLRPHPFunctionInfo(override val root: AntlrNode) : FunctionInfo { +class ANTLRPHPFunctionInfo(override val root: AntlrNode, override val filePath: String) : FunctionInfo { override val returnType = getElementType(root) override val nameNode: AntlrNode? = root.getChildOfType(FUNCTION_NAME) diff --git a/src/main/kotlin/astminer/parse/antlr/php/PHPFunctionSplitter.kt b/src/main/kotlin/astminer/parse/antlr/php/PHPFunctionSplitter.kt index 43bfa666..fbcc46db 100644 --- a/src/main/kotlin/astminer/parse/antlr/php/PHPFunctionSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/php/PHPFunctionSplitter.kt @@ -10,8 +10,8 @@ class PHPFunctionSplitter : TreeFunctionSplitter { const val FUNCTION_TOKEN = "Function_" } - override fun splitIntoFunctions(root: AntlrNode): Collection> { + override fun splitIntoFunctions(root: AntlrNode, filePath: String): Collection> { return root.preOrder().filter { it.typeLabel == LAMBDA_TOKEN || it.typeLabel == FUNCTION_TOKEN } - .mapNotNull { node -> node.parent?.let {statement -> ANTLRPHPFunctionInfo(statement) } } + .mapNotNull { node -> node.parent?.let {statement -> ANTLRPHPFunctionInfo(statement, filePath) } } } } \ No newline at end of file From 3ebb9fa1ace677bc0b649d533d7eae87562b7b3d Mon Sep 17 00:00:00 2001 From: illided Date: Wed, 14 Jul 2021 14:21:01 +0300 Subject: [PATCH 242/308] test fix --- .../astminer/parse/antlr/php/ANTLRPHPFunctionSplitterTest.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionSplitterTest.kt index f17a81fa..40ba4c7e 100644 --- a/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionSplitterTest.kt @@ -23,7 +23,7 @@ internal class ANTLRPHPFunctionSplitterTest { fun parseTree() { val testTree = parser.parseInputStream(File(testFilePath).inputStream()) assertNotNull(testTree) - functionInfos = functionSplitter.splitIntoFunctions(testTree) + functionInfos = functionSplitter.splitIntoFunctions(testTree, testFilePath) } @Test From e0f7a4c03ec1cec11a854ae3f0f1a532e226243b Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Wed, 21 Jul 2021 17:42:03 +0300 Subject: [PATCH 243/308] Update kotlin --- build.gradle.kts | 16 +++++++--------- src/main/kotlin/astminer/Main.kt | 5 ++--- src/main/kotlin/astminer/common/TreeUtil.kt | 2 +- 3 files changed, 10 insertions(+), 13 deletions(-) diff --git a/build.gradle.kts b/build.gradle.kts index e3924d25..ef2177ac 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -5,15 +5,15 @@ version = "0.6.4" plugins { id("java") - kotlin("jvm") version "1.4.32" apply true id("antlr") id("idea") id("application") + id("maven-publish") id("org.jetbrains.dokka") version "0.9.18" id("me.champeau.gradle.jmh") version "0.5.0" - id("maven-publish") - id("tanvd.kosogor") version "1.0.10" apply true - kotlin("plugin.serialization") version "1.4.32" + id("tanvd.kosogor") version "1.0.10" + kotlin("jvm") version "1.5.21" apply true + kotlin("plugin.serialization") version "1.5.21" } defaultTasks("run") @@ -36,16 +36,14 @@ dependencies { // ===== Main ===== implementation(kotlin("stdlib")) - implementation("com.github.ajalt", "clikt", "2.1.0") + implementation("com.github.ajalt.clikt:clikt:3.2.0") implementation("com.charleskorn.kaml:kaml:0.33.0") - implementation("org.jetbrains.kotlinx:kotlinx-serialization-json:1.2.0") - + implementation("org.jetbrains.kotlinx:kotlinx-serialization-json:1.2.2") // ===== Logging ===== implementation("org.slf4j", "slf4j-simple", "1.7.30") implementation("io.github.microutils:kotlin-logging:1.5.9") - // ===== Test ===== // https://mvnrepository.com/artifact/org.slf4j/slf4j-simple testImplementation("junit:junit:4.11") @@ -63,7 +61,7 @@ idea.module.generatedSourceDirs.add(file(generatedSourcesPath)) tasks.generateGrammarSource { // maxHeapSize = "64m" - arguments = arguments + listOf("-package", "me.vovak.antlr.parser") + arguments.addAll(listOf("-package", "me.vovak.antlr.parser")) // Keep a copy of generated sources doLast { println("Copying generated grammar lexer/parser files to main directory.") diff --git a/src/main/kotlin/astminer/Main.kt b/src/main/kotlin/astminer/Main.kt index f31b1cdb..2c5c88d2 100644 --- a/src/main/kotlin/astminer/Main.kt +++ b/src/main/kotlin/astminer/Main.kt @@ -20,9 +20,8 @@ private val logger = KotlinLogging.logger("Main") class PipelineRunner : CliktCommand(name = "") { val config: File by argument("config", help = "Path to config").file( - exists = true, - folderOkay = false, - readable = true + mustExist = true, + canBeDir = false ) override fun run() { diff --git a/src/main/kotlin/astminer/common/TreeUtil.kt b/src/main/kotlin/astminer/common/TreeUtil.kt index 0999f7b1..a84038c7 100644 --- a/src/main/kotlin/astminer/common/TreeUtil.kt +++ b/src/main/kotlin/astminer/common/TreeUtil.kt @@ -7,7 +7,7 @@ const val DEFAULT_TOKEN = "EMPTY" * https://github.com/tech-srl/code2vec/blob/master/JavaExtractor/JPredict/src/main/java/JavaExtractor/Common/Common.java */ fun normalizeToken(token: String, defaultToken: String): String { - val cleanToken = token.toLowerCase() + val cleanToken = token.lowercase() .replace("\\\\n".toRegex(), "") // escaped new line .replace("//s+".toRegex(), "") // whitespaces .replace("[\"',]".toRegex(), "") // quotes, apostrophies, commas From 35e46183aeb659b87d6ae31dc13fa86911af28c6 Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Wed, 21 Jul 2021 17:54:22 +0300 Subject: [PATCH 244/308] Update antlr php to use file path --- src/main/kotlin/astminer/config/ParserConfig.kt | 3 ++- .../astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt | 2 +- .../kotlin/astminer/parse/antlr/php/PHPFunctionSplitter.kt | 7 ++++--- src/main/kotlin/astminer/parse/factory.kt | 2 +- .../parse/antlr/php/ANTLRPHPFunctionSplitterTest.kt | 2 +- 5 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/main/kotlin/astminer/config/ParserConfig.kt b/src/main/kotlin/astminer/config/ParserConfig.kt index 2a716bb3..f0383d6b 100644 --- a/src/main/kotlin/astminer/config/ParserConfig.kt +++ b/src/main/kotlin/astminer/config/ParserConfig.kt @@ -29,5 +29,6 @@ enum class FileExtension(val fileExtension: String) { @SerialName("java") Java("java"), @SerialName("js") JavaScript("js"), @SerialName("c") C("c"), - @SerialName("cpp") Cpp("cpp") + @SerialName("cpp") Cpp("cpp"), + @SerialName("php") Php("php") } diff --git a/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt b/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt index 94f304e9..9a553897 100644 --- a/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt @@ -7,7 +7,7 @@ import astminer.common.model.FunctionInfoParameter import astminer.parse.antlr.* import astminer.parse.findEnclosingElementBy -class ANTLRPHPFunctionInfo(override val root: AntlrNode) : FunctionInfo { +class ANTLRPHPFunctionInfo(override val root: AntlrNode, override val filePath: String) : FunctionInfo { override val returnType = getElementType(root) override val nameNode: AntlrNode? = root.getChildOfType(FUNCTION_NAME) diff --git a/src/main/kotlin/astminer/parse/antlr/php/PHPFunctionSplitter.kt b/src/main/kotlin/astminer/parse/antlr/php/PHPFunctionSplitter.kt index 43bfa666..e85399ca 100644 --- a/src/main/kotlin/astminer/parse/antlr/php/PHPFunctionSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/php/PHPFunctionSplitter.kt @@ -10,8 +10,9 @@ class PHPFunctionSplitter : TreeFunctionSplitter { const val FUNCTION_TOKEN = "Function_" } - override fun splitIntoFunctions(root: AntlrNode): Collection> { - return root.preOrder().filter { it.typeLabel == LAMBDA_TOKEN || it.typeLabel == FUNCTION_TOKEN } - .mapNotNull { node -> node.parent?.let {statement -> ANTLRPHPFunctionInfo(statement) } } + override fun splitIntoFunctions(root: AntlrNode, filePath: String): Collection> { + return root.preOrder() + .filter { it.typeLabel == LAMBDA_TOKEN || it.typeLabel == FUNCTION_TOKEN } + .mapNotNull { node -> node.parent?.let { statement -> ANTLRPHPFunctionInfo(statement, filePath) } } } } \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/factory.kt b/src/main/kotlin/astminer/parse/factory.kt index 01f02bbc..3374a500 100644 --- a/src/main/kotlin/astminer/parse/factory.kt +++ b/src/main/kotlin/astminer/parse/factory.kt @@ -31,7 +31,7 @@ private fun getAntlrHandlerFactory(extension: FileExtension): HandlerFactory { FileExtension.Java -> AntlrJavaHandlerFactory FileExtension.JavaScript -> AntlrJavascriptHandlerFactory FileExtension.Python -> AntlrPythonHandlerFactory - "php" -> AntlrPHPHandlerFactory + FileExtension.Php -> AntlrPHPHandlerFactory else -> throw UnsupportedOperationException() } } diff --git a/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionSplitterTest.kt index f17a81fa..40ba4c7e 100644 --- a/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionSplitterTest.kt @@ -23,7 +23,7 @@ internal class ANTLRPHPFunctionSplitterTest { fun parseTree() { val testTree = parser.parseInputStream(File(testFilePath).inputStream()) assertNotNull(testTree) - functionInfos = functionSplitter.splitIntoFunctions(testTree) + functionInfos = functionSplitter.splitIntoFunctions(testTree, testFilePath) } @Test From 17ed0c018a5182c59b28f38740c6cec8a1a8b461 Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Wed, 21 Jul 2021 22:15:25 +0300 Subject: [PATCH 245/308] Add detekt to astminer --- .github/workflows/build.yml | 85 +- build.gradle.kts | 38 +- detekt.yaml | 36 + src/jmh/README.md | 43 - src/jmh/benchmarks.csv | 103 - src/jmh/kotlin/cli/BenchmarkResultWorker.kt | 108 - src/jmh/kotlin/cli/BenchmarksSetup.kt | 50 - .../kotlin/cli/Code2VecExtractorBenchmarks.kt | 36 - src/jmh/kotlin/cli/DownloadException.kt | 5 - .../cli/PathContextsExtractorBenchmarks.kt | 36 - .../kotlin/cli/ProjectParserCsvBenchmarks.kt | 36 - .../kotlin/cli/ProjectParserDotBenchmarks.kt | 36 - src/jmh/resources/LongJavaFile.java | 5110 ----------------- src/jmh/results.md | 13 - src/main/kotlin/astminer/common/FileUtil.kt | 4 +- .../common/model/FunctionInfoModel.kt | 3 +- .../astminer/common/model/ParsingModel.kt | 12 +- .../storage/RankedIncrementalIdStorage.kt | 2 +- .../{PipelineConfigs.kt => PipelineConfig.kt} | 0 src/main/kotlin/astminer/examples/Common.kt | 2 +- .../astminer/examples/FeatureExtraction.kt | 6 +- .../astminer/featureextraction/TreeFeature.kt | 2 +- .../TreeFeatureValueStorage.kt | 13 +- .../kotlin/astminer/filters/CommonFilters.kt | 4 +- .../astminer/filters/FunctionFilters.kt | 2 +- ...ctors.kt => FunctionNameLabelExtractor.kt} | 4 +- .../kotlin/astminer/parse/ParsingException.kt | 4 +- .../astminer/parse/antlr/AntlrHandler.kt | 2 +- .../kotlin/astminer/parse/antlr/AntlrUtil.kt | 21 +- .../parse/antlr/java/JavaFunctionSplitter.kt | 2 +- .../astminer/parse/antlr/java/JavaParser.kt | 4 +- .../javascript/AntlrJavaScriptElementInfo.kt | 2 +- .../javascript/JavaScriptFunctionSplitter.kt | 2 +- .../antlr/javascript/JavaScriptParser.kt | 4 +- .../parse/antlr/php/ANTLRPHPFunctionInfo.kt | 55 +- .../parse/antlr/php/PHPFunctionSplitter.kt | 2 +- .../astminer/parse/antlr/php/PHPParser.kt | 5 +- .../antlr/python/AntlrPythonFunctionInfo.kt | 7 +- .../parse/antlr/python/PythonParser.kt | 4 +- src/main/kotlin/astminer/parse/factory.kt | 3 +- .../parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt | 4 +- .../parse/fuzzy/cpp/FuzzyCppParser.kt | 63 +- .../parse/fuzzy/cpp/FuzzyFunctionSplitter.kt | 2 +- .../astminer/parse/fuzzy/cpp/FuzzyHandler.kt | 10 +- .../kotlin/astminer/parse/fuzzy/cpp/utils.kt | 2 +- .../astminer/parse/gumtree/GumTreeNode.kt | 2 +- .../astminer/parse/gumtree/GumtreeHandler.kt | 2 +- .../java/GumTreeJavaFunctionSplitter.kt | 2 +- .../parse/gumtree/java/GumTreeJavaParser.kt | 2 +- .../python/GumTreePythonFunctionInfo.kt | 8 +- .../gumtree/python/GumTreePythonParser.kt | 2 +- src/main/kotlin/astminer/paths/PathMiner.kt | 2 +- src/main/kotlin/astminer/paths/PathWorker.kt | 2 +- src/main/kotlin/astminer/pipeline/Pipeline.kt | 5 +- .../astminer/pipeline/branch/Exceptions.kt | 2 +- .../pipeline/branch/PipelineBranch.kt | 2 +- .../astminer/storage/path/PathBasedStorage.kt | 4 +- .../common/{TestUtils.kt => DummyNode.kt} | 1 - .../astminer/common/FileParsingUtilTest.kt | 14 +- .../kotlin/astminer/common/TreeUtilTest.kt | 18 +- .../storage/RankedIncrementalIdStorageTest.kt | 4 +- .../{TreeFeatureTestUtil.kt => PrettyNode.kt} | 1 - .../featureextraction/TreeFeatureTest.kt | 3 +- .../TreeFeatureTestUtilTest.kt | 2 +- .../astminer/filters/FunctionFiltersTest.kt | 2 +- .../labelextractor/FileNameExtractorTest.kt | 14 +- .../labelextractor/FolderNameExtractorTest.kt | 11 +- .../FunctionNameLabelExtractorTest.kt | 11 +- .../astminer/parse/antlr/AntrlUtilTest.kt | 2 +- .../parse/antlr/java/ANTLRJavaParserTest.kt | 2 +- .../antlr/java/JavaFunctionSplitterTest.kt | 15 +- .../javascript/ANTLRJavaScriptParserTest.kt | 3 +- .../JavaScriptFunctionSplitterTest.kt | 16 +- .../antlr/php/ANTLRPHPFunctionSplitterTest.kt | 37 +- .../parse/antlr/php/ANTLRPHPParserText.kt | 2 +- .../antlr/python/ANTLRPythonParserTest.kt | 3 +- .../python/PythonFunctionSplitterTest.kt | 15 +- .../astminer/parse/cpp/FuzzyCppParserTest.kt | 2 +- .../parse/cpp/FuzzyMethodSplitterTest.kt | 20 +- .../java/GumTreeJavaFunctionSplitterTest.kt | 5 +- .../gumtree/java/GumTreeJavaParserTest.kt | 2 +- .../astminer/paths/PathWorkerTestBase.kt | 4 +- .../astminer/paths/PathWorkerTestUtil.kt | 12 +- .../paths/SampleTreePathWorkerTest.kt | 2 +- .../astminer/storage/ast/CsvAstStorageTest.kt | 3 +- .../astminer/storage/ast/DotAstStorageTest.kt | 4 +- 86 files changed, 398 insertions(+), 5854 deletions(-) create mode 100644 detekt.yaml delete mode 100644 src/jmh/README.md delete mode 100644 src/jmh/benchmarks.csv delete mode 100644 src/jmh/kotlin/cli/BenchmarkResultWorker.kt delete mode 100644 src/jmh/kotlin/cli/BenchmarksSetup.kt delete mode 100644 src/jmh/kotlin/cli/Code2VecExtractorBenchmarks.kt delete mode 100644 src/jmh/kotlin/cli/DownloadException.kt delete mode 100644 src/jmh/kotlin/cli/PathContextsExtractorBenchmarks.kt delete mode 100644 src/jmh/kotlin/cli/ProjectParserCsvBenchmarks.kt delete mode 100644 src/jmh/kotlin/cli/ProjectParserDotBenchmarks.kt delete mode 100644 src/jmh/resources/LongJavaFile.java delete mode 100644 src/jmh/results.md rename src/main/kotlin/astminer/config/{PipelineConfigs.kt => PipelineConfig.kt} (100%) rename src/main/kotlin/astminer/labelextractor/{FunctionLabelExtractors.kt => FunctionNameLabelExtractor.kt} (89%) rename src/test/kotlin/astminer/common/{TestUtils.kt => DummyNode.kt} (99%) rename src/test/kotlin/astminer/featureextraction/{TreeFeatureTestUtil.kt => PrettyNode.kt} (99%) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d7da813a..b99ac140 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,15 +1,90 @@ -name: Gradle Build +name: Build with lint -on: [push, pull_request] +on: [push] jobs: + setup: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Setup Java + uses: actions/setup-java@v1 + with: + java-version: 11 + + - name: Cache Gradle dependencies + uses: actions/cache@v2 + with: + path: | + ~/.gradle/caches + ~/.gradle/wrapper + key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }} + restore-keys: | + ${{ runner.os }}-gradle- + build: + needs: setup + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + + - name: Setup Java + uses: actions/setup-java@v1 + with: + java-version: 11 + - name: Cache Gradle dependencies + uses: actions/cache@v2 + with: + path: | + ~/.gradle/caches + ~/.gradle/wrapper + key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }} + restore-keys: | + ${{ runner.os }}-gradle- + + - name: Build plugin + run: ./gradlew buildPlugin + + test: + needs: build runs-on: ubuntu-latest - container: voudy/astminer + steps: + - uses: actions/checkout@v2 + + - name: Setup Java + uses: actions/setup-java@v1 + with: + java-version: 11 + - name: Run JUnit tests + run: ./gradlew test + + - name: Upload Test Report + uses: actions/upload-artifact@v2 + if: ${{ always() }} + with: + name: test-report + path: build/reports/tests/**/* + + code-style: + needs: build + runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - - name: Build with Gradle # Building with gradle already include running tests - run: ./gradlew build --console=plain + - name: Setup Java + uses: actions/setup-java@v1 + with: + java-version: 11 + + - name: Run detekt + run: ./gradlew detekt + + - name: Upload Report + uses: github/codeql-action/upload-sarif@v1 + if: ${{ always() }} + with: + sarif_file: build/detekt.sarif diff --git a/build.gradle.kts b/build.gradle.kts index 482274e5..a15a84af 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -9,8 +9,8 @@ plugins { id("application") id("maven-publish") id("org.jetbrains.dokka") version "1.4.32" - id("me.champeau.gradle.jmh") version "0.5.0" id("tanvd.kosogor") version "1.0.10" + id("io.gitlab.arturbosch.detekt") version "1.17.1" kotlin("jvm") version "1.5.21" apply true kotlin("plugin.serialization") version "1.5.21" } @@ -47,11 +47,6 @@ dependencies { // ===== Test ===== testImplementation("junit:junit:4.13.2") testImplementation(kotlin("test-junit")) - - // ===== JMH ===== - jmhImplementation("org.jetbrains.kotlin:kotlin-reflect:1.5.0") - jmhImplementation("org.openjdk.jmh:jmh-core:1.21") - jmhImplementation("org.openjdk.jmh:jmh-generator-annprocess:1.21") } val generatedSourcesPath = "src/main/generated" @@ -90,25 +85,6 @@ tasks.compileJava { sourceCompatibility = "11" } -tasks.dokkaHtml.configure { - outputDirectory.set(buildDir.resolve("javadoc")) -} - -jmh { - duplicateClassesStrategy = DuplicatesStrategy.WARN - profilers = listOf("gc") - resultFormat = "CSV" - isZip64 = true - failOnError = true - forceGC = true - warmupIterations = 1 - iterations = 4 - fork = 2 - jvmArgs = listOf("-Xmx32g") - benchmarkMode = listOf("AverageTime") - resultsFile = file("build/reports/benchmarks.csv") -} - publishing { publications { create("maven") { @@ -153,4 +129,14 @@ tasks.withType { } } }) -} \ No newline at end of file +} + +detekt { + allRules = true + buildUponDefaultConfig = true + config = files("detekt.yaml") +} + +tasks.dokkaHtml.configure { + outputDirectory.set(buildDir.resolve("javadoc")) +} diff --git a/detekt.yaml b/detekt.yaml new file mode 100644 index 00000000..b4a5d99d --- /dev/null +++ b/detekt.yaml @@ -0,0 +1,36 @@ +complexity: + TooManyFunctions: + thresholdInClasses: 15 + thresholdInFiles: 16 + ComplexInterface: + threshold: 15 + StringLiteralDuplication: + active: false + LabeledExpression: + active: false + +exceptions: + TooGenericExceptionCaught: + active: false + +style: + WildcardImport: + active: false + MagicNumber: + ignorePropertyDeclaration: true + ignoreAnnotation: true + ignoreEnums: true + ignoreNumbers: ['-1', '0', '1', '2', '60', '100', '1000'] + excludes: ['**/test/**', '**/*Test.kt', '**/*Spec.kt', '**/examples/**'] + ForbiddenComment: + allowedPatterns: 'TODO:' + ReturnCount: + max: 5 + +output-reports: + active: true + exclude: + - 'TxtOutputReport' + - 'XmlOutputReport' + - 'HtmlOutputReport' + diff --git a/src/jmh/README.md b/src/jmh/README.md deleted file mode 100644 index 5dee616f..00000000 --- a/src/jmh/README.md +++ /dev/null @@ -1,43 +0,0 @@ -# JMH benchmarks - -This benchmark runs ASTMiner in several CLI modes. These arguments are used for each run: -- Code2Vec -```(bash) -code2vec --project --output --split-tokens --granularity method --lang java -``` -- PathContext -```(bash) -pathContexts --project --output --lang java -``` -- Parse (CSV storage) -```(bash) -parse --project --output --split-tokens --granularity method --lang java --storage csv -``` -- Parse (DOT storage) -```(bash) -parse --project --output --split-tokens --granularity method --lang java --storage dot -``` - -As data, we use 3 types of data: -1. Long file - long java file with ~5000 lines of code. It can be found in [resources](resources/LongJavaFile.java). -2. Small project - concrete version of [Gradle](https://github.com/gradle/gradle) project. We clone branch with tag `v6.3.0`. -3. Big project - concrete version of [Intellij Community](https://github.com/JetBrains/intellij-community) project. We clone branch with tag `idea/193.7288.8`. - -## Results - -Current results can be found in [results.md](results.md). - -These results were on achieved on EC2 instance `i3.8xlarge`. Parameters can be bound in [build.gradle.kts](../../build.gradle.kts) file. - -## How to run benchmarks - -Steps to runs benchmarks: -1. gradle daemons should be stopped before, so execute `./gradlew --stop` -2. jmh plugin is unable to compile code incrementally, so execute `./gradlew clean` -3. to run benchmarks execute `./gradlew jmh` - -After that you will found results in `build/reports/benchmarks.csv`. -You can convert these results into markdown table (like [results.md](results.md)) using [benchmark result worker](kotlin/cli/BenchmarkResultWorker.kt). -You should write needed paths in `main`, compile the file and run it. - - diff --git a/src/jmh/benchmarks.csv b/src/jmh/benchmarks.csv deleted file mode 100644 index 02a28510..00000000 --- a/src/jmh/benchmarks.csv +++ /dev/null @@ -1,103 +0,0 @@ -"Benchmark","Mode","Threads","Samples","Score","Score Error (99.9%)","Unit" -"cli.Code2VecExtractorBenchmarks.bigProject","avgt",1,4,257.449218,18.887113,"s/op" -"cli.Code2VecExtractorBenchmarks.bigProject:·gc.alloc.rate","avgt",1,4,1079.698991,78.832117,"MB/sec" -"cli.Code2VecExtractorBenchmarks.bigProject:·gc.alloc.rate.norm","avgt",1,4,292008937596.000000,759030387.259436,"B/op" -"cli.Code2VecExtractorBenchmarks.bigProject:·gc.churn.PS_Eden_Space","avgt",1,4,1075.100078,81.539687,"MB/sec" -"cli.Code2VecExtractorBenchmarks.bigProject:·gc.churn.PS_Eden_Space.norm","avgt",1,4,290766061568.000000,8037163724.485502,"B/op" -"cli.Code2VecExtractorBenchmarks.bigProject:·gc.churn.PS_Survivor_Space","avgt",1,4,0.520222,0.920481,"MB/sec" -"cli.Code2VecExtractorBenchmarks.bigProject:·gc.churn.PS_Survivor_Space.norm","avgt",1,4,140908712.000000,256704718.451890,"B/op" -"cli.Code2VecExtractorBenchmarks.bigProject:·gc.count","avgt",1,4,389.000000,NaN,"counts" -"cli.Code2VecExtractorBenchmarks.bigProject:·gc.time","avgt",1,4,4898.000000,NaN,"ms" -"cli.Code2VecExtractorBenchmarks.longFileProject","avgt",1,4,0.435963,0.069128,"s/op" -"cli.Code2VecExtractorBenchmarks.longFileProject:·gc.alloc.rate","avgt",1,4,1250.807002,188.215176,"MB/sec" -"cli.Code2VecExtractorBenchmarks.longFileProject:·gc.alloc.rate.norm","avgt",1,4,599492595.652174,16717117.281462,"B/op" -"cli.Code2VecExtractorBenchmarks.longFileProject:·gc.churn.PS_Eden_Space","avgt",1,4,1176.625268,540.269616,"MB/sec" -"cli.Code2VecExtractorBenchmarks.longFileProject:·gc.churn.PS_Eden_Space.norm","avgt",1,4,564309363.014493,287025514.462391,"B/op" -"cli.Code2VecExtractorBenchmarks.longFileProject:·gc.churn.PS_Survivor_Space","avgt",1,4,0.966032,6.388467,"MB/sec" -"cli.Code2VecExtractorBenchmarks.longFileProject:·gc.churn.PS_Survivor_Space.norm","avgt",1,4,461027.619565,3075766.819041,"B/op" -"cli.Code2VecExtractorBenchmarks.longFileProject:·gc.count","avgt",1,4,21.000000,NaN,"counts" -"cli.Code2VecExtractorBenchmarks.longFileProject:·gc.time","avgt",1,4,76.000000,NaN,"ms" -"cli.Code2VecExtractorBenchmarks.simpleProject","avgt",1,4,27.153595,0.904767,"s/op" -"cli.Code2VecExtractorBenchmarks.simpleProject:·gc.alloc.rate","avgt",1,4,1106.676024,31.717964,"MB/sec" -"cli.Code2VecExtractorBenchmarks.simpleProject:·gc.alloc.rate.norm","avgt",1,4,32090342828.000000,372411452.120528,"B/op" -"cli.Code2VecExtractorBenchmarks.simpleProject:·gc.churn.PS_Eden_Space","avgt",1,4,1025.486225,135.965407,"MB/sec" -"cli.Code2VecExtractorBenchmarks.simpleProject:·gc.churn.PS_Eden_Space.norm","avgt",1,4,29738008576.000000,4616229609.315597,"B/op" -"cli.Code2VecExtractorBenchmarks.simpleProject:·gc.count","avgt",1,4,32.000000,NaN,"counts" -"cli.Code2VecExtractorBenchmarks.simpleProject:·gc.time","avgt",1,4,205.000000,NaN,"ms" -"cli.PathContextsExtractorBenchmarks.bigProject","avgt",1,4,223.764264,8.243985,"s/op" -"cli.PathContextsExtractorBenchmarks.bigProject:·gc.alloc.rate","avgt",1,4,1142.237142,47.491930,"MB/sec" -"cli.PathContextsExtractorBenchmarks.bigProject:·gc.alloc.rate.norm","avgt",1,4,268599702660.000000,1371364176.737936,"B/op" -"cli.PathContextsExtractorBenchmarks.bigProject:·gc.churn.PS_Eden_Space","avgt",1,4,1138.936796,42.426358,"MB/sec" -"cli.PathContextsExtractorBenchmarks.bigProject:·gc.churn.PS_Eden_Space.norm","avgt",1,4,267824529408.000000,2120467185.192489,"B/op" -"cli.PathContextsExtractorBenchmarks.bigProject:·gc.churn.PS_Survivor_Space","avgt",1,4,0.539942,0.562919,"MB/sec" -"cli.PathContextsExtractorBenchmarks.bigProject:·gc.churn.PS_Survivor_Space.norm","avgt",1,4,126940612.000000,131024484.723262,"B/op" -"cli.PathContextsExtractorBenchmarks.bigProject:·gc.count","avgt",1,4,418.000000,NaN,"counts" -"cli.PathContextsExtractorBenchmarks.bigProject:·gc.time","avgt",1,4,4353.000000,NaN,"ms" -"cli.PathContextsExtractorBenchmarks.longFileProject","avgt",1,4,0.626323,0.033845,"s/op" -"cli.PathContextsExtractorBenchmarks.longFileProject:·gc.alloc.rate","avgt",1,4,1185.950058,72.860863,"MB/sec" -"cli.PathContextsExtractorBenchmarks.longFileProject:·gc.alloc.rate.norm","avgt",1,4,816574896.720588,69853.062348,"B/op" -"cli.PathContextsExtractorBenchmarks.longFileProject:·gc.churn.PS_Eden_Space","avgt",1,4,1116.234803,385.671179,"MB/sec" -"cli.PathContextsExtractorBenchmarks.longFileProject:·gc.churn.PS_Eden_Space.norm","avgt",1,4,768569584.941176,260446575.197948,"B/op" -"cli.PathContextsExtractorBenchmarks.longFileProject:·gc.churn.PS_Survivor_Space","avgt",1,4,0.847134,7.053726,"MB/sec" -"cli.PathContextsExtractorBenchmarks.longFileProject:·gc.churn.PS_Survivor_Space.norm","avgt",1,4,585812.345588,4903625.828174,"B/op" -"cli.PathContextsExtractorBenchmarks.longFileProject:·gc.count","avgt",1,4,20.000000,NaN,"counts" -"cli.PathContextsExtractorBenchmarks.longFileProject:·gc.time","avgt",1,4,100.000000,NaN,"ms" -"cli.PathContextsExtractorBenchmarks.simpleProject","avgt",1,4,26.544189,1.249203,"s/op" -"cli.PathContextsExtractorBenchmarks.simpleProject:·gc.alloc.rate","avgt",1,4,1120.104864,51.889739,"MB/sec" -"cli.PathContextsExtractorBenchmarks.simpleProject:·gc.alloc.rate.norm","avgt",1,4,31763229496.000000,1078062.624071,"B/op" -"cli.PathContextsExtractorBenchmarks.simpleProject:·gc.churn.PS_Eden_Space","avgt",1,4,1043.613110,302.543916,"MB/sec" -"cli.PathContextsExtractorBenchmarks.simpleProject:·gc.churn.PS_Eden_Space.norm","avgt",1,4,29602217984.000000,9913538750.714693,"B/op" -"cli.PathContextsExtractorBenchmarks.simpleProject:·gc.count","avgt",1,4,32.000000,NaN,"counts" -"cli.PathContextsExtractorBenchmarks.simpleProject:·gc.time","avgt",1,4,192.000000,NaN,"ms" -"cli.ProjectParserCsvBenchmarks.bigProject","avgt",1,4,180.371234,2.981116,"s/op" -"cli.ProjectParserCsvBenchmarks.bigProject:·gc.alloc.rate","avgt",1,4,1230.495669,20.164663,"MB/sec" -"cli.ProjectParserCsvBenchmarks.bigProject:·gc.alloc.rate.norm","avgt",1,4,233372029542.000000,132825856.530797,"B/op" -"cli.ProjectParserCsvBenchmarks.bigProject:·gc.churn.PS_Eden_Space","avgt",1,4,1199.017557,44.611256,"MB/sec" -"cli.ProjectParserCsvBenchmarks.bigProject:·gc.churn.PS_Eden_Space.norm","avgt",1,4,227400744960.000000,5135066231.488147,"B/op" -"cli.ProjectParserCsvBenchmarks.bigProject:·gc.churn.PS_Survivor_Space","avgt",1,4,0.345020,0.630478,"MB/sec" -"cli.ProjectParserCsvBenchmarks.bigProject:·gc.churn.PS_Survivor_Space.norm","avgt",1,4,65404892.000000,118694148.932435,"B/op" -"cli.ProjectParserCsvBenchmarks.bigProject:·gc.count","avgt",1,4,90.000000,NaN,"counts" -"cli.ProjectParserCsvBenchmarks.bigProject:·gc.time","avgt",1,4,3546.000000,NaN,"ms" -"cli.ProjectParserCsvBenchmarks.longFileProject","avgt",1,4,0.334106,0.042951,"s/op" -"cli.ProjectParserCsvBenchmarks.longFileProject:·gc.alloc.rate","avgt",1,4,1406.933177,180.288170,"MB/sec" -"cli.ProjectParserCsvBenchmarks.longFileProject:·gc.alloc.rate.norm","avgt",1,4,516966578.664516,181451.741185,"B/op" -"cli.ProjectParserCsvBenchmarks.longFileProject:·gc.churn.PS_Eden_Space","avgt",1,4,1297.327305,292.912975,"MB/sec" -"cli.ProjectParserCsvBenchmarks.longFileProject:·gc.churn.PS_Eden_Space.norm","avgt",1,4,476606120.464516,57099756.867953,"B/op" -"cli.ProjectParserCsvBenchmarks.longFileProject:·gc.churn.PS_Survivor_Space","avgt",1,4,0.503796,2.005817,"MB/sec" -"cli.ProjectParserCsvBenchmarks.longFileProject:·gc.churn.PS_Survivor_Space.norm","avgt",1,4,186834.683871,759795.352873,"B/op" -"cli.ProjectParserCsvBenchmarks.longFileProject:·gc.count","avgt",1,4,22.000000,NaN,"counts" -"cli.ProjectParserCsvBenchmarks.longFileProject:·gc.time","avgt",1,4,76.000000,NaN,"ms" -"cli.ProjectParserCsvBenchmarks.simpleProject","avgt",1,4,20.403745,1.115891,"s/op" -"cli.ProjectParserCsvBenchmarks.simpleProject:·gc.alloc.rate","avgt",1,4,1214.270360,68.761970,"MB/sec" -"cli.ProjectParserCsvBenchmarks.simpleProject:·gc.alloc.rate.norm","avgt",1,4,26615112204.000000,153463185.255269,"B/op" -"cli.ProjectParserCsvBenchmarks.simpleProject:·gc.churn.PS_Eden_Space","avgt",1,4,1069.541462,303.093519,"MB/sec" -"cli.ProjectParserCsvBenchmarks.simpleProject:·gc.churn.PS_Eden_Space.norm","avgt",1,4,23446159360.000000,7149530979.860157,"B/op" -"cli.ProjectParserCsvBenchmarks.simpleProject:·gc.count","avgt",1,4,26.000000,NaN,"counts" -"cli.ProjectParserCsvBenchmarks.simpleProject:·gc.time","avgt",1,4,189.000000,NaN,"ms" -"cli.ProjectParserDotBenchmarks.bigProject","avgt",1,4,285.639391,3.040175,"s/op" -"cli.ProjectParserDotBenchmarks.bigProject:·gc.alloc.rate","avgt",1,4,1082.387084,16.588036,"MB/sec" -"cli.ProjectParserDotBenchmarks.bigProject:·gc.alloc.rate.norm","avgt",1,4,324758822476.000000,5787126931.438186,"B/op" -"cli.ProjectParserDotBenchmarks.bigProject:·gc.churn.PS_Eden_Space","avgt",1,4,1081.368891,19.864221,"MB/sec" -"cli.ProjectParserDotBenchmarks.bigProject:·gc.churn.PS_Eden_Space.norm","avgt",1,4,324453090758.000000,5981937765.644653,"B/op" -"cli.ProjectParserDotBenchmarks.bigProject:·gc.churn.PS_Survivor_Space","avgt",1,4,0.938662,0.426174,"MB/sec" -"cli.ProjectParserDotBenchmarks.bigProject:·gc.churn.PS_Survivor_Space.norm","avgt",1,4,281651016.000000,129925532.920107,"B/op" -"cli.ProjectParserDotBenchmarks.bigProject:·gc.count","avgt",1,4,1920.000000,NaN,"counts" -"cli.ProjectParserDotBenchmarks.bigProject:·gc.time","avgt",1,4,6093.000000,NaN,"ms" -"cli.ProjectParserDotBenchmarks.longFileProject","avgt",1,4,0.426770,0.048574,"s/op" -"cli.ProjectParserDotBenchmarks.longFileProject:·gc.alloc.rate","avgt",1,4,1256.262364,136.309511,"MB/sec" -"cli.ProjectParserDotBenchmarks.longFileProject:·gc.alloc.rate.norm","avgt",1,4,589531214.750000,47400.704610,"B/op" -"cli.ProjectParserDotBenchmarks.longFileProject:·gc.churn.PS_Eden_Space","avgt",1,4,1083.105600,545.253452,"MB/sec" -"cli.ProjectParserDotBenchmarks.longFileProject:·gc.churn.PS_Eden_Space.norm","avgt",1,4,508482901.333333,275324175.683678,"B/op" -"cli.ProjectParserDotBenchmarks.longFileProject:·gc.churn.PS_Survivor_Space","avgt",1,4,0.798736,3.176831,"MB/sec" -"cli.ProjectParserDotBenchmarks.longFileProject:·gc.churn.PS_Survivor_Space.norm","avgt",1,4,376249.500000,1507169.919826,"B/op" -"cli.ProjectParserDotBenchmarks.longFileProject:·gc.count","avgt",1,4,19.000000,NaN,"counts" -"cli.ProjectParserDotBenchmarks.longFileProject:·gc.time","avgt",1,4,82.000000,NaN,"ms" -"cli.ProjectParserDotBenchmarks.simpleProject","avgt",1,4,32.983427,2.512751,"s/op" -"cli.ProjectParserDotBenchmarks.simpleProject:·gc.alloc.rate","avgt",1,4,1049.552205,78.141154,"MB/sec" -"cli.ProjectParserDotBenchmarks.simpleProject:·gc.alloc.rate.norm","avgt",1,4,36846641168.000000,214127.207194,"B/op" -"cli.ProjectParserDotBenchmarks.simpleProject:·gc.churn.PS_Eden_Space","avgt",1,4,1029.500338,74.389215,"MB/sec" -"cli.ProjectParserDotBenchmarks.simpleProject:·gc.churn.PS_Eden_Space.norm","avgt",1,4,36147691520.000000,4435530434.048316,"B/op" -"cli.ProjectParserDotBenchmarks.simpleProject:·gc.churn.PS_Survivor_Space","avgt",1,4,0.048848,0.223101,"MB/sec" -"cli.ProjectParserDotBenchmarks.simpleProject:·gc.churn.PS_Survivor_Space.norm","avgt",1,4,1717580.000000,7824580.788872,"B/op" -"cli.ProjectParserDotBenchmarks.simpleProject:·gc.count","avgt",1,4,38.000000,NaN,"counts" -"cli.ProjectParserDotBenchmarks.simpleProject:·gc.time","avgt",1,4,306.000000,NaN,"ms" diff --git a/src/jmh/kotlin/cli/BenchmarkResultWorker.kt b/src/jmh/kotlin/cli/BenchmarkResultWorker.kt deleted file mode 100644 index 2c67bf56..00000000 --- a/src/jmh/kotlin/cli/BenchmarkResultWorker.kt +++ /dev/null @@ -1,108 +0,0 @@ -package cli - -import java.io.File - -data class BenchmarkResult(val taskName: String, val projectName: String) { - var totalTime: Float = 0f - var timeStd: Float = 0f - var allocatedMemoryRate: Float = 0f - var allocatedMemoryRateStd: Float = 0f -} - -enum class MemoryMeasure { - GB, - MB -} - -class BenchmarkResultWorker { - object TableFields { - val taskToCsvField = hashMapOf( - "Code2Vec" to "cli.Code2VecExtractorBenchmarks", - "PathContext" to "cli.PathContextsExtractorBenchmarks", - "ProjectParseCSV" to "cli.ProjectParserCsvBenchmarks", - "ProjectParseDOT" to "cli.ProjectParserDotBenchmarks" - ) - val projectToCsvField = hashMapOf( - "Long file" to "longFileProject", - "Small project (Gradle)" to "simpleProject", - "Big project (IntelliJ IDEA)" to "bigProject" - ) - } - - private val tasks = listOf("Code2Vec", "PathContext", "ProjectParseCSV", "ProjectParseDOT") - private val projects = listOf("Long file", "Small project (Gradle)", "Big project (IntelliJ IDEA)") - - private fun convertMegabytes(megabytes: Float): Pair { - if (megabytes < 1024) - return megabytes to MemoryMeasure.MB - return megabytes / 1024 to MemoryMeasure.GB - } - - fun parseCsvFile(pathToCsvFile: String): Map, BenchmarkResult> { - val taskToResult = hashMapOf, BenchmarkResult>() - tasks.forEach {task -> - projects.forEach { project -> - taskToResult[task to project] = BenchmarkResult(task, project) - } - } - - File(pathToCsvFile).forEachLine { line -> - val csvFields = line.split(',') - val taskName = csvFields[0].drop(1).dropLast(1) - val resultValue = csvFields[4].toFloatOrNull() ?: 0f - val resultStd = csvFields[5].toFloatOrNull() ?: 0f - TableFields.taskToCsvField.entries.forEach { task -> - TableFields.projectToCsvField.entries.forEach { project -> - val correctCsvField = "${task.value}.${project.value}" - if (taskName == correctCsvField) { - taskToResult[task.key to project.key]?.let { - it.totalTime = resultValue - it.timeStd = resultStd - } - } else if (taskName == "$correctCsvField:·gc.alloc.rate") { - taskToResult[task.key to project.key]?. let { - it.allocatedMemoryRate = resultValue - it.allocatedMemoryRateStd = resultStd - } - } - } - } - } - return taskToResult - } - - fun saveToMarkdown(results: Map, BenchmarkResult>, pathToMarkdownFile: String) { - val outputFileWriter = File(pathToMarkdownFile).printWriter() - outputFileWriter.println("| | ${projects.joinToString(" | ")} |") - outputFileWriter.println("| --- |${"--- | ".repeat(projects.size)}") - tasks.forEach { task -> - outputFileWriter.print("| $task (time) |") - projects.forEach { project -> - val totalTime = "%.2f".format(results[task to project]?.totalTime) - val timeStd = "%.2f".format(results[task to project]?.timeStd) - outputFileWriter.print(" $totalTime ± $timeStd sec |") - } - outputFileWriter.print("\n") - outputFileWriter.print("| $task (allocated memory per sec) |") - projects.forEach { project -> - val totalMemory = convertMegabytes(results[task to project]?.allocatedMemoryRate ?: 0f) - val memoryStd = convertMegabytes(results[task to project]?.allocatedMemoryRateStd ?: 0f) - outputFileWriter.print( - " ${"%.2f".format(totalMemory.first)} ${totalMemory.second.name.toLowerCase()} ± " + - "${"%.2f".format(memoryStd.first)} ${memoryStd.second.name.toLowerCase()} |" - ) - } - outputFileWriter.print("\n") - if (task != tasks.last()) - outputFileWriter.println("| | ${" | ".repeat(projects.size)}") - } - outputFileWriter.close() - } -} - - -fun main() { - val benchmarkResultWorker = BenchmarkResultWorker() - val results = benchmarkResultWorker.parseCsvFile("src/jmh/benchmarks.csv") - benchmarkResultWorker.saveToMarkdown(results, "src/jmh/results.md") -} \ No newline at end of file diff --git a/src/jmh/kotlin/cli/BenchmarksSetup.kt b/src/jmh/kotlin/cli/BenchmarksSetup.kt deleted file mode 100644 index adf98bfe..00000000 --- a/src/jmh/kotlin/cli/BenchmarksSetup.kt +++ /dev/null @@ -1,50 +0,0 @@ -package cli - -import java.io.File - - -// How to start benchmark: -// 1. gradle daemons should be stopped before, so execute ./gradlew --stop -// 2. jmh plugin is unable to compile code incrementally, so execute ./gradlew clean -// 3. to run benchmarks execute ./gradlew jmh - -open class BenchmarksSetup() { - - private val cliPath = BenchmarksSetup::class.java.protectionDomain.codeSource.location.path.split("/build")[0] - val simpleProjectPath: String = "$cliPath/src/jmh/resources/gradle" - val simpleProjectResultsPath: String = "$cliPath/build/results/simpleProject" - val longFilePath: String = "$cliPath/src/jmh/resources/LongJavaFile.java" - val longFileResultsPath: String = "$cliPath/build/results/LongJavaFile" - val bigProjectPath: String = "$cliPath/src/jmh/resources/intellij-community" - val bigProjectResultsPath: String = "$cliPath/build/results/bigProject" - - fun setup() { - val resourcesPath = "$cliPath/src/jmh/resources" - if (isDirectoryEmpty(simpleProjectPath)) { - println("Gradle project is downloading for benchmark...") - val exitCode = cloneGitProject("v6.3.0", "https://github.com/gradle/gradle", resourcesPath) - if (exitCode != 0) { - throw DownloadException("Error with downloading Gradle project!") - } - } - if (isDirectoryEmpty(bigProjectPath)) { - println("Intellij IDEA project is downloading for benchmark...") - val exitCode = cloneGitProject("idea/193.7288.8", "https://github.com/JetBrains/intellij-community", resourcesPath) - if (exitCode != 0) { - throw DownloadException("Error with downloading Intellij IDEA project!") - } - } - } - - private fun cloneGitProject(tag: String, projectLink: String, directory: String) : Int { - val processBuilder = ProcessBuilder() - processBuilder.command("git", "clone", "--depth", "1", "-b", tag, projectLink) - .directory(File(directory)) - return processBuilder.start().waitFor() - } - - private fun isDirectoryEmpty(path: String) : Boolean { - val directory = File(path) - return !directory.isDirectory || directory.list()?.isEmpty() ?: false - } -} \ No newline at end of file diff --git a/src/jmh/kotlin/cli/Code2VecExtractorBenchmarks.kt b/src/jmh/kotlin/cli/Code2VecExtractorBenchmarks.kt deleted file mode 100644 index 1c32406d..00000000 --- a/src/jmh/kotlin/cli/Code2VecExtractorBenchmarks.kt +++ /dev/null @@ -1,36 +0,0 @@ -package cli - -import org.openjdk.jmh.annotations.* -import astminer.cli.* - -@State(Scope.Benchmark) -open class Code2VecExtractorBenchmarks { - - private val defaultArgs = listOf("--split-tokens", "--granularity", "method", "--lang", "java") - - @Setup - fun pathsSetup() { - BenchmarksSetup().setup() - } - - @Benchmark - fun simpleProject() { - val args = listOf("--project", BenchmarksSetup().simpleProjectPath, - "--output", BenchmarksSetup().simpleProjectResultsPath) + defaultArgs - Code2VecExtractor().main(args) - } - - @Benchmark - fun longFileProject() { - val args = listOf("--project", BenchmarksSetup().longFilePath, - "--output", BenchmarksSetup().longFileResultsPath) + defaultArgs - Code2VecExtractor().main(args) - } - - @Benchmark - fun bigProject() { - val args = listOf("--project", BenchmarksSetup().bigProjectPath, - "--output", BenchmarksSetup().bigProjectResultsPath) + defaultArgs - Code2VecExtractor().main(args) - } -} \ No newline at end of file diff --git a/src/jmh/kotlin/cli/DownloadException.kt b/src/jmh/kotlin/cli/DownloadException.kt deleted file mode 100644 index 69251cca..00000000 --- a/src/jmh/kotlin/cli/DownloadException.kt +++ /dev/null @@ -1,5 +0,0 @@ -package cli - -import java.lang.RuntimeException - -class DownloadException(message: String): RuntimeException(message) {} \ No newline at end of file diff --git a/src/jmh/kotlin/cli/PathContextsExtractorBenchmarks.kt b/src/jmh/kotlin/cli/PathContextsExtractorBenchmarks.kt deleted file mode 100644 index 9a5c05a0..00000000 --- a/src/jmh/kotlin/cli/PathContextsExtractorBenchmarks.kt +++ /dev/null @@ -1,36 +0,0 @@ -package cli - -import org.openjdk.jmh.annotations.* -import astminer.cli.* - -@State(Scope.Benchmark) -open class PathContextsExtractorBenchmarks { - - private val defaultArgs = listOf("--lang", "java") - - @Setup - fun pathsSetup() { - BenchmarksSetup().setup() - } - - @Benchmark - fun simpleProject() { - val args = listOf("--project", BenchmarksSetup().simpleProjectPath, - "--output", BenchmarksSetup().simpleProjectResultsPath) + defaultArgs - PathContextsExtractor().main(args) - } - - @Benchmark - fun longFileProject() { - val args = listOf("--project", BenchmarksSetup().longFilePath, - "--output", BenchmarksSetup().longFileResultsPath) + defaultArgs - PathContextsExtractor().main(args) - } - - @Benchmark - fun bigProject() { - val args = listOf("--project", BenchmarksSetup().bigProjectPath, - "--output", BenchmarksSetup().bigProjectResultsPath) + defaultArgs - PathContextsExtractor().main(args) - } -} \ No newline at end of file diff --git a/src/jmh/kotlin/cli/ProjectParserCsvBenchmarks.kt b/src/jmh/kotlin/cli/ProjectParserCsvBenchmarks.kt deleted file mode 100644 index f5c24f14..00000000 --- a/src/jmh/kotlin/cli/ProjectParserCsvBenchmarks.kt +++ /dev/null @@ -1,36 +0,0 @@ -package cli - -import org.openjdk.jmh.annotations.* -import astminer.cli.* - -@State(Scope.Benchmark) -open class ProjectParserCsvBenchmarks { - - private val defaultArgs = listOf("--split-tokens", "--granularity", "method", "--lang", "java", "--storage", "csv") - - @Setup - fun pathsSetup() { - BenchmarksSetup().setup() - } - - @Benchmark - fun simpleProject() { - val args = listOf("--project", BenchmarksSetup().simpleProjectPath, - "--output", BenchmarksSetup().simpleProjectResultsPath) + defaultArgs - ProjectParser().main(args) - } - - @Benchmark - fun longFileProject() { - val args = listOf("--project", BenchmarksSetup().longFilePath, - "--output", BenchmarksSetup().longFileResultsPath) + defaultArgs - ProjectParser().main(args) - } - - @Benchmark - fun bigProject() { - val args = listOf("--project", BenchmarksSetup().bigProjectPath, - "--output", BenchmarksSetup().bigProjectResultsPath) + defaultArgs - ProjectParser().main(args) - } -} \ No newline at end of file diff --git a/src/jmh/kotlin/cli/ProjectParserDotBenchmarks.kt b/src/jmh/kotlin/cli/ProjectParserDotBenchmarks.kt deleted file mode 100644 index 0d832a50..00000000 --- a/src/jmh/kotlin/cli/ProjectParserDotBenchmarks.kt +++ /dev/null @@ -1,36 +0,0 @@ -package cli - -import org.openjdk.jmh.annotations.* -import astminer.cli.* - -@State(Scope.Benchmark) -open class ProjectParserDotBenchmarks { - - private val defaultArgs = listOf("--split-tokens", "--granularity", "method", "--lang", "java", "--storage", "dot") - - @Setup - fun pathsSetup() { - BenchmarksSetup().setup() - } - - @Benchmark - fun simpleProject() { - val args = listOf("--project", BenchmarksSetup().simpleProjectPath, - "--output", BenchmarksSetup().simpleProjectResultsPath) + defaultArgs - ProjectParser().main(args) - } - - @Benchmark - fun longFileProject() { - val args = listOf("--project", BenchmarksSetup().longFilePath, - "--output", BenchmarksSetup().longFileResultsPath) + defaultArgs - ProjectParser().main(args) - } - - @Benchmark - fun bigProject() { - val args = listOf("--project", BenchmarksSetup().bigProjectPath, - "--output", BenchmarksSetup().bigProjectResultsPath) + defaultArgs - ProjectParser().main(args) - } -} \ No newline at end of file diff --git a/src/jmh/resources/LongJavaFile.java b/src/jmh/resources/LongJavaFile.java deleted file mode 100644 index f5308edb..00000000 --- a/src/jmh/resources/LongJavaFile.java +++ /dev/null @@ -1,5110 +0,0 @@ -import java.lang.reflect.Array; -import java.math.BigInteger; -import java.util.*; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.stream.IntStream; - -import static java.util.stream.Collectors.toList; -import static java.util.stream.Collectors.toMap; -/** - CODE FROM https://github.com/TheAlgorithms/Java - USES ONLY FOR BENCHMARK - */ -public class LongJavaFileProject { - - static boolean swap(T[] array, int idx, int idy) { - T swap = array[idx]; - array[idx] = array[idy]; - array[idy] = swap; - return true; - } - - static > boolean less(T v, T w) { - return v.compareTo(w) < 0; - } - - static void print(List toPrint) { - toPrint.stream() - .map(Object::toString) - .map(str -> str + " ") - .forEach(System.out::print); - - System.out.println(); - } - - static void print(Object[] toPrint) { - System.out.println(Arrays.toString(toPrint)); - } - - static > void flip(T[] array, int left, int right) { - while (left <= right) { - swap(array, left++, right--); - } - } - private static final Random random = new Random(); - - - private static class BogoSort { - - private static final Random random = new Random(); - - - private static > boolean isSorted(T[] array) { - for (int i = 0; i < array.length - 1; i++) { - if (less(array[i + 1], array[i])) return false; - } - return true; - } - - // Randomly shuffles the array - private static void nextPermutation(T[] array) { - int length = array.length; - - for (int i = 0; i < array.length; i++) { - int randomIndex = i + random.nextInt(length - i); - swap(array, randomIndex, i); - } - } - - public > T[] sort(T[] array) { - while (!isSorted(array)) { - nextPermutation(array); - } - return array; - } - - // Driver Program - public static void main(String[] args) { - // Integer Input - Integer[] integers = {4, 23, 6, 78, 1, 54, 231, 9, 12}; - - BogoSort bogoSort = new BogoSort(); - - // print a sorted array - print(bogoSort.sort(integers)); - - // String Input - String[] strings = {"c", "a", "e", "b", "d"}; - - print(bogoSort.sort(strings)); - } - } - - class BubbleSort { - - public > T[] sort(T[] array) { - for (int i = 0, size = array.length; i < size - 1; ++i) { - boolean swapped = false; - for (int j = 0; j < size - 1 - i; ++j) { - if (less(array[j], array[j + 1])) { - swap(array, j, j + 1); - swapped = true; - } - } - if (!swapped) { - break; - } - } - return array; - } - } - - class CocktailShakerSort { - - public > T[] sort(T[] array) { - - int length = array.length; - int left = 0; - int right = length - 1; - int swappedLeft, swappedRight; - while (left < right) { - // front - swappedRight = 0; - for (int i = left; i < right; i++) { - if (less(array[i + 1], array[i])) { - swap(array, i, i + 1); - swappedRight = i; - } - } - // back - right = swappedRight; - swappedLeft = length - 1; - for (int j = right; j > left; j--) { - if (less(array[j], array[j - 1])) { - swap(array, j - 1, j); - swappedLeft = j; - } - } - left = swappedLeft; - } - return array; - - } - } - - class CombSort { - - // To find gap between elements - private int nextGap(int gap) { - // Shrink gap by Shrink factor - gap = (gap * 10) / 13; - return (gap < 1) ? 1 : gap; - } - - public > T[] sort(T[] arr) { - int size = arr.length; - - // initialize gap - int gap = size; - - // Initialize swapped as true to make sure that loop runs - boolean swapped = true; - - // Keep running while gap is more than 1 and last iteration caused a swap - while (gap != 1 || swapped) { - // Find next gap - gap = nextGap(gap); - - // Initialize swapped as false so that we can check if swap happened or not - swapped = false; - - // Compare all elements with current gap - for (int i = 0; i < size - gap; i++) { - if (less(arr[i + gap], arr[i])) { - // Swap arr[i] and arr[i+gap] - swapped = swap(arr, i, i + gap); - } - } - } - return arr; - } - } - - public > T[] sort(T[] unsorted) { - return sort(Arrays.asList(unsorted)).toArray(unsorted); - } - - public > List sort(List list) { - - Map frequency = new TreeMap<>(); - // The final output array - List sortedArray = new ArrayList<>(list.size()); - - // Counting the frequency of @param array elements - list.forEach(v -> frequency.put(v, frequency.getOrDefault(v, 0) + 1)); - - // Filling the sortedArray - for (Map.Entry element : frequency.entrySet()) { - for (int j = 0; j < element.getValue(); j++) { - sortedArray.add(element.getKey()); - } - } - - return sortedArray; - } - - private static > List streamSort(List list) { - return list.stream() - .collect(toMap(k -> k, v -> 1, (v1, v2) -> v1 + v2, TreeMap::new)) - .entrySet() - .stream() - .flatMap(entry -> IntStream.rangeClosed(1, entry.getValue()).mapToObj(t -> entry.getKey())) - .collect(toList()); - } - - class CycleSort { - - public > T[] sort(T[] arr) { - int n = arr.length; - - // traverse array elements - for (int j = 0; j <= n - 2; j++) { - // initialize item as starting point - T item = arr[j]; - - // Find position where we put the item. - int pos = j; - for (int i = j + 1; i < n; i++) - if (less(arr[i], item)) pos++; - - // If item is already in correct position - if (pos == j) continue; - - // ignore all duplicate elements - while (item.compareTo(arr[pos]) == 0) - pos += 1; - - // put the item to it's right position - if (pos != j) { - item = replace(arr, pos, item); - } - - // Rotate rest of the cycle - while (pos != j) { - pos = j; - - // Find position where we put the element - for (int i = j + 1; i < n; i++) - if (less(arr[i], item)) { - pos += 1; - } - - - // ignore all duplicate elements - while (item.compareTo(arr[pos]) == 0) - pos += 1; - - // put the item to it's right position - if (item != arr[pos]) { - item = replace(arr, pos, item); - } - } - } - - return arr; - } - - private > T replace(T[] arr, int pos, T item) { - T temp = item; - item = arr[pos]; - arr[pos] = temp; - return item; - } - } - - class GnomeSort { - - public > T[] sort(T[] arr) { - int i = 1; - int j = 2; - while (i < arr.length) { - if (less(arr[i - 1], arr[i])) i = j++; - else { - swap(arr, i - 1, i); - if (--i == 0) { - i = j++; - } - } - } - - return null; - } - } - - public class HeapSort { - - - private class Heap> { - - private T[] heap; - - public Heap(T[] heap) { - this.heap = heap; - } - - private void heapSubtree(int rootIndex, int lastChild) { - int leftIndex = rootIndex * 2 + 1; - int rightIndex = rootIndex * 2 + 2; - T root = heap[rootIndex]; - if (rightIndex <= lastChild) { // if has right and left children - T left = heap[leftIndex]; - T right = heap[rightIndex]; - if (less(left, right) && less(left, root)) { - swap(heap, leftIndex, rootIndex); - heapSubtree(leftIndex, lastChild); - } else if (less(right, root)) { - swap(heap, rightIndex, rootIndex); - heapSubtree(rightIndex, lastChild); - } - } else if (leftIndex <= lastChild) { // if no right child, but has left child - T left = heap[leftIndex]; - if (less(left, root)) { - swap(heap, leftIndex, rootIndex); - heapSubtree(leftIndex, lastChild); - } - } - } - - - private void makeMinHeap(int root) { - int leftIndex = root * 2 + 1; - int rightIndex = root * 2 + 2; - boolean hasLeftChild = leftIndex < heap.length; - boolean hasRightChild = rightIndex < heap.length; - if (hasRightChild) { //if has left and right - makeMinHeap(leftIndex); - makeMinHeap(rightIndex); - heapSubtree(root, heap.length - 1); - } else if (hasLeftChild) { - heapSubtree(root, heap.length - 1); - } - } - - private T getRoot(int size) { - swap(heap, 0, size); - heapSubtree(0, size - 1); - return heap[size]; // return old root - } - - - } - - public > T[] sort(T[] unsorted) { - return sort(Arrays.asList(unsorted)).toArray(unsorted); - } - - public > List sort(List unsorted) { - int size = unsorted.size(); - - @SuppressWarnings("unchecked") - Heap heap = new Heap<>(unsorted.toArray((T[]) new Comparable[unsorted.size()])); - - heap.makeMinHeap(0); // make min heap using index 0 as root. - List sorted = new ArrayList<>(size); - while (size > 0) { - T min = heap.getRoot(--size); - sorted.add(min); - } - - return sorted; - } - } - - class MergeSort { - - public > T[] sort(T[] unsorted) { - doSort(unsorted, 0, unsorted.length - 1); - return unsorted; - } - - private > void doSort(T[] arr, int left, int right) { - if (left < right) { - int mid = left + (right - left) / 2; - doSort(arr, left, mid); - doSort(arr, mid + 1, right); - merge(arr, left, mid, right); - } - - } - - private > void merge(T[] arr, int left, int mid, int right) { - int length = right - left + 1; - T[] temp = (T[]) new Comparable[length]; - int i = left; - int j = mid + 1; - int k = 0; - - while (i <= mid && j <= right) { - if (arr[i].compareTo(arr[j]) <= 0) { - temp[k++] = arr[i++]; - } else { - temp[k++] = arr[j++]; - } - } - - while (i <= mid) { - temp[k++] = arr[i++]; - } - - while (j <= right) { - temp[k++] = arr[j++]; - } - - System.arraycopy(temp, 0, arr, left, length); - } - } - - class QuickSort { - - public > T[] sort(T[] array) { - doSort(array, 0, array.length - 1); - return array; - } - - private > void doSort(T[] array, int left, int right) { - if (left < right) { - int pivot = randomPartition(array, left, right); - doSort(array, left, pivot - 1); - doSort(array, pivot, right); - } - } - - private > int randomPartition(T[] array, int left, int right) { - int randomIndex = left + (int) (Math.random() * (right - left + 1)); - swap(array, randomIndex, right); - return partition(array, left, right); - } - - private > int partition(T[] array, int left, int right) { - int mid = (left + right) / 2; - T pivot = array[mid]; - - while (left <= right) { - while (less(array[left], pivot)) { - ++left; - } - while (less(pivot, array[right])) { - --right; - } - if (left <= right) { - swap(array, left, right); - ++left; - --right; - } - } - return left; - } - } - - class RadixSort { - - private int getMax(int[] arr, int n) { - int mx = arr[0]; - for (int i = 1; i < n; i++) - if (arr[i] > mx) - mx = arr[i]; - return mx; - } - - private void countSort(int[] arr, int n, int exp) { - int[] output = new int[n]; - int i; - int[] count = new int[10]; - Arrays.fill(count, 0); - - for (i = 0; i < n; i++) - count[(arr[i] / exp) % 10]++; - - for (i = 1; i < 10; i++) - count[i] += count[i - 1]; - - for (i = n - 1; i >= 0; i--) { - output[count[(arr[i] / exp) % 10] - 1] = arr[i]; - count[(arr[i] / exp) % 10]--; - } - - for (i = 0; i < n; i++) - arr[i] = output[i]; - } - - private void radixsort(int[] arr, int n) { - - int m = getMax(arr, n); - - - for (int exp = 1; m / exp > 0; exp *= 10) - countSort(arr, n, exp); - } - - - void print(int[] arr, int n) { - for (int i = 0; i < n; i++) - System.out.print(arr[i] + " "); - } - } - - public class AnyBaseToAnyBase { - - static final int MINIMUM_BASE = 2; - static final int MAXIMUM_BASE = 36; - - public void main(String[] args) { - Scanner in = new Scanner(System.in); - String n; - int b1, b2; - while (true) { - try { - System.out.print("Enter number: "); - n = in.next(); - System.out.print("Enter beginning base (between " + MINIMUM_BASE + " and " + MAXIMUM_BASE + "): "); - b1 = in.nextInt(); - if (b1 > MAXIMUM_BASE || b1 < MINIMUM_BASE) { - System.out.println("Invalid base!"); - continue; - } - if (!validForBase(n, b1)) { - System.out.println("The number is invalid for this base!"); - continue; - } - System.out.print("Enter end base (between " + MINIMUM_BASE + " and " + MAXIMUM_BASE + "): "); - b2 = in.nextInt(); - if (b2 > MAXIMUM_BASE || b2 < MINIMUM_BASE) { - System.out.println("Invalid base!"); - continue; - } - break; - } catch (InputMismatchException e) { - System.out.println("Invalid input."); - in.next(); - } - } - System.out.println(base2base(n, b1, b2)); - in.close(); - } - - public boolean validForBase(String n, int base) { - char[] validDigits = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', - 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', - 'W', 'X', 'Y', 'Z'}; - // digitsForBase contains all the valid digits for the base given - char[] digitsForBase = Arrays.copyOfRange(validDigits, 0, base); - - // Convert character array into set for convenience of contains() method - HashSet digitsList = new HashSet<>(); - for (int i = 0; i < digitsForBase.length; i++) - digitsList.add(digitsForBase[i]); - - // Check that every digit in n is within the list of valid digits for that base. - for (char c : n.toCharArray()) - if (!digitsList.contains(c)) - return false; - - return true; - } - - public String base2base(String n, int b1, int b2) { - int decimalValue = 0, charB2; - char charB1; - String output = ""; - // Go through every character of n - for (int i = 0; i < n.length(); i++) { - // store the character in charB1 - charB1 = n.charAt(i); - // if it is a non-number, convert it to a decimal value >9 and store it in charB2 - if (charB1 >= 'A' && charB1 <= 'Z') - charB2 = 10 + (charB1 - 'A'); - // Else, store the integer value in charB2 - else - charB2 = charB1 - '0'; - // Convert the digit to decimal and add it to the - // decimalValue of n - decimalValue = decimalValue * b1 + charB2; - } - while (decimalValue != 0) { - // If the remainder is a digit < 10, simply add it to - // the left side of the new number. - if (decimalValue % b2 < 10) - output = Integer.toString(decimalValue % b2) + output; - else - output = (char) ((decimalValue % b2) + 55) + output; - // Divide by the new base again - decimalValue /= b2; - } - return output; - } - } - - public class RomanToInteger { - - private Map map = new HashMap() { - /** - * - */ - private final long serialVersionUID = 87605733047260530L; - - { - put('I', 1); - put('V', 5); - put('X', 10); - put('L', 50); - put('C', 100); - put('D', 500); - put('M', 1000); - } - }; - - public int romanToInt(String A) { - - char prev = ' '; - - int sum = 0; - - int newPrev = 0; - for (int i = A.length() - 1; i >= 0; i--) { - char c = A.charAt(i); - - if (prev != ' ') { - // checking current Number greater then previous or not - newPrev = map.get(prev) > newPrev ? map.get(prev) : newPrev; - } - - int currentNum = map.get(c); - - // if current number greater then prev max previous then add - if (currentNum >= newPrev) { - sum += currentNum; - } else { - // subtract upcoming number until upcoming number not greater then prev max - sum -= currentNum; - } - - prev = c; - } - - return sum; - } - } - - public class Bag implements Iterable { - - private Node firstElement; // first element of the bag - private int size; // size of bag - - private class Node { - private Element content; - private Node nextElement; - } - - /** - * Create an empty bag - */ - public Bag() { - firstElement = null; - size = 0; - } - - /** - * @return true if this bag is empty, false otherwise - */ - public boolean isEmpty() { - return firstElement == null; - } - - /** - * @return the number of elements - */ - public int size() { - return size; - } - - /** - * @param element - the element to add - */ - public void add(Element element) { - Node oldfirst = firstElement; - firstElement = new Node<>(); - firstElement.content = element; - firstElement.nextElement = oldfirst; - size++; - } - - /** - * Checks if the bag contains a specific element - * - * @param element which you want to look for - * @return true if bag contains element, otherwise false - */ - public boolean contains(Element element) { - Iterator iterator = this.iterator(); - while (iterator.hasNext()) { - if (iterator.next().equals(element)) { - return true; - } - } - return false; - } - - /** - * @return an iterator that iterates over the elements in this bag in arbitrary order - */ - public Iterator iterator() { - return new ListIterator<>(firstElement); - } - - @SuppressWarnings("hiding") - private class ListIterator implements Iterator { - private Node currentElement; - - public ListIterator(Node firstElement) { - currentElement = firstElement; - } - - public boolean hasNext() { - return currentElement != null; - } - - /** - * remove is not allowed in a bag - */ - @Override - public void remove() { - throw new UnsupportedOperationException(); - } - - public Element next() { - if (!hasNext()) - throw new NoSuchElementException(); - Element element = currentElement.content; - currentElement = currentElement.nextElement; - return element; - } - } - } - - public class CircularBuffer { - private char[] _buffer; - public final int _buffer_size; - private int _write_index = 0; - private int _read_index = 0; - private AtomicInteger _readable_data = new AtomicInteger(0); - - public CircularBuffer(int buffer_size) { - if (!IsPowerOfTwo(buffer_size)) { - throw new IllegalArgumentException(); - } - this._buffer_size = buffer_size; - _buffer = new char[buffer_size]; - } - - private boolean IsPowerOfTwo(int i) { - return (i & (i - 1)) == 0; - } - - private int getTrueIndex(int i) { - return i % _buffer_size; - } - - - public Character readOutChar() { - Character result = null; - - - //if we have data to read - if (_readable_data.get() > 0) { - - result = Character.valueOf(_buffer[getTrueIndex(_read_index)]); - _readable_data.decrementAndGet(); - _read_index++; - } - - return result; - } - - public boolean writeToCharBuffer(char c) { - boolean result = false; - - //if we can write to the buffer - if (_readable_data.get() < _buffer_size) { - //write to buffer - _buffer[getTrueIndex(_write_index)] = c; - _readable_data.incrementAndGet(); - _write_index++; - result = true; - } - - return result; - } - - private class TestWriteWorker implements Runnable { - String _alphabet = "abcdefghijklmnopqrstuvwxyz0123456789"; - Random _random = new Random(); - CircularBuffer _buffer; - - public TestWriteWorker(CircularBuffer cb) { - this._buffer = cb; - } - - private char getRandomChar() { - return _alphabet.charAt(_random.nextInt(_alphabet.length())); - } - - public void run() { - while (!Thread.interrupted()) { - if (!_buffer.writeToCharBuffer(getRandomChar())) { - Thread.yield(); - try { - Thread.sleep(10); - } catch (InterruptedException e) { - return; - } - } - } - } - } - - private class TestReadWorker implements Runnable { - CircularBuffer _buffer; - - public TestReadWorker(CircularBuffer cb) { - this._buffer = cb; - } - - public void run() { - System.out.println("Printing Buffer:"); - while (!Thread.interrupted()) { - Character c = _buffer.readOutChar(); - if (c != null) { - System.out.print(c.charValue()); - } else { - Thread.yield(); - try { - Thread.sleep(10); - } catch (InterruptedException e) { - System.out.println(); - return; - } - } - } - } - } - } - - class BellmanFord { - int vertex,edge; - private Edge edges[]; - private int index=0; - BellmanFord(int v,int e) - { - vertex=v; - edge=e; - edges=new Edge[e]; - } - class Edge - { - int u,v; - int w; - Edge(int a,int b,int c) - { - u=a; - v=b; - w=c; - } - } - - void printPath(int p[],int i) - { - if(p[i]==-1)//Found the path back to parent - return; - printPath(p,p[i]); - System.out.print(i+" "); - } - public void main(String args[]) - { - BellmanFord obj=new BellmanFord(0,0);//Dummy object to call nonstatic variables - obj.go(); - } - public void go()//Interactive run for understanding the class first time. Assumes source vertex is 0 and shows distaance to all vertices - { - Scanner sc=new Scanner(System.in);//Grab scanner object for user input - int i,v,e,u,ve,w,j,neg=0; - System.out.println("Enter no. of vertices and edges please"); - v=sc.nextInt(); - e=sc.nextInt(); - Edge arr[]=new Edge[e];//Array of edges - System.out.println("Input edges"); - for(i=0;idist[arr[j].u]+arr[j].w) - { - dist[arr[j].v]=dist[arr[j].u]+arr[j].w;//Update - p[arr[j].v]=arr[j].u; - } - } - } - //Final cycle for negative checking - for(j=0;jdist[arr[j].u]+arr[j].w) - { - neg=1; - System.out.println("Negative cycle"); - break; - } - if(neg==0)//Go ahead and show results of computaion - { - System.out.println("Distances are: "); - for(i=0;idist[arr[j].u]+arr[j].w) - { - dist[arr[j].v]=dist[arr[j].u]+arr[j].w;//Update - p[arr[j].v]=arr[j].u; - } - } - } - //Final cycle for negative checking - for(j=0;jdist[arr[j].u]+arr[j].w) - { - neg=1; - System.out.println("Negative cycle"); - break; - } - if(neg==0)//Go ahead and show results of computaion - { - System.out.println("Distance is: "+dist[end]); - System.out.println("Path followed:"); - System.out.print(source+" "); - printPath(p,end); - System.out.println(); - } - } - /** - *@param x Source Vertex - * @param y End vertex - * @param z Weight - */ - public void addEdge(int x,int y,int z)//Adds unidirectionl Edge - { - edges[index++]=new Edge(x,y,z); - } - public Edge[] getEdgeArray() - { - return edges; - } - } - - class Graph> { - - class Node { - E name; - - public Node(E name) { - this.name = name; - } - } - - class Edge { - Node startNode, endNode; - - public Edge(Node startNode, Node endNode) { - this.startNode = startNode; - this.endNode = endNode; - } - } - - ArrayList edgeList; - ArrayList nodeList; - - public Graph() { - edgeList = new ArrayList(); - nodeList = new ArrayList(); - } - - public void addEdge(E startNode, E endNode) { - Node start = null, end = null; - for (Node node : nodeList) { - if (startNode.compareTo(node.name) == 0) { - start = node; - } else if (endNode.compareTo(node.name) == 0) { - end = node; - } - } - if (start == null) { - start = new Node(startNode); - nodeList.add(start); - } - if (end == null) { - end = new Node(endNode); - nodeList.add(end); - } - - edgeList.add(new Edge(start, end)); - } - - public int countGraphs() { - int count = 0; - Set markedNodes = new HashSet(); - - for (Node n : nodeList) { - if (!markedNodes.contains(n)) { - markedNodes.add(n); - markedNodes.addAll(depthFirstSearch(n, new ArrayList())); - count++; - } - } - - return count; - } - - public ArrayList depthFirstSearch(Node n, ArrayList visited) { - visited.add(n); - for (Edge e : edgeList) { - if (e.startNode.equals(n) && !visited.contains(e.endNode)) { - depthFirstSearch(e.endNode, visited); - } - } - return visited; - } - } - - class Cycle { - - private int nodes, edges; - private int[][] adjacencyMatrix; - private boolean[] visited; - ArrayList> cycles = new ArrayList>(); - - - public Cycle() { - Scanner in = new Scanner(System.in); - System.out.print("Enter the no. of nodes: "); - nodes = in.nextInt(); - System.out.print("Enter the no. of Edges: "); - edges = in.nextInt(); - - adjacencyMatrix = new int[nodes][nodes]; - visited = new boolean[nodes]; - - for (int i = 0; i < nodes; i++) { - visited[i] = false; - } - - System.out.println("Enter the details of each edges "); - - for (int i = 0; i < edges; i++) { - int start, end; - start = in.nextInt(); - end = in.nextInt(); - adjacencyMatrix[start][end] = 1; - } - in.close(); - - } - - public void start() { - for (int i = 0; i < nodes; i++) { - ArrayList temp = new ArrayList<>(); - dfs(i, i, temp); - for (int j = 0; j < nodes; j++) { - adjacencyMatrix[i][j] = 0; - adjacencyMatrix[j][i] = 0; - } - } - } - - private void dfs(Integer start, Integer curr, ArrayList temp) { - temp.add(curr); - visited[curr] = true; - for (int i = 0; i < nodes; i++) { - if (adjacencyMatrix[curr][i] == 1) { - if (i == start) { - cycles.add(new ArrayList(temp)); - } else { - if (!visited[i]) { - dfs(start, i, temp); - } - } - } - } - - if (temp.size() > 0) { - temp.remove(temp.size() - 1); - } - visited[curr] = false; - } - - public void printAll() { - for (int i = 0; i < cycles.size(); i++) { - for (int j = 0; j < cycles.get(i).size(); j++) { - System.out.print(cycles.get(i).get(j) + " -> "); - } - System.out.println(cycles.get(i).get(0)); - System.out.println(); - } - - } - - } - - public class Cycles { - public void main(String[] args) { - Cycle c = new Cycle(); - c.start(); - c.printAll(); - } - } - - public class MatrixGraphs { - - public void main(String args[]) { - AdjacencyMatrixGraph graph = new AdjacencyMatrixGraph(10); - graph.addEdge(1, 2); - graph.addEdge(1, 5); - graph.addEdge(2, 5); - graph.addEdge(1, 2); - graph.addEdge(2, 3); - graph.addEdge(3, 4); - graph.addEdge(4, 1); - graph.addEdge(2, 3); - System.out.println(graph); - } - - } - - class AdjacencyMatrixGraph { - private int _numberOfVertices; - private int _numberOfEdges; - private int[][] _adjacency; - - static final int EDGE_EXIST = 1; - static final int EDGE_NONE = 0; - - public AdjacencyMatrixGraph(int givenNumberOfVertices) { - this.setNumberOfVertices(givenNumberOfVertices); - this.setNumberOfEdges(0); - this.setAdjacency(new int[givenNumberOfVertices][givenNumberOfVertices]); - for (int i = 0; i < givenNumberOfVertices; i++) { - for (int j = 0; j < givenNumberOfVertices; j++) { - this.adjacency()[i][j] = AdjacencyMatrixGraph.EDGE_NONE; - } - } - } - - private void setNumberOfVertices(int newNumberOfVertices) { - this._numberOfVertices = newNumberOfVertices; - } - - public int numberOfVertices() { - return this._numberOfVertices; - } - - private void setNumberOfEdges(int newNumberOfEdges) { - this._numberOfEdges = newNumberOfEdges; - } - - public int numberOfEdges() { - return this._numberOfEdges; - } - - private void setAdjacency(int[][] newAdjacency) { - this._adjacency = newAdjacency; - } - - private int[][] adjacency() { - return this._adjacency; - } - - private boolean adjacencyOfEdgeDoesExist(int from, int to) { - return (this.adjacency()[from][to] != AdjacencyMatrixGraph.EDGE_NONE); - } - - public boolean vertexDoesExist(int aVertex) { - if (aVertex >= 0 && aVertex < this.numberOfVertices()) { - return true; - } else { - return false; - } - } - - public boolean edgeDoesExist(int from, int to) { - if (this.vertexDoesExist(from) && this.vertexDoesExist(to)) { - return (this.adjacencyOfEdgeDoesExist(from, to)); - } - - return false; - } - - public boolean addEdge(int from, int to) { - if (this.vertexDoesExist(from) && this.vertexDoesExist(to)) { - if (!this.adjacencyOfEdgeDoesExist(from, to)) { - this.adjacency()[from][to] = AdjacencyMatrixGraph.EDGE_EXIST; - this.adjacency()[to][from] = AdjacencyMatrixGraph.EDGE_EXIST; - this.setNumberOfEdges(this.numberOfEdges() + 1); - return true; - } - } - - return false; - } - - public boolean removeEdge(int from, int to) { - if (!this.vertexDoesExist(from) || !this.vertexDoesExist(to)) { - if (this.adjacencyOfEdgeDoesExist(from, to)) { - this.adjacency()[from][to] = AdjacencyMatrixGraph.EDGE_NONE; - this.adjacency()[to][from] = AdjacencyMatrixGraph.EDGE_NONE; - this.setNumberOfEdges(this.numberOfEdges() - 1); - return true; - } - } - return false; - } - - public String toString() { - String s = new String(); - s = " "; - for (int i = 0; i < this.numberOfVertices(); i++) { - s = s + String.valueOf(i) + " "; - } - s = s + " \n"; - - for (int i = 0; i < this.numberOfVertices(); i++) { - s = s + String.valueOf(i) + " : "; - for (int j = 0; j < this.numberOfVertices(); j++) { - s = s + String.valueOf(this._adjacency[i][j]) + " "; - } - s = s + "\n"; - } - return s; - } - } - - public class FordFulkerson { - final static int INF = 987654321; - // edges - int V; - int[][] capacity, flow; - - public void main(String[] args) { - System.out.println("V : 6"); - V = 6; - capacity = new int[V][V]; - - capacity[0][1] = 12; - capacity[0][3] = 13; - capacity[1][2] = 10; - capacity[2][3] = 13; - capacity[2][4] = 3; - capacity[2][5] = 15; - capacity[3][2] = 7; - capacity[3][4] = 15; - capacity[4][5] = 17; - - System.out.println("Max capacity in networkFlow : " + networkFlow(0, 5)); - } - - private int networkFlow(int source, int sink) { - flow = new int[V][V]; - int totalFlow = 0; - while (true) { - Vector parent = new Vector<>(V); - for (int i = 0; i < V; i++) - parent.add(-1); - Queue q = new LinkedList<>(); - parent.set(source, source); - q.add(source); - while (!q.isEmpty() && parent.get(sink) == -1) { - int here = q.peek(); - q.poll(); - for (int there = 0; there < V; ++there) - if (capacity[here][there] - flow[here][there] > 0 && parent.get(there) == -1) { - q.add(there); - parent.set(there, here); - } - } - if (parent.get(sink) == -1) - break; - - int amount = INF; - String printer = "path : "; - StringBuilder sb = new StringBuilder(); - for (int p = sink; p != source; p = parent.get(p)) { - amount = Math.min(capacity[parent.get(p)][p] - flow[parent.get(p)][p], amount); - sb.append(p + "-"); - } - sb.append(source); - for (int p = sink; p != source; p = parent.get(p)) { - flow[parent.get(p)][p] += amount; - flow[p][parent.get(p)] -= amount; - } - totalFlow += amount; - printer += sb.reverse() + " / max flow : " + totalFlow; - System.out.println(printer); - } - - return totalFlow; - } - } - - class LongestCommonSubsequence { - - public String getLCS(String str1, String str2) { - - //At least one string is null - if (str1 == null || str2 == null) - return null; - - //At least one string is empty - if (str1.length() == 0 || str2.length() == 0) - return ""; - - String[] arr1 = str1.split(""); - String[] arr2 = str2.split(""); - - //lcsMatrix[i][j] = LCS of first i elements of arr1 and first j characters of arr2 - int[][] lcsMatrix = new int[arr1.length + 1][arr2.length + 1]; - - for (int i = 0; i < arr1.length + 1; i++) - lcsMatrix[i][0] = 0; - for (int j = 1; j < arr2.length + 1; j++) - lcsMatrix[0][j] = 0; - for (int i = 1; i < arr1.length + 1; i++) { - for (int j = 1; j < arr2.length + 1; j++) { - if (arr1[i - 1].equals(arr2[j - 1])) { - lcsMatrix[i][j] = lcsMatrix[i - 1][j - 1] + 1; - } else { - lcsMatrix[i][j] = lcsMatrix[i - 1][j] > lcsMatrix[i][j - 1] ? lcsMatrix[i - 1][j] : lcsMatrix[i][j - 1]; - } - } - } - return lcsString(str1, str2, lcsMatrix); - } - - public String lcsString(String str1, String str2, int[][] lcsMatrix) { - StringBuilder lcs = new StringBuilder(); - int i = str1.length(), - j = str2.length(); - while (i > 0 && j > 0) { - if (str1.charAt(i - 1) == str2.charAt(j - 1)) { - lcs.append(str1.charAt(i - 1)); - i--; - j--; - } else if (lcsMatrix[i - 1][j] > lcsMatrix[i][j - 1]) { - i--; - } else { - j--; - } - } - return lcs.reverse().toString(); - } - - public void main(String[] args) { - String str1 = "DSGSHSRGSRHTRD"; - String str2 = "DATRGAGTSHS"; - String lcs = getLCS(str1, str2); - - //Print LCS - if (lcs != null) { - System.out.println("String 1: " + str1); - System.out.println("String 2: " + str2); - System.out.println("LCS: " + lcs); - System.out.println("LCS length: " + lcs.length()); - } - } - } - - public final class ClosestPair { - - - /** - * Number of points - */ - int numberPoints = 0; - /** - * Input data, maximum 10000. - */ - private Location[] array; - /** - * Minimum point coordinate. - */ - Location point1 = null; - /** - * Minimum point coordinate. - */ - Location point2 = null; - /** - * Minimum point length. - */ - private double minNum = Double.MAX_VALUE; - - private int secondCount = 0; - - ClosestPair(int points) { - numberPoints = points; - array = new Location[numberPoints]; - } - - public class Location { - - double x = 0; - double y = 0; - - Location(final double xpar, final double ypar) { //Save x, y coordinates - this.x = xpar; - this.y = ypar; - } - - } - - public Location[] createLocation(int numberValues) { - return new Location[numberValues]; - - } - - public Location buildLocation(double x, double y) { - return new Location(x, y); - } - - public int xPartition( - final Location[] a, final int first, final int last) { - - Location pivot = a[last]; // pivot - int pIndex = last; - int i = first - 1; - Location temp; // Temporarily store value for position transformation - for (int j = first; j <= last - 1; j++) { - if (a[j].x <= pivot.x) { // Less than or less than pivot - i++; - temp = a[i]; // array[i] <-> array[j] - a[i] = a[j]; - a[j] = temp; - } - } - i++; - temp = a[i]; // array[pivot] <-> array[i] - a[i] = a[pIndex]; - a[pIndex] = temp; - return i; // pivot index - } - - public int yPartition( - final Location[] a, final int first, final int last) { - - Location pivot = a[last]; // pivot - int pIndex = last; - int i = first - 1; - Location temp; // Temporarily store value for position transformation - for (int j = first; j <= last - 1; j++) { - if (a[j].y <= pivot.y) { // Less than or less than pivot - i++; - temp = a[i]; // array[i] <-> array[j] - a[i] = a[j]; - a[j] = temp; - } - } - i++; - temp = a[i]; // array[pivot] <-> array[i] - a[i] = a[pIndex]; - a[pIndex] = temp; - return i; // pivot index - } - - public void xQuickSort( - final Location[] a, final int first, final int last) { - - if (first < last) { - int q = xPartition(a, first, last); // pivot - xQuickSort(a, first, q - 1); // Left - xQuickSort(a, q + 1, last); // Right - } - } - - public void yQuickSort( - final Location[] a, final int first, final int last) { - - if (first < last) { - int q = yPartition(a, first, last); // pivot - yQuickSort(a, first, q - 1); // Left - yQuickSort(a, q + 1, last); // Right - } - } - public double closestPair(final Location[] a, final int indexNum) { - - Location[] divideArray = new Location[indexNum]; - System.arraycopy(a, 0, divideArray, 0, indexNum); // Copy previous array - int totalNum = indexNum; // number of coordinates in the divideArray - int divideX = indexNum / 2; // Intermediate value for divide - Location[] leftArray = new Location[divideX]; //divide - left array - //divide-right array - Location[] rightArray = new Location[totalNum - divideX]; - if (indexNum <= 3) { // If the number of coordinates is 3 or less - return bruteForce(divideArray); - } - //divide-left array - System.arraycopy(divideArray, 0, leftArray, 0, divideX); - //divide-right array - System.arraycopy( - divideArray, divideX, rightArray, 0, totalNum - divideX); - - double minLeftArea = 0; //Minimum length of left array - double minRightArea = 0; //Minimum length of right array - double minValue = 0; //Minimum lengt - - minLeftArea = closestPair(leftArray, divideX); // recursive closestPair - minRightArea = closestPair(rightArray, totalNum - divideX); - // window size (= minimum length) - minValue = Math.min(minLeftArea, minRightArea); - - // Create window. Set the size for creating a window - // and creating a new array for the coordinates in the window - for (int i = 0; i < totalNum; i++) { - double xGap = Math.abs(divideArray[divideX].x - divideArray[i].x); - if (xGap < minValue) { - secondCount++; // size of the array - } else { - if (divideArray[i].x > divideArray[divideX].x) { - break; - } - } - } - // new array for coordinates in window - Location[] firstWindow = new Location[secondCount]; - int k = 0; - for (int i = 0; i < totalNum; i++) { - double xGap = Math.abs(divideArray[divideX].x - divideArray[i].x); - if (xGap < minValue) { // if it's inside a window - firstWindow[k] = divideArray[i]; // put in an array - k++; - } else { - if (divideArray[i].x > divideArray[divideX].x) { - break; - } - } - } - yQuickSort(firstWindow, 0, secondCount - 1); // Sort by y coordinates - /* Coordinates in Window */ - double length = 0; - // size comparison within window - for (int i = 0; i < secondCount - 1; i++) { - for (int j = (i + 1); j < secondCount; j++) { - double xGap = Math.abs(firstWindow[i].x - firstWindow[j].x); - double yGap = Math.abs(firstWindow[i].y - firstWindow[j].y); - if (yGap < minValue) { - length = Math.sqrt(Math.pow(xGap, 2) + Math.pow(yGap, 2)); - // If measured distance is less than current min distance - if (length < minValue) { - // Change minimum distance to current distance - minValue = length; - // Conditional for registering final coordinate - if (length < minNum) { - minNum = length; - point1 = firstWindow[i]; - point2 = firstWindow[j]; - } - } - } else { - break; - } - } - } - secondCount = 0; - return minValue; - } - - public double bruteForce(final Location[] arrayParam) { - - double minValue = Double.MAX_VALUE; // minimum distance - double length = 0; - double xGap = 0; // Difference between x coordinates - double yGap = 0; // Difference between y coordinates - double result = 0; - - if (arrayParam.length == 2) { - // Difference between x coordinates - xGap = (arrayParam[0].x - arrayParam[1].x); - // Difference between y coordinates - yGap = (arrayParam[0].y - arrayParam[1].y); - // distance between coordinates - length = Math.sqrt(Math.pow(xGap, 2) + Math.pow(yGap, 2)); - // Conditional statement for registering final coordinate - if (length < minNum) { - minNum = length; - - } - point1 = arrayParam[0]; - point2 = arrayParam[1]; - result = length; - } - if (arrayParam.length == 3) { - for (int i = 0; i < arrayParam.length - 1; i++) { - for (int j = (i + 1); j < arrayParam.length; j++) { - // Difference between x coordinates - xGap = (arrayParam[i].x - arrayParam[j].x); - // Difference between y coordinates - yGap = (arrayParam[i].y - arrayParam[j].y); - // distance between coordinates - length = - Math.sqrt(Math.pow(xGap, 2) + Math.pow(yGap, 2)); - // If measured distance is less than current min distance - if (length < minValue) { - // Change minimum distance to current distance - minValue = length; - if (length < minNum) { - // Registering final coordinate - minNum = length; - point1 = arrayParam[i]; - point2 = arrayParam[j]; - } - } - } - } - result = minValue; - - } - return result; // If only one point returns 0. - } - - public void main(final String[] args) { - - //Input data consists of one x-coordinate and one y-coordinate - - ClosestPair cp = new ClosestPair(12); - cp.array[0] = cp.buildLocation(2, 3); - cp.array[1] = cp.buildLocation(2, 16); - cp.array[2] = cp.buildLocation(3, 9); - cp.array[3] = cp.buildLocation(6, 3); - cp.array[4] = cp.buildLocation(7, 7); - cp.array[5] = cp.buildLocation(19, 4); - cp.array[6] = cp.buildLocation(10, 11); - cp.array[7] = cp.buildLocation(15, 2); - cp.array[8] = cp.buildLocation(15, 19); - cp.array[9] = cp.buildLocation(16, 11); - cp.array[10] = cp.buildLocation(17, 13); - cp.array[11] = cp.buildLocation(9, 12); - - System.out.println("Input data"); - System.out.println("Number of points: " + cp.array.length); - for (int i = 0; i < cp.array.length; i++) { - System.out.println("x: " + cp.array[i].x + ", y: " + cp.array[i].y); - } - - cp.xQuickSort(cp.array, 0, cp.array.length - 1); // Sorting by x value - - double result; // minimum distance - - result = cp.closestPair(cp.array, cp.array.length); - System.out.println("Output Data"); - System.out.println("(" + cp.point1.x + ", " + cp.point1.y + ")"); - System.out.println("(" + cp.point2.x + ", " + cp.point2.y + ")"); - System.out.println("Minimum Distance : " + result); - - } - } - - public class SkylineAlgorithm { - private ArrayList points; - public SkylineAlgorithm() { - points = new ArrayList<>(); - } - - public ArrayList getPoints() { - return points; - } - - public ArrayList produceSubSkyLines(ArrayList list) { - - // part where function exits flashback - int size = list.size(); - if (size == 1) { - return list; - } else if (size == 2) { - if (list.get(0).dominates(list.get(1))) { - list.remove(1); - } else { - if (list.get(1).dominates(list.get(0))) { - list.remove(0); - } - } - return list; - } - - // recursive part of the function - ArrayList leftHalf = new ArrayList<>(); - ArrayList rightHalf = new ArrayList<>(); - for (int i = 0; i < list.size(); i++) { - if (i < list.size() / 2) { - leftHalf.add(list.get(i)); - } else { - rightHalf.add(list.get(i)); - } - } - ArrayList leftSubSkyLine = produceSubSkyLines(leftHalf); - ArrayList rightSubSkyLine = produceSubSkyLines(rightHalf); - - // skyline is produced - return produceFinalSkyLine(leftSubSkyLine, rightSubSkyLine); - } - - public ArrayList produceFinalSkyLine(ArrayList left, ArrayList right) { - - // dominated points of ArrayList left are removed - for (int i = 0; i < left.size() - 1; i++) { - if (left.get(i).x == left.get(i + 1).x && left.get(i).y > left.get(i + 1).y) { - left.remove(i); - i--; - } - } - - // minimum y-value is found - int min = left.get(0).y; - for (int i = 1; i < left.size(); i++) { - if (min > left.get(i).y) { - min = left.get(i).y; - if (min == 1) { - i = left.size(); - } - } - } - - // dominated points of ArrayList right are removed - for (int i = 0; i < right.size(); i++) { - if (right.get(i).y >= min) { - right.remove(i); - i--; - } - } - - // final skyline found and returned - left.addAll(right); - return left; - } - - - public class Point { - private int x; - private int y; - - public Point(int x, int y) { - this.x = x; - this.y = y; - } - - /** - * @return x, the x-value - */ - public int getX() { - return x; - } - - /** - * @return y, the y-value - */ - public int getY() { - return y; - } - - public boolean dominates(Point p1) { - // checks if p1 is dominated - return (this.x < p1.x && this.y <= p1.y) || (this.x <= p1.x && this.y < p1.y); - } - } - - class XComparator implements Comparator { - @Override - public int compare(Point a, Point b) { - return Integer.compare(a.x, b.x); - } - } - } - - public class AES { - - private final int[] RCON = { 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, - 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, - 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, - 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, - 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, - 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, - 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, - 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, - 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, - 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, - 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, - 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, - 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, - 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, - 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d }; - - private final int[] SBOX = { 0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, - 0xD7, 0xAB, 0x76, 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, - 0xC0, 0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15, 0x04, - 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75, 0x09, 0x83, 0x2C, - 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84, 0x53, 0xD1, 0x00, 0xED, 0x20, - 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF, 0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, - 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8, 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, - 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2, 0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, - 0x3D, 0x64, 0x5D, 0x19, 0x73, 0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, - 0x5E, 0x0B, 0xDB, 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, - 0x79, 0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08, 0xBA, - 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A, 0x70, 0x3E, 0xB5, - 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E, 0xE1, 0xF8, 0x98, 0x11, 0x69, - 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF, 0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, - 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16 }; - - private final int[] INVERSE_SBOX = { 0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38, 0xBF, 0x40, 0xA3, 0x9E, - 0x81, 0xF3, 0xD7, 0xFB, 0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87, 0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, - 0xE9, 0xCB, 0x54, 0x7B, 0x94, 0x32, 0xA6, 0xC2, 0x23, 0x3D, 0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E, - 0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2, 0x76, 0x5B, 0xA2, 0x49, 0x6D, 0x8B, 0xD1, 0x25, 0x72, 0xF8, - 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92, 0x6C, 0x70, 0x48, 0x50, - 0xFD, 0xED, 0xB9, 0xDA, 0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84, 0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, - 0xD3, 0x0A, 0xF7, 0xE4, 0x58, 0x05, 0xB8, 0xB3, 0x45, 0x06, 0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02, - 0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B, 0x3A, 0x91, 0x11, 0x41, 0x4F, 0x67, 0xDC, 0xEA, 0x97, 0xF2, - 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73, 0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85, 0xE2, 0xF9, 0x37, 0xE8, - 0x1C, 0x75, 0xDF, 0x6E, 0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89, 0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, - 0xBE, 0x1B, 0xFC, 0x56, 0x3E, 0x4B, 0xC6, 0xD2, 0x79, 0x20, 0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4, - 0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31, 0xB1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xEC, 0x5F, 0x60, 0x51, - 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D, 0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF, 0xA0, 0xE0, 0x3B, 0x4D, - 0xAE, 0x2A, 0xF5, 0xB0, 0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61, 0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, - 0xD6, 0x26, 0xE1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0C, 0x7D }; - - private final int[] MULT2 = { 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, - 0x1a, 0x1c, 0x1e, 0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, - 0x3e, 0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e, 0x60, - 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, 0x74, 0x76, 0x78, 0x7a, 0x7c, 0x7e, 0x80, 0x82, 0x84, - 0x86, 0x88, 0x8a, 0x8c, 0x8e, 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e, 0xa0, 0xa2, 0xa4, 0xa6, 0xa8, - 0xaa, 0xac, 0xae, 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe, 0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, - 0xce, 0xd0, 0xd2, 0xd4, 0xd6, 0xd8, 0xda, 0xdc, 0xde, 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee, 0xf0, - 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe, 0x1b, 0x19, 0x1f, 0x1d, 0x13, 0x11, 0x17, 0x15, 0x0b, 0x09, 0x0f, - 0x0d, 0x03, 0x01, 0x07, 0x05, 0x3b, 0x39, 0x3f, 0x3d, 0x33, 0x31, 0x37, 0x35, 0x2b, 0x29, 0x2f, 0x2d, 0x23, - 0x21, 0x27, 0x25, 0x5b, 0x59, 0x5f, 0x5d, 0x53, 0x51, 0x57, 0x55, 0x4b, 0x49, 0x4f, 0x4d, 0x43, 0x41, 0x47, - 0x45, 0x7b, 0x79, 0x7f, 0x7d, 0x73, 0x71, 0x77, 0x75, 0x6b, 0x69, 0x6f, 0x6d, 0x63, 0x61, 0x67, 0x65, 0x9b, - 0x99, 0x9f, 0x9d, 0x93, 0x91, 0x97, 0x95, 0x8b, 0x89, 0x8f, 0x8d, 0x83, 0x81, 0x87, 0x85, 0xbb, 0xb9, 0xbf, - 0xbd, 0xb3, 0xb1, 0xb7, 0xb5, 0xab, 0xa9, 0xaf, 0xad, 0xa3, 0xa1, 0xa7, 0xa5, 0xdb, 0xd9, 0xdf, 0xdd, 0xd3, - 0xd1, 0xd7, 0xd5, 0xcb, 0xc9, 0xcf, 0xcd, 0xc3, 0xc1, 0xc7, 0xc5, 0xfb, 0xf9, 0xff, 0xfd, 0xf3, 0xf1, 0xf7, - 0xf5, 0xeb, 0xe9, 0xef, 0xed, 0xe3, 0xe1, 0xe7, 0xe5 }; - - private final int[] MULT3 = { 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, - 0x17, 0x12, 0x11, 0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, 0x3a, 0x39, 0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, - 0x21, 0x60, 0x63, 0x66, 0x65, 0x6c, 0x6f, 0x6a, 0x69, 0x78, 0x7b, 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71, 0x50, - 0x53, 0x56, 0x55, 0x5c, 0x5f, 0x5a, 0x59, 0x48, 0x4b, 0x4e, 0x4d, 0x44, 0x47, 0x42, 0x41, 0xc0, 0xc3, 0xc6, - 0xc5, 0xcc, 0xcf, 0xca, 0xc9, 0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1, 0xf0, 0xf3, 0xf6, 0xf5, 0xfc, - 0xff, 0xfa, 0xf9, 0xe8, 0xeb, 0xee, 0xed, 0xe4, 0xe7, 0xe2, 0xe1, 0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf, 0xaa, - 0xa9, 0xb8, 0xbb, 0xbe, 0xbd, 0xb4, 0xb7, 0xb2, 0xb1, 0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99, 0x88, - 0x8b, 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81, 0x9b, 0x98, 0x9d, 0x9e, 0x97, 0x94, 0x91, 0x92, 0x83, 0x80, 0x85, - 0x86, 0x8f, 0x8c, 0x89, 0x8a, 0xab, 0xa8, 0xad, 0xae, 0xa7, 0xa4, 0xa1, 0xa2, 0xb3, 0xb0, 0xb5, 0xb6, 0xbf, - 0xbc, 0xb9, 0xba, 0xfb, 0xf8, 0xfd, 0xfe, 0xf7, 0xf4, 0xf1, 0xf2, 0xe3, 0xe0, 0xe5, 0xe6, 0xef, 0xec, 0xe9, - 0xea, 0xcb, 0xc8, 0xcd, 0xce, 0xc7, 0xc4, 0xc1, 0xc2, 0xd3, 0xd0, 0xd5, 0xd6, 0xdf, 0xdc, 0xd9, 0xda, 0x5b, - 0x58, 0x5d, 0x5e, 0x57, 0x54, 0x51, 0x52, 0x43, 0x40, 0x45, 0x46, 0x4f, 0x4c, 0x49, 0x4a, 0x6b, 0x68, 0x6d, - 0x6e, 0x67, 0x64, 0x61, 0x62, 0x73, 0x70, 0x75, 0x76, 0x7f, 0x7c, 0x79, 0x7a, 0x3b, 0x38, 0x3d, 0x3e, 0x37, - 0x34, 0x31, 0x32, 0x23, 0x20, 0x25, 0x26, 0x2f, 0x2c, 0x29, 0x2a, 0x0b, 0x08, 0x0d, 0x0e, 0x07, 0x04, 0x01, - 0x02, 0x13, 0x10, 0x15, 0x16, 0x1f, 0x1c, 0x19, 0x1a }; - - private final int[] MULT9 = { 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, - 0x65, 0x7e, 0x77, 0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf, 0xd8, 0xd1, 0xca, 0xc3, 0xfc, 0xf5, 0xee, - 0xe7, 0x3b, 0x32, 0x29, 0x20, 0x1f, 0x16, 0x0d, 0x04, 0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c, 0xab, - 0xa2, 0xb9, 0xb0, 0x8f, 0x86, 0x9d, 0x94, 0xe3, 0xea, 0xf1, 0xf8, 0xc7, 0xce, 0xd5, 0xdc, 0x76, 0x7f, 0x64, - 0x6d, 0x52, 0x5b, 0x40, 0x49, 0x3e, 0x37, 0x2c, 0x25, 0x1a, 0x13, 0x08, 0x01, 0xe6, 0xef, 0xf4, 0xfd, 0xc2, - 0xcb, 0xd0, 0xd9, 0xae, 0xa7, 0xbc, 0xb5, 0x8a, 0x83, 0x98, 0x91, 0x4d, 0x44, 0x5f, 0x56, 0x69, 0x60, 0x7b, - 0x72, 0x05, 0x0c, 0x17, 0x1e, 0x21, 0x28, 0x33, 0x3a, 0xdd, 0xd4, 0xcf, 0xc6, 0xf9, 0xf0, 0xeb, 0xe2, 0x95, - 0x9c, 0x87, 0x8e, 0xb1, 0xb8, 0xa3, 0xaa, 0xec, 0xe5, 0xfe, 0xf7, 0xc8, 0xc1, 0xda, 0xd3, 0xa4, 0xad, 0xb6, - 0xbf, 0x80, 0x89, 0x92, 0x9b, 0x7c, 0x75, 0x6e, 0x67, 0x58, 0x51, 0x4a, 0x43, 0x34, 0x3d, 0x26, 0x2f, 0x10, - 0x19, 0x02, 0x0b, 0xd7, 0xde, 0xc5, 0xcc, 0xf3, 0xfa, 0xe1, 0xe8, 0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2, 0xa9, - 0xa0, 0x47, 0x4e, 0x55, 0x5c, 0x63, 0x6a, 0x71, 0x78, 0x0f, 0x06, 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30, 0x9a, - 0x93, 0x88, 0x81, 0xbe, 0xb7, 0xac, 0xa5, 0xd2, 0xdb, 0xc0, 0xc9, 0xf6, 0xff, 0xe4, 0xed, 0x0a, 0x03, 0x18, - 0x11, 0x2e, 0x27, 0x3c, 0x35, 0x42, 0x4b, 0x50, 0x59, 0x66, 0x6f, 0x74, 0x7d, 0xa1, 0xa8, 0xb3, 0xba, 0x85, - 0x8c, 0x97, 0x9e, 0xe9, 0xe0, 0xfb, 0xf2, 0xcd, 0xc4, 0xdf, 0xd6, 0x31, 0x38, 0x23, 0x2a, 0x15, 0x1c, 0x07, - 0x0e, 0x79, 0x70, 0x6b, 0x62, 0x5d, 0x54, 0x4f, 0x46 }; - - private final int[] MULT11 = { 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, - 0x7f, 0x62, 0x69, 0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81, 0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, - 0xd9, 0x7b, 0x70, 0x6d, 0x66, 0x57, 0x5c, 0x41, 0x4a, 0x23, 0x28, 0x35, 0x3e, 0x0f, 0x04, 0x19, 0x12, 0xcb, - 0xc0, 0xdd, 0xd6, 0xe7, 0xec, 0xf1, 0xfa, 0x93, 0x98, 0x85, 0x8e, 0xbf, 0xb4, 0xa9, 0xa2, 0xf6, 0xfd, 0xe0, - 0xeb, 0xda, 0xd1, 0xcc, 0xc7, 0xae, 0xa5, 0xb8, 0xb3, 0x82, 0x89, 0x94, 0x9f, 0x46, 0x4d, 0x50, 0x5b, 0x6a, - 0x61, 0x7c, 0x77, 0x1e, 0x15, 0x08, 0x03, 0x32, 0x39, 0x24, 0x2f, 0x8d, 0x86, 0x9b, 0x90, 0xa1, 0xaa, 0xb7, - 0xbc, 0xd5, 0xde, 0xc3, 0xc8, 0xf9, 0xf2, 0xef, 0xe4, 0x3d, 0x36, 0x2b, 0x20, 0x11, 0x1a, 0x07, 0x0c, 0x65, - 0x6e, 0x73, 0x78, 0x49, 0x42, 0x5f, 0x54, 0xf7, 0xfc, 0xe1, 0xea, 0xdb, 0xd0, 0xcd, 0xc6, 0xaf, 0xa4, 0xb9, - 0xb2, 0x83, 0x88, 0x95, 0x9e, 0x47, 0x4c, 0x51, 0x5a, 0x6b, 0x60, 0x7d, 0x76, 0x1f, 0x14, 0x09, 0x02, 0x33, - 0x38, 0x25, 0x2e, 0x8c, 0x87, 0x9a, 0x91, 0xa0, 0xab, 0xb6, 0xbd, 0xd4, 0xdf, 0xc2, 0xc9, 0xf8, 0xf3, 0xee, - 0xe5, 0x3c, 0x37, 0x2a, 0x21, 0x10, 0x1b, 0x06, 0x0d, 0x64, 0x6f, 0x72, 0x79, 0x48, 0x43, 0x5e, 0x55, 0x01, - 0x0a, 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30, 0x59, 0x52, 0x4f, 0x44, 0x75, 0x7e, 0x63, 0x68, 0xb1, 0xba, 0xa7, - 0xac, 0x9d, 0x96, 0x8b, 0x80, 0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce, 0xd3, 0xd8, 0x7a, 0x71, 0x6c, 0x67, 0x56, - 0x5d, 0x40, 0x4b, 0x22, 0x29, 0x34, 0x3f, 0x0e, 0x05, 0x18, 0x13, 0xca, 0xc1, 0xdc, 0xd7, 0xe6, 0xed, 0xf0, - 0xfb, 0x92, 0x99, 0x84, 0x8f, 0xbe, 0xb5, 0xa8, 0xa3 }; - - private final int[] MULT13 = { 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, - 0x51, 0x46, 0x4b, 0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, 0xfe, 0xf3, 0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, - 0x9b, 0xbb, 0xb6, 0xa1, 0xac, 0x8f, 0x82, 0x95, 0x98, 0xd3, 0xde, 0xc9, 0xc4, 0xe7, 0xea, 0xfd, 0xf0, 0x6b, - 0x66, 0x71, 0x7c, 0x5f, 0x52, 0x45, 0x48, 0x03, 0x0e, 0x19, 0x14, 0x37, 0x3a, 0x2d, 0x20, 0x6d, 0x60, 0x77, - 0x7a, 0x59, 0x54, 0x43, 0x4e, 0x05, 0x08, 0x1f, 0x12, 0x31, 0x3c, 0x2b, 0x26, 0xbd, 0xb0, 0xa7, 0xaa, 0x89, - 0x84, 0x93, 0x9e, 0xd5, 0xd8, 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6, 0xd6, 0xdb, 0xcc, 0xc1, 0xe2, 0xef, 0xf8, - 0xf5, 0xbe, 0xb3, 0xa4, 0xa9, 0x8a, 0x87, 0x90, 0x9d, 0x06, 0x0b, 0x1c, 0x11, 0x32, 0x3f, 0x28, 0x25, 0x6e, - 0x63, 0x74, 0x79, 0x5a, 0x57, 0x40, 0x4d, 0xda, 0xd7, 0xc0, 0xcd, 0xee, 0xe3, 0xf4, 0xf9, 0xb2, 0xbf, 0xa8, - 0xa5, 0x86, 0x8b, 0x9c, 0x91, 0x0a, 0x07, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29, 0x62, 0x6f, 0x78, 0x75, 0x56, - 0x5b, 0x4c, 0x41, 0x61, 0x6c, 0x7b, 0x76, 0x55, 0x58, 0x4f, 0x42, 0x09, 0x04, 0x13, 0x1e, 0x3d, 0x30, 0x27, - 0x2a, 0xb1, 0xbc, 0xab, 0xa6, 0x85, 0x88, 0x9f, 0x92, 0xd9, 0xd4, 0xc3, 0xce, 0xed, 0xe0, 0xf7, 0xfa, 0xb7, - 0xba, 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94, 0xdf, 0xd2, 0xc5, 0xc8, 0xeb, 0xe6, 0xf1, 0xfc, 0x67, 0x6a, 0x7d, - 0x70, 0x53, 0x5e, 0x49, 0x44, 0x0f, 0x02, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c, 0x0c, 0x01, 0x16, 0x1b, 0x38, - 0x35, 0x22, 0x2f, 0x64, 0x69, 0x7e, 0x73, 0x50, 0x5d, 0x4a, 0x47, 0xdc, 0xd1, 0xc6, 0xcb, 0xe8, 0xe5, 0xf2, - 0xff, 0xb4, 0xb9, 0xae, 0xa3, 0x80, 0x8d, 0x9a, 0x97 }; - - private final int[] MULT14 = { 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, - 0x46, 0x54, 0x5a, 0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca, 0x90, 0x9e, 0x8c, 0x82, 0xa8, 0xa6, 0xb4, - 0xba, 0xdb, 0xd5, 0xc7, 0xc9, 0xe3, 0xed, 0xff, 0xf1, 0xab, 0xa5, 0xb7, 0xb9, 0x93, 0x9d, 0x8f, 0x81, 0x3b, - 0x35, 0x27, 0x29, 0x03, 0x0d, 0x1f, 0x11, 0x4b, 0x45, 0x57, 0x59, 0x73, 0x7d, 0x6f, 0x61, 0xad, 0xa3, 0xb1, - 0xbf, 0x95, 0x9b, 0x89, 0x87, 0xdd, 0xd3, 0xc1, 0xcf, 0xe5, 0xeb, 0xf9, 0xf7, 0x4d, 0x43, 0x51, 0x5f, 0x75, - 0x7b, 0x69, 0x67, 0x3d, 0x33, 0x21, 0x2f, 0x05, 0x0b, 0x19, 0x17, 0x76, 0x78, 0x6a, 0x64, 0x4e, 0x40, 0x52, - 0x5c, 0x06, 0x08, 0x1a, 0x14, 0x3e, 0x30, 0x22, 0x2c, 0x96, 0x98, 0x8a, 0x84, 0xae, 0xa0, 0xb2, 0xbc, 0xe6, - 0xe8, 0xfa, 0xf4, 0xde, 0xd0, 0xc2, 0xcc, 0x41, 0x4f, 0x5d, 0x53, 0x79, 0x77, 0x65, 0x6b, 0x31, 0x3f, 0x2d, - 0x23, 0x09, 0x07, 0x15, 0x1b, 0xa1, 0xaf, 0xbd, 0xb3, 0x99, 0x97, 0x85, 0x8b, 0xd1, 0xdf, 0xcd, 0xc3, 0xe9, - 0xe7, 0xf5, 0xfb, 0x9a, 0x94, 0x86, 0x88, 0xa2, 0xac, 0xbe, 0xb0, 0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc, 0xce, - 0xc0, 0x7a, 0x74, 0x66, 0x68, 0x42, 0x4c, 0x5e, 0x50, 0x0a, 0x04, 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20, 0xec, - 0xe2, 0xf0, 0xfe, 0xd4, 0xda, 0xc8, 0xc6, 0x9c, 0x92, 0x80, 0x8e, 0xa4, 0xaa, 0xb8, 0xb6, 0x0c, 0x02, 0x10, - 0x1e, 0x34, 0x3a, 0x28, 0x26, 0x7c, 0x72, 0x60, 0x6e, 0x44, 0x4a, 0x58, 0x56, 0x37, 0x39, 0x2b, 0x25, 0x0f, - 0x01, 0x13, 0x1d, 0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71, 0x63, 0x6d, 0xd7, 0xd9, 0xcb, 0xc5, 0xef, 0xe1, 0xf3, - 0xfd, 0xa7, 0xa9, 0xbb, 0xb5, 0x9f, 0x91, 0x83, 0x8d }; - - public BigInteger scheduleCore(BigInteger t, int rconCounter) { - String rBytes = t.toString(16); - - // Add zero padding - while (rBytes.length() < 8) { - rBytes = "0" + rBytes; - } - - // rotate the first 16 bits to the back - String rotatingBytes = rBytes.substring(0, 2); - String fixedBytes = rBytes.substring(2); - - rBytes = fixedBytes + rotatingBytes; - - // apply S-Box to all 8-Bit Substrings - for (int i = 0; i < 4; i++) { - String currentByteBits = rBytes.substring(i * 2, (i + 1) * 2); - - int currentByte = Integer.parseInt(currentByteBits, 16); - currentByte = SBOX[currentByte]; - - // add the current RCON value to the first byte - if (i == 0) { - currentByte = currentByte ^ RCON[rconCounter]; - } - - currentByteBits = Integer.toHexString(currentByte); - - // Add zero padding - - while (currentByteBits.length() < 2) { - currentByteBits = '0' + currentByteBits; - } - - // replace bytes in original string - rBytes = rBytes.substring(0, i * 2) + currentByteBits + rBytes.substring((i + 1) * 2); - } - - // t = new BigInteger(rBytes, 16); - // return t; - return new BigInteger(rBytes, 16); - } - - public BigInteger[] keyExpansion(BigInteger initialKey) { - BigInteger[] roundKeys = { initialKey, new BigInteger("0"), new BigInteger("0"), new BigInteger("0"), - new BigInteger("0"), new BigInteger("0"), new BigInteger("0"), new BigInteger("0"), new BigInteger("0"), - new BigInteger("0"), new BigInteger("0"), }; - - // initialize rcon iteration - int rconCounter = 1; - - for (int i = 1; i < 11; i++) { - - // get the previous 32 bits the key - BigInteger t = roundKeys[i - 1].remainder(new BigInteger("100000000", 16)); - - // split previous key into 8-bit segments - BigInteger[] prevKey = { roundKeys[i - 1].remainder(new BigInteger("100000000", 16)), - roundKeys[i - 1].remainder(new BigInteger("10000000000000000", 16)) - .divide(new BigInteger("100000000", 16)), - roundKeys[i - 1].remainder(new BigInteger("1000000000000000000000000", 16)) - .divide(new BigInteger("10000000000000000", 16)), - roundKeys[i - 1].divide(new BigInteger("1000000000000000000000000", 16)), }; - - // run schedule core - t = scheduleCore(t, rconCounter); - rconCounter += 1; - - // Calculate partial round key - BigInteger t0 = t.xor(prevKey[3]); - BigInteger t1 = t0.xor(prevKey[2]); - BigInteger t2 = t1.xor(prevKey[1]); - BigInteger t3 = t2.xor(prevKey[0]); - - // Join round key segments - t2 = t2.multiply(new BigInteger("100000000", 16)); - t1 = t1.multiply(new BigInteger("10000000000000000", 16)); - t0 = t0.multiply(new BigInteger("1000000000000000000000000", 16)); - roundKeys[i] = t0.add(t1).add(t2).add(t3); - - } - return roundKeys; - } - - public int[] splitBlockIntoCells(BigInteger block) { - - int[] cells = new int[16]; - String blockBits = block.toString(2); - - // Append leading 0 for full "128-bit" string - while (blockBits.length() < 128) { - blockBits = '0' + blockBits; - } - - // split 128 to 8 bit cells - for (int i = 0; i < cells.length; i++) { - String cellBits = blockBits.substring(8 * i, 8 * (i + 1)); - cells[i] = Integer.parseInt(cellBits, 2); - } - - return cells; - } - - public BigInteger mergeCellsIntoBlock(int[] cells) { - - String blockBits = ""; - for (int i = 0; i < 16; i++) { - String cellBits = Integer.toBinaryString(cells[i]); - - // Append leading 0 for full "8-bit" strings - while (cellBits.length() < 8) { - cellBits = '0' + cellBits; - } - - blockBits += cellBits; - } - - return new BigInteger(blockBits, 2); - } - - public BigInteger addRoundKey(BigInteger ciphertext, BigInteger key) { - return ciphertext.xor(key); - } - - public BigInteger subBytes(BigInteger ciphertext) { - - int[] cells = splitBlockIntoCells(ciphertext); - - for (int i = 0; i < 16; i++) { - cells[i] = SBOX[cells[i]]; - } - - return mergeCellsIntoBlock(cells); - } - - public BigInteger subBytesDec(BigInteger ciphertext) { - - int[] cells = splitBlockIntoCells(ciphertext); - - for (int i = 0; i < 16; i++) { - cells[i] = INVERSE_SBOX[cells[i]]; - } - - return mergeCellsIntoBlock(cells); - } - - public BigInteger shiftRows(BigInteger ciphertext) { - int[] cells = splitBlockIntoCells(ciphertext); - int[] output = new int[16]; - - // do nothing in the first row - output[0] = cells[0]; - output[4] = cells[4]; - output[8] = cells[8]; - output[12] = cells[12]; - - // shift the second row backwards by one cell - output[1] = cells[5]; - output[5] = cells[9]; - output[9] = cells[13]; - output[13] = cells[1]; - - // shift the third row backwards by two cell - output[2] = cells[10]; - output[6] = cells[14]; - output[10] = cells[2]; - output[14] = cells[6]; - - // shift the forth row backwards by tree cell - output[3] = cells[15]; - output[7] = cells[3]; - output[11] = cells[7]; - output[15] = cells[11]; - - return mergeCellsIntoBlock(output); - } - - public BigInteger shiftRowsDec(BigInteger ciphertext) { - int[] cells = splitBlockIntoCells(ciphertext); - int[] output = new int[16]; - - // do nothing in the first row - output[0] = cells[0]; - output[4] = cells[4]; - output[8] = cells[8]; - output[12] = cells[12]; - - // shift the second row forwards by one cell - output[1] = cells[13]; - output[5] = cells[1]; - output[9] = cells[5]; - output[13] = cells[9]; - - // shift the third row forwards by two cell - output[2] = cells[10]; - output[6] = cells[14]; - output[10] = cells[2]; - output[14] = cells[6]; - - // shift the forth row forwards by tree cell - output[3] = cells[7]; - output[7] = cells[11]; - output[11] = cells[15]; - output[15] = cells[3]; - - return mergeCellsIntoBlock(output); - } - - public BigInteger mixColumns(BigInteger ciphertext) { - - int[] cells = splitBlockIntoCells(ciphertext); - int[] outputCells = new int[16]; - - for (int i = 0; i < 4; i++) { - int[] row = { cells[i * 4], cells[i * 4 + 1], cells[i * 4 + 2], cells[i * 4 + 3] }; - - outputCells[i * 4] = MULT2[row[0]] ^ MULT3[row[1]] ^ row[2] ^ row[3]; - outputCells[i * 4 + 1] = row[0] ^ MULT2[row[1]] ^ MULT3[row[2]] ^ row[3]; - outputCells[i * 4 + 2] = row[0] ^ row[1] ^ MULT2[row[2]] ^ MULT3[row[3]]; - outputCells[i * 4 + 3] = MULT3[row[0]] ^ row[1] ^ row[2] ^ MULT2[row[3]]; - } - return mergeCellsIntoBlock(outputCells); - } - - public BigInteger mixColumnsDec(BigInteger ciphertext) { - - int[] cells = splitBlockIntoCells(ciphertext); - int[] outputCells = new int[16]; - - for (int i = 0; i < 4; i++) { - int[] row = { cells[i * 4], cells[i * 4 + 1], cells[i * 4 + 2], cells[i * 4 + 3] }; - - outputCells[i * 4] = MULT14[row[0]] ^ MULT11[row[1]] ^ MULT13[row[2]] ^ MULT9[row[3]]; - outputCells[i * 4 + 1] = MULT9[row[0]] ^ MULT14[row[1]] ^ MULT11[row[2]] ^ MULT13[row[3]]; - outputCells[i * 4 + 2] = MULT13[row[0]] ^ MULT9[row[1]] ^ MULT14[row[2]] ^ MULT11[row[3]]; - outputCells[i * 4 + 3] = MULT11[row[0]] ^ MULT13[row[1]] ^ MULT9[row[2]] ^ MULT14[row[3]]; - } - return mergeCellsIntoBlock(outputCells); - } - - public BigInteger encrypt(BigInteger plainText, BigInteger key) { - BigInteger[] roundKeys = keyExpansion(key); - - // Initial round - plainText = addRoundKey(plainText, roundKeys[0]); - - // Main rounds - for (int i = 1; i < 10; i++) { - plainText = subBytes(plainText); - plainText = shiftRows(plainText); - plainText = mixColumns(plainText); - plainText = addRoundKey(plainText, roundKeys[i]); - } - - // Final round - plainText = subBytes(plainText); - plainText = shiftRows(plainText); - plainText = addRoundKey(plainText, roundKeys[10]); - - return plainText; - } - - public BigInteger decrypt(BigInteger cipherText, BigInteger key) { - - BigInteger[] roundKeys = keyExpansion(key); - - // Invert final round - cipherText = addRoundKey(cipherText, roundKeys[10]); - cipherText = shiftRowsDec(cipherText); - cipherText = subBytesDec(cipherText); - - // Invert main rounds - for (int i = 9; i > 0; i--) { - cipherText = addRoundKey(cipherText, roundKeys[i]); - cipherText = mixColumnsDec(cipherText); - cipherText = shiftRowsDec(cipherText); - cipherText = subBytesDec(cipherText); - } - - // Invert initial round - cipherText = addRoundKey(cipherText, roundKeys[0]); - - return cipherText; - } - - public void main(String[] args) { - - try (Scanner input = new Scanner(System.in)) { - System.out.println("Enter (e) letter for encrpyt or (d) letter for decrypt :"); - char choice = input.nextLine().charAt(0); - String in; - switch (choice) { - case 'E': - case 'e': - System.out.println("Choose a plaintext block (128-Bit Integer in base 16):"); - in = input.nextLine(); - BigInteger plaintext = new BigInteger(in, 16); - System.out.println("Choose a Key (128-Bit Integer in base 16):"); - in = input.nextLine(); - BigInteger encryptionKey = new BigInteger(in, 16); - System.out.println("The encrypted message is: \n" + encrypt(plaintext, encryptionKey).toString(16)); - break; - case 'D': - case 'd': - System.out.println("Enter your ciphertext block (128-Bit Integer in base 16):"); - in = input.nextLine(); - BigInteger ciphertext = new BigInteger(in, 16); - System.out.println("Choose a Key (128-Bit Integer in base 16):"); - in = input.nextLine(); - BigInteger decryptionKey = new BigInteger(in, 16); - System.out.println("The deciphered message is:\n" + decrypt(ciphertext, decryptionKey).toString(16)); - break; - default: - System.out.println("** End **"); - } - } - } - } - - public static class ColumnarTranspositionCipher { - - private static String keyword; - private static Object[][] table; - private static String abecedarium; - public static final String ABECEDARIUM = "abcdefghijklmnopqrstuvwxyzABCDEFG" - + "HIJKLMNOPQRSTUVWXYZ0123456789,.;:-@"; - private static final String ENCRYPTION_FIELD = "≈"; - private static final char ENCRYPTION_FIELD_CHAR = '≈'; - - public static String encrpyter(String word, String keyword) { - ColumnarTranspositionCipher.keyword = keyword; - abecedariumBuilder(500); - table = tableBuilder(word); - Object[][] sortedTable = sortTable(table); - String wordEncrypted = ""; - for (int i = 0; i < sortedTable[i].length; i++) { - for (int j = 1; j < sortedTable.length; j++) { - wordEncrypted += sortedTable[j][i]; - } - } - return wordEncrypted; - } - - public static String encrpyter(String word, String keyword, String abecedarium) { - ColumnarTranspositionCipher.keyword = keyword; - if (abecedarium != null) { - ColumnarTranspositionCipher.abecedarium = abecedarium; - } else { - ColumnarTranspositionCipher.abecedarium = ABECEDARIUM; - } - table = tableBuilder(word); - Object[][] sortedTable = sortTable(table); - String wordEncrypted = ""; - for (int i = 0; i < sortedTable[0].length; i++) { - for (int j = 1; j < sortedTable.length; j++) { - wordEncrypted += sortedTable[j][i]; - } - } - return wordEncrypted; - } - - public static String decrypter() { - String wordDecrypted = ""; - for (int i = 1; i < table.length; i++) { - for (Object item : table[i]) { - wordDecrypted += item; - } - } - return wordDecrypted.replaceAll(ENCRYPTION_FIELD, ""); - } - - private static Object[][] tableBuilder(String word) { - Object[][] table = new Object[numberOfRows(word) + 1][keyword.length()]; - char[] wordInChards = word.toCharArray(); - //Fils in the respective numbers - table[0] = findElements(); - int charElement = 0; - for (int i = 1; i < table.length; i++) { - for (int j = 0; j < table[i].length; j++) { - if (charElement < wordInChards.length) { - table[i][j] = wordInChards[charElement]; - charElement++; - } else { - table[i][j] = ENCRYPTION_FIELD_CHAR; - } - } - } - return table; - } - - private static int numberOfRows(String word) { - if ((double) word.length() / keyword.length() > word.length() / keyword.length()) { - return (word.length() / keyword.length()) + 1; - } else { - return word.length() / keyword.length(); - } - } - - private static Object[] findElements() { - Object[] charValues = new Object[keyword.length()]; - for (int i = 0; i < charValues.length; i++) { - int charValueIndex = abecedarium.indexOf(keyword.charAt(i)); - charValues[i] = charValueIndex > -1 ? charValueIndex : null; - } - return charValues; - } - - private static Object[][] sortTable(Object[][] table) { - Object[][] tableSorted = new Object[table.length][table[0].length]; - for (int i = 0; i < tableSorted.length; i++) { - System.arraycopy(table[i], 0, tableSorted[i], 0, tableSorted[i].length); - } - for (int i = 0; i < tableSorted[0].length; i++) { - for (int j = i + 1; j < tableSorted[0].length; j++) { - if ((int) tableSorted[0][i] > (int) table[0][j]) { - Object[] column = getColumn(tableSorted, tableSorted.length, i); - switchColumns(tableSorted, j, i, column); - } - } - } - return tableSorted; - } - - private static Object[] getColumn(Object[][] table, int rows, int column) { - Object[] columnArray = new Object[rows]; - for (int i = 0; i < rows; i++) { - columnArray[i] = table[i][column]; - } - return columnArray; - } - - private static void switchColumns(Object[][] table, int firstColumnIndex, - int secondColumnIndex, Object[] columnToSwitch) { - for (int i = 0; i < table.length; i++) { - table[i][secondColumnIndex] = table[i][firstColumnIndex]; - table[i][firstColumnIndex] = columnToSwitch[i]; - } - } - - private static void abecedariumBuilder(int value) { - abecedarium = ""; - for (int i = 0; i < value; i++) { - abecedarium += (char) i; - } - } - - private static void showTable() { - for (Object[] table1 : table) { - for (Object item : table1) { - System.out.print(item + " "); - } - System.out.println(); - } - } - - public void main(String[] args) { - String keywordForExample = "asd215"; - String wordBeingEncrypted = "This is a test of the Columnar Transposition Cipher"; - System.out.println("### Example of Columnar Transposition Cipher ###\n"); - System.out.println("Word being encryped ->>> " + wordBeingEncrypted); - System.out.println("Word encrypted ->>> " + ColumnarTranspositionCipher - .encrpyter(wordBeingEncrypted, keywordForExample)); - System.out.println("Word decryped ->>> " + ColumnarTranspositionCipher - .decrypter()); - System.out.println("\n### Encrypted Table ###"); - showTable(); - } - } - - public final class ClosestPairs { - - - /** - * Number of points - */ - int numberPoints = 0; - /** - * Input data, maximum 10000. - */ - private Location[] array; - - Location point1 = null; - - Location point2 = null; - - private double minNum = Double.MAX_VALUE; - - private int secondCount = 0; - - ClosestPairs(int points) { - numberPoints = points; - array = new Location[numberPoints]; - } - - - public class Location { - - double x = 0; - double y = 0; - - - Location(final double xpar, final double ypar) { //Save x, y coordinates - this.x = xpar; - this.y = ypar; - } - - } - - public Location[] createLocation(int numberValues) { - return new Location[numberValues]; - - } - - public Location buildLocation(double x, double y) { - return new Location(x, y); - } - - public int xPartition( - final Location[] a, final int first, final int last) { - - Location pivot = a[last]; // pivot - int pIndex = last; - int i = first - 1; - Location temp; // Temporarily store value for position transformation - for (int j = first; j <= last - 1; j++) { - if (a[j].x <= pivot.x) { // Less than or less than pivot - i++; - temp = a[i]; // array[i] <-> array[j] - a[i] = a[j]; - a[j] = temp; - } - } - i++; - temp = a[i]; // array[pivot] <-> array[i] - a[i] = a[pIndex]; - a[pIndex] = temp; - return i; // pivot index - } - - public int yPartition( - final Location[] a, final int first, final int last) { - - Location pivot = a[last]; // pivot - int pIndex = last; - int i = first - 1; - Location temp; // Temporarily store value for position transformation - for (int j = first; j <= last - 1; j++) { - if (a[j].y <= pivot.y) { // Less than or less than pivot - i++; - temp = a[i]; // array[i] <-> array[j] - a[i] = a[j]; - a[j] = temp; - } - } - i++; - temp = a[i]; // array[pivot] <-> array[i] - a[i] = a[pIndex]; - a[pIndex] = temp; - return i; // pivot index - } - - public void xQuickSort( - final Location[] a, final int first, final int last) { - - if (first < last) { - int q = xPartition(a, first, last); // pivot - xQuickSort(a, first, q - 1); // Left - xQuickSort(a, q + 1, last); // Right - } - } - - public void yQuickSort( - final Location[] a, final int first, final int last) { - - if (first < last) { - int q = yPartition(a, first, last); // pivot - yQuickSort(a, first, q - 1); // Left - yQuickSort(a, q + 1, last); // Right - } - } - - public double closestPair(final Location[] a, final int indexNum) { - - Location[] divideArray = new Location[indexNum]; - System.arraycopy(a, 0, divideArray, 0, indexNum); // Copy previous array - int totalNum = indexNum; // number of coordinates in the divideArray - int divideX = indexNum / 2; // Intermediate value for divide - Location[] leftArray = new Location[divideX]; //divide - left array - //divide-right array - Location[] rightArray = new Location[totalNum - divideX]; - if (indexNum <= 3) { // If the number of coordinates is 3 or less - return bruteForce(divideArray); - } - //divide-left array - System.arraycopy(divideArray, 0, leftArray, 0, divideX); - //divide-right array - System.arraycopy( - divideArray, divideX, rightArray, 0, totalNum - divideX); - - double minLeftArea = 0; //Minimum length of left array - double minRightArea = 0; //Minimum length of right array - double minValue = 0; //Minimum lengt - - minLeftArea = closestPair(leftArray, divideX); // recursive closestPair - minRightArea = closestPair(rightArray, totalNum - divideX); - // window size (= minimum length) - minValue = Math.min(minLeftArea, minRightArea); - - // Create window. Set the size for creating a window - // and creating a new array for the coordinates in the window - for (int i = 0; i < totalNum; i++) { - double xGap = Math.abs(divideArray[divideX].x - divideArray[i].x); - if (xGap < minValue) { - secondCount++; // size of the array - } else { - if (divideArray[i].x > divideArray[divideX].x) { - break; - } - } - } - // new array for coordinates in window - Location[] firstWindow = new Location[secondCount]; - int k = 0; - for (int i = 0; i < totalNum; i++) { - double xGap = Math.abs(divideArray[divideX].x - divideArray[i].x); - if (xGap < minValue) { // if it's inside a window - firstWindow[k] = divideArray[i]; // put in an array - k++; - } else { - if (divideArray[i].x > divideArray[divideX].x) { - break; - } - } - } - yQuickSort(firstWindow, 0, secondCount - 1); // Sort by y coordinates - /* Coordinates in Window */ - double length = 0; - // size comparison within window - for (int i = 0; i < secondCount - 1; i++) { - for (int j = (i + 1); j < secondCount; j++) { - double xGap = Math.abs(firstWindow[i].x - firstWindow[j].x); - double yGap = Math.abs(firstWindow[i].y - firstWindow[j].y); - if (yGap < minValue) { - length = Math.sqrt(Math.pow(xGap, 2) + Math.pow(yGap, 2)); - // If measured distance is less than current min distance - if (length < minValue) { - // Change minimum distance to current distance - minValue = length; - // Conditional for registering final coordinate - if (length < minNum) { - minNum = length; - point1 = firstWindow[i]; - point2 = firstWindow[j]; - } - } - } else { - break; - } - } - } - secondCount = 0; - return minValue; - } - - public double bruteForce(final Location[] arrayParam) { - - double minValue = Double.MAX_VALUE; // minimum distance - double length = 0; - double xGap = 0; // Difference between x coordinates - double yGap = 0; // Difference between y coordinates - double result = 0; - - if (arrayParam.length == 2) { - // Difference between x coordinates - xGap = (arrayParam[0].x - arrayParam[1].x); - // Difference between y coordinates - yGap = (arrayParam[0].y - arrayParam[1].y); - // distance between coordinates - length = Math.sqrt(Math.pow(xGap, 2) + Math.pow(yGap, 2)); - // Conditional statement for registering final coordinate - if (length < minNum) { - minNum = length; - - } - point1 = arrayParam[0]; - point2 = arrayParam[1]; - result = length; - } - if (arrayParam.length == 3) { - for (int i = 0; i < arrayParam.length - 1; i++) { - for (int j = (i + 1); j < arrayParam.length; j++) { - // Difference between x coordinates - xGap = (arrayParam[i].x - arrayParam[j].x); - // Difference between y coordinates - yGap = (arrayParam[i].y - arrayParam[j].y); - // distance between coordinates - length = - Math.sqrt(Math.pow(xGap, 2) + Math.pow(yGap, 2)); - // If measured distance is less than current min distance - if (length < minValue) { - // Change minimum distance to current distance - minValue = length; - if (length < minNum) { - // Registering final coordinate - minNum = length; - point1 = arrayParam[i]; - point2 = arrayParam[j]; - } - } - } - } - result = minValue; - - } - return result; // If only one point returns 0. - } - - public void main(final String[] args) { - - //Input data consists of one x-coordinate and one y-coordinate - - ClosestPair cp = new ClosestPair(12); - cp.array[0] = cp.buildLocation(2, 3); - cp.array[1] = cp.buildLocation(2, 16); - cp.array[2] = cp.buildLocation(3, 9); - cp.array[3] = cp.buildLocation(6, 3); - cp.array[4] = cp.buildLocation(7, 7); - cp.array[5] = cp.buildLocation(19, 4); - cp.array[6] = cp.buildLocation(10, 11); - cp.array[7] = cp.buildLocation(15, 2); - cp.array[8] = cp.buildLocation(15, 19); - cp.array[9] = cp.buildLocation(16, 11); - cp.array[10] = cp.buildLocation(17, 13); - cp.array[11] = cp.buildLocation(9, 12); - - System.out.println("Input data"); - System.out.println("Number of points: " + cp.array.length); - for (int i = 0; i < cp.array.length; i++) { - System.out.println("x: " + cp.array[i].x + ", y: " + cp.array[i].y); - } - - cp.xQuickSort(cp.array, 0, cp.array.length - 1); // Sorting by x value - - double result; // minimum distance - - result = cp.closestPair(cp.array, cp.array.length); - // ClosestPair start - // minimum distance coordinates and distance output - System.out.println("Output Data"); - System.out.println("(" + cp.point1.x + ", " + cp.point1.y + ")"); - System.out.println("(" + cp.point2.x + ", " + cp.point2.y + ")"); - System.out.println("Minimum Distance : " + result); - - } - } - - public class ColumnarTranspositionCiphers { - - private String keyword; - private Object[][] table; - private String abecedarium; - public static final String ABECEDARIUM = "abcdefghijklmnopqrstuvwxyzABCDEFG" - + "HIJKLMNOPQRSTUVWXYZ0123456789,.;:-@"; - private static final String ENCRYPTION_FIELD = "≈"; - private static final char ENCRYPTION_FIELD_CHAR = '≈'; - - public String encrpyter(String word, String keyword) { - ColumnarTranspositionCipher.keyword = keyword; - abecedariumBuilder(500); - table = tableBuilder(word); - Object[][] sortedTable = sortTable(table); - String wordEncrypted = ""; - for (int i = 0; i < sortedTable[i].length; i++) { - for (int j = 1; j < sortedTable.length; j++) { - wordEncrypted += sortedTable[j][i]; - } - } - return wordEncrypted; - } - - public String encrpyter(String word, String keyword, - String abecedarium) { - ColumnarTranspositionCipher.keyword = keyword; - if (abecedarium != null) { - ColumnarTranspositionCipher.abecedarium = abecedarium; - } else { - ColumnarTranspositionCipher.abecedarium = ABECEDARIUM; - } - table = tableBuilder(word); - Object[][] sortedTable = sortTable(table); - String wordEncrypted = ""; - for (int i = 0; i < sortedTable[0].length; i++) { - for (int j = 1; j < sortedTable.length; j++) { - wordEncrypted += sortedTable[j][i]; - } - } - return wordEncrypted; - } - - public String decrypter() { - String wordDecrypted = ""; - for (int i = 1; i < table.length; i++) { - for (Object item : table[i]) { - wordDecrypted += item; - } - } - return wordDecrypted.replaceAll(ENCRYPTION_FIELD, ""); - } - - private Object[][] tableBuilder(String word) { - Object[][] table = new Object[numberOfRows(word) + 1][keyword.length()]; - char[] wordInChards = word.toCharArray(); - //Fils in the respective numbers - table[0] = findElements(); - int charElement = 0; - for (int i = 1; i < table.length; i++) { - for (int j = 0; j < table[i].length; j++) { - if (charElement < wordInChards.length) { - table[i][j] = wordInChards[charElement]; - charElement++; - } else { - table[i][j] = ENCRYPTION_FIELD_CHAR; - } - } - } - return table; - } - - private int numberOfRows(String word) { - if ((double) word.length() / keyword.length() > word.length() / keyword.length()) { - return (word.length() / keyword.length()) + 1; - } else { - return word.length() / keyword.length(); - } - } - - /** - * - * @return charValues - */ - private Object[] findElements() { - Object[] charValues = new Object[keyword.length()]; - for (int i = 0; i < charValues.length; i++) { - int charValueIndex = abecedarium.indexOf(keyword.charAt(i)); - charValues[i] = charValueIndex > -1 ? charValueIndex : null; - } - return charValues; - } - - private Object[][] sortTable(Object[][] table) { - Object[][] tableSorted = new Object[table.length][table[0].length]; - for (int i = 0; i < tableSorted.length; i++) { - System.arraycopy(table[i], 0, tableSorted[i], 0, tableSorted[i].length); - } - for (int i = 0; i < tableSorted[0].length; i++) { - for (int j = i + 1; j < tableSorted[0].length; j++) { - if ((int) tableSorted[0][i] > (int) table[0][j]) { - Object[] column = getColumn(tableSorted, tableSorted.length, i); - switchColumns(tableSorted, j, i, column); - } - } - } - return tableSorted; - } - - private Object[] getColumn(Object[][] table, int rows, int column) { - Object[] columnArray = new Object[rows]; - for (int i = 0; i < rows; i++) { - columnArray[i] = table[i][column]; - } - return columnArray; - } - - private void switchColumns(Object[][] table, int firstColumnIndex, - int secondColumnIndex, Object[] columnToSwitch) { - for (int i = 0; i < table.length; i++) { - table[i][secondColumnIndex] = table[i][firstColumnIndex]; - table[i][firstColumnIndex] = columnToSwitch[i]; - } - } - - private void abecedariumBuilder(int value) { - abecedarium = ""; - for (int i = 0; i < value; i++) { - abecedarium += (char) i; - } - } - - private void showTable() { - for (Object[] table1 : table) { - for (Object item : table1) { - System.out.print(item + " "); - } - System.out.println(); - } - } - - public void main(String[] args) { - String keywordForExample = "asd215"; - String wordBeingEncrypted = "This is a test of the Columnar Transposition Cipher"; - System.out.println("### Example of Columnar Transposition Cipher ###\n"); - System.out.println("Word being encryped ->>> " + wordBeingEncrypted); - System.out.println("Word encrypted ->>> " + ColumnarTranspositionCipher - .encrpyter(wordBeingEncrypted, keywordForExample)); - System.out.println("Word decryped ->>> " + ColumnarTranspositionCipher - .decrypter()); - System.out.println("\n### Encrypted Table ###"); - showTable(); - } - } - - public class BinarySearchTree> implements Collection, Iterable { - private Node root = null; - private int size = 0; - - /** - * @return number of elements in the collection - */ - @Override - public int size() { - return this.size; - } - - /** - * @return if collection is empty - */ - @Override - public boolean isEmpty() { - return size == 0; - } - - /** - * @param element to check for contains - * @return if the object contains in collection - */ - @Override - @SuppressWarnings("unchecked") - public boolean contains(Object element) { - return !isEmpty() && root.contains((T) element); - } - - /** - * @return tree iterator - */ - @Override - public Iterator iterator() { - return new BSTIterator(); - } - - /** - * Method adding elements from the collection to array - * @param array to add elements - * @param parameter of array elements - * @return array with elements from the collection - */ - @Override - @SuppressWarnings("unchecked") - public T1[] toArray(T1[] array) { - ArrayList result = new ArrayList<>(); - for (T tmp : this) { - result.add((T1) tmp); - } - return result.toArray(array); - } - - /** - * @return elements of collection as array of Objects in increasing order - */ - @Override - public Object[] toArray() { - return toArray(new Object[size]); - } - - /** - * Method adding element to the collection - * @param value element to add - * @return if the element will be added - */ - @Override - public boolean add(T value) { - if (root == null) { - root = new Node(value); - ++size; - return true; - } - return root.add(value); - } - - /** - * Method removing element from the collection - * @param value to remove from collection - * @return if the element will be removed - */ - @Override - @SuppressWarnings("unchecked") - public boolean remove(Object value) { - return !isEmpty() && root.remove((T) value); - } - - /** - * @param collection to check for contains - * @return if Tree contains every element from the collection - */ - @Override - public boolean containsAll(Collection collection) { - boolean result = true; - for (Object tmp : collection) { - result &= contains(tmp); - } - return result; - } - - /** - * @param collection with elements to add - * @return if all elements will be added - */ - @Override - public boolean addAll(Collection collection) { - boolean result = true; - for (T tmp : collection) { - result &= add(tmp); - } - return result; - } - - /** - * @param collection with elements to remove - * @return if all elements will be removed - */ - @Override - public boolean removeAll(Collection collection) { - boolean result = true; - for (Object tmp : collection) { - result &= remove(tmp); - } - return result; - } - - /** - * @param collection with elements to retain - * @return if this collection will be changed - */ - @Override - public boolean retainAll(Collection collection) { - boolean result = false; - for (Object tmp : collection) { - if (contains(tmp)) { - remove(tmp); - result = true; - } - } - return result; - } - - /** - * Clear the collection - */ - @Override - public void clear() { - root = null; - size = 0; - } - - /** - * @return String representation of the collection - */ - @Override - public String toString() { - return isEmpty() ? "null" : root.toString(); - } - - /** - * Methods gets all Nodes from current subtree to the List - * @param node subtree to get Nodes - * @param elements List to add elements - * @return List with elements - */ - private ArrayList getAll(Node node, ArrayList elements) { - if (node.left != null) { - getAll(node.left, elements); - } - elements.add(node); - if (node.right != null) { - getAll(node.right, elements); - } - return elements; - } - - /** Class realizing the Binary Search Tree Iterator */ - private class BSTIterator implements Iterator { - private ArrayList elements = new ArrayList<>(); - - private BSTIterator() { - if (!isEmpty()) { - BinarySearchTree.this.getAll(root, elements); - } - } - - /** - * @return if the next element exist - */ - @Override - public boolean hasNext() { - return !elements.isEmpty() && treeContainsAtLeastOneElement(); - } - - /** - * @return if the List contains at least one element from the tree - */ - private boolean treeContainsAtLeastOneElement() { - for (Node tmp : elements) { - if (BinarySearchTree.this.contains(tmp.value)) { - return true; - } - } - return false; - } - - /** - * @return value of next element, null if it does not exist - */ - @Override - public T next() { - if (elements.isEmpty()) { - return null; - } - if (!root.contains(elements.get(0).value)) { - elements.remove(0); - return next(); - } - return elements.remove(0).value; - } - } - - /** - * Class describes Node of the Binary Tree - */ - private class Node { - private T value; - private Node parent; - private Node left = null; - private Node right = null; - - private Node(T value) { - this.value = value; - this.parent = null; - } - - private Node(T value, Node parent) { - this.value = value; - this.parent = parent; - } - - /** - * Method adds an element with a specified value - * @param value of new element - */ - private boolean add(T value) { - if (value.compareTo(this.value) < 0) { - if (left == null) { - left = new Node(value, this); - ++size; - return true; - } - left.add(value); - } else if (value.compareTo(this.value) > 0) { - if (right == null) { - right = new Node(value, this); - ++size; - return true; - } - right.add(value); - } - return false; - } - - /** - * Method removes an element with a specified value - * @param value of element to remove - */ - private boolean remove(T value) { - boolean result = false; - if (value.compareTo(this.value) < 0) { - result = left != null && left.remove(value); - } else if (value.compareTo(this.value) > 0) { - result = right != null && right.remove(value); - } else { - --size; - this.remove(); - } - return result; - } - - /** - * Method removing current Node - */ - private void remove() { - if (left != null && right != null) { - Node newNode = this.findMinimalInRightSubtree(); - value = newNode.value; - changeNode(newNode); - } else if (left != null) { - changeNode(left); - } else if (right != null) { - changeNode(right); - } else { - changeNode(null); - } - } - - /** - * @return Node with minimum value in the subtree - */ - private Node findMinimalInRightSubtree() { - Node current = this.right; - while (current.left != null) { - current = current.left; - } - return current; - } - - private void changeNode(Node newNode) { - if (newNode == null) { - if (parent == null) { - root = null; - } else { - if (equals(parent.left)) { - parent.left = null; - } else { - parent.right = null; - } - } - return; - } - value = newNode.value; - if (newNode.equals(newNode.parent.left)) { - newNode.parent.left = newNode.left; - } else { - newNode.parent.right = newNode.right; - } - } - - /** - * @param element to check for contains - * @return if the object contains in the subtree - */ - private boolean contains(T element) { - if (value.equals(element)) { - return true; - } - if (value.compareTo(element) > 0) { - return left != null && left.contains(element); - } - return right != null && right.contains(element); - } - - /** - * @return String representation of the subtree - */ - @Override - public String toString() { - StringBuilder result = new StringBuilder(); - result.append("(").append(value.toString()).append(" "); - result.append(left == null ? "null" : left.toString()).append(" "); - result.append(right == null ? "null" : right.toString()).append(")"); - return result.toString(); - } - - } - } - - public final class Point2D implements Comparable { - - /** - * Compares two points by x-coordinate. - */ - public final Comparator X_ORDER = new XOrder(); - - /** - * Compares two points by y-coordinate. - */ - public final Comparator Y_ORDER = new YOrder(); - - /** - * Compares two points by polar radius. - */ - public final Comparator R_ORDER = new ROrder(); - - private final double x; // x coordinate - private final double y; // y coordinate - - /** - * Initializes a new point (x, y). - * @param x the x-coordinate - * @param y the y-coordinate - * @throws IllegalArgumentException if either {@code x} or {@code y} - * is {@code Double.NaN}, {@code Double.POSITIVE_INFINITY} or - * {@code Double.NEGATIVE_INFINITY} - */ - public Point2D(double x, double y) { - if (Double.isInfinite(x) || Double.isInfinite(y)) - throw new IllegalArgumentException("Coordinates must be finite"); - if (Double.isNaN(x) || Double.isNaN(y)) - throw new IllegalArgumentException("Coordinates cannot be NaN"); - if (x == 0.0) this.x = 0.0; // convert -0.0 to +0.0 - else this.x = x; - - if (y == 0.0) this.y = 0.0; // convert -0.0 to +0.0 - else this.y = y; - } - - /** - * Returns the x-coordinate. - * @return the x-coordinate - */ - public double x() { - return x; - } - - public double y() { - return y; - } - - public double r() { - return Math.sqrt(x*x + y*y); - } - - public double theta() { - return Math.atan2(y, x); - } - - private double angleTo(Point2D that) { - double dx = that.x - this.x; - double dy = that.y - this.y; - return Math.atan2(dy, dx); - } - - public int ccw(Point2D a, Point2D b, Point2D c) { - double area2 = (b.x-a.x)*(c.y-a.y) - (b.y-a.y)*(c.x-a.x); - if (area2 < 0) return -1; - else if (area2 > 0) return +1; - else return 0; - } - - public double area2(Point2D a, Point2D b, Point2D c) { - return (b.x-a.x)*(c.y-a.y) - (b.y-a.y)*(c.x-a.x); - } - - public double distanceTo(Point2D that) { - double dx = this.x - that.x; - double dy = this.y - that.y; - return Math.sqrt(dx*dx + dy*dy); - } - - public double distanceSquaredTo(Point2D that) { - double dx = this.x - that.x; - double dy = this.y - that.y; - return dx*dx + dy*dy; - } - - public int compareTo(Point2D that) { - if (this.y < that.y) return -1; - if (this.y > that.y) return +1; - if (this.x < that.x) return -1; - if (this.x > that.x) return +1; - return 0; - } - - public Comparator polarOrder() { - return new PolarOrder(); - } - - public Comparator atan2Order() { - return new Atan2Order(); - } - - public Comparator distanceToOrder() { - return new DistanceToOrder(); - } - - // compare points according to their x-coordinate - private class XOrder implements Comparator { - public int compare(Point2D p, Point2D q) { - if (p.x < q.x) return -1; - if (p.x > q.x) return +1; - return 0; - } - } - - // compare points according to their y-coordinate - private class YOrder implements Comparator { - public int compare(Point2D p, Point2D q) { - if (p.y < q.y) return -1; - if (p.y > q.y) return +1; - return 0; - } - } - - // compare points according to their polar radius - private class ROrder implements Comparator { - public int compare(Point2D p, Point2D q) { - double delta = (p.x*p.x + p.y*p.y) - (q.x*q.x + q.y*q.y); - if (delta < 0) return -1; - if (delta > 0) return +1; - return 0; - } - } - - // compare other points relative to atan2 angle (bewteen -pi/2 and pi/2) they make with this Point - private class Atan2Order implements Comparator { - public int compare(Point2D q1, Point2D q2) { - double angle1 = angleTo(q1); - double angle2 = angleTo(q2); - if (angle1 < angle2) return -1; - else if (angle1 > angle2) return +1; - else return 0; - } - } - - // compare other points relative to polar angle (between 0 and 2pi) they make with this Point - private class PolarOrder implements Comparator { - public int compare(Point2D q1, Point2D q2) { - double dx1 = q1.x - x; - double dy1 = q1.y - y; - double dx2 = q2.x - x; - double dy2 = q2.y - y; - - if (dy1 >= 0 && dy2 < 0) return -1; // q1 above; q2 below - else if (dy2 >= 0 && dy1 < 0) return +1; // q1 below; q2 above - else if (dy1 == 0 && dy2 == 0) { // 3-collinear and horizontal - if (dx1 >= 0 && dx2 < 0) return -1; - else if (dx2 >= 0 && dx1 < 0) return +1; - else return 0; - } - else return -ccw(Point2D.this, q1, q2); // both above or below - - // Note: ccw() recomputes dx1, dy1, dx2, and dy2 - } - } - - // compare points according to their distance to this point - private class DistanceToOrder implements Comparator { - public int compare(Point2D p, Point2D q) { - double dist1 = distanceSquaredTo(p); - double dist2 = distanceSquaredTo(q); - if (dist1 < dist2) return -1; - else if (dist1 > dist2) return +1; - else return 0; - } - } - - @Override - public boolean equals(Object other) { - if (other == this) return true; - if (other == null) return false; - if (other.getClass() != this.getClass()) return false; - Point2D that = (Point2D) other; - return this.x == that.x && this.y == that.y; - } - - @Override - public String toString() { - return "(" + x + ", " + y + ")"; - } - - @Override - public int hashCode() { - int hashX = ((Double) x).hashCode(); - int hashY = ((Double) y).hashCode(); - return 31*hashX + hashY; - } - } - - public class RedBlackBST, Value> { - - private static final boolean RED = true; - private static final boolean BLACK = false; - - private Node root; // root of the BST - - // BST helper node data type - private class Node { - private Key key; // key - private Value val; // associated data - private Node left, right; // links to left and right subtrees - private boolean color; // color of parent link - private int size; // subtree count - - public Node(Key key, Value val, boolean color, int size) { - this.key = key; - this.val = val; - this.color = color; - this.size = size; - } - } - - /** - * Initializes an empty symbol table. - */ - public RedBlackBST() { - } - - /*************************************************************************** - * Node helper methods. - ***************************************************************************/ - // is node x red; false if x is null ? - private boolean isRed(Node x) { - if (x == null) return false; - return x.color == RED; - } - - // number of node in subtree rooted at x; 0 if x is null - private int size(Node x) { - if (x == null) return 0; - return x.size; - } - - - /** - * Returns the number of key-value pairs in this symbol table. - * @return the number of key-value pairs in this symbol table - */ - public int size() { - return size(root); - } - - /** - * Is this symbol table empty? - * @return {@code true} if this symbol table is empty and {@code false} otherwise - */ - public boolean isEmpty() { - return root == null; - } - - - /*************************************************************************** - * Standard BST search. - ***************************************************************************/ - - /** - * Returns the value associated with the given key. - * @param key the key - * @return the value associated with the given key if the key is in the symbol table - * and {@code null} if the key is not in the symbol table - * @throws IllegalArgumentException if {@code key} is {@code null} - */ - public Value get(Key key) { - if (key == null) throw new IllegalArgumentException("argument to get() is null"); - return get(root, key); - } - - // value associated with the given key in subtree rooted at x; null if no such key - private Value get(Node x, Key key) { - while (x != null) { - int cmp = key.compareTo(x.key); - if (cmp < 0) x = x.left; - else if (cmp > 0) x = x.right; - else return x.val; - } - return null; - } - - /** - * Does this symbol table contain the given key? - * @param key the key - * @return {@code true} if this symbol table contains {@code key} and - * {@code false} otherwise - * @throws IllegalArgumentException if {@code key} is {@code null} - */ - public boolean contains(Key key) { - return get(key) != null; - } - - /*************************************************************************** - * Red-black tree insertion. - ***************************************************************************/ - - /** - * Inserts the specified key-value pair into the symbol table, overwriting the old - * value with the new value if the symbol table already contains the specified key. - * Deletes the specified key (and its associated value) from this symbol table - * if the specified value is {@code null}. - * - * @param key the key - * @param val the value - * @throws IllegalArgumentException if {@code key} is {@code null} - */ - public void put(Key key, Value val) { - if (key == null) throw new IllegalArgumentException("first argument to put() is null"); - if (val == null) { - delete(key); - return; - } - - root = put(root, key, val); - root.color = BLACK; - // assert check(); - } - - // insert the key-value pair in the subtree rooted at h - private Node put(Node h, Key key, Value val) { - if (h == null) return new Node(key, val, RED, 1); - - int cmp = key.compareTo(h.key); - if (cmp < 0) h.left = put(h.left, key, val); - else if (cmp > 0) h.right = put(h.right, key, val); - else h.val = val; - - // fix-up any right-leaning links - if (isRed(h.right) && !isRed(h.left)) h = rotateLeft(h); - if (isRed(h.left) && isRed(h.left.left)) h = rotateRight(h); - if (isRed(h.left) && isRed(h.right)) flipColors(h); - h.size = size(h.left) + size(h.right) + 1; - - return h; - } - - /*************************************************************************** - * Red-black tree deletion. - ***************************************************************************/ - - /** - * Removes the smallest key and associated value from the symbol table. - * @throws NoSuchElementException if the symbol table is empty - */ - public void deleteMin() { - if (isEmpty()) throw new NoSuchElementException("BST underflow"); - - // if both children of root are black, set root to red - if (!isRed(root.left) && !isRed(root.right)) - root.color = RED; - - root = deleteMin(root); - if (!isEmpty()) root.color = BLACK; - // assert check(); - } - - // delete the key-value pair with the minimum key rooted at h - private Node deleteMin(Node h) { - if (h.left == null) - return null; - - if (!isRed(h.left) && !isRed(h.left.left)) - h = moveRedLeft(h); - - h.left = deleteMin(h.left); - return balance(h); - } - - - /** - * Removes the largest key and associated value from the symbol table. - * @throws NoSuchElementException if the symbol table is empty - */ - public void deleteMax() { - if (isEmpty()) throw new NoSuchElementException("BST underflow"); - - // if both children of root are black, set root to red - if (!isRed(root.left) && !isRed(root.right)) - root.color = RED; - - root = deleteMax(root); - if (!isEmpty()) root.color = BLACK; - // assert check(); - } - - // delete the key-value pair with the maximum key rooted at h - private Node deleteMax(Node h) { - if (isRed(h.left)) - h = rotateRight(h); - - if (h.right == null) - return null; - - if (!isRed(h.right) && !isRed(h.right.left)) - h = moveRedRight(h); - - h.right = deleteMax(h.right); - - return balance(h); - } - - /** - * Removes the specified key and its associated value from this symbol table - * (if the key is in this symbol table). - * - * @param key the key - * @throws IllegalArgumentException if {@code key} is {@code null} - */ - public void delete(Key key) { - if (key == null) throw new IllegalArgumentException("argument to delete() is null"); - if (!contains(key)) return; - - // if both children of root are black, set root to red - if (!isRed(root.left) && !isRed(root.right)) - root.color = RED; - - root = delete(root, key); - if (!isEmpty()) root.color = BLACK; - // assert check(); - } - - // delete the key-value pair with the given key rooted at h - private Node delete(Node h, Key key) { - // assert get(h, key) != null; - - if (key.compareTo(h.key) < 0) { - if (!isRed(h.left) && !isRed(h.left.left)) - h = moveRedLeft(h); - h.left = delete(h.left, key); - } - else { - if (isRed(h.left)) - h = rotateRight(h); - if (key.compareTo(h.key) == 0 && (h.right == null)) - return null; - if (!isRed(h.right) && !isRed(h.right.left)) - h = moveRedRight(h); - if (key.compareTo(h.key) == 0) { - Node x = min(h.right); - h.key = x.key; - h.val = x.val; - // h.val = get(h.right, min(h.right).key); - // h.key = min(h.right).key; - h.right = deleteMin(h.right); - } - else h.right = delete(h.right, key); - } - return balance(h); - } - - private Node rotateRight(Node h) { - // assert (h != null) && isRed(h.left); - Node x = h.left; - h.left = x.right; - x.right = h; - x.color = x.right.color; - x.right.color = RED; - x.size = h.size; - h.size = size(h.left) + size(h.right) + 1; - return x; - } - - // make a right-leaning link lean to the left - private Node rotateLeft(Node h) { - // assert (h != null) && isRed(h.right); - Node x = h.right; - h.right = x.left; - x.left = h; - x.color = x.left.color; - x.left.color = RED; - x.size = h.size; - h.size = size(h.left) + size(h.right) + 1; - return x; - } - - // flip the colors of a node and its two children - private void flipColors(Node h) { - h.color = !h.color; - h.left.color = !h.left.color; - h.right.color = !h.right.color; - } - - // Assuming that h is red and both h.left and h.left.left - // are black, make h.left or one of its children red. - private Node moveRedLeft(Node h) { - // assert (h != null); - // assert isRed(h) && !isRed(h.left) && !isRed(h.left.left); - - flipColors(h); - if (isRed(h.right.left)) { - h.right = rotateRight(h.right); - h = rotateLeft(h); - flipColors(h); - } - return h; - } - - // Assuming that h is red and both h.right and h.right.left - // are black, make h.right or one of its children red. - private Node moveRedRight(Node h) { - // assert (h != null); - // assert isRed(h) && !isRed(h.right) && !isRed(h.right.left); - flipColors(h); - if (isRed(h.left.left)) { - h = rotateRight(h); - flipColors(h); - } - return h; - } - - // restore red-black tree invariant - private Node balance(Node h) { - // assert (h != null); - - if (isRed(h.right)) h = rotateLeft(h); - if (isRed(h.left) && isRed(h.left.left)) h = rotateRight(h); - if (isRed(h.left) && isRed(h.right)) flipColors(h); - - h.size = size(h.left) + size(h.right) + 1; - return h; - } - - public int height() { - return height(root); - } - private int height(Node x) { - if (x == null) return -1; - return 1 + Math.max(height(x.left), height(x.right)); - } - - public Key min() { - if (isEmpty()) throw new NoSuchElementException("calls min() with empty symbol table"); - return min(root).key; - } - - // the smallest key in subtree rooted at x; null if no such key - private Node min(Node x) { - // assert x != null; - if (x.left == null) return x; - else return min(x.left); - } - - public Key max() { - if (isEmpty()) throw new NoSuchElementException("calls max() with empty symbol table"); - return max(root).key; - } - - // the largest key in the subtree rooted at x; null if no such key - private Node max(Node x) { - // assert x != null; - if (x.right == null) return x; - else return max(x.right); - } - - public Key floor(Key key) { - if (key == null) throw new IllegalArgumentException("argument to floor() is null"); - if (isEmpty()) throw new NoSuchElementException("calls floor() with empty symbol table"); - Node x = floor(root, key); - if (x == null) throw new NoSuchElementException("argument to floor() is too small"); - else return x.key; - } - - // the largest key in the subtree rooted at x less than or equal to the given key - private Node floor(Node x, Key key) { - if (x == null) return null; - int cmp = key.compareTo(x.key); - if (cmp == 0) return x; - if (cmp < 0) return floor(x.left, key); - Node t = floor(x.right, key); - if (t != null) return t; - else return x; - } - - public Key ceiling(Key key) { - if (key == null) throw new IllegalArgumentException("argument to ceiling() is null"); - if (isEmpty()) throw new NoSuchElementException("calls ceiling() with empty symbol table"); - Node x = ceiling(root, key); - if (x == null) throw new NoSuchElementException("argument to ceiling() is too small"); - else return x.key; - } - - // the smallest key in the subtree rooted at x greater than or equal to the given key - private Node ceiling(Node x, Key key) { - if (x == null) return null; - int cmp = key.compareTo(x.key); - if (cmp == 0) return x; - if (cmp > 0) return ceiling(x.right, key); - Node t = ceiling(x.left, key); - if (t != null) return t; - else return x; - } - - public Key select(int rank) { - if (rank < 0 || rank >= size()) { - throw new IllegalArgumentException("argument to select() is invalid: " + rank); - } - return select(root, rank); - } - - private Key select(Node x, int rank) { - if (x == null) return null; - int leftSize = size(x.left); - if (leftSize > rank) return select(x.left, rank); - else if (leftSize < rank) return select(x.right, rank - leftSize - 1); - else return x.key; - } - - public int rank(Key key) { - if (key == null) throw new IllegalArgumentException("argument to rank() is null"); - return rank(key, root); - } - - // number of keys less than key in the subtree rooted at x - private int rank(Key key, Node x) { - if (x == null) return 0; - int cmp = key.compareTo(x.key); - if (cmp < 0) return rank(key, x.left); - else if (cmp > 0) return 1 + size(x.left) + rank(key, x.right); - else return size(x.left); - } - - public Iterable keys() { - if (isEmpty()) return new PriorityQueue(); - return keys(min(), max()); - } - - public Iterable keys(Key lo, Key hi) { - if (lo == null) throw new IllegalArgumentException("first argument to keys() is null"); - if (hi == null) throw new IllegalArgumentException("second argument to keys() is null"); - - Queue queue = new PriorityQueue(); - // if (isEmpty() || lo.compareTo(hi) > 0) return queue; - keys(root, queue, lo, hi); - return queue; - } - - private void keys(Node x, Queue queue, Key lo, Key hi) { - if (x == null) return; - int cmplo = lo.compareTo(x.key); - int cmphi = hi.compareTo(x.key); - if (cmplo < 0) keys(x.left, queue, lo, hi); - if (cmphi > 0) keys(x.right, queue, lo, hi); - } - - public int size(Key lo, Key hi) { - if (lo == null) throw new IllegalArgumentException("first argument to size() is null"); - if (hi == null) throw new IllegalArgumentException("second argument to size() is null"); - - if (lo.compareTo(hi) > 0) return 0; - if (contains(hi)) return rank(hi) - rank(lo) + 1; - else return rank(hi) - rank(lo); - } - - - private boolean check() { - return isBST() && isSizeConsistent() && isRankConsistent() && is23() && isBalanced(); - } - - private boolean isBST() { - return isBST(root, null, null); - } - - private boolean isBST(Node x, Key min, Key max) { - if (x == null) return true; - if (min != null && x.key.compareTo(min) <= 0) return false; - if (max != null && x.key.compareTo(max) >= 0) return false; - return isBST(x.left, min, x.key) && isBST(x.right, x.key, max); - } - - // are the size fields correct? - private boolean isSizeConsistent() { return isSizeConsistent(root); } - private boolean isSizeConsistent(Node x) { - if (x == null) return true; - if (x.size != size(x.left) + size(x.right) + 1) return false; - return isSizeConsistent(x.left) && isSizeConsistent(x.right); - } - - // check that ranks are consistent - private boolean isRankConsistent() { - for (int i = 0; i < size(); i++) - if (i != rank(select(i))) return false; - for (Key key : keys()) - if (key.compareTo(select(rank(key))) != 0) return false; - return true; - } - - // Does the tree have no red right links, and at most one (left) - // red links in a row on any path? - private boolean is23() { return is23(root); } - private boolean is23(Node x) { - if (x == null) return true; - if (isRed(x.right)) return false; - if (x != root && isRed(x) && isRed(x.left)) - return false; - return is23(x.left) && is23(x.right); - } - - // do all paths from root to leaf have same number of black edges? - private boolean isBalanced() { - int black = 0; // number of black links on path from root to min - Node x = root; - while (x != null) { - if (!isRed(x)) black++; - x = x.left; - } - return isBalanced(root, black); - } - - // does every path from the root to a leaf have the given number of black links? - private boolean isBalanced(Node x, int black) { - if (x == null) return black == 0; - if (!isRed(x)) black--; - return isBalanced(x.left, black) && isBalanced(x.right, black); - } - } - - public class BinomialMinPQ implements Iterable { - private Node head; //head of the list of roots - private final Comparator comp; //Comparator over the keys - - //Represents a Node of a Binomial Tree - private class Node { - Key key; //Key contained by the Node - int order; //The order of the Binomial Tree rooted by this Node - Node child, sibling; //child and sibling of this Node - } - - public BinomialMinPQ() { - comp = new MyComparator(); - } - - public BinomialMinPQ(Comparator C) { - comp = C; - } - - public BinomialMinPQ(Key[] a) { - comp = new MyComparator(); - for (Key k : a) insert(k); - } - - public BinomialMinPQ(Comparator C, Key[] a) { - comp = C; - for (Key k : a) insert(k); - } - - public boolean isEmpty() { - return head == null; - } - - public int size() { - int result = 0, tmp; - for (Node node = head; node != null; node = node.sibling) { - if (node.order > 30) { throw new ArithmeticException("The number of elements cannot be evaluated, but the priority queue is still valid."); } - tmp = 1 << node.order; - result |= tmp; - } - return result; - } - - public void insert(Key key) { - Node x = new Node(); - x.key = key; - x.order = 0; - BinomialMinPQ H = new BinomialMinPQ(); //The Comparator oh the H heap is not used - H.head = x; - this.head = this.union(H).head; - } - - public Key minKey() { - if (isEmpty()) throw new NoSuchElementException("Priority queue is empty"); - Node min = head; - Node current = head; - while (current.sibling != null) { - min = (greater(min.key, current.sibling.key)) ? current : min; - current = current.sibling; - } - return min.key; - } - - public Key delMin() { - if(isEmpty()) throw new NoSuchElementException("Priority queue is empty"); - Node min = eraseMin(); - Node x = (min.child == null) ? min : min.child; - if (min.child != null) { - min.child = null; - Node prevx = null, nextx = x.sibling; - while (nextx != null) { - x.sibling = prevx; - prevx = x; - x = nextx;nextx = nextx.sibling; - } - x.sibling = prevx; - BinomialMinPQ H = new BinomialMinPQ(); - H.head = x; - head = union(H).head; - } - return min.key; - } - - public BinomialMinPQ union(BinomialMinPQ heap) { - if (heap == null) throw new IllegalArgumentException("Cannot merge a Binomial Heap with null"); - this.head = merge(new Node(), this.head, heap.head).sibling; - Node x = this.head; - Node prevx = null, nextx = x.sibling; - while (nextx != null) { - if (x.order < nextx.order || - (nextx.sibling != null && nextx.sibling.order == x.order)) { - prevx = x; x = nextx; - } else if (greater(nextx.key, x.key)) { - x.sibling = nextx.sibling; - link(nextx, x); - } else { - if (prevx == null) { this.head = nextx; } - else { prevx.sibling = nextx; } - link(x, nextx); - x = nextx; - } - nextx = x.sibling; - } - return this; - } - - private boolean greater(Key n, Key m) { - if (n == null) return false; - if (m == null) return true; - return comp.compare(n, m) > 0; - } - - //Assuming root1 holds a greater key than root2, root2 becomes the new root - private void link(Node root1, Node root2) { - root1.sibling = root2.child; - root2.child = root1; - root2.order++; - } - - //Deletes and return the node containing the minimum key - private Node eraseMin() { - Node min = head; - Node previous = null; - Node current = head; - while (current.sibling != null) { - if (greater(min.key, current.sibling.key)) { - previous = current; - min = current.sibling; - } - current = current.sibling; - } - previous.sibling = min.sibling; - if (min == head) head = min.sibling; - return min; - } - - private Node merge(Node h, Node x, Node y) { - if (x == null && y == null) return h; - else if (x == null) h.sibling = merge(y, null, y.sibling); - else if (y == null) h.sibling = merge(x, x.sibling, null); - else if (x.order < y.order) h.sibling = merge(x, x.sibling, y); - else h.sibling = merge(y, x, y.sibling); - return h; - } - - public Iterator iterator() { - return new MyIterator(); - } - - private class MyIterator implements Iterator { - BinomialMinPQ data; - - //Constructor clones recursively the elements in the queue - //It takes linear time - public MyIterator() { - data = new BinomialMinPQ(comp); - data.head = clone(head, null); - } - - private Node clone(Node x, Node parent) { - if (x == null) return null; - Node node = new Node(); - node.key = x.key; - node.sibling = clone(x.sibling, parent); - node.child = clone(x.child, node); - return node; - } - - public boolean hasNext() { - return !data.isEmpty(); - } - - public Key next() { - if (!hasNext()) throw new NoSuchElementException(); - return data.delMin(); - } - - public void remove() { - throw new UnsupportedOperationException(); - } - } - - private class MyComparator implements Comparator { - @Override - public int compare(Key key1, Key key2) { - return ((Comparable) key1).compareTo(key2); - } - } - } - - public class SegmentTree { - - private Node[] heap; - private int[] array; - private int size; - - /** - * Time-Complexity: O(n*log(n)) - * - * @param array the Initialization array - */ - public SegmentTree(int[] array) { - this.array = Arrays.copyOf(array, array.length); - //The max size of this array is about 2 * 2 ^ log2(n) + 1 - size = (int) (2 * Math.pow(2.0, Math.floor((Math.log((double) array.length) / Math.log(2.0)) + 1))); - heap = new Node[size]; - build(1, 0, array.length); - } - - - public int size() { - return array.length; - } - - //Initialize the Nodes of the Segment tree - private void build(int v, int from, int size) { - heap[v] = new Node(); - heap[v].from = from; - heap[v].to = from + size - 1; - - if (size == 1) { - heap[v].sum = array[from]; - heap[v].min = array[from]; - } else { - //Build childs - build(2 * v, from, size / 2); - build(2 * v + 1, from + size / 2, size - size / 2); - - heap[v].sum = heap[2 * v].sum + heap[2 * v + 1].sum; - //min = min of the children - heap[v].min = Math.min(heap[2 * v].min, heap[2 * v + 1].min); - } - } - - /** - * Range Sum Query - * - * Time-Complexity: O(log(n)) - * - * @param from from index - * @param to to index - * @return sum - */ - public int rsq(int from, int to) { - return rsq(1, from, to); - } - - private int rsq(int v, int from, int to) { - Node n = heap[v]; - - //If you did a range update that contained this node, you can infer the Sum without going down the tree - if (n.pendingVal != null && contains(n.from, n.to, from, to)) { - return (to - from + 1) * n.pendingVal; - } - - if (contains(from, to, n.from, n.to)) { - return heap[v].sum; - } - - if (intersects(from, to, n.from, n.to)) { - propagate(v); - int leftSum = rsq(2 * v, from, to); - int rightSum = rsq(2 * v + 1, from, to); - - return leftSum + rightSum; - } - - return 0; - } - - /** - * Range Min Query - * - * Time-Complexity: O(log(n)) - * - * @param from from index - * @param to to index - * @return min - */ - public int rMinQ(int from, int to) { - return rMinQ(1, from, to); - } - - private int rMinQ(int v, int from, int to) { - Node n = heap[v]; - - - //If you did a range update that contained this node, you can infer the Min value without going down the tree - if (n.pendingVal != null && contains(n.from, n.to, from, to)) { - return n.pendingVal; - } - - if (contains(from, to, n.from, n.to)) { - return heap[v].min; - } - - if (intersects(from, to, n.from, n.to)) { - propagate(v); - int leftMin = rMinQ(2 * v, from, to); - int rightMin = rMinQ(2 * v + 1, from, to); - - return Math.min(leftMin, rightMin); - } - - return Integer.MAX_VALUE; - } - - - /** - * Range Update Operation. - * With this operation you can update either one position or a range of positions with a given number. - * The update operations will update the less it can to update the whole range (Lazy Propagation). - * The values will be propagated lazily from top to bottom of the segment tree. - * This behavior is really useful for updates on portions of the array - *

- * Time-Complexity: O(log(n)) - * - * @param from from index - * @param to to index - * @param value value - */ - public void update(int from, int to, int value) { - update(1, from, to, value); - } - - private void update(int v, int from, int to, int value) { - - //The Node of the heap tree represents a range of the array with bounds: [n.from, n.to] - Node n = heap[v]; - - /** - * If the updating-range contains the portion of the current Node We lazily update it. - * This means We do NOT update each position of the vector, but update only some temporal - * values into the Node; such values into the Node will be propagated down to its children only when they need to. - */ - if (contains(from, to, n.from, n.to)) { - change(n, value); - } - - if (n.size() == 1) return; - - if (intersects(from, to, n.from, n.to)) { - /** - * Before keeping going down to the tree We need to propagate the - * the values that have been temporally/lazily saved into this Node to its children - * So that when We visit them the values are properly updated - */ - propagate(v); - - update(2 * v, from, to, value); - update(2 * v + 1, from, to, value); - - n.sum = heap[2 * v].sum + heap[2 * v + 1].sum; - n.min = Math.min(heap[2 * v].min, heap[2 * v + 1].min); - } - } - - //Propagate temporal values to children - private void propagate(int v) { - Node n = heap[v]; - - if (n.pendingVal != null) { - change(heap[2 * v], n.pendingVal); - change(heap[2 * v + 1], n.pendingVal); - n.pendingVal = null; //unset the pending propagation value - } - } - - //Save the temporal values that will be propagated lazily - private void change(Node n, int value) { - n.pendingVal = value; - n.sum = n.size() * value; - n.min = value; - array[n.from] = value; - - } - - //Test if the range1 contains range2 - private boolean contains(int from1, int to1, int from2, int to2) { - return from2 >= from1 && to2 <= to1; - } - - //check inclusive intersection, test if range1[from1, to1] intersects range2[from2, to2] - private boolean intersects(int from1, int to1, int from2, int to2) { - return from1 <= from2 && to1 >= from2 // (.[..)..] or (.[...]..) - || from1 >= from2 && from1 <= to2; // [.(..]..) or [..(..).. - } - - //The Node class represents a partition range of the array. - class Node { - int sum; - int min; - //Here We store the value that will be propagated lazily - Integer pendingVal = null; - int from; - int to; - - int size() { - return to - from + 1; - } - - } - - public void main(String[] args) { - - - SegmentTree st = null; - - String cmd = "cmp"; - while (true) { - String[] line = new String[0]; - - if (line[0].equals("exit")) break; - - int arg1 = 0, arg2 = 0, arg3 = 0; - - if (line.length > 1) { - arg1 = Integer.parseInt(line[1]); - } - if (line.length > 2) { - arg2 = Integer.parseInt(line[2]); - } - if (line.length > 3) { - arg3 = Integer.parseInt(line[3]); - } - - if ((!line[0].equals("set") && !line[0].equals("init")) && st == null) { - continue; - } - int array[]; - if (line[0].equals("set")) { - array = new int[line.length - 1]; - for (int i = 0; i < line.length - 1; i++) { - array[i] = Integer.parseInt(line[i + 1]); - } - st = new SegmentTree(array); - } - else if (line[0].equals("init")) { - array = new int[arg1]; - Arrays.fill(array, arg2); - st = new SegmentTree(array); - - for (int i = 0; i < st.size(); i++) { - - } - } - - else if (line[0].equals("up")) { - st.update(arg1, arg2, arg3); - for (int i = 0; i < st.size(); i++) { - - } - - } - else if (line[0].equals("rsq")) { - - } - else if (line[0].equals("rmq")) { - - } - else { - - } - - } - } - } - - public class GaussJordanElimination { - private static final double EPSILON = 1e-8; - - private final int n; // n-by-n system - private double[][] a; // n-by-(n+1) augmented matrix - - // Gauss-Jordan elimination with partial pivoting - /** - * Solves the linear system of equations Ax = b, - * where A is an n-by-n matrix and b - * is a length n vector. - * - * @param A the n-by-n constraint matrix - * @param b the length n right-hand-side vector - */ - public GaussJordanElimination(double[][] A, double[] b) { - n = b.length; - - // build augmented matrix - a = new double[n][n+n+1]; - for (int i = 0; i < n; i++) - for (int j = 0; j < n; j++) - a[i][j] = A[i][j]; - - // only needed if you want to find certificate of infeasibility (or compute inverse) - for (int i = 0; i < n; i++) - a[i][n+i] = 1.0; - - for (int i = 0; i < n; i++) - a[i][n+n] = b[i]; - - solve(); - - assert certifySolution(A, b); - } - - private void solve() { - - // Gauss-Jordan elimination - for (int p = 0; p < n; p++) { - // show(); - - // find pivot row using partial pivoting - int max = p; - for (int i = p+1; i < n; i++) { - if (Math.abs(a[i][p]) > Math.abs(a[max][p])) { - max = i; - } - } - - // exchange row p with row max - swap(p, max); - - // singular or nearly singular - if (Math.abs(a[p][p]) <= EPSILON) { - continue; - // throw new ArithmeticException("Matrix is singular or nearly singular"); - } - - // pivot - pivot(p, p); - } - // show(); - } - - // swap row1 and row2 - private void swap(int row1, int row2) { - double[] temp = a[row1]; - a[row1] = a[row2]; - a[row2] = temp; - } - - - // pivot on entry (p, q) using Gauss-Jordan elimination - private void pivot(int p, int q) { - - // everything but row p and column q - for (int i = 0; i < n; i++) { - double alpha = a[i][q] / a[p][q]; - for (int j = 0; j <= n+n; j++) { - if (i != p && j != q) a[i][j] -= alpha * a[p][j]; - } - } - - // zero out column q - for (int i = 0; i < n; i++) - if (i != p) a[i][q] = 0.0; - - // scale row p (ok to go from q+1 to n, but do this for consistency with simplex pivot) - for (int j = 0; j <= n+n; j++) - if (j != q) a[p][j] /= a[p][q]; - a[p][q] = 1.0; - } - - public double[] primal() { - double[] x = new double[n]; - for (int i = 0; i < n; i++) { - if (Math.abs(a[i][i]) > EPSILON) - x[i] = a[i][n+n] / a[i][i]; - else if (Math.abs(a[i][n+n]) > EPSILON) - return null; - } - return x; - } - - public double[] dual() { - double[] y = new double[n]; - for (int i = 0; i < n; i++) { - if ((Math.abs(a[i][i]) <= EPSILON) && (Math.abs(a[i][n+n]) > EPSILON)) { - for (int j = 0; j < n; j++) - y[j] = a[i][n+j]; - return y; - } - } - return null; - } - - public boolean isFeasible() { - return primal() != null; - } - - // print the tableaux - private void show() { - for (int i = 0; i < n; i++) { - for (int j = 0; j < n; j++) { - } - for (int j = n; j < n+n; j++) { - } - } - - } - - - // check that Ax = b or yA = 0, yb != 0 - private boolean certifySolution(double[][] A, double[] b) { - - // check that Ax = b - if (isFeasible()) { - double[] x = primal(); - for (int i = 0; i < n; i++) { - double sum = 0.0; - for (int j = 0; j < n; j++) { - sum += A[i][j] * x[j]; - } - if (Math.abs(sum - b[i]) > EPSILON) { - return false; - } - } - return true; - } - - // or that yA = 0, yb != 0 - else { - double[] y = dual(); - for (int j = 0; j < n; j++) { - double sum = 0.0; - for (int i = 0; i < n; i++) { - sum += A[i][j] * y[i]; - } - if (Math.abs(sum) > EPSILON) { - return false; - } - } - double sum = 0.0; - for (int i = 0; i < n; i++) { - sum += y[i] * b[i]; - } - if (Math.abs(sum) < EPSILON) { - - return false; - } - return true; - } - } - - - private void test(String name, double[][] A, double[] b) { - - GaussJordanElimination gaussian = new GaussJordanElimination(A, b); - if (gaussian.isFeasible()) { - double[] x = gaussian.primal(); - for (int i = 0; i < x.length; i++) { - } - } - else { - double[] y = gaussian.dual(); - for (int j = 0; j < y.length; j++) { - - } - } - } - - - // 3-by-3 nonsingular system - private void test1() { - double[][] A = { - { 0, 1, 1 }, - { 2, 4, -2 }, - { 0, 3, 15 } - }; - double[] b = { 4, 2, 36 }; - test("test 1", A, b); - } - - private void test2() { - double[][] A = { - { 1, -3, 1 }, - { 2, -8, 8 }, - { -6, 3, -15 } - }; - double[] b = { 4, -2, 9 }; - test("test 2", A, b); - } - - private void test3() { - double[][] A = { - { 2, -3, -1, 2, 3 }, - { 4, -4, -1, 4, 11 }, - { 2, -5, -2, 2, -1 }, - { 0, 2, 1, 0, 4 }, - { -4, 6, 0, 0, 7 }, - }; - double[] b = { 4, 4, 9, -6, 5 }; - test("test 3", A, b); - } - - // 5-by-5 singluar: infinitely many solutions - private void test4() { - double[][] A = { - { 2, -3, -1, 2, 3 }, - { 4, -4, -1, 4, 11 }, - { 2, -5, -2, 2, -1 }, - { 0, 2, 1, 0, 4 }, - { -4, 6, 0, 0, 7 }, - }; - double[] b = { 4, 4, 9, -5, 5 }; - test("test 4", A, b); - } - - // 3-by-3 singular: no solutions - // y = [ 1, 0, 1/3 ] - private void test5() { - double[][] A = { - { 2, -1, 1 }, - { 3, 2, -4 }, - { -6, 3, -3 }, - }; - double[] b = { 1, 4, 2 }; - test("test 5", A, b); - } - - // 3-by-3 singular: infinitely many solutions - private void test6() { - double[][] A = { - { 1, -1, 2 }, - { 4, 4, -2 }, - { -2, 2, -4 }, - }; - double[] b = { -3, 1, 6 }; - test("test 6 (infinitely many solutions)", A, b); - } - - public void main(String[] args) { - - test1(); - test2(); - test3(); - test4(); - test5(); - test6(); - - // n-by-n random system (likely full rank) - int n = Integer.parseInt(args[0]); - double[][] A = new double[n][n]; - for (int i = 0; i < n; i++) - for (int j = 0; j < n; j++){} - - double[] b = new double[n]; - for (int i = 0; i < n; i++){} - - test("random " + n + "-by-" + n + " (likely full rank)", A, b); - - A = new double[n][n]; - for (int i = 0; i < n-1; i++) - for (int j = 0; j < n; j++){} - - for (int i = 0; i < n-1; i++) { - double alpha = - 5.0; - for (int j = 0; j < n; j++) { - A[n-1][j] += alpha * A[i][j]; - } - } - b = new double[n]; - for (int i = 0; i < n; i++) - - test("random " + n + "-by-" + n + " (likely infeasible)", A, b); - } - } - - public class PatriciaST { - private Node head; - private int count; - - private class Node { - private Node left, right; - private String key; - private Value val; - private int b; - - public Node(String key, Value val, int b) { - this.key = key; - this.val = val; - this.b = b; - } - }; - - public PatriciaST() { - head = new Node("", null, 0); - head.left = head; - head.right = head; - count = 0; - } - - public void put(String key, Value val) { - if (key == null) throw new IllegalArgumentException("called put(null)"); - if (key.length() == 0) throw new IllegalArgumentException("invalid key"); - if (val == null) delete(key); - Node p; - Node x = head; - do { - p = x; - if (safeBitTest(key, x.b)) x = x.right; - else x = x.left; - } while (p.b < x.b); - if (!x.key.equals(key)) { - int b = firstDifferingBit(x.key, key); - x = head; - do { - p = x; - if (safeBitTest(key, x.b)) x = x.right; - else x = x.left; - } while (p.b < x.b && x.b < b); - Node t = new Node(key, val, b); - if (safeBitTest(key, b)) { - t.left = x; - t.right = t; - } - else { - t.left = t; - t.right = x; - } - if (safeBitTest(key, p.b)) p.right = t; - else p.left = t; - count++; - } - else x.val = val; - } - - public Value get(String key) { - if (key == null) throw new IllegalArgumentException("called get(null)"); - if (key.length() == 0) throw new IllegalArgumentException("invalid key"); - Node p; - Node x = head; - do { - p = x; - if (safeBitTest(key, x.b)) x = x.right; - else x = x.left; - } while (p.b < x.b); - if (x.key.equals(key)) return x.val; - else return null; - } - - public void delete(String key) { - if (key == null) throw new IllegalArgumentException("called delete(null)"); - if (key.length() == 0) throw new IllegalArgumentException("invalid key"); - Node g; // previous previous (grandparent) - Node p = head; // previous (parent) - Node x = head; // node to delete - do { - g = p; - p = x; - if (safeBitTest(key, x.b)) x = x.right; - else x = x.left; - } while (p.b < x.b); - if (x.key.equals(key)) { - Node z; - Node y = head; - do { // find the true parent (z) of x - z = y; - if (safeBitTest(key, y.b)) y = y.right; - else y = y.left; - } while (y != x); - if (x == p) { // case 1: remove (leaf node) x - Node c; // child of x - if (safeBitTest(key, x.b)) c = x.left; - else c = x.right; - if (safeBitTest(key, z.b)) z.right = c; - else z.left = c; - } - else { // case 2: p replaces (internal node) x - Node c; // child of p - if (safeBitTest(key, p.b)) c = p.left; - else c = p.right; - if (safeBitTest(key, g.b)) g.right = c; - else g.left = c; - if (safeBitTest(key, z.b)) z.right = p; - else z.left = p; - p.left = x.left; - p.right = x.right; - p.b = x.b; - } - count--; - } - } - - public boolean contains(String key) { - return get(key) != null; - } - - boolean isEmpty() { - return count == 0; - } - - int size() { - return count; - } - - public Iterable keys() { - Queue queue = new PriorityQueue<>(); - if (head.left != head) keys(head.left, 0, queue); - if (head.right != head) keys(head.right, 0, queue); - return queue; - } - - private void keys(Node x, int b, Queue queue) { - if (x.b > b) { - keys(x.left, x.b, queue); - keys(x.right, x.b, queue); - } - } - - private boolean safeBitTest(String key, int b) { - if (b < key.length() * 16) return bitTest(key, b) != 0; - if (b > key.length() * 16 + 15) return false; // padding - /* 16 bits of 0xffff */ return true; // end marker - } - - private int bitTest(String key, int b) { - return (key.charAt(b >>> 4) >>> (b & 0xf)) & 1; - } - - private int safeCharAt(String key, int i) { - if (i < key.length()) return key.charAt(i); - if (i > key.length()) return 0x0000; // padding - else return 0xffff; // end marker - } - - private int firstDifferingBit(String k1, String k2) { - int i = 0; - int c1 = safeCharAt(k1, 0) & ~1; - int c2 = safeCharAt(k2, 0) & ~1; - if (c1 == c2) { - i = 1; - while (safeCharAt(k1, i) == safeCharAt(k2, i)) i++; - c1 = safeCharAt(k1, i); - c2 = safeCharAt(k2, i); - } - int b = 0; - while (((c1 >>> b) & 1) == ((c2 >>> b) & 1)) b++; - return i * 16 + b; - } - - public void main(String[] args) { - PatriciaST st = new PatriciaST(); - int limitItem = 1000000; - int limitPass = 1; - int countPass = 0; - boolean ok = true; - - if (args.length > 0) limitItem = Integer.parseInt(args[0]); - if (args.length > 1) limitPass = Integer.parseInt(args[1]); - - do { - String[] a = new String[limitItem]; - int[] v = new int[limitItem]; - - for (int i = 0; i < limitItem; i++) { - a[i] = Integer.toString(i, 16); - v[i] = i; - } - - for (int i = 0; i < limitItem; i++) - st.put(a[v[i]], v[i]); - - int countKeys = 0; - for (String key : st.keys()) countKeys++; - if (countKeys != limitItem) ok = false; - if (countKeys != st.size()) ok = false; - - - int limitDelete = limitItem / 2; - for (int i = 0; i < limitDelete; i++) - st.delete(a[v[i]]); - - countKeys = 0; - for (String key : st.keys()) countKeys++; - if (countKeys != limitItem - limitDelete) ok = false; - if (countKeys != st.size()) ok = false; - - int countDelete = 0; - int countRemain = 0; - for (int i = 0; i < limitItem; i++) { - if (i < limitDelete) { - if (!st.contains(a[v[i]])) countDelete++; - } - else { - int val = st.get(a[v[i]]); - if (val == v[i]) countRemain++; - } - } - - if (countRemain + countDelete != limitItem) ok = false; - if (countRemain != st.size()) ok = false; - if (st.isEmpty()) ok = false; - - - for (int i = countDelete; i < limitItem; i++) - st.delete(a[v[i]]); - if (!st.isEmpty()) ok = false; - - countPass++; - if (ok) { - } - else { - - } - } while (ok && countPass < limitPass); - - if (!ok) throw new java.lang.RuntimeException("TESTS FAILED"); - } - } - - public class EulerianPath { - private Stack path = null; // Eulerian path; null if no suh path - private class Edge { - private final int v; - private final int w; - private boolean isUsed; - - public Edge(int v, int w) { - this.v = v; - this.w = w; - isUsed = false; - } - - // returns the other vertex of the edge - public int other(int vertex) { - if (vertex == v) return w; - else if (vertex == w) return v; - else throw new IllegalArgumentException("Illegal endpoint"); - } - } - - public EulerianPath(Graph G) { - - // find vertex from which to start potential Eulerian path: - // a vertex v with odd degree(v) if it exits; - // otherwise a vertex with degree(v) > 0 - int oddDegreeVertices = 0; - int s = nonIsolatedVertex(G); - for (int v = 0; v < 7; v++) { - if (2 % 2 != 0) { - oddDegreeVertices++; - s = v; - } - } - - - if (oddDegreeVertices > 2) return; - if (s == -1) s = 0; - - - - for (int v = 0; v < 5; v++) { - int selfLoops = 0; - // careful with self loops - if (v == 5) { - if (selfLoops % 2 == 0) { - Edge e = new Edge(v, 5); - } - selfLoops++; - } - else if (v < 5) { - Edge e = new Edge(v, 5); - - } - } - - // initialize stack with any non-isolated vertex - Stack stack = new Stack(); - stack.push(s); - - // greedily search through edges in iterative DFS style - path = new Stack(); - while (!stack.isEmpty()) { - int v = stack.pop(); - - // push vertex with no more leaving edges to path - path.push(v); - } - - // check if all edges are used - if (path.size() != 5 + 1) - path = null; - - assert certifySolution(G); - } - - public Iterable path() { - return path; - } - - public boolean hasEulerianPath() { - return path != null; - } - - - // returns any non-isolated vertex; -1 if no such vertex - private int nonIsolatedVertex(Graph G) { - for (int v = 0; v < 6; v++) - if (1 > 0) - return v; - return -1; - } - - private boolean satisfiesNecessaryAndSufficientConditions(Graph G) { - if (2 == 0) return true; - - // Condition 1: degree(v) is even except for possibly two - int oddDegreeVertices = 0; - for (int v = 0; v <7; v++) - if (3 % 2 != 0) - oddDegreeVertices++; - if (oddDegreeVertices > 2) return false; - - // Condition 2: graph is connected, ignoring isolated vertices - int s = nonIsolatedVertex(G); - return true; - } - - // check that solution is correct - private boolean certifySolution(Graph G) { - - // internal consistency check - if (hasEulerianPath() == (path() == null)) return false; - - // hashEulerianPath() returns correct value - if (hasEulerianPath() != satisfiesNecessaryAndSufficientConditions(G)) return false; - - // nothing else to check if no Eulerian path - if (path == null) return true; - - // check that path() uses correct number of edges - if (path.size() != 7 + 1) return false; - - // check that path() is a path in G - // TODO - - return true; - } - - - private void unitTest(Graph G, String description) { - - EulerianPath euler = new EulerianPath(G); - - if (euler.hasEulerianPath()) { - for (int v : euler.path()) { - } - } - } - } -} diff --git a/src/jmh/results.md b/src/jmh/results.md deleted file mode 100644 index 6b6b62d7..00000000 --- a/src/jmh/results.md +++ /dev/null @@ -1,13 +0,0 @@ -| | Long file | Small project (Gradle) | Big project (IntelliJ IDEA) | -| --- |--- | --- | --- | -| Code2Vec (time) | 0.44 ± 0.07 sec | 27.15 ± 0.90 sec | 257.45 ± 18.89 sec | -| Code2Vec (allocated memory per sec) | 1.22 gb ± 188.22 mb | 1.08 gb ± 31.72 mb | 1.05 gb ± 78.83 mb | -| | | | | -| PathContext (time) | 0.63 ± 0.03 sec | 26.54 ± 1.25 sec | 223.76 ± 8.24 sec | -| PathContext (allocated memory per sec) | 1.16 gb ± 72.86 mb | 1.09 gb ± 51.89 mb | 1.12 gb ± 47.49 mb | -| | | | | -| ProjectParseCSV (time) | 0.33 ± 0.04 sec | 20.40 ± 1.12 sec | 180.37 ± 2.98 sec | -| ProjectParseCSV (allocated memory per sec) | 1.37 gb ± 180.29 mb | 1.19 gb ± 68.76 mb | 1.20 gb ± 20.16 mb | -| | | | | -| ProjectParseDOT (time) | 0.43 ± 0.05 sec | 32.98 ± 2.51 sec | 285.64 ± 3.04 sec | -| ProjectParseDOT (allocated memory per sec) | 1.23 gb ± 136.31 mb | 1.02 gb ± 78.14 mb | 1.06 gb ± 16.59 mb | diff --git a/src/main/kotlin/astminer/common/FileUtil.kt b/src/main/kotlin/astminer/common/FileUtil.kt index badcc4d6..e609fd46 100644 --- a/src/main/kotlin/astminer/common/FileUtil.kt +++ b/src/main/kotlin/astminer/common/FileUtil.kt @@ -38,7 +38,7 @@ fun addClassWrapper(file: File, className: String) { } /** - * Checks if java file has any syntax errors, that can be identified via [Java8Parser][me.vovak.antlr.parser.Java8Parser] + * Checks if java file has any syntax errors that can be identified via [Java8Parser][me.vovak.antlr.parser.Java8Parser] * @param javaFile file which is checked for correct syntax * @return true if there are syntax errors and false otherwise */ @@ -56,4 +56,4 @@ fun getProjectFiles(projectRoot: File, filter: (File) -> Boolean = { true }) = p .toList() fun getProjectFilesWithExtension(projectRoot: File, extension: String): List = - getProjectFiles(projectRoot) { it.isFile && it.extension == extension } \ No newline at end of file + getProjectFiles(projectRoot) { it.isFile && it.extension == extension } diff --git a/src/main/kotlin/astminer/common/model/FunctionInfoModel.kt b/src/main/kotlin/astminer/common/model/FunctionInfoModel.kt index e417f116..9d1afa76 100644 --- a/src/main/kotlin/astminer/common/model/FunctionInfoModel.kt +++ b/src/main/kotlin/astminer/common/model/FunctionInfoModel.kt @@ -10,7 +10,8 @@ class FunctionInfoPropertyNotImplementedException(propertyName: String) : "Consider implementing it." ) -private fun notImplemented(propertyName: String): Nothing = throw FunctionInfoPropertyNotImplementedException(propertyName) +private fun notImplemented(propertyName: String): Nothing = + throw FunctionInfoPropertyNotImplementedException(propertyName) interface FunctionInfo { val nameNode: T? diff --git a/src/main/kotlin/astminer/common/model/ParsingModel.kt b/src/main/kotlin/astminer/common/model/ParsingModel.kt index 08ccb3ec..c2f6e600 100644 --- a/src/main/kotlin/astminer/common/model/ParsingModel.kt +++ b/src/main/kotlin/astminer/common/model/ParsingModel.kt @@ -6,6 +6,7 @@ import astminer.common.splitToSubtokens import java.io.File import java.io.InputStream import java.util.* +import kotlin.NoSuchElementException abstract class Node { @@ -64,9 +65,11 @@ class PreOrderIterator(root: Node): Iterator { } override fun next(): Node { - val currentNode = stack.pop() - currentNode.children.asReversed().forEach { stack.push(it) } - return currentNode + if (hasNext()) { + val currentNode = stack.pop() + currentNode.children.asReversed().forEach { stack.push(it) } + return currentNode + } else throw NoSuchElementException() } } @@ -88,7 +91,8 @@ class PostOrderIterator(root: Node): Iterator { while (!tree.last().isChecked) { fillWithChildren(tree.last()) } - return tree.removeLast().node + if (hasNext()) return tree.removeLast().node + else throw NoSuchElementException() } } diff --git a/src/main/kotlin/astminer/common/storage/RankedIncrementalIdStorage.kt b/src/main/kotlin/astminer/common/storage/RankedIncrementalIdStorage.kt index 03990c4d..268298fe 100644 --- a/src/main/kotlin/astminer/common/storage/RankedIncrementalIdStorage.kt +++ b/src/main/kotlin/astminer/common/storage/RankedIncrementalIdStorage.kt @@ -84,4 +84,4 @@ class RankedIncrementalIdStorage { } idCountRanks = idRankMap } -} \ No newline at end of file +} diff --git a/src/main/kotlin/astminer/config/PipelineConfigs.kt b/src/main/kotlin/astminer/config/PipelineConfig.kt similarity index 100% rename from src/main/kotlin/astminer/config/PipelineConfigs.kt rename to src/main/kotlin/astminer/config/PipelineConfig.kt diff --git a/src/main/kotlin/astminer/examples/Common.kt b/src/main/kotlin/astminer/examples/Common.kt index 402a9a13..a036d2fe 100644 --- a/src/main/kotlin/astminer/examples/Common.kt +++ b/src/main/kotlin/astminer/examples/Common.kt @@ -8,4 +8,4 @@ fun iterateFiles(dir: File, condition: (File) -> Boolean, action: (File) -> Unit fun File.forFilesWithSuffix(extension: String, action: (File) -> Unit) { iterateFiles(this, ({ file: File -> file.path.endsWith(extension) }), action) -} \ No newline at end of file +} diff --git a/src/main/kotlin/astminer/examples/FeatureExtraction.kt b/src/main/kotlin/astminer/examples/FeatureExtraction.kt index 28e964ad..4c1dcf32 100644 --- a/src/main/kotlin/astminer/examples/FeatureExtraction.kt +++ b/src/main/kotlin/astminer/examples/FeatureExtraction.kt @@ -5,10 +5,10 @@ import astminer.featureextraction.* import astminer.parse.gumtree.java.GumTreeJavaParser import java.io.File - fun parseAndCollectFeatures() { val parser = GumTreeJavaParser() - val features : List> = listOf(Depth, NumberOfNodes, BranchingFactor, CompressiblePathLengths, Tokens, NodeTypes) + val features : List> = + listOf(Depth, NumberOfNodes, BranchingFactor, CompressiblePathLengths, Tokens, NodeTypes) val folderInput = "./testData/featureextraction" val folderOutput = "out_examples/featureextraction" @@ -29,4 +29,4 @@ fun parseAndCollectFeatures() { fun main() { parseAndCollectFeatures() -} \ No newline at end of file +} diff --git a/src/main/kotlin/astminer/featureextraction/TreeFeature.kt b/src/main/kotlin/astminer/featureextraction/TreeFeature.kt index abe46cc6..8b42afb6 100644 --- a/src/main/kotlin/astminer/featureextraction/TreeFeature.kt +++ b/src/main/kotlin/astminer/featureextraction/TreeFeature.kt @@ -108,4 +108,4 @@ object CompressiblePathLengths : TreeFeature> { } return length } -} \ No newline at end of file +} diff --git a/src/main/kotlin/astminer/featureextraction/TreeFeatureValueStorage.kt b/src/main/kotlin/astminer/featureextraction/TreeFeatureValueStorage.kt index ffc5ab22..0f0a10eb 100644 --- a/src/main/kotlin/astminer/featureextraction/TreeFeatureValueStorage.kt +++ b/src/main/kotlin/astminer/featureextraction/TreeFeatureValueStorage.kt @@ -21,7 +21,8 @@ fun Any.className() : String { } /** - * Class for store and save [tree features][astminer.featureextraction.TreeFeature] for [parsed trees][astminer.featureextraction.ParsedTree]. + * Class for store and save [tree features][astminer.featureextraction.TreeFeature] + * for [parsed trees][astminer.featureextraction.ParsedTree]. * @property separator separator which is used in resulting file to separate with tree features values */ class TreeFeatureValueStorage(private val separator: String) { @@ -32,10 +33,10 @@ class TreeFeatureValueStorage(private val separator: String) { private val idField = Field("Id") { parsedTrees.indexOf(it).toString() } private val parserField = Field("ParserName") { it.parserName } private val fileNameField = Field("FileName") { it.fileName } - private val NOLField = Field("NumberOfLines") { it.numberOfLines.toString() } + private val numOfLinesField = Field("NumberOfLines") { it.numberOfLines.toString() } private val fileName = "features.csv" - private val fields: List = listOf(idField, parserField, fileNameField, NOLField) + private val fields: List = listOf(idField, parserField, fileNameField, numOfLinesField) /** * Data class for additional fields that should be in resulting file with features. @@ -70,7 +71,8 @@ class TreeFeatureValueStorage(private val separator: String) { /** * Computes all stored features for all stored parsed trees and saves them in a given directory. - * @param directoryPath path to directory where tree features is saved. If this directory does not exist the new one creates. + * @param directoryPath path to directory where tree features is saved. + * If this directory does not exist the new one creates. */ fun save(directoryPath: String) { File(directoryPath).mkdirs() @@ -95,5 +97,4 @@ class TreeFeatureValueStorage(private val separator: String) { } return a.toString() } - -} \ No newline at end of file +} diff --git a/src/main/kotlin/astminer/filters/CommonFilters.kt b/src/main/kotlin/astminer/filters/CommonFilters.kt index 49c32534..00d0904d 100644 --- a/src/main/kotlin/astminer/filters/CommonFilters.kt +++ b/src/main/kotlin/astminer/filters/CommonFilters.kt @@ -4,8 +4,8 @@ import astminer.common.model.* import astminer.featureextraction.NumberOfNodes /** - * Filter that excludes trees which do not satisfy [minSize] <= tree size <= [maxSize] - * @param minSize The minimum size of trees that pass the filter + * Filter that excludes trees which do not satisfy [minSize] <= tree size <= [maxSize]. + * @param minSize The minimum size of trees that pass the filter. * @param maxSize The maximum size of trees that pass the filter. Set it to null if there should be no upper bound. */ class TreeSizeFilter(private val minSize: Int = 0, private val maxSize: Int? = null) : FileFilter, FunctionFilter { diff --git a/src/main/kotlin/astminer/filters/FunctionFilters.kt b/src/main/kotlin/astminer/filters/FunctionFilters.kt index 6b7b9c88..15591f34 100644 --- a/src/main/kotlin/astminer/filters/FunctionFilters.kt +++ b/src/main/kotlin/astminer/filters/FunctionFilters.kt @@ -22,7 +22,7 @@ class AnnotationFilter(private val excludeAnnotations: List) : FunctionF } /** - * Filter that excludes constructors + * Filter that excludes constructors. */ object ConstructorFilter : FunctionFilter { override fun validate(functionInfo: FunctionInfo) = !functionInfo.isConstructor diff --git a/src/main/kotlin/astminer/labelextractor/FunctionLabelExtractors.kt b/src/main/kotlin/astminer/labelextractor/FunctionNameLabelExtractor.kt similarity index 89% rename from src/main/kotlin/astminer/labelextractor/FunctionLabelExtractors.kt rename to src/main/kotlin/astminer/labelextractor/FunctionNameLabelExtractor.kt index 9fc1b058..fa4dd5fe 100644 --- a/src/main/kotlin/astminer/labelextractor/FunctionLabelExtractors.kt +++ b/src/main/kotlin/astminer/labelextractor/FunctionNameLabelExtractor.kt @@ -10,8 +10,8 @@ import astminer.common.model.Node * Hides the name of the function in the subtree and also all in the recursive calls. */ object FunctionNameLabelExtractor : FunctionLabelExtractor { - const val HIDDEN_METHOD_NAME_TOKEN = "METHOD_NAME" - const val RECURSIVE_CALL_TOKEN = "SELF" + private const val HIDDEN_METHOD_NAME_TOKEN = "METHOD_NAME" + private const val RECURSIVE_CALL_TOKEN = "SELF" override fun process(functionInfo: FunctionInfo): LabeledResult? { val normalizedName = functionInfo.nameNode?.normalizedToken ?: return null diff --git a/src/main/kotlin/astminer/parse/ParsingException.kt b/src/main/kotlin/astminer/parse/ParsingException.kt index 27d3f510..ab24473b 100644 --- a/src/main/kotlin/astminer/parse/ParsingException.kt +++ b/src/main/kotlin/astminer/parse/ParsingException.kt @@ -1,4 +1,4 @@ package astminer.parse -class ParsingException(parserType: String, language: String, message: String? = null) : - IllegalStateException("Parser $parserType had problems parsing $language: ${message ?: "Unknown error."}") +class ParsingException(parserType: String, language: String, exc: Exception? = null) : + IllegalStateException("Parser $parserType had problems parsing $language: ${exc?.message ?: "Unknown error."}") diff --git a/src/main/kotlin/astminer/parse/antlr/AntlrHandler.kt b/src/main/kotlin/astminer/parse/antlr/AntlrHandler.kt index d3c270fe..dbf9b9fa 100644 --- a/src/main/kotlin/astminer/parse/antlr/AntlrHandler.kt +++ b/src/main/kotlin/astminer/parse/antlr/AntlrHandler.kt @@ -45,4 +45,4 @@ object AntlrPHPHandlerFactory: HandlerFactory { override val parseResult: ParseResult = PHPParser().parseFile(file) override val splitter: TreeFunctionSplitter = PHPFunctionSplitter() } -} \ No newline at end of file +} diff --git a/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt b/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt index 055072e8..c6d1841c 100644 --- a/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt +++ b/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt @@ -7,25 +7,22 @@ import org.antlr.v4.runtime.Vocabulary import org.antlr.v4.runtime.tree.ErrorNode import org.antlr.v4.runtime.tree.TerminalNode -fun convertAntlrTree(tree: ParserRuleContext, ruleNames: Array, vocabulary: Vocabulary): AntlrNode { - return compressTree(convertRuleContext(tree, ruleNames, null, vocabulary)) -} +fun convertAntlrTree(tree: ParserRuleContext, ruleNames: Array, vocabulary: Vocabulary): AntlrNode = + compressTree(convertRuleContext(tree, ruleNames, null, vocabulary)) -private fun convertRuleContext(ruleContext: ParserRuleContext, ruleNames: Array, parent: AntlrNode?, vocabulary: Vocabulary): AntlrNode { +private fun convertRuleContext( + ruleContext: ParserRuleContext, ruleNames: Array, parent: AntlrNode?, vocabulary: Vocabulary +): AntlrNode { val typeLabel = ruleNames[ruleContext.ruleIndex] val currentNode = AntlrNode(typeLabel, parent, null) val children: MutableList = ArrayList() ruleContext.children?.forEach { - if (it is TerminalNode) { - children.add(convertTerminal(it, currentNode, vocabulary)) - return@forEach - } - if (it is ErrorNode) { - children.add(convertErrorNode(it, currentNode)) - return@forEach + when (it) { + is TerminalNode -> children.add(convertTerminal(it, currentNode, vocabulary)) + is ErrorNode -> children.add(convertErrorNode(it, currentNode)) + else -> children.add(convertRuleContext(it as ParserRuleContext, ruleNames, currentNode, vocabulary)) } - children.add(convertRuleContext(it as ParserRuleContext, ruleNames, currentNode, vocabulary)) } currentNode.replaceChildren(children) diff --git a/src/main/kotlin/astminer/parse/antlr/java/JavaFunctionSplitter.kt b/src/main/kotlin/astminer/parse/antlr/java/JavaFunctionSplitter.kt index 1955f3b5..6ffcadb0 100644 --- a/src/main/kotlin/astminer/parse/antlr/java/JavaFunctionSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/java/JavaFunctionSplitter.kt @@ -13,4 +13,4 @@ class JavaFunctionSplitter : TreeFunctionSplitter { } return methodRoots.map { AntlrJavaFunctionInfo(it, filePath) } } -} \ No newline at end of file +} diff --git a/src/main/kotlin/astminer/parse/antlr/java/JavaParser.kt b/src/main/kotlin/astminer/parse/antlr/java/JavaParser.kt index 277b1aca..4f32066a 100644 --- a/src/main/kotlin/astminer/parse/antlr/java/JavaParser.kt +++ b/src/main/kotlin/astminer/parse/antlr/java/JavaParser.kt @@ -22,7 +22,7 @@ class JavaParser : Parser { val context = parser.compilationUnit() convertAntlrTree(context, Java8Parser.ruleNames, Java8Parser.VOCABULARY) } catch (e: Exception) { - throw ParsingException("ANTLR", "Java", e.message) + throw ParsingException("ANTLR", "Java", e) } } -} \ No newline at end of file +} diff --git a/src/main/kotlin/astminer/parse/antlr/javascript/AntlrJavaScriptElementInfo.kt b/src/main/kotlin/astminer/parse/antlr/javascript/AntlrJavaScriptElementInfo.kt index ceeda5b1..cd2bc27c 100644 --- a/src/main/kotlin/astminer/parse/antlr/javascript/AntlrJavaScriptElementInfo.kt +++ b/src/main/kotlin/astminer/parse/antlr/javascript/AntlrJavaScriptElementInfo.kt @@ -125,4 +125,4 @@ class JavaScriptFunctionInfo(root: AntlrNode, filePath: String) : AntlrJavaScrip override val nameNode: AntlrNode? = root.getChildOfType(FUNCTION_NAME_NODE) override fun getParametersRoot(): AntlrNode? = root.getChildOfType(FUNCTION_PARAMETER_NODE) -} \ No newline at end of file +} diff --git a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitter.kt b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitter.kt index 8d9929e8..84822c88 100644 --- a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitter.kt @@ -29,4 +29,4 @@ class JavaScriptFunctionSplitter : TreeFunctionSplitter { private fun Node.isArrowElement() = this.getChildOfType(ARROW_NODE) != null private fun Node.isFunctionElement() = this.getChildOfType(FUNCTION_NODE) != null private fun Node.isMethodElement() = decompressTypeLabel(this.typeLabel).last() == METHOD_NODE -} \ No newline at end of file +} diff --git a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptParser.kt b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptParser.kt index 947af3b2..9c736ba1 100644 --- a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptParser.kt +++ b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptParser.kt @@ -22,7 +22,7 @@ class JavaScriptParser : Parser { val context = parser.program() convertAntlrTree(context, JavaScriptParser.ruleNames, JavaScriptParser.VOCABULARY) } catch (e: Exception) { - throw ParsingException("ANTLR", "JavaScript", e.message) + throw ParsingException("ANTLR", "JavaScript", e) } } -} \ No newline at end of file +} diff --git a/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt b/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt index 9a553897..42b13d53 100644 --- a/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt @@ -6,6 +6,9 @@ import astminer.common.model.FunctionInfo import astminer.common.model.FunctionInfoParameter import astminer.parse.antlr.* import astminer.parse.findEnclosingElementBy +import mu.KotlinLogging + +private val logger = KotlinLogging.logger("ANTLR-PHP-function-info") class ANTLRPHPFunctionInfo(override val root: AntlrNode, override val filePath: String) : FunctionInfo { override val returnType = getElementType(root) @@ -14,23 +17,6 @@ class ANTLRPHPFunctionInfo(override val root: AntlrNode, override val filePath: override val parameters: List = collectParameters() override val enclosingElement: EnclosingElement? = collectEnclosingElement() - companion object { - const val PARAMETERS_LIST = "formalParameterList" - const val PARAMETER = "formalParameter" - const val TYPE = "typeHint" - const val PARAMETER_NAME = "VarName" - const val CLASS_MEMBER = "classStatement" - const val FUNCTION_NAME = "identifier" - const val CLASS_DECLARATION = "classDeclaration" - const val VAR_DECLARATION = "variableInitializer" - const val ELLIPSIS = "Ellipsis" - const val EXPRESSION = "expression" - const val ASSIGN_OP = "assignmentOperator" - const val LAMBDA_TOKEN = "LambdaFn" - const val FUNCTION_TOKEN = "Function_" - const val REFERENCE = "Ampersand" - } - private fun collectParameters(): List { // Parameters in this grammar have following structure (children order may be wrong): //formal parameter list -> formal parameter -> Ampersand @@ -50,9 +36,16 @@ class ANTLRPHPFunctionInfo(override val root: AntlrNode, override val filePath: } // Otherwise find all parameters - return parameterList.getItOrChildrenOfType(PARAMETER).mapNotNull { - try { assembleParameter(it) } catch (e: IllegalStateException) { return@mapNotNull null } - } + return parameterList + .getItOrChildrenOfType(PARAMETER) + .mapNotNull { + try { + assembleParameter(it) + } catch (e: IllegalStateException) { + logger.warn { "Error during collecting parameters for $name in $filePath: ${e.message}" } + null + } + } } private fun assembleParameter(parameterNode: AntlrNode): FunctionInfoParameter { @@ -92,6 +85,7 @@ class ANTLRPHPFunctionInfo(override val root: AntlrNode, override val filePath: type = getEnclosingType(enclosing) ) } catch (e: IllegalStateException) { + logger.warn { "Error during collecting enclosing element for $name in $filePath: ${e.message}" } null } } @@ -106,7 +100,7 @@ class ANTLRPHPFunctionInfo(override val root: AntlrNode, override val filePath: } } - private fun getEnclosingElementName(enclosing: AntlrNode) : String?{ + private fun getEnclosingElementName(enclosing: AntlrNode): String? { return when { enclosing.isFunction() || enclosing.isClass() -> enclosing.getChildOfType(FUNCTION_NAME)?.originalToken enclosing.isAssignExpression() -> enclosing.children.find { it.hasLastLabel(PARAMETER_NAME) }?.originalToken @@ -124,4 +118,21 @@ class ANTLRPHPFunctionInfo(override val root: AntlrNode, override val filePath: private fun AntlrNode.isAssignExpression() = hasFirstLabel(EXPRESSION) && (getChildOfType(ASSIGN_OP) != null) private fun AntlrNode.isClass(): Boolean = hasLastLabel(CLASS_DECLARATION) -} \ No newline at end of file + + companion object { + const val PARAMETERS_LIST = "formalParameterList" + const val PARAMETER = "formalParameter" + const val TYPE = "typeHint" + const val PARAMETER_NAME = "VarName" + const val CLASS_MEMBER = "classStatement" + const val FUNCTION_NAME = "identifier" + const val CLASS_DECLARATION = "classDeclaration" + const val VAR_DECLARATION = "variableInitializer" + const val ELLIPSIS = "Ellipsis" + const val EXPRESSION = "expression" + const val ASSIGN_OP = "assignmentOperator" + const val LAMBDA_TOKEN = "LambdaFn" + const val FUNCTION_TOKEN = "Function_" + const val REFERENCE = "Ampersand" + } +} diff --git a/src/main/kotlin/astminer/parse/antlr/php/PHPFunctionSplitter.kt b/src/main/kotlin/astminer/parse/antlr/php/PHPFunctionSplitter.kt index e85399ca..a95288f8 100644 --- a/src/main/kotlin/astminer/parse/antlr/php/PHPFunctionSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/php/PHPFunctionSplitter.kt @@ -15,4 +15,4 @@ class PHPFunctionSplitter : TreeFunctionSplitter { .filter { it.typeLabel == LAMBDA_TOKEN || it.typeLabel == FUNCTION_TOKEN } .mapNotNull { node -> node.parent?.let { statement -> ANTLRPHPFunctionInfo(statement, filePath) } } } -} \ No newline at end of file +} diff --git a/src/main/kotlin/astminer/parse/antlr/php/PHPParser.kt b/src/main/kotlin/astminer/parse/antlr/php/PHPParser.kt index 86808926..fc5b76f8 100644 --- a/src/main/kotlin/astminer/parse/antlr/php/PHPParser.kt +++ b/src/main/kotlin/astminer/parse/antlr/php/PHPParser.kt @@ -30,8 +30,7 @@ class PHPParser: Parser { val context = parser.htmlDocument() convertAntlrTree(context, PhpParser.ruleNames, PhpParser.VOCABULARY) } catch (e: Exception) { - throw ParsingException("ANTLR", "PHP", e.message) + throw ParsingException("ANTLR", "PHP", e) } } - -} \ No newline at end of file +} diff --git a/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt b/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt index 01bde266..f1e87c20 100644 --- a/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt @@ -51,7 +51,7 @@ class AntlrPythonFunctionInfo(override val root: AntlrNode, override val filePat private fun assembleMethodInfoParameter(parameterNode: AntlrNode): FunctionInfoParameter { val parameterHaveNoDefaultOrType = parameterNode.hasLastLabel(PARAMETER_NAME_NODE) val parameterName = if (parameterHaveNoDefaultOrType) parameterNode.originalToken - else parameterNode.getChildOfType(PARAMETER_NAME_NODE)?.originalToken + else parameterNode.getChildOfType(PARAMETER_NAME_NODE)?.originalToken require(parameterName != null) { "Method name was not found" } val parameterType = parameterNode.getChildOfType(PARAMETER_TYPE_NODE)?.getTokensFromSubtree() @@ -77,7 +77,8 @@ class AntlrPythonFunctionInfo(override val root: AntlrNode, override val filePat } val name = when (type) { EnclosingElementType.Class -> enclosingNode.getChildOfType(CLASS_NAME_NODE) - EnclosingElementType.Method, EnclosingElementType.Function -> enclosingNode.getChildOfType(FUNCTION_NAME_NODE) + EnclosingElementType.Method, EnclosingElementType.Function -> + enclosingNode.getChildOfType(FUNCTION_NAME_NODE) else -> throw IllegalStateException("Enclosing node can only be function or class") }?.originalToken return EnclosingElement( @@ -97,4 +98,4 @@ class AntlrPythonFunctionInfo(override val root: AntlrNode, override val filePat val lastLabel = decompressTypeLabel(enclosingNode.typeLabel).last() return lastLabel == CLASS_DECLARATION_NODE } -} \ No newline at end of file +} diff --git a/src/main/kotlin/astminer/parse/antlr/python/PythonParser.kt b/src/main/kotlin/astminer/parse/antlr/python/PythonParser.kt index 69554417..7ce15938 100644 --- a/src/main/kotlin/astminer/parse/antlr/python/PythonParser.kt +++ b/src/main/kotlin/astminer/parse/antlr/python/PythonParser.kt @@ -22,7 +22,7 @@ class PythonParser : Parser { val context = parser.file_input() convertAntlrTree(context, Python3Parser.ruleNames, Python3Parser.VOCABULARY) } catch (e: Exception) { - throw ParsingException("ANTLR", "Python", e.message) + throw ParsingException("ANTLR", "Python", e) } } -} \ No newline at end of file +} diff --git a/src/main/kotlin/astminer/parse/factory.kt b/src/main/kotlin/astminer/parse/factory.kt index 3374a500..0d8de704 100644 --- a/src/main/kotlin/astminer/parse/factory.kt +++ b/src/main/kotlin/astminer/parse/factory.kt @@ -7,6 +7,7 @@ import astminer.parse.antlr.AntlrJavaHandlerFactory import astminer.parse.antlr.AntlrJavascriptHandlerFactory import astminer.parse.antlr.AntlrPHPHandlerFactory import astminer.parse.antlr.AntlrPythonHandlerFactory +import astminer.parse.fuzzy.cpp.FuzzyHandler import astminer.parse.gumtree.GumtreeJavaHandlerFactory import astminer.parse.gumtree.GumtreePythonHandlerFactory @@ -38,7 +39,7 @@ private fun getAntlrHandlerFactory(extension: FileExtension): HandlerFactory { private fun getFuzzyHandlerFactory(extension: FileExtension): HandlerFactory { return when (extension) { - FileExtension.C, FileExtension.Cpp -> FuzzyCppHandler + FileExtension.C, FileExtension.Cpp -> FuzzyHandler else -> throw UnsupportedOperationException() } } diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt index 93e898cf..6ef4c94a 100644 --- a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt @@ -35,7 +35,7 @@ class FuzzyCppFunctionInfo(override val root: FuzzyNode, override val filePath: private fun collectEnclosingClass(): EnclosingElement? { val enclosingClass = findEnclosingClass() ?: return null - val enclosingClassName = findEnclosingClassName(enclosingClass) ?: return null + val enclosingClassName = findEnclosingClassName(enclosingClass) return EnclosingElement( root = enclosingClass, type = EnclosingElementType.Class, @@ -59,4 +59,4 @@ class FuzzyCppFunctionInfo(override val root: FuzzyNode, override val filePath: FunctionInfoParameter(name, type) } } -} \ No newline at end of file +} diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppParser.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppParser.kt index 24b97c46..8c4529d7 100644 --- a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppParser.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppParser.kt @@ -25,40 +25,45 @@ class FuzzyCppParser : Parser { private val supportedExtensions = listOf("c", "cpp") data class ExpandableNodeKey( - val key: String, - val supportedNodeLabels: List, - val order: Int + val key: String, + val supportedNodeLabels: List, + val order: Int ) private val expandableNodeKeys = listOf( - ExpandableNodeKey("NAME", listOf( - NodeTypes.TYPE, NodeTypes.TYPE_DECL, NodeTypes.TYPE_PARAMETER, NodeTypes.MEMBER, NodeTypes.TYPE_ARGUMENT, - NodeTypes.METHOD, NodeTypes.METHOD_PARAMETER_IN, NodeTypes.LOCAL, NodeTypes.MODIFIER, - NodeTypes.IDENTIFIER, NodeTypes.CALL, - NodeTypes.UNKNOWN - ), 0), - ExpandableNodeKey("TYPE_FULL_NAME", listOf( - NodeTypes.TYPE, - NodeTypes.METHOD_RETURN, NodeTypes.METHOD_PARAMETER_IN, NodeTypes.LOCAL, - NodeTypes.IDENTIFIER, - NodeTypes.UNKNOWN - ), 0), - ExpandableNodeKey("ALIAS_TYPE_FULL_NAME", listOf( - NodeTypes.TYPE_DECL, - NodeTypes.UNKNOWN - ), 0) + ExpandableNodeKey( + "NAME", listOf( + NodeTypes.TYPE, NodeTypes.TYPE_DECL, NodeTypes.TYPE_PARAMETER, NodeTypes.MEMBER, + NodeTypes.TYPE_ARGUMENT, NodeTypes.METHOD, NodeTypes.METHOD_PARAMETER_IN, NodeTypes.LOCAL, + NodeTypes.MODIFIER, NodeTypes.IDENTIFIER, NodeTypes.CALL, NodeTypes.UNKNOWN + ), 0 + ), + ExpandableNodeKey( + "TYPE_FULL_NAME", listOf( + NodeTypes.TYPE, + NodeTypes.METHOD_RETURN, NodeTypes.METHOD_PARAMETER_IN, NodeTypes.LOCAL, + NodeTypes.IDENTIFIER, + NodeTypes.UNKNOWN + ), 0 + ), + ExpandableNodeKey( + "ALIAS_TYPE_FULL_NAME", listOf( + NodeTypes.TYPE_DECL, + NodeTypes.UNKNOWN + ), 0 + ) ) data class ReplaceableNodeKey(val key: String, val condition: (Node) -> Boolean) private val replaceableNodeKeys = listOf( - ReplaceableNodeKey("NAME") { v -> - v.propertyKeys().contains("NAME") && - v.property("NAME").toString().startsWith("") - }, - ReplaceableNodeKey("PARSER_TYPE_NAME") { v -> - v.propertyKeys().contains("PARSER_TYPE_NAME") - } + ReplaceableNodeKey("NAME") { v -> + v.propertyKeys().contains("NAME") && + v.property("NAME").toString().startsWith("") + }, + ReplaceableNodeKey("PARSER_TYPE_NAME") { v -> + v.propertyKeys().contains("PARSER_TYPE_NAME") + } ) } @@ -148,10 +153,10 @@ class FuzzyCppParser : Parser { */ fun preprocessProject(projectRoot: File, outputDir: File) { val files = projectRoot.walkTopDown() - .filter { file -> supportedExtensions.contains(file.extension) } + .filter { file -> supportedExtensions.contains(file.extension) } files.forEach { file -> val relativeFilePath = file.relativeTo(projectRoot) - val outputPath = if (relativeFilePath.parent != null){ + val outputPath = if (relativeFilePath.parent != null) { outputDir.resolve(relativeFilePath.parent) } else { outputDir @@ -192,7 +197,7 @@ class FuzzyCppParser : Parser { return@forEach } } - node.metadata[k]= property + node.metadata[k] = property } return node } diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyFunctionSplitter.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyFunctionSplitter.kt index 39ca7ab7..3fb83611 100644 --- a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyFunctionSplitter.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyFunctionSplitter.kt @@ -9,4 +9,4 @@ class FuzzyFunctionSplitter : TreeFunctionSplitter { val methodRoots = root.preOrder().filter { it.typeLabel == methodNode } return methodRoots.map { FuzzyCppFunctionInfo(it, filePath) } } -} \ No newline at end of file +} diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyHandler.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyHandler.kt index 18ae066a..95fd7c7a 100644 --- a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyHandler.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyHandler.kt @@ -1,19 +1,15 @@ -package astminer.parse +package astminer.parse.fuzzy.cpp import astminer.common.model.HandlerFactory import astminer.common.model.LanguageHandler import astminer.common.model.ParseResult -import astminer.parse.fuzzy.cpp.FuzzyCppParser -import astminer.parse.fuzzy.cpp.FuzzyFunctionSplitter -import astminer.parse.fuzzy.cpp.FuzzyNode import java.io.File -object FuzzyCppHandler : HandlerFactory { +object FuzzyHandler : HandlerFactory { override fun createHandler(file: File): LanguageHandler = CppFuzzyHandler(file) - class CppFuzzyHandler(file: File) : LanguageHandler() { override val splitter = FuzzyFunctionSplitter() override val parseResult: ParseResult = FuzzyCppParser().parseFile(file) } -} \ No newline at end of file +} diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/utils.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/utils.kt index fc117afa..c2d6a7f8 100644 --- a/src/main/kotlin/astminer/parse/fuzzy/cpp/utils.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/utils.kt @@ -23,4 +23,4 @@ fun preprocessCppCode(file: File, outputDir: File, preprocessCommand: String) = cat __tmp_include.cpp >${outputDir.absolutePath}/${file.name} cat __tmp_preprocessed.cpp >>${outputDir.absolutePath}/${file.name} rm __tmp_*.cpp -""".trimIndent() \ No newline at end of file +""".trimIndent() diff --git a/src/main/kotlin/astminer/parse/gumtree/GumTreeNode.kt b/src/main/kotlin/astminer/parse/gumtree/GumTreeNode.kt index dd6b29db..37bea3fa 100644 --- a/src/main/kotlin/astminer/parse/gumtree/GumTreeNode.kt +++ b/src/main/kotlin/astminer/parse/gumtree/GumTreeNode.kt @@ -27,4 +27,4 @@ class GumTreeNode(val wrappedNode: ITree, val context: TreeContext,override var } override fun preOrder(): List = super.preOrder().map { it as GumTreeNode } -} \ No newline at end of file +} diff --git a/src/main/kotlin/astminer/parse/gumtree/GumtreeHandler.kt b/src/main/kotlin/astminer/parse/gumtree/GumtreeHandler.kt index e16349d9..13a0548e 100644 --- a/src/main/kotlin/astminer/parse/gumtree/GumtreeHandler.kt +++ b/src/main/kotlin/astminer/parse/gumtree/GumtreeHandler.kt @@ -25,4 +25,4 @@ object GumtreePythonHandlerFactory : HandlerFactory { override val splitter = GumTreePythonFunctionSplitter() override val parseResult: ParseResult = GumTreePythonParser().parseFile(file) } -} \ No newline at end of file +} diff --git a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitter.kt b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitter.kt index fd9287e1..eeaf23ef 100644 --- a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitter.kt +++ b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitter.kt @@ -10,4 +10,4 @@ class GumTreeJavaFunctionSplitter : TreeFunctionSplitter { val methodRoots = root.preOrder().filter { it.typeLabel == methodDeclaration } return methodRoots.map { GumTreeJavaFunctionInfo(it, filePath) } } -} \ No newline at end of file +} diff --git a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaParser.kt b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaParser.kt index 26007811..6338d4be 100644 --- a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaParser.kt +++ b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaParser.kt @@ -21,4 +21,4 @@ class GumTreeJavaParser : Parser { fun wrapGumTreeNode(treeContext: TreeContext): GumTreeNode { return GumTreeNode(treeContext.root, treeContext, null) -} \ No newline at end of file +} diff --git a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt index 90e68a4c..f94fc7d0 100644 --- a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt @@ -7,7 +7,9 @@ import astminer.common.model.FunctionInfoParameter import astminer.parse.findEnclosingElementBy import astminer.parse.gumtree.GumTreeNode -class GumTreePythonFunctionInfo(override val root: GumTreeNode, override val filePath: String) : FunctionInfo { +class GumTreePythonFunctionInfo( + override val root: GumTreeNode, override val filePath: String +) : FunctionInfo { companion object { private object TypeLabels { const val classDefinition = "ClassDef" @@ -75,11 +77,11 @@ class GumTreePythonFunctionInfo(override val root: GumTreeNode, override val fil else -> emptyList() } } - return params.map { node-> + return params.map { node -> FunctionInfoParameter( name = node.originalToken, type = getElementType(node)?.originalToken ) } } -} \ No newline at end of file +} diff --git a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonParser.kt b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonParser.kt index e3ab61db..973e2328 100644 --- a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonParser.kt +++ b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonParser.kt @@ -18,7 +18,7 @@ class GumTreePythonParser : Parser { val context = PythonTreeGenerator().generate(InputStreamReader(content)) wrapGumTreeNode(context) } catch (e: Exception) { - throw ParsingException("GumTree", "Python", e.message) + throw ParsingException("GumTree", "Python", e) } } diff --git a/src/main/kotlin/astminer/paths/PathMiner.kt b/src/main/kotlin/astminer/paths/PathMiner.kt index 8bfab93d..905d8761 100644 --- a/src/main/kotlin/astminer/paths/PathMiner.kt +++ b/src/main/kotlin/astminer/paths/PathMiner.kt @@ -11,4 +11,4 @@ class PathMiner(val settings: PathRetrievalSettings) { fun retrievePaths(tree: Node): Collection { return pathWorker.retrievePaths(tree, settings.maxLength, settings.maxWidth) } -} \ No newline at end of file +} diff --git a/src/main/kotlin/astminer/paths/PathWorker.kt b/src/main/kotlin/astminer/paths/PathWorker.kt index 451d7b05..a3399cc0 100644 --- a/src/main/kotlin/astminer/paths/PathWorker.kt +++ b/src/main/kotlin/astminer/paths/PathWorker.kt @@ -74,4 +74,4 @@ class PathWorker { } return paths } -} \ No newline at end of file +} diff --git a/src/main/kotlin/astminer/pipeline/Pipeline.kt b/src/main/kotlin/astminer/pipeline/Pipeline.kt index 9c187214..74d67cd1 100644 --- a/src/main/kotlin/astminer/pipeline/Pipeline.kt +++ b/src/main/kotlin/astminer/pipeline/Pipeline.kt @@ -1,7 +1,6 @@ package astminer.pipeline import astminer.common.getProjectFilesWithExtension -import astminer.config.* import astminer.parse.getHandlerFactory import astminer.pipeline.branch.FilePipelineBranch import astminer.pipeline.branch.FunctionPipelineBranch @@ -9,6 +8,8 @@ import astminer.pipeline.branch.IllegalLabelExtractorException import astminer.common.model.FileLabelExtractor import astminer.common.model.FunctionLabelExtractor import astminer.common.model.Storage +import astminer.config.FileExtension +import astminer.config.PipelineConfig import java.io.File /** @@ -40,7 +41,7 @@ class Pipeline(private val config: PipelineConfig) { } /** - * Runs the pipeline that is defined in the [config] + * Runs the pipeline that is defined in the [config]. */ fun run() { for (extension in config.parser.extensions) { diff --git a/src/main/kotlin/astminer/pipeline/branch/Exceptions.kt b/src/main/kotlin/astminer/pipeline/branch/Exceptions.kt index f2c9c6a3..e67d179f 100644 --- a/src/main/kotlin/astminer/pipeline/branch/Exceptions.kt +++ b/src/main/kotlin/astminer/pipeline/branch/Exceptions.kt @@ -7,7 +7,7 @@ class IllegalLabelExtractorException(problemName: String?) : IllegalStateException("Unknown label extractor `${problemName ?: "anonymous"}`") /** - * This exception is thrown when the given filter is not implemented for the given granularity + * This exception is thrown when the given filter is not implemented for the given granularity. */ class IllegalFilterException(granularity: String, filterName: String?): IllegalStateException("Unknown filter `${filterName ?: "anonymous"}` for $granularity granularity") diff --git a/src/main/kotlin/astminer/pipeline/branch/PipelineBranch.kt b/src/main/kotlin/astminer/pipeline/branch/PipelineBranch.kt index f417247d..fbf12a24 100644 --- a/src/main/kotlin/astminer/pipeline/branch/PipelineBranch.kt +++ b/src/main/kotlin/astminer/pipeline/branch/PipelineBranch.kt @@ -5,7 +5,7 @@ import astminer.common.model.Node import astminer.common.model.LabeledResult /** - * PipelineBranch is a part of the pipeline that can be completely different depending on the granularity (pipeline type) + * PipelineBranch is a part of the pipeline that encapsulate inside itself granularity based logic. * It accepts parsed files (LanguageHandler) and returns labeled results. */ interface PipelineBranch { diff --git a/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt b/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt index aa814206..2832b5ed 100644 --- a/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt +++ b/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt @@ -2,11 +2,11 @@ package astminer.storage.path import astminer.common.model.LabeledResult import astminer.common.model.* -import astminer.common.storage.* import astminer.paths.PathMiner import astminer.paths.PathRetrievalSettings import astminer.paths.toPathContext import astminer.common.model.Storage +import astminer.common.storage.* import java.io.File import java.io.PrintWriter @@ -17,7 +17,7 @@ import java.io.PrintWriter * @property maxPathWidth The maximum width of a single path (based on the formal math definition of path width) * @property maxTokens ?? * @property maxPaths ?? - * @property maxPathContextsPerEntity The maximum number of path contexts that should be extracted from LabeledParseResult. + * @property maxPathContextsPerEntity The maximum number of path contexts that should be extracted from tree. * In other words, the maximum number of path contexts to save from each file/method (depending on granularity) */ data class PathBasedStorageConfig( diff --git a/src/test/kotlin/astminer/common/TestUtils.kt b/src/test/kotlin/astminer/common/DummyNode.kt similarity index 99% rename from src/test/kotlin/astminer/common/TestUtils.kt rename to src/test/kotlin/astminer/common/DummyNode.kt index 1a2bd93a..ff4b683b 100644 --- a/src/test/kotlin/astminer/common/TestUtils.kt +++ b/src/test/kotlin/astminer/common/DummyNode.kt @@ -4,7 +4,6 @@ import astminer.common.model.LabeledResult import astminer.common.model.Node import astminer.common.model.ParseResult - class DummyNode( override val typeLabel: String, override val children: MutableList = mutableListOf() diff --git a/src/test/kotlin/astminer/common/FileParsingUtilTest.kt b/src/test/kotlin/astminer/common/FileParsingUtilTest.kt index 0ea4a52d..cd7948ac 100644 --- a/src/test/kotlin/astminer/common/FileParsingUtilTest.kt +++ b/src/test/kotlin/astminer/common/FileParsingUtilTest.kt @@ -22,7 +22,10 @@ class FileParsingUtilTest { file.writeText(text) file = changeExtensionTo(file, newExtension) - Assert.assertTrue("File extension should be changed but its content should not", file.extension == newExtension && file.readText() == text) + Assert.assertTrue( + "File extension should be changed but its content should not", + file.extension == newExtension && file.readText() == text + ) file.delete() } @@ -35,7 +38,11 @@ class FileParsingUtilTest { file.writeText(text) addClassWrapper(file, "Foo") - Assert.assertEquals("File wrapper should be added with braces and newlines", file.readText(), "class Foo {\n$text\n}") + Assert.assertEquals( + "File wrapper should be added with braces and newlines", + file.readText(), + "class Foo {\n$text\n}" + ) file.delete() } @@ -51,5 +58,4 @@ class FileParsingUtilTest { val file = File("src/test/resources/common/NonParsableFile.java") Assert.assertTrue("This file has syntax errors", hasSyntaxErrors(file)) } - -} \ No newline at end of file +} diff --git a/src/test/kotlin/astminer/common/TreeUtilTest.kt b/src/test/kotlin/astminer/common/TreeUtilTest.kt index 631ac3ad..6e3ec5d2 100644 --- a/src/test/kotlin/astminer/common/TreeUtilTest.kt +++ b/src/test/kotlin/astminer/common/TreeUtilTest.kt @@ -4,13 +4,7 @@ import org.junit.Assert import org.junit.Test class TreeUtilTest { - @Test - fun testPostOrder() { - val root = createDummyTree() - val dataList = root.postOrderIterator().asSequence().map { it.typeLabel } - - Assert.assertArrayEquals(arrayOf("4", "5", "6", "2", "7", "8", "3", "1"), dataList.toList().toTypedArray()) - } + private val defaultToken = "EMPTY" @Test fun testPreOrder() { @@ -20,7 +14,13 @@ class TreeUtilTest { Assert.assertArrayEquals(arrayOf("1", "2", "4", "5", "6", "3", "7", "8"), dataList.toList().toTypedArray()) } - private val defaultToken = "EMPTY" + @Test + fun testPostOrder() { + val root = createDummyTree() + val dataList = root.postOrderIterator().asSequence().map { it.typeLabel } + + Assert.assertArrayEquals(arrayOf("4", "5", "6", "2", "7", "8", "3", "1"), dataList.toList().toTypedArray()) + } @Test fun testNormalizeTokenCleaning() { @@ -65,4 +65,4 @@ class TreeUtilTest { splitToSubtokens(token) ) } -} \ No newline at end of file +} diff --git a/src/test/kotlin/astminer/common/storage/RankedIncrementalIdStorageTest.kt b/src/test/kotlin/astminer/common/storage/RankedIncrementalIdStorageTest.kt index 7ca1a5cb..38b796b0 100644 --- a/src/test/kotlin/astminer/common/storage/RankedIncrementalIdStorageTest.kt +++ b/src/test/kotlin/astminer/common/storage/RankedIncrementalIdStorageTest.kt @@ -44,7 +44,7 @@ class RankedIncrementalIdStorageTest { val correctRanks = listOf(2, 3, 4, 1, 5) for (i in items.indices) { - for (rep in 0 until counts[i]) { + repeat(counts[i]) { storage.record(items[i]) } } @@ -55,4 +55,4 @@ class RankedIncrementalIdStorageTest { assertEquals(correctRanks[i].toLong(), storage.getKeyRank(items[i])) } } -} \ No newline at end of file +} diff --git a/src/test/kotlin/astminer/featureextraction/TreeFeatureTestUtil.kt b/src/test/kotlin/astminer/featureextraction/PrettyNode.kt similarity index 99% rename from src/test/kotlin/astminer/featureextraction/TreeFeatureTestUtil.kt rename to src/test/kotlin/astminer/featureextraction/PrettyNode.kt index 8b1cc345..fd36faf0 100644 --- a/src/test/kotlin/astminer/featureextraction/TreeFeatureTestUtil.kt +++ b/src/test/kotlin/astminer/featureextraction/PrettyNode.kt @@ -42,7 +42,6 @@ fun restoreFromPrettyPrint(prettyPrintedTree: String, indentSymbol: String = "-- return tree.first() } - fun restorePrintedNode(printedNode: String, indentSymbol: String = "--") : Pair { val indents = Regex("^($indentSymbol)*").find(printedNode)?.value ?: "" val nodeString = printedNode.substringAfter(indents) diff --git a/src/test/kotlin/astminer/featureextraction/TreeFeatureTest.kt b/src/test/kotlin/astminer/featureextraction/TreeFeatureTest.kt index 2183c686..3a5a1a2a 100644 --- a/src/test/kotlin/astminer/featureextraction/TreeFeatureTest.kt +++ b/src/test/kotlin/astminer/featureextraction/TreeFeatureTest.kt @@ -64,5 +64,4 @@ class TreeFeatureTest { val expected = listOf("a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l").sorted() Assert.assertEquals(expected, Tokens.compute(tree).sorted()) } - -} \ No newline at end of file +} diff --git a/src/test/kotlin/astminer/featureextraction/TreeFeatureTestUtilTest.kt b/src/test/kotlin/astminer/featureextraction/TreeFeatureTestUtilTest.kt index a3f1685c..4e44b56b 100644 --- a/src/test/kotlin/astminer/featureextraction/TreeFeatureTestUtilTest.kt +++ b/src/test/kotlin/astminer/featureextraction/TreeFeatureTestUtilTest.kt @@ -30,4 +30,4 @@ class TreeFeatureTestUtilTest { Assert.assertEquals(prettyTree, prettyRestoredTree) } -} \ No newline at end of file +} diff --git a/src/test/kotlin/astminer/filters/FunctionFiltersTest.kt b/src/test/kotlin/astminer/filters/FunctionFiltersTest.kt index 727bed95..f4314875 100644 --- a/src/test/kotlin/astminer/filters/FunctionFiltersTest.kt +++ b/src/test/kotlin/astminer/filters/FunctionFiltersTest.kt @@ -128,4 +128,4 @@ class FunctionFiltersTest { } assertTrue { TreeSizeFilter(10, 100).validate(functionInfo) } } -} \ No newline at end of file +} diff --git a/src/test/kotlin/astminer/labelextractor/FileNameExtractorTest.kt b/src/test/kotlin/astminer/labelextractor/FileNameExtractorTest.kt index ec582b1f..db392d48 100644 --- a/src/test/kotlin/astminer/labelextractor/FileNameExtractorTest.kt +++ b/src/test/kotlin/astminer/labelextractor/FileNameExtractorTest.kt @@ -7,12 +7,6 @@ import org.junit.Test import kotlin.test.assertEquals class FileNameExtractorTest { - companion object { - private const val FILE_NAME = "file.txt" - private const val PATH = "random/folder/$FILE_NAME" - private var dummyRoot = AntlrNode("", null, null) - } - @Test fun `test file path extractor returns the same root and file path and labels with file path`() { val nonEmptyParseResult = ParseResult(dummyRoot, PATH) @@ -20,4 +14,10 @@ class FileNameExtractorTest { assertEquals(LabeledResult(dummyRoot, FILE_NAME, PATH), labeledParseResult) } -} \ No newline at end of file + + companion object { + private const val FILE_NAME = "file.txt" + private const val PATH = "random/folder/$FILE_NAME" + private var dummyRoot = AntlrNode("", null, null) + } +} diff --git a/src/test/kotlin/astminer/labelextractor/FolderNameExtractorTest.kt b/src/test/kotlin/astminer/labelextractor/FolderNameExtractorTest.kt index e6f18d0f..b07a236d 100644 --- a/src/test/kotlin/astminer/labelextractor/FolderNameExtractorTest.kt +++ b/src/test/kotlin/astminer/labelextractor/FolderNameExtractorTest.kt @@ -8,11 +8,6 @@ import kotlin.test.assertEquals import kotlin.test.assertNull class FolderNameExtractorTest { - companion object { - private const val PATH = "random/folder/file.txt" - private const val FOLDER = "folder" - private var dummyRoot = AntlrNode("", null, null) - } @Test fun `test folder extractor returns null when folder is empty or not found`() { @@ -29,4 +24,10 @@ class FolderNameExtractorTest { assertEquals(LabeledResult(dummyRoot, FOLDER, PATH), labeledParseResult) } + + companion object { + private const val PATH = "random/folder/file.txt" + private const val FOLDER = "folder" + private var dummyRoot = AntlrNode("", null, null) + } } diff --git a/src/test/kotlin/astminer/labelextractor/FunctionNameLabelExtractorTest.kt b/src/test/kotlin/astminer/labelextractor/FunctionNameLabelExtractorTest.kt index 56c720d8..e15966f8 100644 --- a/src/test/kotlin/astminer/labelextractor/FunctionNameLabelExtractorTest.kt +++ b/src/test/kotlin/astminer/labelextractor/FunctionNameLabelExtractorTest.kt @@ -9,10 +9,6 @@ import org.junit.Test import kotlin.test.assertEquals class FunctionNameLabelExtractorTest { - companion object { - private const val PATH = "random/folder/file.txt" - private const val FUNCTION_NAME = "method" - } lateinit var functionRoot: Node @@ -54,4 +50,9 @@ class FunctionNameLabelExtractorTest { val recursiveCallNode = functionInfo.root.children.firstOrNull()?.children?.firstOrNull() assertEquals("SELF", recursiveCallNode?.token) } -} \ No newline at end of file + + companion object { + private const val PATH = "random/folder/file.txt" + private const val FUNCTION_NAME = "method" + } +} diff --git a/src/test/kotlin/astminer/parse/antlr/AntrlUtilTest.kt b/src/test/kotlin/astminer/parse/antlr/AntrlUtilTest.kt index 69dfe9a1..e573af41 100644 --- a/src/test/kotlin/astminer/parse/antlr/AntrlUtilTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/AntrlUtilTest.kt @@ -19,4 +19,4 @@ class AntrlUtilTest { } Assert.assertEquals("There should be no children with different parent", 0, adoptedNodesSize) } -} \ No newline at end of file +} diff --git a/src/test/kotlin/astminer/parse/antlr/java/ANTLRJavaParserTest.kt b/src/test/kotlin/astminer/parse/antlr/java/ANTLRJavaParserTest.kt index 756fec53..8cd21da2 100644 --- a/src/test/kotlin/astminer/parse/antlr/java/ANTLRJavaParserTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/java/ANTLRJavaParserTest.kt @@ -62,4 +62,4 @@ class ANTLRJavaParserTest { Assert.assertEquals("There is only 5 file with .java extension in 'testData/arrayCalls' folder",5, trees.size) trees.forEach { Assert.assertNotNull("Parse tree for a valid file should not be null", it) } } -} \ No newline at end of file +} diff --git a/src/test/kotlin/astminer/parse/antlr/java/JavaFunctionSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/java/JavaFunctionSplitterTest.kt index 5d0b1d2a..44ed3f91 100644 --- a/src/test/kotlin/astminer/parse/antlr/java/JavaFunctionSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/java/JavaFunctionSplitterTest.kt @@ -9,12 +9,6 @@ import kotlin.test.BeforeTest import kotlin.test.assertNotNull class JavaFunctionSplitterTest { - companion object { - const val FILE_PATH = "src/test/resources/methodSplitting/testMethodSplitting.java" - const val N_FUNCTIONS = 10 - val functionSplitter = JavaFunctionSplitter() - val parser = JavaParser() - } var functionInfos: Collection> = listOf() @@ -111,4 +105,11 @@ class JavaFunctionSplitterTest { assertEquals(weirdParameter.name, "arr[]") assertEquals(weirdParameter.type, "int") } -} \ No newline at end of file + + companion object { + const val FILE_PATH = "src/test/resources/methodSplitting/testMethodSplitting.java" + const val N_FUNCTIONS = 10 + val functionSplitter = JavaFunctionSplitter() + val parser = JavaParser() + } +} diff --git a/src/test/kotlin/astminer/parse/antlr/javascript/ANTLRJavaScriptParserTest.kt b/src/test/kotlin/astminer/parse/antlr/javascript/ANTLRJavaScriptParserTest.kt index be76de3a..335da12e 100644 --- a/src/test/kotlin/astminer/parse/antlr/javascript/ANTLRJavaScriptParserTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/javascript/ANTLRJavaScriptParserTest.kt @@ -14,5 +14,4 @@ class ANTLRJavaScriptParserTest { val node = parser.parseInputStream(FileInputStream(file)) assertNotNull(node, "Parse tree for a valid file should not be null") } - -} \ No newline at end of file +} diff --git a/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitterTest.kt index 83b44db2..66bfdc05 100644 --- a/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitterTest.kt @@ -9,14 +9,7 @@ import kotlin.test.BeforeTest import kotlin.test.assertEquals import kotlin.test.assertNotNull - class JavaScriptFunctionSplitterTest { - companion object { - const val N_METHODS = 47 - const val testFilePath = "src/test/resources/methodSplitting/testMethodSplitting.js" - val functionSplitter = JavaScriptFunctionSplitter() - val parser = JavaScriptParser() - } var functionInfos: Collection> = listOf() @@ -60,4 +53,11 @@ class JavaScriptFunctionSplitterTest { assertEquals(expectedJsonInfos, actualJsonInfos) } -} \ No newline at end of file + + companion object { + const val N_METHODS = 47 + const val testFilePath = "src/test/resources/methodSplitting/testMethodSplitting.js" + val functionSplitter = JavaScriptFunctionSplitter() + val parser = JavaScriptParser() + } +} diff --git a/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionSplitterTest.kt index 40ba4c7e..9a1e1c3e 100644 --- a/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionSplitterTest.kt @@ -10,12 +10,6 @@ import kotlin.test.assertEquals import kotlin.test.assertNotNull internal class ANTLRPHPFunctionSplitterTest { - companion object { - const val N_METHODS = 18 - const val testFilePath = "src/test/resources/methodSplitting/testMethodSplitting.php" - val functionSplitter = PHPFunctionSplitter() - val parser = PHPParser() - } private var functionInfos: Collection> = listOf() @@ -43,17 +37,17 @@ internal class ANTLRPHPFunctionSplitterTest { } } - fun FunctionInfo.getJsonInfo(): String { - return listOf( - "info : {", - "name: ${name}, ", - "args: ${parameters.joinToString(", ") { listOfNotNull(it.type, it.name).joinToString(" ") }}, ", - "enclosing element: ${enclosingElement?.type?.getEnclosingElementType()}, ", - "enclosing element name: ${enclosingElement?.name}, ", - "return type: $returnType", - "}" - ).joinToString("") - } + fun FunctionInfo.getJsonInfo(): String = listOf( + "info : {", + "name: ${name}, ", + "args: ${parameters.joinToString(", ") { + listOfNotNull(it.type, it.name).joinToString(" ") + }}, ", + "enclosing element: ${enclosingElement?.type?.getEnclosingElementType()}, ", + "enclosing element name: ${enclosingElement?.name}, ", + "return type: $returnType", + "}" + ).joinToString("") val actualJsonInfos = functionInfos.map { it.getJsonInfo() + '\n' }.sorted() @@ -62,4 +56,11 @@ internal class ANTLRPHPFunctionSplitterTest { assertEquals(expectedJsonInfos, actualJsonInfos) } -} \ No newline at end of file + + companion object { + const val N_METHODS = 18 + const val testFilePath = "src/test/resources/methodSplitting/testMethodSplitting.php" + val functionSplitter = PHPFunctionSplitter() + val parser = PHPParser() + } +} diff --git a/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPParserText.kt b/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPParserText.kt index fe42535b..da7b4caa 100644 --- a/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPParserText.kt +++ b/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPParserText.kt @@ -14,4 +14,4 @@ internal class ANTLRPHPParserText { val node = parser.parseInputStream(FileInputStream(file)) assertNotNull(node) } -} \ No newline at end of file +} diff --git a/src/test/kotlin/astminer/parse/antlr/python/ANTLRPythonParserTest.kt b/src/test/kotlin/astminer/parse/antlr/python/ANTLRPythonParserTest.kt index d92c46f6..0b6604f4 100644 --- a/src/test/kotlin/astminer/parse/antlr/python/ANTLRPythonParserTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/python/ANTLRPythonParserTest.kt @@ -1,7 +1,6 @@ package astminer.parse.antlr.python import astminer.common.getProjectFilesWithExtension -import astminer.common.model.Node import astminer.parseFiles import org.junit.Assert import org.junit.Test @@ -26,4 +25,4 @@ class ANTLRPythonParserTest { Assert.assertEquals("There is only 1 file with .py extension in 'testData/examples' folder",1, trees.size) trees.forEach { Assert.assertNotNull("Parse tree for a valid file should not be null", it) } } -} \ No newline at end of file +} diff --git a/src/test/kotlin/astminer/parse/antlr/python/PythonFunctionSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/python/PythonFunctionSplitterTest.kt index 008af33f..8a935689 100644 --- a/src/test/kotlin/astminer/parse/antlr/python/PythonFunctionSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/python/PythonFunctionSplitterTest.kt @@ -11,12 +11,6 @@ import kotlin.test.assertNotNull import kotlin.test.assertNull class PythonFunctionSplitterTest { - companion object { - const val FILE_PATH = "src/test/resources/methodSplitting/testMethodSplitting.py" - const val N_FUNCTIONS = 17 - val functionSplitter = PythonFunctionSplitter() - val parser = PythonParser() - } var functionInfos: Collection> = listOf() @@ -156,4 +150,11 @@ class PythonFunctionSplitterTest { assertEquals("second_function_inside_method", enclosingElement.name) assertEquals(EnclosingElementType.Function, enclosingElement.type) } -} \ No newline at end of file + + companion object { + const val FILE_PATH = "src/test/resources/methodSplitting/testMethodSplitting.py" + const val N_FUNCTIONS = 17 + val functionSplitter = PythonFunctionSplitter() + val parser = PythonParser() + } +} diff --git a/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt b/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt index 06a7cc63..16b92b12 100644 --- a/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt +++ b/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt @@ -95,4 +95,4 @@ class FuzzyCppParserTest { ) preprocessedRoot.deleteRecursively() } -} \ No newline at end of file +} diff --git a/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt index 86d965a4..d2ff0810 100644 --- a/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt @@ -15,19 +15,14 @@ import kotlin.test.assertNull class FuzzyMethodSplitterTest { - companion object { - const val FILE_PATH = "src/test/resources/methodSplitting/testMethodSplitting.cpp" - const val N_FUNCTIONS = 10 - val methodSplitter = FuzzyFunctionSplitter() - val parser = FuzzyCppParser() - } - var methodInfos: Collection> = listOf() @Before fun parseTree() { Assume.assumeTrue(checkExecutable("g++")) - val testTree = parser.parseInputStream(File("src/test/resources/methodSplitting/testMethodSplitting.cpp").inputStream()) + val testTree = parser.parseInputStream( + File("src/test/resources/methodSplitting/testMethodSplitting.cpp").inputStream() + ) assertNotNull(testTree) methodInfos = methodSplitter.splitIntoFunctions(testTree, FILE_PATH) } @@ -114,4 +109,11 @@ class FuzzyMethodSplitterTest { assertEquals("int", parameter.type) } } -} \ No newline at end of file + + companion object { + const val FILE_PATH = "src/test/resources/methodSplitting/testMethodSplitting.cpp" + const val N_FUNCTIONS = 10 + val methodSplitter = FuzzyFunctionSplitter() + val parser = FuzzyCppParser() + } +} diff --git a/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitterTest.kt b/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitterTest.kt index 48b2d893..1cf8cb4c 100644 --- a/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitterTest.kt @@ -25,7 +25,6 @@ class GumTreeJavaFunctionSplitterTest { assertEquals(listOf("args", "param"), parameters.map { it.name }) assertEquals(listOf("String[]", "int"), parameters.map { it.type }) } - } @Test @@ -77,5 +76,5 @@ class GumTreeJavaFunctionSplitterTest { } } - //TODO: add more tests -} \ No newline at end of file +// TODO: add more tests +} diff --git a/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaParserTest.kt b/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaParserTest.kt index b4225277..9ecbd78b 100644 --- a/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaParserTest.kt +++ b/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaParserTest.kt @@ -25,4 +25,4 @@ class GumTreeJavaParserTest { Assert.assertEquals("There is only 2 file with .java extension in 'testData/examples' folder",2, trees.size) trees.forEach { Assert.assertNotNull("Parse tree for a valid file should not be null", it) } } -} \ No newline at end of file +} diff --git a/src/test/kotlin/astminer/paths/PathWorkerTestBase.kt b/src/test/kotlin/astminer/paths/PathWorkerTestBase.kt index 484dcccd..3a3e9cf0 100644 --- a/src/test/kotlin/astminer/paths/PathWorkerTestBase.kt +++ b/src/test/kotlin/astminer/paths/PathWorkerTestBase.kt @@ -19,7 +19,7 @@ abstract class PathWorkerTestBase { val nLeaves = tree.postOrder().count { it.isLeaf() } val allPaths = PathWorker().retrievePaths(tree) - val expectedCount = (nLeaves * (nLeaves - 1)) / 2 + val expectedCount = nLeaves * (nLeaves - 1) / 2 Assert.assertEquals("A tree with $nLeaves leaves contains $expectedCount paths, " + "one per distinct ordered pair of leaves. Worker returned ${allPaths.size}", @@ -70,4 +70,4 @@ abstract class PathWorkerTestBase { } } } -} \ No newline at end of file +} diff --git a/src/test/kotlin/astminer/paths/PathWorkerTestUtil.kt b/src/test/kotlin/astminer/paths/PathWorkerTestUtil.kt index 0c51f32e..b3bad4ac 100644 --- a/src/test/kotlin/astminer/paths/PathWorkerTestUtil.kt +++ b/src/test/kotlin/astminer/paths/PathWorkerTestUtil.kt @@ -5,13 +5,11 @@ import astminer.common.model.Node import astminer.parse.antlr.AntlrNode import org.junit.Assert -fun simpleNode(number: Int, parent: AntlrNode?): AntlrNode { - return AntlrNode("$number", parent, "node_$number") -} +fun simpleNode(number: Int, parent: AntlrNode?): AntlrNode = + AntlrNode("$number", parent, "node_$number") + +fun simpleNodes(numbers: List, parent: AntlrNode?): List = numbers.map { simpleNode(it, parent) } -fun simpleNodes(numbers: List, parent: AntlrNode?): List { - return numbers.map { simpleNode(it, parent) } -} fun getParentStack(node: Node): List = (node.parent?.let { getParentStack(it) } ?: emptyList()) + node @@ -60,4 +58,4 @@ fun assertPathIsValid(path: ASTPath) { "Path should be simple: upward and downward pieces should not intersect or contain top node", path.isSimple() ) -} \ No newline at end of file +} diff --git a/src/test/kotlin/astminer/paths/SampleTreePathWorkerTest.kt b/src/test/kotlin/astminer/paths/SampleTreePathWorkerTest.kt index 29fd1932..4d6a5c20 100644 --- a/src/test/kotlin/astminer/paths/SampleTreePathWorkerTest.kt +++ b/src/test/kotlin/astminer/paths/SampleTreePathWorkerTest.kt @@ -26,4 +26,4 @@ class SampleTreePathWorkerTest : PathWorkerTestBase() { return root } -} \ No newline at end of file +} diff --git a/src/test/kotlin/astminer/storage/ast/CsvAstStorageTest.kt b/src/test/kotlin/astminer/storage/ast/CsvAstStorageTest.kt index b2ec8eca..da8ddc4f 100644 --- a/src/test/kotlin/astminer/storage/ast/CsvAstStorageTest.kt +++ b/src/test/kotlin/astminer/storage/ast/CsvAstStorageTest.kt @@ -53,5 +53,4 @@ class CsvAstStorageTest { val expected = generateCorrectAstStringForBamboo(1, 100) Assert.assertEquals(expected, storage.astString(bamboo)) } - -} \ No newline at end of file +} diff --git a/src/test/kotlin/astminer/storage/ast/DotAstStorageTest.kt b/src/test/kotlin/astminer/storage/ast/DotAstStorageTest.kt index c8a0014c..062ff964 100644 --- a/src/test/kotlin/astminer/storage/ast/DotAstStorageTest.kt +++ b/src/test/kotlin/astminer/storage/ast/DotAstStorageTest.kt @@ -21,7 +21,7 @@ class DotAstStorageTest { private fun getBambooLines(size: Int): List { val lines = mutableListOf() lines.add("digraph entityId {") - for (i in 0..(size - 2)) { + for (i in 0..size - 2) { lines.add("$i -- {${i + 1}};") } lines.add("${size - 1} -- {};") @@ -136,4 +136,4 @@ class DotAstStorageTest { assertEquals("interviews/Leet-Code/binary-search/pow_x_n_.java", normalizedFilepath) } -} \ No newline at end of file +} From 6aab607e6816810738239449b71b85754eb4aa86 Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Wed, 21 Jul 2021 22:22:50 +0300 Subject: [PATCH 246/308] Remove multiple companions --- .../parse/antlr/php/ANTLRPHPFunctionInfo.kt | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt b/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt index eeff72d6..42b13d53 100644 --- a/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt @@ -17,23 +17,6 @@ class ANTLRPHPFunctionInfo(override val root: AntlrNode, override val filePath: override val parameters: List = collectParameters() override val enclosingElement: EnclosingElement? = collectEnclosingElement() - companion object { - const val PARAMETERS_LIST = "formalParameterList" - const val PARAMETER = "formalParameter" - const val TYPE = "typeHint" - const val PARAMETER_NAME = "VarName" - const val CLASS_MEMBER = "classStatement" - const val FUNCTION_NAME = "identifier" - const val CLASS_DECLARATION = "classDeclaration" - const val VAR_DECLARATION = "variableInitializer" - const val ELLIPSIS = "Ellipsis" - const val EXPRESSION = "expression" - const val ASSIGN_OP = "assignmentOperator" - const val LAMBDA_TOKEN = "LambdaFn" - const val FUNCTION_TOKEN = "Function_" - const val REFERENCE = "Ampersand" - } - private fun collectParameters(): List { // Parameters in this grammar have following structure (children order may be wrong): //formal parameter list -> formal parameter -> Ampersand From 7a423753d720099ca9076b415f11664ddde663f9 Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Wed, 21 Jul 2021 22:28:44 +0300 Subject: [PATCH 247/308] Correct building command in github action --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b99ac140..27cb9e7f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -46,7 +46,7 @@ jobs: ${{ runner.os }}-gradle- - name: Build plugin - run: ./gradlew buildPlugin + run: ./gradlew build test: needs: build From e180ed6d08f5c16b85681c12a26b946ee84e1edc Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Thu, 22 Jul 2021 11:55:32 +0300 Subject: [PATCH 248/308] Fix all detekt issues --- .github/workflows/build.yml | 4 +- build.gradle.kts | 7 +- detekt.yaml | 20 +++- src/main/kotlin/astminer/Main.kt | 7 +- src/main/kotlin/astminer/common/FileUtil.kt | 4 +- src/main/kotlin/astminer/common/TreeUtil.kt | 20 ++-- .../common/model/FunctionInfoModel.kt | 2 +- .../astminer/common/model/HandlerModel.kt | 5 +- .../astminer/common/model/ParsingModel.kt | 73 +++---------- .../common/model/PathContextsModel.kt | 1 - .../astminer/common/model/PipelineModel.kt | 3 +- .../astminer/common/storage/CsvFileUtil.kt | 12 ++- .../storage/RankedIncrementalIdStorage.kt | 12 +-- .../kotlin/astminer/config/ParserConfig.kt | 34 ++++-- .../kotlin/astminer/config/StorageConfigs.kt | 1 - .../kotlin/astminer/examples/AllJavaFiles.kt | 2 +- .../astminer/examples/AllJavaFilesGumTree.kt | 2 +- .../astminer/examples/AllJavaMethods.kt | 16 ++- .../astminer/examples/AllPythonFiles.kt | 1 - .../astminer/examples/AllPythonMethods.kt | 4 +- .../astminer/examples/Code2VecJavaMethods.kt | 5 +- src/main/kotlin/astminer/examples/Common.kt | 2 +- .../astminer/examples/FeatureExtraction.kt | 2 +- .../astminer/featureextraction/TreeFeature.kt | 24 ++--- .../TreeFeatureValueStorage.kt | 14 ++- .../labelextractor/FileLabelExtractors.kt | 5 +- .../kotlin/astminer/parse/FindingUtils.kt | 4 +- .../astminer/parse/antlr/AntlrHandler.kt | 4 +- .../kotlin/astminer/parse/antlr/AntlrUtil.kt | 32 +++--- .../parse/antlr/java/AntlrJavaFunctionInfo.kt | 35 +++--- .../parse/antlr/java/JavaFunctionSplitter.kt | 2 +- .../astminer/parse/antlr/java/JavaParser.kt | 2 +- .../javascript/AntlrJavaScriptElementInfo.kt | 67 ++++++------ .../javascript/JavaScriptFunctionSplitter.kt | 12 +-- .../parse/antlr/php/ANTLRPHPFunctionInfo.kt | 21 ++-- .../parse/antlr/php/PHPFunctionSplitter.kt | 10 +- .../astminer/parse/antlr/php/PHPParser.kt | 2 +- .../antlr/python/AntlrPythonFunctionInfo.kt | 61 +++++------ .../antlr/python/PythonFunctionSplitter.kt | 3 +- .../parse/antlr/python/PythonParser.kt | 4 +- .../parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt | 46 ++++---- .../parse/fuzzy/cpp/FuzzyCppParser.kt | 100 ++++++++++-------- .../astminer/parse/fuzzy/cpp/FuzzyNode.kt | 9 +- .../kotlin/astminer/parse/fuzzy/cpp/utils.kt | 10 +- .../astminer/parse/gumtree/GumTreeNode.kt | 2 +- .../astminer/parse/gumtree/GumtreeHandler.kt | 6 +- .../gumtree/java/GumTreeJavaFunctionInfo.kt | 36 +++---- .../parse/gumtree/java/GumTreeJavaParser.kt | 8 +- .../python/GumTreePythonFunctionInfo.kt | 65 ++++++------ .../python/GumTreePythonFunctionSplitter.kt | 10 +- .../gumtree/python/GumTreePythonParser.kt | 4 +- src/main/kotlin/astminer/paths/PathMiner.kt | 7 +- src/main/kotlin/astminer/paths/PathUtil.kt | 4 +- src/main/kotlin/astminer/paths/PathWorker.kt | 41 +++---- src/main/kotlin/astminer/pipeline/Pipeline.kt | 8 +- .../astminer/pipeline/branch/Exceptions.kt | 4 +- .../pipeline/branch/FunctionPipelineBranch.kt | 1 - .../pipeline/branch/PipelineBranch.kt | 2 +- .../astminer/storage/ast/CsvAstStorage.kt | 4 +- .../astminer/storage/ast/DotAstStorage.kt | 16 +-- .../astminer/storage/path/PathBasedStorage.kt | 19 ++-- src/test/kotlin/astminer/common/DummyNode.kt | 2 +- .../kotlin/astminer/common/TreeUtilTest.kt | 26 ++--- .../astminer/featureextraction/PrettyNode.kt | 18 ++-- .../astminer/filters/FunctionFiltersTest.kt | 2 +- .../parse/antlr/java/ANTLRJavaParserTest.kt | 2 +- .../antlr/java/JavaFunctionSplitterTest.kt | 34 +++--- .../JavaScriptFunctionSplitterTest.kt | 12 +-- .../antlr/php/ANTLRPHPFunctionSplitterTest.kt | 10 +- .../antlr/python/ANTLRPythonParserTest.kt | 2 +- .../python/PythonFunctionSplitterTest.kt | 26 ++--- .../astminer/parse/cpp/FuzzyCppParserTest.kt | 24 ++--- .../parse/cpp/FuzzyMethodSplitterTest.kt | 32 +++--- .../gumtree/java/GumTreeJavaParserTest.kt | 2 +- .../GumTreePythonFunctionSplitterTest.kt | 10 +- .../astminer/paths/PathWorkerTestBase.kt | 13 ++- .../astminer/paths/PathWorkerTestUtil.kt | 15 ++- .../astminer/storage/ast/DotAstStorageTest.kt | 12 +-- 78 files changed, 567 insertions(+), 618 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 27cb9e7f..3c3cbff0 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -67,7 +67,7 @@ jobs: if: ${{ always() }} with: name: test-report - path: build/reports/tests/**/* + path: build/astminer/reports/tests/**/* code-style: needs: build @@ -87,4 +87,4 @@ jobs: uses: github/codeql-action/upload-sarif@v1 if: ${{ always() }} with: - sarif_file: build/detekt.sarif + sarif_file: build/astminer/reports/detekt/detekt.sarif diff --git a/build.gradle.kts b/build.gradle.kts index a15a84af..d4ceace8 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -29,7 +29,6 @@ dependencies { api("com.github.gumtreediff", "client", "2.1.2") api("com.github.gumtreediff", "gen.jdt", "2.1.2") api("com.github.gumtreediff", "gen.python", "2.1.2") - // https://mvnrepository.com/artifact/io.shiftleft/fuzzyc2cpg api("io.shiftleft", "fuzzyc2cpg_2.13", "1.2.30") @@ -47,6 +46,9 @@ dependencies { // ===== Test ===== testImplementation("junit:junit:4.13.2") testImplementation(kotlin("test-junit")) + + // ===== Detekt ===== + detektPlugins("io.gitlab.arturbosch.detekt:detekt-formatting:1.17.1") } val generatedSourcesPath = "src/main/generated" @@ -133,7 +135,8 @@ tasks.withType { detekt { allRules = true - buildUponDefaultConfig = true + autoCorrect = true + parallel = true config = files("detekt.yaml") } diff --git a/detekt.yaml b/detekt.yaml index b4a5d99d..e914391b 100644 --- a/detekt.yaml +++ b/detekt.yaml @@ -14,18 +14,28 @@ exceptions: active: false style: - WildcardImport: - active: false + ForbiddenComment: + allowedPatterns: 'TODO:' MagicNumber: ignorePropertyDeclaration: true ignoreAnnotation: true ignoreEnums: true ignoreNumbers: ['-1', '0', '1', '2', '60', '100', '1000'] - excludes: ['**/test/**', '**/*Test.kt', '**/*Spec.kt', '**/examples/**'] - ForbiddenComment: - allowedPatterns: 'TODO:' + excludes: ['**/test/**', '**/examples/**'] ReturnCount: max: 5 + WildcardImport: + active: false + +formatting: + autoCorrect: true + NoConsecutiveBlankLines: + active: true + NoWildcardImports: + active: false + +comments: + active: false output-reports: active: true diff --git a/src/main/kotlin/astminer/Main.kt b/src/main/kotlin/astminer/Main.kt index 2457e6f0..e451b931 100644 --- a/src/main/kotlin/astminer/Main.kt +++ b/src/main/kotlin/astminer/Main.kt @@ -6,13 +6,13 @@ import astminer.pipeline.Pipeline import astminer.pipeline.branch.IllegalFilterException import astminer.pipeline.branch.IllegalLabelExtractorException import com.charleskorn.kaml.PolymorphismStyle +import com.charleskorn.kaml.Yaml +import com.charleskorn.kaml.YamlConfiguration import com.github.ajalt.clikt.core.CliktCommand import com.github.ajalt.clikt.parameters.arguments.argument import com.github.ajalt.clikt.parameters.types.file import kotlinx.serialization.SerializationException import kotlinx.serialization.decodeFromString -import com.charleskorn.kaml.Yaml -import com.charleskorn.kaml.YamlConfiguration import mu.KotlinLogging import java.io.File @@ -38,7 +38,7 @@ class PipelineRunner : CliktCommand(name = "") { } catch (e: FunctionInfoPropertyNotImplementedException) { report( "The chosen parser does not implement the required properties. " + - "Consider implementing them or change the parser", + "Consider implementing them or change the parser", e ) } @@ -59,7 +59,6 @@ class PipelineRunner : CliktCommand(name = "") { ) ) } - } fun main(args: Array) = PipelineRunner().main(args) diff --git a/src/main/kotlin/astminer/common/FileUtil.kt b/src/main/kotlin/astminer/common/FileUtil.kt index e609fd46..0d83dc51 100644 --- a/src/main/kotlin/astminer/common/FileUtil.kt +++ b/src/main/kotlin/astminer/common/FileUtil.kt @@ -11,9 +11,7 @@ import java.io.File * @param file file in which the number of lines is counted * @return number of lines in a given file */ -fun numberOfLines(file: File): Int { - return file.readLines().filter { it != "" }.size -} +fun numberOfLines(file: File): Int = file.readLines().filter { it != "" }.size /** * Changes extension of a given file to the new one. diff --git a/src/main/kotlin/astminer/common/TreeUtil.kt b/src/main/kotlin/astminer/common/TreeUtil.kt index 82c02d47..267f9356 100644 --- a/src/main/kotlin/astminer/common/TreeUtil.kt +++ b/src/main/kotlin/astminer/common/TreeUtil.kt @@ -1,6 +1,6 @@ package astminer.common -const val DEFAULT_TOKEN = "EMPTY" +const val EMPTY_TOKEN = "EMPTY" /** * The function was adopted from the original code2vec implementation in order to match their behavior: @@ -8,10 +8,10 @@ const val DEFAULT_TOKEN = "EMPTY" */ fun normalizeToken(token: String, defaultToken: String): String { val cleanToken = token.lowercase() - .replace("\\\\n".toRegex(), "") // escaped new line - .replace("//s+".toRegex(), "") // whitespaces - .replace("[\"',]".toRegex(), "") // quotes, apostrophes, commas - .replace("\\P{Print}".toRegex(), "") // unicode weird characters + .replace("\\\\n".toRegex(), "") // escaped new line + .replace("//s+".toRegex(), "") // whitespaces + .replace("[\"',]".toRegex(), "") // quotes, apostrophes, commas + .replace("\\P{Print}".toRegex(), "") // unicode weird characters val stripped = cleanToken.replace("[^A-Za-z]".toRegex(), "") @@ -28,8 +28,8 @@ fun normalizeToken(token: String, defaultToken: String): String { * https://github.com/tech-srl/code2vec/blob/master/JavaExtractor/JPredict/src/main/java/JavaExtractor/Common/Common.java */ fun splitToSubtokens(token: String) = token - .trim() - .split("(?<=[a-z])(?=[A-Z])|_|[0-9]|(?<=[A-Z])(?=[A-Z][a-z])|\\s+".toRegex()) - .map { s -> normalizeToken(s, "") } - .filter { it.isNotEmpty() } - .toList() + .trim() + .split("(?<=[a-z])(?=[A-Z])|_|[0-9]|(?<=[A-Z])(?=[A-Z][a-z])|\\s+".toRegex()) + .map { s -> normalizeToken(s, "") } + .filter { it.isNotEmpty() } + .toList() diff --git a/src/main/kotlin/astminer/common/model/FunctionInfoModel.kt b/src/main/kotlin/astminer/common/model/FunctionInfoModel.kt index 9d1afa76..2b40034f 100644 --- a/src/main/kotlin/astminer/common/model/FunctionInfoModel.kt +++ b/src/main/kotlin/astminer/common/model/FunctionInfoModel.kt @@ -7,7 +7,7 @@ interface TreeFunctionSplitter { class FunctionInfoPropertyNotImplementedException(propertyName: String) : UnsupportedOperationException( "The property `$propertyName` of FunctionInfo for this language and parser type is not implemented yet. " + - "Consider implementing it." + "Consider implementing it." ) private fun notImplemented(propertyName: String): Nothing = diff --git a/src/main/kotlin/astminer/common/model/HandlerModel.kt b/src/main/kotlin/astminer/common/model/HandlerModel.kt index 7f312df6..e944bdef 100644 --- a/src/main/kotlin/astminer/common/model/HandlerModel.kt +++ b/src/main/kotlin/astminer/common/model/HandlerModel.kt @@ -24,7 +24,6 @@ abstract class LanguageHandler { abstract val parseResult: ParseResult protected abstract val splitter: TreeFunctionSplitter - fun splitIntoFunctions(): Collection> { - return splitter.splitIntoFunctions(parseResult.root, parseResult.filePath) - } + fun splitIntoFunctions(): Collection> = + splitter.splitIntoFunctions(parseResult.root, parseResult.filePath) } diff --git a/src/main/kotlin/astminer/common/model/ParsingModel.kt b/src/main/kotlin/astminer/common/model/ParsingModel.kt index c2f6e600..1bebbeac 100644 --- a/src/main/kotlin/astminer/common/model/ParsingModel.kt +++ b/src/main/kotlin/astminer/common/model/ParsingModel.kt @@ -1,13 +1,10 @@ package astminer.common.model -import astminer.common.model.LabeledResult -import astminer.common.DEFAULT_TOKEN +import astminer.common.EMPTY_TOKEN import astminer.common.splitToSubtokens import java.io.File import java.io.InputStream import java.util.* -import kotlin.NoSuchElementException - abstract class Node { abstract val typeLabel: String @@ -18,14 +15,13 @@ abstract class Node { val normalizedToken: String? by lazy { originalToken?.let { val subtokens = splitToSubtokens(it) - if (subtokens.isEmpty()) null - else subtokens.joinToString(TOKEN_DELIMITER) + if (subtokens.isEmpty()) null else subtokens.joinToString(TOKEN_DELIMITER) } } var technicalToken: String? = null val token: String - get() = listOfNotNull(technicalToken, normalizedToken, originalToken).firstOrNull() ?: DEFAULT_TOKEN + get() = listOfNotNull(technicalToken, normalizedToken, originalToken).firstOrNull() ?: EMPTY_TOKEN val metadata: MutableMap = HashMap() fun isLeaf() = children.isEmpty() @@ -42,57 +38,22 @@ abstract class Node { abstract fun removeChildrenOfType(typeLabel: String) - fun preOrderIterator(): Iterator = PreOrderIterator(this) - open fun preOrder(): List = PreOrderIterator(this).asSequence().toList() - - fun postOrderIterator(): Iterator = PostOrderIterator(this) - open fun postOrder(): List = PostOrderIterator(this).asSequence().toList() - - companion object { - const val TOKEN_DELIMITER = "|" - } -} - -class PreOrderIterator(root: Node): Iterator { - private val stack = ArrayDeque() - - init { - stack.push(root) - } - - override fun hasNext(): Boolean { - return stack.isNotEmpty() + private fun doTraversePreOrder(resultList: MutableList) { + resultList.add(this) + children.forEach { it.doTraversePreOrder(resultList) } } + fun preOrderIterator(): Iterator = preOrder().listIterator() + open fun preOrder(): List = mutableListOf().also { doTraversePreOrder(it) } - override fun next(): Node { - if (hasNext()) { - val currentNode = stack.pop() - currentNode.children.asReversed().forEach { stack.push(it) } - return currentNode - } else throw NoSuchElementException() - } -} - -class PostOrderIterator(root: Node): Iterator { - private data class NodeWrapper(val node: Node, var isChecked: Boolean = false) - - private val tree = mutableListOf(NodeWrapper(root)) - - private fun fillWithChildren(wrapper: NodeWrapper){ - if (!wrapper.isChecked) { - tree.addAll(wrapper.node.children.asReversed().map { NodeWrapper(it) }) - wrapper.isChecked = true - } + private fun doTraversePostOrder(resultList: MutableList) { + children.forEach { it.doTraversePostOrder(resultList) } + resultList.add(this) } + fun postOrderIterator(): Iterator = postOrder().listIterator() + open fun postOrder(): List = mutableListOf().also { doTraversePostOrder(it) } - override fun hasNext(): Boolean = tree.isNotEmpty() - - override fun next(): Node { - while (!tree.last().isChecked) { - fillWithChildren(tree.last()) - } - if (hasNext()) return tree.removeLast().node - else throw NoSuchElementException() + companion object { + const val TOKEN_DELIMITER = "|" } } @@ -112,6 +73,4 @@ interface Parser { fun parseFile(file: File) = ParseResult(parseInputStream(file.inputStream()), file.path) } -data class ParseResult(val root: T, val filePath: String) { - fun labeledWith(label: String): LabeledResult = LabeledResult(root, label, filePath) -} +data class ParseResult(val root: T, val filePath: String) diff --git a/src/main/kotlin/astminer/common/model/PathContextsModel.kt b/src/main/kotlin/astminer/common/model/PathContextsModel.kt index 6ca55927..7c133226 100644 --- a/src/main/kotlin/astminer/common/model/PathContextsModel.kt +++ b/src/main/kotlin/astminer/common/model/PathContextsModel.kt @@ -1,6 +1,5 @@ package astminer.common.model - data class ASTPath(val upwardNodes: List, val topNode: Node, val downwardNodes: List) enum class Direction { UP, DOWN, TOP } diff --git a/src/main/kotlin/astminer/common/model/PipelineModel.kt b/src/main/kotlin/astminer/common/model/PipelineModel.kt index dd3f6420..343e24ea 100644 --- a/src/main/kotlin/astminer/common/model/PipelineModel.kt +++ b/src/main/kotlin/astminer/common/model/PipelineModel.kt @@ -2,7 +2,6 @@ package astminer.common.model import java.io.Closeable - interface Filter interface LabelExtractor @@ -31,6 +30,8 @@ interface FunctionLabelExtractor : LabelExtractor { */ data class LabeledResult(val root: T, val label: String, val filePath: String) +fun ParseResult.labeledWith(label: String): LabeledResult = LabeledResult(root, label, filePath) + /** * Storage saved labeled results to disk in a specified format. * Storage might extract any data from labeled result. diff --git a/src/main/kotlin/astminer/common/storage/CsvFileUtil.kt b/src/main/kotlin/astminer/common/storage/CsvFileUtil.kt index bc40c140..0751e274 100644 --- a/src/main/kotlin/astminer/common/storage/CsvFileUtil.kt +++ b/src/main/kotlin/astminer/common/storage/CsvFileUtil.kt @@ -3,11 +3,13 @@ package astminer.common.storage import astminer.common.model.OrientedNodeType import java.io.File -fun dumpIdStorageToCsv(storage: RankedIncrementalIdStorage, - typeHeader: String, - csvSerializer: (T) -> String, - file: File, - limit: Long? = null) { +fun dumpIdStorageToCsv( + storage: RankedIncrementalIdStorage, + typeHeader: String, + csvSerializer: (T) -> String, + file: File, + limit: Long? = null +) { file.printWriter().use { out -> out.println("id,$typeHeader") storage.idPerItem.forEach { diff --git a/src/main/kotlin/astminer/common/storage/RankedIncrementalIdStorage.kt b/src/main/kotlin/astminer/common/storage/RankedIncrementalIdStorage.kt index 268298fe..6a20b0e2 100644 --- a/src/main/kotlin/astminer/common/storage/RankedIncrementalIdStorage.kt +++ b/src/main/kotlin/astminer/common/storage/RankedIncrementalIdStorage.kt @@ -46,9 +46,7 @@ class RankedIncrementalIdStorage { /** * Returns the item by its [id] */ - fun lookUpValue(id: Id): T? { - return idPerItem.entries.firstOrNull { it.value == id }?.key - } + fun lookUpValue(id: Id): T? = idPerItem.entries.firstOrNull { it.value == id }?.key /** * Returns the rank of the [item] @@ -74,10 +72,10 @@ class RankedIncrementalIdStorage { */ fun computeRanks() { val sortedIds = idCountMap.entries - .sortedBy { it.value } - .reversed() - .map { it.key } - .toList() + .sortedBy { it.value } + .reversed() + .map { it.key } + .toList() val idRankMap = mutableMapOf() for ((index, id) in sortedIds.withIndex()) { idRankMap[id] = (index + 1).toLong() diff --git a/src/main/kotlin/astminer/config/ParserConfig.kt b/src/main/kotlin/astminer/config/ParserConfig.kt index c85fd1b0..f9e57aa7 100644 --- a/src/main/kotlin/astminer/config/ParserConfig.kt +++ b/src/main/kotlin/astminer/config/ParserConfig.kt @@ -18,17 +18,33 @@ data class ParserConfig( @Serializable enum class ParserType { - @SerialName("antlr") Antlr, - @SerialName("gumtree") GumTree, - @SerialName("fuzzy") Fuzzy + @SerialName("antlr") + Antlr, + + @SerialName("gumtree") + GumTree, + + @SerialName("fuzzy") + Fuzzy } @Serializable enum class FileExtension(val fileExtension: String) { - @SerialName("py") Python("py"), - @SerialName("java") Java("java"), - @SerialName("js") JavaScript("js"), - @SerialName("c") C("c"), - @SerialName("cpp") Cpp("cpp"), - @SerialName("php") PHP("php") + @SerialName("py") + Python("py"), + + @SerialName("java") + Java("java"), + + @SerialName("js") + JavaScript("js"), + + @SerialName("c") + C("c"), + + @SerialName("cpp") + Cpp("cpp"), + + @SerialName("php") + PHP("php") } diff --git a/src/main/kotlin/astminer/config/StorageConfigs.kt b/src/main/kotlin/astminer/config/StorageConfigs.kt index 6185a32c..0646581b 100644 --- a/src/main/kotlin/astminer/config/StorageConfigs.kt +++ b/src/main/kotlin/astminer/config/StorageConfigs.kt @@ -9,7 +9,6 @@ import astminer.storage.path.PathBasedStorageConfig import kotlinx.serialization.SerialName import kotlinx.serialization.Serializable import kotlinx.serialization.Transient -import java.io.File /** * Config for storage that saved the results on the disk diff --git a/src/main/kotlin/astminer/examples/AllJavaFiles.kt b/src/main/kotlin/astminer/examples/AllJavaFiles.kt index fa9320fe..5fa28a11 100644 --- a/src/main/kotlin/astminer/examples/AllJavaFiles.kt +++ b/src/main/kotlin/astminer/examples/AllJavaFiles.kt @@ -7,7 +7,7 @@ import astminer.storage.path.Code2VecPathStorage import astminer.storage.path.PathBasedStorageConfig import java.io.File -//Retrieve paths from Java files, using a generated parser. +// Retrieve paths from Java files, using a generated parser. fun allJavaFiles() { val inputDir = "src/test/resources/examples/" diff --git a/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt b/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt index 0e0b6691..3ace42f3 100644 --- a/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt +++ b/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt @@ -3,7 +3,7 @@ package astminer.examples import astminer.config.* import astminer.pipeline.Pipeline -//Retrieve paths from Java files, using a GumTree parser. +// Retrieve paths from Java files, using a GumTree parser. fun allJavaFilesGumTree() { val config = PipelineConfig( inputDir = "src/test/resources/gumTreeMethodSplitter/", diff --git a/src/main/kotlin/astminer/examples/AllJavaMethods.kt b/src/main/kotlin/astminer/examples/AllJavaMethods.kt index 73ba198c..74dec938 100644 --- a/src/main/kotlin/astminer/examples/AllJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/AllJavaMethods.kt @@ -1,15 +1,14 @@ package astminer.examples -import astminer.common.model.LabeledResult import astminer.common.model.FunctionInfo +import astminer.common.model.LabeledResult import astminer.parse.gumtree.GumTreeNode -import astminer.parse.gumtree.java.GumTreeJavaParser import astminer.parse.gumtree.java.GumTreeJavaFunctionSplitter +import astminer.parse.gumtree.java.GumTreeJavaParser import astminer.storage.path.Code2VecPathStorage import astminer.storage.path.PathBasedStorageConfig import java.io.File - private fun getCsvFriendlyMethodId(functionInfo: FunctionInfo): String { val className = functionInfo.enclosingElement?.name ?: "" val methodName = functionInfo.name @@ -17,9 +16,8 @@ private fun getCsvFriendlyMethodId(functionInfo: FunctionInfo): Str return "$className.$methodName($parameterTypes)" } - -//Retrieve paths from all Java files, using a GumTree parser. -//GumTreeMethodSplitter is used to extract individual method nodes from the compilation unit tree. +// Retrieve paths from all Java files, using a GumTree parser. +// GumTreeMethodSplitter is used to extract individual method nodes from the compilation unit tree. fun allJavaMethods() { val inputDir = "src/test/resources/gumTreeMethodSplitter" @@ -27,14 +25,14 @@ fun allJavaMethods() { val storage = Code2VecPathStorage(outputDir, PathBasedStorageConfig(5, 5)) File(inputDir).forFilesWithSuffix(".java") { file -> - //parse file + // parse file val fileNode = GumTreeJavaParser().parseInputStream(file.inputStream()) - //extract method nodes + // extract method nodes val methodNodes = GumTreeJavaFunctionSplitter().splitIntoFunctions(fileNode, file.path) methodNodes.forEach { methodInfo -> - //Retrieve a method identifier + // Retrieve a method identifier val entityId = "${file.path}::${getCsvFriendlyMethodId(methodInfo)}" val labelingResult = LabeledResult(fileNode, entityId, file.path) storage.store(labelingResult) diff --git a/src/main/kotlin/astminer/examples/AllPythonFiles.kt b/src/main/kotlin/astminer/examples/AllPythonFiles.kt index e759973f..cf3b216f 100644 --- a/src/main/kotlin/astminer/examples/AllPythonFiles.kt +++ b/src/main/kotlin/astminer/examples/AllPythonFiles.kt @@ -3,7 +3,6 @@ package astminer.examples import astminer.config.* import astminer.pipeline.Pipeline - fun allPythonFiles() { val config = PipelineConfig( inputDir = "src/test/resources/examples", diff --git a/src/main/kotlin/astminer/examples/AllPythonMethods.kt b/src/main/kotlin/astminer/examples/AllPythonMethods.kt index bc54721a..d7ddab1e 100644 --- a/src/main/kotlin/astminer/examples/AllPythonMethods.kt +++ b/src/main/kotlin/astminer/examples/AllPythonMethods.kt @@ -1,9 +1,9 @@ package astminer.examples -import astminer.common.model.LabeledResult import astminer.common.model.FunctionInfo -import astminer.parse.gumtree.python.GumTreePythonFunctionSplitter +import astminer.common.model.LabeledResult import astminer.parse.gumtree.GumTreeNode +import astminer.parse.gumtree.python.GumTreePythonFunctionSplitter import astminer.parse.gumtree.python.GumTreePythonParser import astminer.storage.path.Code2VecPathStorage import astminer.storage.path.PathBasedStorageConfig diff --git a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt index 70d1a011..b8ef0c3e 100644 --- a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt +++ b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt @@ -3,9 +3,8 @@ package astminer.examples import astminer.config.* import astminer.pipeline.Pipeline - -//Retrieve paths from all Java files, using a GumTree parser. -//GumTreeMethodSplitter is used to extract individual method nodes from the compilation unit tree. +// Retrieve paths from all Java files, using a GumTree parser. +// GumTreeMethodSplitter is used to extract individual method nodes from the compilation unit tree. fun code2vecJavaMethods() { val folder = "src/test/resources/code2vecPathMining" val outputDir = "out_examples/code2vecPathMining" diff --git a/src/main/kotlin/astminer/examples/Common.kt b/src/main/kotlin/astminer/examples/Common.kt index a036d2fe..0e3e1202 100644 --- a/src/main/kotlin/astminer/examples/Common.kt +++ b/src/main/kotlin/astminer/examples/Common.kt @@ -7,5 +7,5 @@ fun iterateFiles(dir: File, condition: (File) -> Boolean, action: (File) -> Unit } fun File.forFilesWithSuffix(extension: String, action: (File) -> Unit) { - iterateFiles(this, ({ file: File -> file.path.endsWith(extension) }), action) + iterateFiles(this, { file: File -> file.path.endsWith(extension) }, action) } diff --git a/src/main/kotlin/astminer/examples/FeatureExtraction.kt b/src/main/kotlin/astminer/examples/FeatureExtraction.kt index 4c1dcf32..57cd35b7 100644 --- a/src/main/kotlin/astminer/examples/FeatureExtraction.kt +++ b/src/main/kotlin/astminer/examples/FeatureExtraction.kt @@ -7,7 +7,7 @@ import java.io.File fun parseAndCollectFeatures() { val parser = GumTreeJavaParser() - val features : List> = + val features: List> = listOf(Depth, NumberOfNodes, BranchingFactor, CompressiblePathLengths, Tokens, NodeTypes) val folderInput = "./testData/featureextraction" diff --git a/src/main/kotlin/astminer/featureextraction/TreeFeature.kt b/src/main/kotlin/astminer/featureextraction/TreeFeature.kt index 8b42afb6..9a76c7fc 100644 --- a/src/main/kotlin/astminer/featureextraction/TreeFeature.kt +++ b/src/main/kotlin/astminer/featureextraction/TreeFeature.kt @@ -12,7 +12,7 @@ interface TreeFeature { * @param tree tree for which this feature is computed * @return computed feature */ - fun compute(tree: Node) : T + fun compute(tree: Node): T } /** @@ -20,7 +20,7 @@ interface TreeFeature { */ object Depth : TreeFeature { override fun compute(tree: Node): Int { - val max = tree.children.map { compute(it) }.maxOrNull() ?: 0 + val max = tree.children.map { compute(it) }.maxOrNull() ?: 0 return max + 1 } } @@ -46,18 +46,14 @@ object BranchingFactor : TreeFeature { * Tree feature for computing the number of nodes in a given tree. */ object NumberOfNodes : TreeFeature { - override fun compute(tree: Node): Int { - return tree.children.sumOf { compute(it) } + 1 - } + override fun compute(tree: Node): Int = tree.children.sumOf { compute(it) } + 1 } /** * Tree feature for computing list of all node tokens from a given tree. */ object Tokens : TreeFeature> { - override fun compute(tree: Node): List { - return findTokens(tree, ArrayList()) - } + override fun compute(tree: Node): List = findTokens(tree, ArrayList()) private fun findTokens(node: Node, tokensList: MutableList): List { node.children.forEach { findTokens(it, tokensList) } @@ -70,9 +66,7 @@ object Tokens : TreeFeature> { * Tree feature for computing list of all node types from a given tree. */ object NodeTypes : TreeFeature> { - override fun compute(tree: Node): List { - return findNodeTypes(tree, ArrayList()) - } + override fun compute(tree: Node): List = findNodeTypes(tree, ArrayList()) private fun findNodeTypes(node: Node, nodeTypesList: MutableList): List { node.children.forEach { findNodeTypes(it, nodeTypesList) } @@ -92,13 +86,11 @@ object CompressiblePathLengths : TreeFeature> { return pathLengths } - private fun Node.isStartingNode() : Boolean { - return this.hasOneChild() && !(this.parent?.hasOneChild() ?: false) - } + private fun Node.isStartingNode(): Boolean = this.hasOneChild() && !(this.parent?.hasOneChild() ?: false) - private fun Node.hasOneChild() : Boolean = children.size == 1 + private fun Node.hasOneChild(): Boolean = children.size == 1 - private fun findPathLengthFromStartingNode(node: Node) : Int { + private fun findPathLengthFromStartingNode(node: Node): Int { var length = 1 var next = node.children.first() diff --git a/src/main/kotlin/astminer/featureextraction/TreeFeatureValueStorage.kt b/src/main/kotlin/astminer/featureextraction/TreeFeatureValueStorage.kt index 0f0a10eb..2239c7ce 100644 --- a/src/main/kotlin/astminer/featureextraction/TreeFeatureValueStorage.kt +++ b/src/main/kotlin/astminer/featureextraction/TreeFeatureValueStorage.kt @@ -11,14 +11,12 @@ import java.io.File * @property fileName name of parsed file * @property numberOfLines number of lines in parsed file */ -data class ParsedTree(val parserName : String, val tree: Node, val fileName : String, val numberOfLines: Int) +data class ParsedTree(val parserName: String, val tree: Node, val fileName: String, val numberOfLines: Int) /** * Gets simple name of Any. */ -fun Any.className() : String { - return this::class.java.simpleName -} +fun Any.className(): String = this::class.java.simpleName /** * Class for store and save [tree features][astminer.featureextraction.TreeFeature] @@ -49,7 +47,7 @@ class TreeFeatureValueStorage(private val separator: String) { * Stores new tree feature to compute for stored parsed trees. * @param feature feature to store */ - fun storeFeature(feature : TreeFeature) { + fun storeFeature(feature: TreeFeature) { features.add(feature) } @@ -81,17 +79,17 @@ class TreeFeatureValueStorage(private val separator: String) { val lines = ArrayList() val csvHeaders = fields.joinToString(separator = separator) { it.header } - lines.add(features.map { it.className() }.fold(csvHeaders) { c, f -> "$c$separator$f" } ) + lines.add(features.map { it.className() }.fold(csvHeaders) { c, f -> "$c$separator$f" }) parsedTrees.forEach { t -> val csvFields = fields.joinToString(separator = separator) { it.value(t) } - lines.add(features.map { toCsvString(it.compute(t.tree)) }.fold(csvFields) { c, f -> "$c$separator$f" } ) + lines.add(features.map { toCsvString(it.compute(t.tree)) }.fold(csvFields) { c, f -> "$c$separator$f" }) } writeLinesToFile(lines, file) } - private fun toCsvString(a : Any?) : String { + private fun toCsvString(a: Any?): String { if (a is List<*>) { return "\"${a.joinToString { toCsvString(it) }.replace("\"","\"\"")}\"" } diff --git a/src/main/kotlin/astminer/labelextractor/FileLabelExtractors.kt b/src/main/kotlin/astminer/labelextractor/FileLabelExtractors.kt index 4182839f..ba6ed3da 100644 --- a/src/main/kotlin/astminer/labelextractor/FileLabelExtractors.kt +++ b/src/main/kotlin/astminer/labelextractor/FileLabelExtractors.kt @@ -1,9 +1,6 @@ package astminer.labelextractor -import astminer.common.model.FileLabelExtractor -import astminer.common.model.LabeledResult -import astminer.common.model.Node -import astminer.common.model.ParseResult +import astminer.common.model.* import java.io.File /** diff --git a/src/main/kotlin/astminer/parse/FindingUtils.kt b/src/main/kotlin/astminer/parse/FindingUtils.kt index 84f5b78d..9ecd19bd 100644 --- a/src/main/kotlin/astminer/parse/FindingUtils.kt +++ b/src/main/kotlin/astminer/parse/FindingUtils.kt @@ -2,10 +2,10 @@ package astminer.parse import astminer.common.model.Node -inline fun T.findEnclosingElementBy(condition: (T) -> Boolean): T? { +inline fun T.findEnclosingElementBy(condition: (T) -> Boolean): T? { var curNode = this.parent while (!(curNode == null || condition(curNode as T))) { curNode = curNode.parent } - return curNode as T? + return curNode as? T } diff --git a/src/main/kotlin/astminer/parse/antlr/AntlrHandler.kt b/src/main/kotlin/astminer/parse/antlr/AntlrHandler.kt index dbf9b9fa..c8d81a40 100644 --- a/src/main/kotlin/astminer/parse/antlr/AntlrHandler.kt +++ b/src/main/kotlin/astminer/parse/antlr/AntlrHandler.kt @@ -38,10 +38,10 @@ object AntlrJavascriptHandlerFactory : HandlerFactory { } } -object AntlrPHPHandlerFactory: HandlerFactory { +object AntlrPHPHandlerFactory : HandlerFactory { override fun createHandler(file: File): LanguageHandler = AntlrPHPHandler(file) - class AntlrPHPHandler(file: File): LanguageHandler() { + class AntlrPHPHandler(file: File) : LanguageHandler() { override val parseResult: ParseResult = PHPParser().parseFile(file) override val splitter: TreeFunctionSplitter = PHPFunctionSplitter() } diff --git a/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt b/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt index c6d1841c..71e45c6f 100644 --- a/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt +++ b/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt @@ -1,6 +1,6 @@ package astminer.parse.antlr -import astminer.common.DEFAULT_TOKEN +import astminer.common.EMPTY_TOKEN import astminer.common.model.Node import org.antlr.v4.runtime.ParserRuleContext import org.antlr.v4.runtime.Vocabulary @@ -11,7 +11,10 @@ fun convertAntlrTree(tree: ParserRuleContext, ruleNames: Array, vocabula compressTree(convertRuleContext(tree, ruleNames, null, vocabulary)) private fun convertRuleContext( - ruleContext: ParserRuleContext, ruleNames: Array, parent: AntlrNode?, vocabulary: Vocabulary + ruleContext: ParserRuleContext, + ruleNames: Array, + parent: AntlrNode?, + vocabulary: Vocabulary ): AntlrNode { val typeLabel = ruleNames[ruleContext.ruleIndex] val currentNode = AntlrNode(typeLabel, parent, null) @@ -29,13 +32,11 @@ private fun convertRuleContext( return currentNode } -private fun convertTerminal(terminalNode: TerminalNode, parent: AntlrNode?, vocabulary: Vocabulary): AntlrNode { - return AntlrNode(vocabulary.getSymbolicName(terminalNode.symbol.type), parent, terminalNode.symbol.text) -} +private fun convertTerminal(terminalNode: TerminalNode, parent: AntlrNode?, vocabulary: Vocabulary): AntlrNode = + AntlrNode(vocabulary.getSymbolicName(terminalNode.symbol.type), parent, terminalNode.symbol.text) -private fun convertErrorNode(errorNode: ErrorNode, parent: AntlrNode?): AntlrNode { - return AntlrNode("Error", parent, errorNode.text) -} +private fun convertErrorNode(errorNode: ErrorNode, parent: AntlrNode?): AntlrNode = + AntlrNode("Error", parent, errorNode.text) /** * Remove intermediate nodes that have a single child. @@ -56,9 +57,9 @@ fun compressTree(root: AntlrNode): AntlrNode { return if (root.children.size == 1) { val child = compressTree(root.children.first()) val compressedNode = AntlrNode( - root.typeLabel + "|" + child.typeLabel, - root.parent, - child.originalToken + root.typeLabel + "|" + child.typeLabel, + root.parent, + child.originalToken ) compressedNode.replaceChildren(child.children) compressedNode @@ -68,7 +69,6 @@ fun compressTree(root: AntlrNode): AntlrNode { } } - fun decompressTypeLabel(typeLabel: String) = typeLabel.split("|") fun AntlrNode.lastLabel() = decompressTypeLabel(typeLabel).last() @@ -84,9 +84,7 @@ fun AntlrNode.hasFirstLabel(label: String): Boolean = firstLabel() == label fun AntlrNode.firstLabelIn(labels: List): Boolean = labels.contains(firstLabel()) fun Node.getTokensFromSubtree(): String = - if (isLeaf()) originalToken ?: DEFAULT_TOKEN - else children.joinToString(separator = "") { child -> child.getTokensFromSubtree() } + if (isLeaf()) originalToken ?: EMPTY_TOKEN else children.joinToString(separator = "") { it.getTokensFromSubtree() } -fun AntlrNode.getItOrChildrenOfType(typeLabel: String) : List = - if (hasLastLabel(typeLabel)) listOf(this) - else this.getChildrenOfType(typeLabel).map { it } +fun AntlrNode.getItOrChildrenOfType(typeLabel: String): List = + if (hasLastLabel(typeLabel)) listOf(this) else this.getChildrenOfType(typeLabel).map { it } diff --git a/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt b/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt index 89dc5047..d6ac9ec6 100644 --- a/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt @@ -10,23 +10,7 @@ class AntlrJavaFunctionInfo(override val root: AntlrNode, override val filePath: override val returnType: String? = collectReturnType() override val enclosingElement: EnclosingElement? = collectEnclosingClass() - companion object { - private const val METHOD_RETURN_TYPE_NODE = "typeTypeOrVoid" - private const val METHOD_NAME_NODE = "IDENTIFIER" - - private const val CLASS_DECLARATION_NODE = "classDeclaration" - private const val CLASS_NAME_NODE = "IDENTIFIER" - - private const val METHOD_PARAMETER_NODE = "formalParameters" - private const val METHOD_PARAMETER_INNER_NODE = "formalParameterList" - private val METHOD_SINGLE_PARAMETER_NODES = listOf("formalParameter", "lastFormalParameter") - private const val PARAMETER_RETURN_TYPE_NODE = "typeType" - private const val PARAMETER_NAME_NODE = "variableDeclaratorId" - } - - private fun collectNameNode(): AntlrNode? { - return root.getChildOfType(METHOD_NAME_NODE) - } + private fun collectNameNode(): AntlrNode? = root.getChildOfType(METHOD_NAME_NODE) private fun collectReturnType(): String? { val returnTypeNode = root.getChildOfType(METHOD_RETURN_TYPE_NODE) @@ -60,9 +44,22 @@ class AntlrJavaFunctionInfo(override val root: AntlrNode, override val filePath: val returnTypeToken = returnTypeNode?.getTokensFromSubtree() val parameterName = parameterNode.getChildOfType(PARAMETER_NAME_NODE)?.getTokensFromSubtree() - ?: throw IllegalStateException("Parameter name wasn't found") + ?: error("Parameter name wasn't found") return FunctionInfoParameter(parameterName, returnTypeToken) } -} + companion object { + private const val METHOD_RETURN_TYPE_NODE = "typeTypeOrVoid" + private const val METHOD_NAME_NODE = "IDENTIFIER" + + private const val CLASS_DECLARATION_NODE = "classDeclaration" + private const val CLASS_NAME_NODE = "IDENTIFIER" + + private const val METHOD_PARAMETER_NODE = "formalParameters" + private const val METHOD_PARAMETER_INNER_NODE = "formalParameterList" + private val METHOD_SINGLE_PARAMETER_NODES = listOf("formalParameter", "lastFormalParameter") + private const val PARAMETER_RETURN_TYPE_NODE = "typeType" + private const val PARAMETER_NAME_NODE = "variableDeclaratorId" + } +} diff --git a/src/main/kotlin/astminer/parse/antlr/java/JavaFunctionSplitter.kt b/src/main/kotlin/astminer/parse/antlr/java/JavaFunctionSplitter.kt index 6ffcadb0..69099be6 100644 --- a/src/main/kotlin/astminer/parse/antlr/java/JavaFunctionSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/java/JavaFunctionSplitter.kt @@ -9,7 +9,7 @@ class JavaFunctionSplitter : TreeFunctionSplitter { override fun splitIntoFunctions(root: AntlrNode, filePath: String): Collection> { val methodRoots = root.preOrder().filter { - (it).hasLastLabel(methodNodeType) + it.hasLastLabel(methodNodeType) } return methodRoots.map { AntlrJavaFunctionInfo(it, filePath) } } diff --git a/src/main/kotlin/astminer/parse/antlr/java/JavaParser.kt b/src/main/kotlin/astminer/parse/antlr/java/JavaParser.kt index 4f32066a..f9ccd945 100644 --- a/src/main/kotlin/astminer/parse/antlr/java/JavaParser.kt +++ b/src/main/kotlin/astminer/parse/antlr/java/JavaParser.kt @@ -4,10 +4,10 @@ import astminer.common.model.Parser import astminer.parse.ParsingException import astminer.parse.antlr.AntlrNode import astminer.parse.antlr.convertAntlrTree -import org.antlr.v4.runtime.CommonTokenStream import me.vovak.antlr.parser.Java8Lexer import me.vovak.antlr.parser.Java8Parser import org.antlr.v4.runtime.CharStreams +import org.antlr.v4.runtime.CommonTokenStream import java.io.InputStream import java.lang.Exception diff --git a/src/main/kotlin/astminer/parse/antlr/javascript/AntlrJavaScriptElementInfo.kt b/src/main/kotlin/astminer/parse/antlr/javascript/AntlrJavaScriptElementInfo.kt index cd2bc27c..33589567 100644 --- a/src/main/kotlin/astminer/parse/antlr/javascript/AntlrJavaScriptElementInfo.kt +++ b/src/main/kotlin/astminer/parse/antlr/javascript/AntlrJavaScriptElementInfo.kt @@ -2,7 +2,6 @@ package astminer.parse.antlr.javascript import astminer.common.model.* import astminer.parse.antlr.* -import astminer.parse.antlr.java.AntlrJavaFunctionInfo import astminer.parse.findEnclosingElementBy /** @@ -10,14 +9,6 @@ Base class for describing JavaScript methods, functions or arrow functions. */ abstract class AntlrJavaScriptElementInfo(override val root: AntlrNode, override val filePath: String) : FunctionInfo { - companion object { - private val ENCLOSING_ELEMENT_NODES = - listOf("functionDeclaration", "variableDeclaration", "classDeclaration", "methodDefinition") - private const val ENCLOSING_ELEMENT_NAME_NODE = "Identifier" - - private const val SINGLE_PARAMETER_NODE = "formalParameterArg" - private const val PARAMETER_NAME_NODE = "Identifier" - } protected fun collectEnclosingElement(): EnclosingElement? { val enclosingElement = root.findEnclosingElementBy { @@ -30,9 +21,8 @@ abstract class AntlrJavaScriptElementInfo(override val root: AntlrNode, override ) } - private fun AntlrNode.containsLabelIn(labels: List): Boolean { - return decompressTypeLabel(typeLabel).intersect(labels).isNotEmpty() - } + private fun AntlrNode.containsLabelIn(labels: List): Boolean = + decompressTypeLabel(typeLabel).intersect(labels).isNotEmpty() private fun getEnclosingElementName(enclosingRoot: AntlrNode?): String? { return enclosingRoot?.children?.firstOrNull { @@ -46,7 +36,7 @@ abstract class AntlrJavaScriptElementInfo(override val root: AntlrNode, override "classDeclaration" -> EnclosingElementType.Class "methodDefinition" -> EnclosingElementType.Method "variableDeclaration" -> EnclosingElementType.VariableDeclaration - else -> throw IllegalStateException("Couldn't derive enclosing element type") + else -> error("Couldn't derive enclosing element type") } } @@ -60,25 +50,30 @@ abstract class AntlrJavaScriptElementInfo(override val root: AntlrNode, override parametersRoot.hasLastLabel(PARAMETER_NAME_NODE) -> listOf(parametersRoot) // Have many parameters or one indicated not only by it's name - else -> parametersRoot - .getItOrChildrenOfType(SINGLE_PARAMETER_NODE) - .map { it.getChildOfType(PARAMETER_NAME_NODE) ?: it } + else -> + parametersRoot + .getItOrChildrenOfType(SINGLE_PARAMETER_NODE) + .map { it.getChildOfType(PARAMETER_NAME_NODE) ?: it } } return parameterNameNodes.map { - val parameterName = it.originalToken ?: throw IllegalStateException("Parameter name wasn't found") - FunctionInfoParameter(name = parameterName, type = null) + check(it.originalToken != null) { "Parameter name wasn't found" } + FunctionInfoParameter(name = it.originalToken, type = null) } } abstract fun getParametersRoot(): AntlrNode? -} -class JavaScriptArrowInfo(root: AntlrNode, filePath: String) : AntlrJavaScriptElementInfo(root, filePath) { companion object { - private const val ARROW_NAME_NODE = "Identifier" - private const val ARROW_PARAMETER_NODE = "arrowFunctionParameters" - private const val ARROW_PARAMETER_INNER_NODE = "formalParameterList" + private val ENCLOSING_ELEMENT_NODES = + listOf("functionDeclaration", "variableDeclaration", "classDeclaration", "methodDefinition") + private const val ENCLOSING_ELEMENT_NAME_NODE = "Identifier" + + private const val SINGLE_PARAMETER_NODE = "formalParameterArg" + private const val PARAMETER_NAME_NODE = "Identifier" } +} + +class JavaScriptArrowInfo(root: AntlrNode, filePath: String) : AntlrJavaScriptElementInfo(root, filePath) { override val enclosingElement: EnclosingElement? = collectEnclosingElement() override val parameters: List = collectParameters() @@ -88,14 +83,15 @@ class JavaScriptArrowInfo(root: AntlrNode, filePath: String) : AntlrJavaScriptEl val parameterRoot = root.getChildOfType(ARROW_PARAMETER_NODE) return parameterRoot?.getChildOfType(ARROW_PARAMETER_INNER_NODE) ?: parameterRoot } -} -class JavaScriptMethodInfo(root: AntlrNode, filePath: String) : AntlrJavaScriptElementInfo(root, filePath) { companion object { - private val METHOD_GETTERS_SETTERS = listOf("getter", "setter") - private const val METHOD_NAME_NODE = "identifierName" - private const val METHOD_PARAMETER_NODE = "formalParameterList" + private const val ARROW_NAME_NODE = "Identifier" + private const val ARROW_PARAMETER_NODE = "arrowFunctionParameters" + private const val ARROW_PARAMETER_INNER_NODE = "formalParameterList" } +} + +class JavaScriptMethodInfo(root: AntlrNode, filePath: String) : AntlrJavaScriptElementInfo(root, filePath) { override val enclosingElement: EnclosingElement? = collectEnclosingElement() override val parameters: List = collectParameters() @@ -112,17 +108,24 @@ class JavaScriptMethodInfo(root: AntlrNode, filePath: String) : AntlrJavaScriptE } override fun getParametersRoot(): AntlrNode? = root.getChildOfType(METHOD_PARAMETER_NODE) -} -class JavaScriptFunctionInfo(root: AntlrNode, filePath: String) : AntlrJavaScriptElementInfo(root, filePath) { companion object { - private const val FUNCTION_NAME_NODE = "Identifier" - private const val FUNCTION_PARAMETER_NODE = "formalParameterList" + private val METHOD_GETTERS_SETTERS = listOf("getter", "setter") + private const val METHOD_NAME_NODE = "identifierName" + private const val METHOD_PARAMETER_NODE = "formalParameterList" } +} + +class JavaScriptFunctionInfo(root: AntlrNode, filePath: String) : AntlrJavaScriptElementInfo(root, filePath) { override val enclosingElement: EnclosingElement? = collectEnclosingElement() override val parameters: List = collectParameters() override val nameNode: AntlrNode? = root.getChildOfType(FUNCTION_NAME_NODE) override fun getParametersRoot(): AntlrNode? = root.getChildOfType(FUNCTION_PARAMETER_NODE) + + companion object { + private const val FUNCTION_NAME_NODE = "Identifier" + private const val FUNCTION_PARAMETER_NODE = "formalParameterList" + } } diff --git a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitter.kt b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitter.kt index 84822c88..ed918519 100644 --- a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitter.kt @@ -9,12 +9,6 @@ import astminer.parse.antlr.decompressTypeLabel * about their names, enclosing elements and parameters. */ class JavaScriptFunctionSplitter : TreeFunctionSplitter { - companion object { - private const val METHOD_NODE = "methodDefinition" - private const val ARROW_NODE = "ARROW" - private const val FUNCTION_NODE = "Function" - } - override fun splitIntoFunctions(root: AntlrNode, filePath: String): Collection> { return root.preOrder().mapNotNull { node -> when { @@ -29,4 +23,10 @@ class JavaScriptFunctionSplitter : TreeFunctionSplitter { private fun Node.isArrowElement() = this.getChildOfType(ARROW_NODE) != null private fun Node.isFunctionElement() = this.getChildOfType(FUNCTION_NODE) != null private fun Node.isMethodElement() = decompressTypeLabel(this.typeLabel).last() == METHOD_NODE + + companion object { + private const val METHOD_NODE = "methodDefinition" + private const val ARROW_NODE = "ARROW" + private const val FUNCTION_NODE = "Function" + } } diff --git a/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt b/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt index 42b13d53..a2bb3cf2 100644 --- a/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt @@ -19,7 +19,7 @@ class ANTLRPHPFunctionInfo(override val root: AntlrNode, override val filePath: private fun collectParameters(): List { // Parameters in this grammar have following structure (children order may be wrong): - //formal parameter list -> formal parameter -> Ampersand + // formal parameter list -> formal parameter -> Ampersand // | -> type hint // | -> ellipsis // | -> var init -> var name @@ -61,20 +61,19 @@ class ANTLRPHPFunctionInfo(override val root: AntlrNode, override val filePath: val isPassedByReference = parameterNode.getChildOfType(REFERENCE) != null - if (parameterNode.hasLastLabel(PARAMETER_NAME)) return parameterNode.originalToken - ?: throw IllegalStateException("No name was found for a parameter") + if (parameterNode.hasLastLabel(PARAMETER_NAME)) { + return parameterNode.originalToken ?: error("No name was found for a parameter") + } val varInit = parameterNode.getItOrChildrenOfType(VAR_DECLARATION).first() val name = varInit.getItOrChildrenOfType(PARAMETER_NAME).first().originalToken - ?: throw IllegalStateException("No name was found for a parameter") + ?: error("No name was found for a parameter") return (if (isPassedByReference) "&" else "") + (if (isSplattedArg) "..." else "") + name } - private fun getElementType(element: AntlrNode): String? { - return element.getChildOfType(TYPE)?.originalToken - } + private fun getElementType(element: AntlrNode): String? = element.getChildOfType(TYPE)?.originalToken private fun collectEnclosingElement(): EnclosingElement? { val enclosing = root.findEnclosingElementBy { it.isPossibleEnclosing() } ?: return null @@ -96,7 +95,7 @@ class ANTLRPHPFunctionInfo(override val root: AntlrNode, override val filePath: enclosing.isFunction() -> EnclosingElementType.Function enclosing.isClass() -> EnclosingElementType.Class enclosing.isAssignExpression() -> EnclosingElementType.VariableDeclaration - else -> throw IllegalStateException("No type can be associated") + else -> error("No type can be associated") } } @@ -104,18 +103,18 @@ class ANTLRPHPFunctionInfo(override val root: AntlrNode, override val filePath: return when { enclosing.isFunction() || enclosing.isClass() -> enclosing.getChildOfType(FUNCTION_NAME)?.originalToken enclosing.isAssignExpression() -> enclosing.children.find { it.hasLastLabel(PARAMETER_NAME) }?.originalToken - else -> throw IllegalStateException("No type can be associated") + else -> error("No type can be associated") } } // No check for method because method is a function private fun AntlrNode.isPossibleEnclosing() = isFunction() || isClass() || isAssignExpression() - private fun AntlrNode.isMethod() = isFunction() && (hasFirstLabel(CLASS_MEMBER)) + private fun AntlrNode.isMethod() = isFunction() && hasFirstLabel(CLASS_MEMBER) private fun AntlrNode.isFunction() = getChildOfType(LAMBDA_TOKEN) != null || getChildOfType(FUNCTION_TOKEN) != null - private fun AntlrNode.isAssignExpression() = hasFirstLabel(EXPRESSION) && (getChildOfType(ASSIGN_OP) != null) + private fun AntlrNode.isAssignExpression() = hasFirstLabel(EXPRESSION) && getChildOfType(ASSIGN_OP) != null private fun AntlrNode.isClass(): Boolean = hasLastLabel(CLASS_DECLARATION) diff --git a/src/main/kotlin/astminer/parse/antlr/php/PHPFunctionSplitter.kt b/src/main/kotlin/astminer/parse/antlr/php/PHPFunctionSplitter.kt index a95288f8..cc640244 100644 --- a/src/main/kotlin/astminer/parse/antlr/php/PHPFunctionSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/php/PHPFunctionSplitter.kt @@ -5,14 +5,14 @@ import astminer.common.model.TreeFunctionSplitter import astminer.parse.antlr.AntlrNode class PHPFunctionSplitter : TreeFunctionSplitter { - companion object { - const val LAMBDA_TOKEN = "LambdaFn" - const val FUNCTION_TOKEN = "Function_" - } - override fun splitIntoFunctions(root: AntlrNode, filePath: String): Collection> { return root.preOrder() .filter { it.typeLabel == LAMBDA_TOKEN || it.typeLabel == FUNCTION_TOKEN } .mapNotNull { node -> node.parent?.let { statement -> ANTLRPHPFunctionInfo(statement, filePath) } } } + + companion object { + const val LAMBDA_TOKEN = "LambdaFn" + const val FUNCTION_TOKEN = "Function_" + } } diff --git a/src/main/kotlin/astminer/parse/antlr/php/PHPParser.kt b/src/main/kotlin/astminer/parse/antlr/php/PHPParser.kt index fc5b76f8..6e9b97ba 100644 --- a/src/main/kotlin/astminer/parse/antlr/php/PHPParser.kt +++ b/src/main/kotlin/astminer/parse/antlr/php/PHPParser.kt @@ -16,7 +16,7 @@ import java.io.InputStream // (AST just falls apart when class field contain dot concatenation) // More details can be found in corresponding issues // https://github.com/antlr/grammars-v4/issues/1991 -class PHPParser: Parser { +class PHPParser : Parser { override fun parseInputStream(content: InputStream): AntlrNode { return try { val stream = CharStreams.fromStream(content) diff --git a/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt b/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt index f1e87c20..53ee37c4 100644 --- a/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt @@ -9,29 +9,7 @@ class AntlrPythonFunctionInfo(override val root: AntlrNode, override val filePat override val parameters: List = collectParameters() override val enclosingElement: EnclosingElement? = collectEnclosingElement() - companion object { - private const val FUNCTION_NODE = "funcdef" - private const val FUNCTION_NAME_NODE = "NAME" - - private const val CLASS_DECLARATION_NODE = "classdef" - private const val CLASS_NAME_NODE = "NAME" - - private const val METHOD_PARAMETER_NODE = "parameters" - private const val METHOD_PARAMETER_INNER_NODE = "typedargslist" - private const val METHOD_SINGLE_PARAMETER_NODE = "tfpdef" - private const val PARAMETER_NAME_NODE = "NAME" - private const val PARAMETER_TYPE_NODE = "test" - //It's seems strange but it works because actual type label will be - //test|or_test|and_test|not_test|comparison|expr|xor_expr... - // ..|and_expr|shift_expr|arith_expr|term|factor|power|atom_expr|atom|NAME - - private val POSSIBLE_ENCLOSING_ELEMENTS = listOf(CLASS_DECLARATION_NODE, FUNCTION_NODE) - private const val BODY = "suite" - } - - private fun collectNameNode(): AntlrNode? { - return root.getChildOfType(FUNCTION_NAME_NODE) - } + private fun collectNameNode(): AntlrNode? = root.getChildOfType(FUNCTION_NAME_NODE) private fun collectParameters(): List { val parametersRoot = root.getChildOfType(METHOD_PARAMETER_NODE) @@ -50,8 +28,9 @@ class AntlrPythonFunctionInfo(override val root: AntlrNode, override val filePat private fun assembleMethodInfoParameter(parameterNode: AntlrNode): FunctionInfoParameter { val parameterHaveNoDefaultOrType = parameterNode.hasLastLabel(PARAMETER_NAME_NODE) - val parameterName = if (parameterHaveNoDefaultOrType) parameterNode.originalToken - else parameterNode.getChildOfType(PARAMETER_NAME_NODE)?.originalToken + val parameterNameNode = + if (parameterHaveNoDefaultOrType) parameterNode else parameterNode.getChildOfType(PARAMETER_NAME_NODE) + val parameterName = parameterNameNode?.originalToken require(parameterName != null) { "Method name was not found" } val parameterType = parameterNode.getChildOfType(PARAMETER_TYPE_NODE)?.getTokensFromSubtree() @@ -67,19 +46,15 @@ class AntlrPythonFunctionInfo(override val root: AntlrNode, override val filePat val enclosingNode = root.findEnclosingElementBy { it.lastLabelIn(POSSIBLE_ENCLOSING_ELEMENTS) } ?: return null val type = when { enclosingNode.hasLastLabel(CLASS_DECLARATION_NODE) -> EnclosingElementType.Class - enclosingNode.hasLastLabel(FUNCTION_NODE) -> { - when { - enclosingNode.isMethod() -> EnclosingElementType.Method - else -> EnclosingElementType.Function - } - } - else -> throw IllegalStateException("Enclosing node can only be function or class") + enclosingNode.hasLastLabel(FUNCTION_NODE) -> + if (enclosingNode.isMethod()) EnclosingElementType.Method else EnclosingElementType.Function + else -> error("Enclosing node can only be function or class") } val name = when (type) { EnclosingElementType.Class -> enclosingNode.getChildOfType(CLASS_NAME_NODE) EnclosingElementType.Method, EnclosingElementType.Function -> enclosingNode.getChildOfType(FUNCTION_NAME_NODE) - else -> throw IllegalStateException("Enclosing node can only be function or class") + else -> error("Enclosing node can only be function or class") }?.originalToken return EnclosingElement( type = type, @@ -98,4 +73,24 @@ class AntlrPythonFunctionInfo(override val root: AntlrNode, override val filePat val lastLabel = decompressTypeLabel(enclosingNode.typeLabel).last() return lastLabel == CLASS_DECLARATION_NODE } + + companion object { + private const val FUNCTION_NODE = "funcdef" + private const val FUNCTION_NAME_NODE = "NAME" + + private const val CLASS_DECLARATION_NODE = "classdef" + private const val CLASS_NAME_NODE = "NAME" + + private const val METHOD_PARAMETER_NODE = "parameters" + private const val METHOD_PARAMETER_INNER_NODE = "typedargslist" + private const val METHOD_SINGLE_PARAMETER_NODE = "tfpdef" + private const val PARAMETER_NAME_NODE = "NAME" + private const val PARAMETER_TYPE_NODE = "test" + // It's seems strange but it works because actual type label will be + // test|or_test|and_test|not_test|comparison|expr|xor_expr... + // ..|and_expr|shift_expr|arith_expr|term|factor|power|atom_expr|atom|NAME + + private val POSSIBLE_ENCLOSING_ELEMENTS = listOf(CLASS_DECLARATION_NODE, FUNCTION_NODE) + private const val BODY = "suite" + } } diff --git a/src/main/kotlin/astminer/parse/antlr/python/PythonFunctionSplitter.kt b/src/main/kotlin/astminer/parse/antlr/python/PythonFunctionSplitter.kt index f3e987f4..73cbb3bf 100644 --- a/src/main/kotlin/astminer/parse/antlr/python/PythonFunctionSplitter.kt +++ b/src/main/kotlin/astminer/parse/antlr/python/PythonFunctionSplitter.kt @@ -4,13 +4,12 @@ import astminer.common.model.* import astminer.parse.antlr.AntlrNode import astminer.parse.antlr.hasLastLabel - class PythonFunctionSplitter : TreeFunctionSplitter { private val methodNode = "funcdef" override fun splitIntoFunctions(root: AntlrNode, filePath: String): Collection> { val methodRoots = root.preOrder().filter { - (it).hasLastLabel(methodNode) + it.hasLastLabel(methodNode) } return methodRoots.map { AntlrPythonFunctionInfo(it, filePath) } } diff --git a/src/main/kotlin/astminer/parse/antlr/python/PythonParser.kt b/src/main/kotlin/astminer/parse/antlr/python/PythonParser.kt index 7ce15938..66181fe1 100644 --- a/src/main/kotlin/astminer/parse/antlr/python/PythonParser.kt +++ b/src/main/kotlin/astminer/parse/antlr/python/PythonParser.kt @@ -1,11 +1,11 @@ package astminer.parse.antlr.python -import me.vovak.antlr.parser.Python3Lexer -import me.vovak.antlr.parser.Python3Parser import astminer.common.model.Parser import astminer.parse.ParsingException import astminer.parse.antlr.AntlrNode import astminer.parse.antlr.convertAntlrTree +import me.vovak.antlr.parser.Python3Lexer +import me.vovak.antlr.parser.Python3Parser import org.antlr.v4.runtime.CharStreams import org.antlr.v4.runtime.CommonTokenStream import java.io.InputStream diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt index 6ef4c94a..681a3cac 100644 --- a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt @@ -6,32 +6,17 @@ import astminer.common.model.FunctionInfo import astminer.common.model.FunctionInfoParameter import astminer.parse.findEnclosingElementBy -class FuzzyCppFunctionInfo(override val root: FuzzyNode, override val filePath: String): FunctionInfo { - companion object { - private const val METHOD_NAME_NODE = "NAME" - private const val METHOD_RETURN_NODE = "METHOD_RETURN" - private const val METHOD_RETURN_TYPE_NODE = "TYPE_FULL_NAME" - - private const val CLASS_DECLARATION_NODE = "TYPE_DECL" - private const val CLASS_NAME_NODE = "NAME" - - private const val METHOD_PARAMETER_NODE = "METHOD_PARAMETER_IN" - private const val PARAMETER_NAME_NODE = "NAME" - private const val PARAMETER_TYPE_NODE = "TYPE_FULL_NAME" - } +class FuzzyCppFunctionInfo(override val root: FuzzyNode, override val filePath: String) : FunctionInfo { override val returnType: String? = collectReturnType() override val enclosingElement: EnclosingElement? = collectEnclosingClass() override val parameters: List = collectParameters() override val nameNode: FuzzyNode? = collectNameNode() - private fun collectNameNode(): FuzzyNode? { - return root.getChildOfType(METHOD_NAME_NODE) as FuzzyNode? - } + private fun collectNameNode(): FuzzyNode? = root.getChildOfType(METHOD_NAME_NODE) as? FuzzyNode - private fun collectReturnType(): String? { - return root.getChildOfType(METHOD_RETURN_NODE)?.getChildOfType(METHOD_RETURN_TYPE_NODE)?.originalToken - } + private fun collectReturnType(): String? = + root.getChildOfType(METHOD_RETURN_NODE)?.getChildOfType(METHOD_RETURN_TYPE_NODE)?.originalToken private fun collectEnclosingClass(): EnclosingElement? { val enclosingClass = findEnclosingClass() ?: return null @@ -43,13 +28,11 @@ class FuzzyCppFunctionInfo(override val root: FuzzyNode, override val filePath: ) } - private fun findEnclosingClass(): FuzzyNode? { - return root.findEnclosingElementBy { it.typeLabel == CLASS_DECLARATION_NODE } - } + private fun findEnclosingClass(): FuzzyNode? = + root.findEnclosingElementBy { it.typeLabel == CLASS_DECLARATION_NODE } - private fun findEnclosingClassName(enclosingClass: FuzzyNode): String? { - return enclosingClass.getChildOfType(CLASS_NAME_NODE)?.originalToken - } + private fun findEnclosingClassName(enclosingClass: FuzzyNode): String? = + enclosingClass.getChildOfType(CLASS_NAME_NODE)?.originalToken private fun collectParameters(): List { val parameters = root.getChildrenOfType(METHOD_PARAMETER_NODE) @@ -59,4 +42,17 @@ class FuzzyCppFunctionInfo(override val root: FuzzyNode, override val filePath: FunctionInfoParameter(name, type) } } + + companion object { + private const val METHOD_NAME_NODE = "NAME" + private const val METHOD_RETURN_NODE = "METHOD_RETURN" + private const val METHOD_RETURN_TYPE_NODE = "TYPE_FULL_NAME" + + private const val CLASS_DECLARATION_NODE = "TYPE_DECL" + private const val CLASS_NAME_NODE = "NAME" + + private const val METHOD_PARAMETER_NODE = "METHOD_PARAMETER_IN" + private const val PARAMETER_NAME_NODE = "NAME" + private const val PARAMETER_TYPE_NODE = "TYPE_FULL_NAME" + } } diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppParser.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppParser.kt index 8c4529d7..8be634bd 100644 --- a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppParser.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppParser.kt @@ -21,52 +21,6 @@ import java.io.InputStream */ class FuzzyCppParser : Parser { - companion object { - private val supportedExtensions = listOf("c", "cpp") - - data class ExpandableNodeKey( - val key: String, - val supportedNodeLabels: List, - val order: Int - ) - - private val expandableNodeKeys = listOf( - ExpandableNodeKey( - "NAME", listOf( - NodeTypes.TYPE, NodeTypes.TYPE_DECL, NodeTypes.TYPE_PARAMETER, NodeTypes.MEMBER, - NodeTypes.TYPE_ARGUMENT, NodeTypes.METHOD, NodeTypes.METHOD_PARAMETER_IN, NodeTypes.LOCAL, - NodeTypes.MODIFIER, NodeTypes.IDENTIFIER, NodeTypes.CALL, NodeTypes.UNKNOWN - ), 0 - ), - ExpandableNodeKey( - "TYPE_FULL_NAME", listOf( - NodeTypes.TYPE, - NodeTypes.METHOD_RETURN, NodeTypes.METHOD_PARAMETER_IN, NodeTypes.LOCAL, - NodeTypes.IDENTIFIER, - NodeTypes.UNKNOWN - ), 0 - ), - ExpandableNodeKey( - "ALIAS_TYPE_FULL_NAME", listOf( - NodeTypes.TYPE_DECL, - NodeTypes.UNKNOWN - ), 0 - ) - ) - - data class ReplaceableNodeKey(val key: String, val condition: (Node) -> Boolean) - - private val replaceableNodeKeys = listOf( - ReplaceableNodeKey("NAME") { v -> - v.propertyKeys().contains("NAME") && - v.property("NAME").toString().startsWith("") - }, - ReplaceableNodeKey("PARSER_TYPE_NAME") { v -> - v.propertyKeys().contains("PARSER_TYPE_NAME") - } - ) - } - /** * Parse input stream and create an AST. * If you already have a file with code you need to parse, better use [parseFile], @@ -201,4 +155,58 @@ class FuzzyCppParser : Parser { } return node } + + companion object { + private val supportedExtensions = listOf("c", "cpp") + + data class ExpandableNodeKey( + val key: String, + val supportedNodeLabels: List, + val order: Int + ) + + private val expandableNodeKeys = listOf( + ExpandableNodeKey( + "NAME", + listOf( + NodeTypes.TYPE, NodeTypes.TYPE_DECL, NodeTypes.TYPE_PARAMETER, NodeTypes.MEMBER, + NodeTypes.TYPE_ARGUMENT, NodeTypes.METHOD, NodeTypes.METHOD_PARAMETER_IN, NodeTypes.LOCAL, + NodeTypes.MODIFIER, NodeTypes.IDENTIFIER, NodeTypes.CALL, NodeTypes.UNKNOWN + ), + 0 + ), + ExpandableNodeKey( + "TYPE_FULL_NAME", + listOf( + NodeTypes.TYPE, + NodeTypes.METHOD_RETURN, + NodeTypes.METHOD_PARAMETER_IN, + NodeTypes.LOCAL, + NodeTypes.IDENTIFIER, + NodeTypes.UNKNOWN + ), + 0 + ), + ExpandableNodeKey( + "ALIAS_TYPE_FULL_NAME", + listOf( + NodeTypes.TYPE_DECL, + NodeTypes.UNKNOWN + ), + 0 + ) + ) + + data class ReplaceableNodeKey(val key: String, val condition: (Node) -> Boolean) + + private val replaceableNodeKeys = listOf( + ReplaceableNodeKey("NAME") { v -> + v.propertyKeys().contains("NAME") && + v.property("NAME").toString().startsWith("") + }, + ReplaceableNodeKey("PARSER_TYPE_NAME") { v -> + v.propertyKeys().contains("PARSER_TYPE_NAME") + } + ) + } } diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyNode.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyNode.kt index d4a24011..097bd914 100644 --- a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyNode.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyNode.kt @@ -16,13 +16,12 @@ class FuzzyNode( ) : Node() { private val order = order ?: -1 override var parent: Node? = null - private val childrenMultiset = TreeMultiset.create(compareBy( - { it.order }, - { System.identityHashCode(it) } - )) + private val childrenMultiset = TreeMultiset.create( + compareBy({ it.order }, { System.identityHashCode(it) }) + ) override val children - get() = childrenMultiset.toList() + get() = childrenMultiset.toList() fun addChild(node: FuzzyNode) { childrenMultiset.add(node) diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/utils.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/utils.kt index c2d6a7f8..56bc6544 100644 --- a/src/main/kotlin/astminer/parse/fuzzy/cpp/utils.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/utils.kt @@ -5,11 +5,11 @@ import java.util.concurrent.TimeUnit fun String.runCommand(workingDir: File) { ProcessBuilder("/bin/sh", "-c", this) - .directory(workingDir) - .redirectOutput(ProcessBuilder.Redirect.INHERIT) - .redirectError(ProcessBuilder.Redirect.INHERIT) - .start() - .waitFor(60, TimeUnit.MINUTES) + .directory(workingDir) + .redirectOutput(ProcessBuilder.Redirect.INHERIT) + .redirectError(ProcessBuilder.Redirect.INHERIT) + .start() + .waitFor(60, TimeUnit.MINUTES) } fun preprocessCppCode(file: File, outputDir: File, preprocessCommand: String) = """ diff --git a/src/main/kotlin/astminer/parse/gumtree/GumTreeNode.kt b/src/main/kotlin/astminer/parse/gumtree/GumTreeNode.kt index 37bea3fa..79c79e04 100644 --- a/src/main/kotlin/astminer/parse/gumtree/GumTreeNode.kt +++ b/src/main/kotlin/astminer/parse/gumtree/GumTreeNode.kt @@ -4,7 +4,7 @@ import astminer.common.model.Node import com.github.gumtreediff.tree.ITree import com.github.gumtreediff.tree.TreeContext -class GumTreeNode(val wrappedNode: ITree, val context: TreeContext,override var parent: GumTreeNode?) : Node() { +class GumTreeNode(val wrappedNode: ITree, val context: TreeContext, override var parent: GumTreeNode?) : Node() { override val typeLabel: String get() = context.getTypeLabel(wrappedNode) diff --git a/src/main/kotlin/astminer/parse/gumtree/GumtreeHandler.kt b/src/main/kotlin/astminer/parse/gumtree/GumtreeHandler.kt index 13a0548e..7b94646a 100644 --- a/src/main/kotlin/astminer/parse/gumtree/GumtreeHandler.kt +++ b/src/main/kotlin/astminer/parse/gumtree/GumtreeHandler.kt @@ -1,10 +1,10 @@ package astminer.parse.gumtree -import astminer.common.model.ParseResult import astminer.common.model.HandlerFactory import astminer.common.model.LanguageHandler -import astminer.parse.gumtree.java.GumTreeJavaParser +import astminer.common.model.ParseResult import astminer.parse.gumtree.java.GumTreeJavaFunctionSplitter +import astminer.parse.gumtree.java.GumTreeJavaParser import astminer.parse.gumtree.python.GumTreePythonFunctionSplitter import astminer.parse.gumtree.python.GumTreePythonParser import java.io.File @@ -21,7 +21,7 @@ object GumtreeJavaHandlerFactory : HandlerFactory { object GumtreePythonHandlerFactory : HandlerFactory { override fun createHandler(file: File): LanguageHandler = PythonGumTreeHandler(file) - class PythonGumTreeHandler(file: File) : LanguageHandler() { + class PythonGumTreeHandler(file: File) : LanguageHandler() { override val splitter = GumTreePythonFunctionSplitter() override val parseResult: ParseResult = GumTreePythonParser().parseFile(file) } diff --git a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt index 6a0586a7..db1c5219 100644 --- a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt @@ -6,15 +6,10 @@ import astminer.common.model.FunctionInfo import astminer.common.model.FunctionInfoParameter import astminer.parse.gumtree.GumTreeNode -class GumTreeJavaFunctionInfo(override val root: GumTreeNode, override val filePath: String) : - FunctionInfo { - companion object { - private object TypeLabels { - const val simpleName = "SimpleName" - const val typeDeclaration = "TypeDeclaration" - const val singleVariableDeclaration = "SingleVariableDeclaration" - } - } +class GumTreeJavaFunctionInfo( + override val root: GumTreeNode, + override val filePath: String +) : FunctionInfo { override val nameNode: GumTreeNode? = root.getChildOfType(TypeLabels.simpleName) override val parameters: List = collectParameters() @@ -41,21 +36,22 @@ class GumTreeJavaFunctionInfo(override val root: GumTreeNode, override val fileP private fun collectParameters(): List { val params = root.getChildrenOfType(TypeLabels.singleVariableDeclaration) return params.map { node -> - FunctionInfoParameter( - name = node.getElementName(), - type = node.getElementType() - ) + FunctionInfoParameter(node.getElementName(), node.getElementType()) } } - private fun GumTreeNode.getElementName(): String { - return getChildOfType(TypeLabels.simpleName)?.originalToken - ?: throw IllegalStateException("No name found for element") - } + private fun GumTreeNode.getElementName(): String = + getChildOfType(TypeLabels.simpleName)?.originalToken ?: error("No name found for element") - private fun GumTreeNode.getElementType(): String? { - return children.firstOrNull { it.isTypeNode() }?.originalToken - } + private fun GumTreeNode.getElementType(): String? = children.firstOrNull { it.isTypeNode() }?.originalToken private fun GumTreeNode.isTypeNode() = typeLabel.endsWith("Type") + + companion object { + private object TypeLabels { + const val simpleName = "SimpleName" + const val typeDeclaration = "TypeDeclaration" + const val singleVariableDeclaration = "SingleVariableDeclaration" + } + } } diff --git a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaParser.kt b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaParser.kt index 6338d4be..253a17d5 100644 --- a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaParser.kt +++ b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaParser.kt @@ -1,10 +1,10 @@ package astminer.parse.gumtree.java +import astminer.common.model.Parser +import astminer.parse.gumtree.GumTreeNode import com.github.gumtreediff.client.Run import com.github.gumtreediff.gen.jdt.JdtTreeGenerator import com.github.gumtreediff.tree.TreeContext -import astminer.common.model.Parser -import astminer.parse.gumtree.GumTreeNode import java.io.InputStream import java.io.InputStreamReader @@ -19,6 +19,4 @@ class GumTreeJavaParser : Parser { } } -fun wrapGumTreeNode(treeContext: TreeContext): GumTreeNode { - return GumTreeNode(treeContext.root, treeContext, null) -} +fun wrapGumTreeNode(treeContext: TreeContext): GumTreeNode = GumTreeNode(treeContext.root, treeContext, null) diff --git a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt index f94fc7d0..d297a3ab 100644 --- a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt @@ -8,31 +8,9 @@ import astminer.parse.findEnclosingElementBy import astminer.parse.gumtree.GumTreeNode class GumTreePythonFunctionInfo( - override val root: GumTreeNode, override val filePath: String + override val root: GumTreeNode, + override val filePath: String ) : FunctionInfo { - companion object { - private object TypeLabels { - const val classDefinition = "ClassDef" - const val functionDefinition = "FunctionDef" - const val asyncFunctionDefinition = "AsyncFunctionDef" - const val nameLoad = "Name_Load" - const val posOnlyArgs = "posonlyargs" - const val kwOnlyArgs = "kwonlyargs" - const val arguments = "arguments" - const val vararg = "vararg" - const val kwarg = "kwarg" - const val args = "args" - const val arg = "arg" - - const val body = "body" - const val returnTypeLabel = "Return" - const val passTypeLabel = "Pass" - const val constantType = "Constant-" - - val methodDefinitions = listOf(functionDefinition, asyncFunctionDefinition) - val funcArgsTypesNodes = listOf(args, posOnlyArgs, kwOnlyArgs) - } - } override val nameNode: GumTreeNode = root override val parameters: List = collectParameters() @@ -63,25 +41,44 @@ class GumTreePythonFunctionInfo( ) } - private fun findEnclosingClass(): GumTreeNode? { - return root.findEnclosingElementBy { it.typeLabel == TypeLabels.classDefinition } - } + private fun findEnclosingClass(): GumTreeNode? = + root.findEnclosingElementBy { it.typeLabel == TypeLabels.classDefinition } private fun collectParameters(): List { val arguments = root.getChildrenOfType(TypeLabels.arguments).flatMap { it.children } val params = arguments.flatMap { node -> when (node.typeLabel) { - in TypeLabels.funcArgsTypesNodes -> node.children - .filter { it.typeLabel == TypeLabels.arg } + in TypeLabels.funcArgsTypesNodes -> + node.children + .filter { it.typeLabel == TypeLabels.arg } TypeLabels.vararg, TypeLabels.kwarg -> listOf(node) else -> emptyList() } } - return params.map { node -> - FunctionInfoParameter( - name = node.originalToken, - type = getElementType(node)?.originalToken - ) + return params.map { FunctionInfoParameter(it.originalToken, getElementType(it)?.originalToken) } + } + + companion object { + private object TypeLabels { + const val classDefinition = "ClassDef" + const val functionDefinition = "FunctionDef" + const val asyncFunctionDefinition = "AsyncFunctionDef" + const val nameLoad = "Name_Load" + const val posOnlyArgs = "posonlyargs" + const val kwOnlyArgs = "kwonlyargs" + const val arguments = "arguments" + const val vararg = "vararg" + const val kwarg = "kwarg" + const val args = "args" + const val arg = "arg" + + const val body = "body" + const val returnTypeLabel = "Return" + const val passTypeLabel = "Pass" + const val constantType = "Constant-" + + val methodDefinitions = listOf(functionDefinition, asyncFunctionDefinition) + val funcArgsTypesNodes = listOf(args, posOnlyArgs, kwOnlyArgs) } } } diff --git a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitter.kt b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitter.kt index 04cd363a..1db706ca 100644 --- a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitter.kt +++ b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitter.kt @@ -4,6 +4,11 @@ import astminer.common.model.* import astminer.parse.gumtree.GumTreeNode class GumTreePythonFunctionSplitter : TreeFunctionSplitter { + override fun splitIntoFunctions(root: GumTreeNode, filePath: String): Collection> { + val functionRoots = root.preOrder().filter { TypeLabels.methodDefinitions.contains(it.typeLabel) } + return functionRoots.map { GumTreePythonFunctionInfo(it, filePath) } + } + companion object { private object TypeLabels { const val functionDefinition = "FunctionDef" @@ -11,9 +16,4 @@ class GumTreePythonFunctionSplitter : TreeFunctionSplitter { val methodDefinitions = listOf(functionDefinition, asyncFunctionDefinition) } } - - override fun splitIntoFunctions(root: GumTreeNode, filePath: String): Collection> { - val functionRoots = root.preOrder().filter { TypeLabels.methodDefinitions.contains(it.typeLabel) } - return functionRoots.map { GumTreePythonFunctionInfo(it, filePath) } - } } diff --git a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonParser.kt b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonParser.kt index 973e2328..8a407687 100644 --- a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonParser.kt +++ b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonParser.kt @@ -22,6 +22,4 @@ class GumTreePythonParser : Parser { } } -fun wrapGumTreeNode(treeContext: TreeContext): GumTreeNode { - return GumTreeNode(treeContext.root, treeContext, null) -} +fun wrapGumTreeNode(treeContext: TreeContext): GumTreeNode = GumTreeNode(treeContext.root, treeContext, null) diff --git a/src/main/kotlin/astminer/paths/PathMiner.kt b/src/main/kotlin/astminer/paths/PathMiner.kt index 905d8761..b00239cd 100644 --- a/src/main/kotlin/astminer/paths/PathMiner.kt +++ b/src/main/kotlin/astminer/paths/PathMiner.kt @@ -5,10 +5,9 @@ import astminer.common.model.Node data class PathRetrievalSettings(val maxLength: Int, val maxWidth: Int) -class PathMiner(val settings: PathRetrievalSettings) { +class PathMiner(private val settings: PathRetrievalSettings) { private val pathWorker = PathWorker() - fun retrievePaths(tree: Node): Collection { - return pathWorker.retrievePaths(tree, settings.maxLength, settings.maxWidth) - } + fun retrievePaths(tree: Node): Collection = + pathWorker.retrievePaths(tree, settings.maxLength, settings.maxWidth) } diff --git a/src/main/kotlin/astminer/paths/PathUtil.kt b/src/main/kotlin/astminer/paths/PathUtil.kt index 9e4f598c..ad8c9cc8 100644 --- a/src/main/kotlin/astminer/paths/PathUtil.kt +++ b/src/main/kotlin/astminer/paths/PathUtil.kt @@ -6,7 +6,7 @@ fun toPathContext(path: ASTPath, getToken: (Node) -> String = { node -> node.tok val startToken = getToken(path.upwardNodes.first()) val endToken = getToken(path.downwardNodes.last()) val astNodes = path.upwardNodes.map { OrientedNodeType(it.typeLabel, Direction.UP) } + - OrientedNodeType(path.topNode.typeLabel, Direction.TOP) + - path.downwardNodes.map { OrientedNodeType(it.typeLabel, Direction.DOWN) } + OrientedNodeType(path.topNode.typeLabel, Direction.TOP) + + path.downwardNodes.map { OrientedNodeType(it.typeLabel, Direction.DOWN) } return PathContext(startToken, astNodes, endToken) } diff --git a/src/main/kotlin/astminer/paths/PathWorker.kt b/src/main/kotlin/astminer/paths/PathWorker.kt index a3399cc0..a19bd487 100644 --- a/src/main/kotlin/astminer/paths/PathWorker.kt +++ b/src/main/kotlin/astminer/paths/PathWorker.kt @@ -7,24 +7,12 @@ import kotlin.math.min class PathWorker { - companion object { - private const val PATH_PIECES_KEY = "path_pieces" - - private fun Node.setPathPieces(pathPieces: List) { - this.metadata[PATH_PIECES_KEY] = pathPieces - } - - // In runtime all generics upcast to upper bound, therefore it's impossible to check type inside List - @Suppress("UNCHECKED_CAST") - private fun Node.getPathPieces(): List? = this.metadata[PATH_PIECES_KEY] as List? - } - fun retrievePaths(tree: Node) = retrievePaths(tree, null, null) - fun updatePathPieces( - currentNode: Node, - pathPiecesPerChild: List?>, - maxLength: Int? + private fun updatePathPieces( + currentNode: Node, + pathPiecesPerChild: List?>, + maxLength: Int? ) = pathPiecesPerChild.filterNotNull().flatMap { childPieces -> childPieces.filter { pathPiece -> maxLength == null || pathPiece.size <= maxLength @@ -33,10 +21,11 @@ class PathWorker { } } - fun collapsePiecesToPaths( - currentNode: Node, - pathPiecesPerChild: List?>, - maxLength: Int?, maxWidth: Int? + private fun collapsePiecesToPaths( + currentNode: Node, + pathPiecesPerChild: List?>, + maxLength: Int?, + maxWidth: Int? ): Collection { val paths: MutableCollection = ArrayList() val childrenCount = pathPiecesPerChild.size @@ -74,4 +63,16 @@ class PathWorker { } return paths } + + companion object { + private const val PATH_PIECES_KEY = "path_pieces" + + private fun Node.setPathPieces(pathPieces: List) { + this.metadata[PATH_PIECES_KEY] = pathPieces + } + + // In runtime all generics upcast to upper bound, therefore it's impossible to check type inside List + @Suppress("UNCHECKED_CAST") + private fun Node.getPathPieces(): List? = this.metadata[PATH_PIECES_KEY] as? List + } } diff --git a/src/main/kotlin/astminer/pipeline/Pipeline.kt b/src/main/kotlin/astminer/pipeline/Pipeline.kt index 74d67cd1..ca258628 100644 --- a/src/main/kotlin/astminer/pipeline/Pipeline.kt +++ b/src/main/kotlin/astminer/pipeline/Pipeline.kt @@ -1,15 +1,15 @@ package astminer.pipeline import astminer.common.getProjectFilesWithExtension -import astminer.parse.getHandlerFactory -import astminer.pipeline.branch.FilePipelineBranch -import astminer.pipeline.branch.FunctionPipelineBranch -import astminer.pipeline.branch.IllegalLabelExtractorException import astminer.common.model.FileLabelExtractor import astminer.common.model.FunctionLabelExtractor import astminer.common.model.Storage import astminer.config.FileExtension import astminer.config.PipelineConfig +import astminer.parse.getHandlerFactory +import astminer.pipeline.branch.FilePipelineBranch +import astminer.pipeline.branch.FunctionPipelineBranch +import astminer.pipeline.branch.IllegalLabelExtractorException import java.io.File /** diff --git a/src/main/kotlin/astminer/pipeline/branch/Exceptions.kt b/src/main/kotlin/astminer/pipeline/branch/Exceptions.kt index e67d179f..bb0128a1 100644 --- a/src/main/kotlin/astminer/pipeline/branch/Exceptions.kt +++ b/src/main/kotlin/astminer/pipeline/branch/Exceptions.kt @@ -9,5 +9,5 @@ class IllegalLabelExtractorException(problemName: String?) : /** * This exception is thrown when the given filter is not implemented for the given granularity. */ -class IllegalFilterException(granularity: String, filterName: String?): - IllegalStateException("Unknown filter `${filterName ?: "anonymous"}` for $granularity granularity") +class IllegalFilterException(granularity: String, filterName: String?) : + IllegalStateException("Unknown filter `${filterName ?: "anonymous"}` for $granularity granularity") diff --git a/src/main/kotlin/astminer/pipeline/branch/FunctionPipelineBranch.kt b/src/main/kotlin/astminer/pipeline/branch/FunctionPipelineBranch.kt index f7201b96..62a5f184 100644 --- a/src/main/kotlin/astminer/pipeline/branch/FunctionPipelineBranch.kt +++ b/src/main/kotlin/astminer/pipeline/branch/FunctionPipelineBranch.kt @@ -3,7 +3,6 @@ package astminer.pipeline.branch import astminer.common.model.* import astminer.labelextractor.* - /** * PipelineBranch for pipeline with function-level granularity (FunctionPipelineConfig). * Extracts functions from the parsed files. diff --git a/src/main/kotlin/astminer/pipeline/branch/PipelineBranch.kt b/src/main/kotlin/astminer/pipeline/branch/PipelineBranch.kt index fbf12a24..7b743781 100644 --- a/src/main/kotlin/astminer/pipeline/branch/PipelineBranch.kt +++ b/src/main/kotlin/astminer/pipeline/branch/PipelineBranch.kt @@ -1,8 +1,8 @@ package astminer.pipeline.branch +import astminer.common.model.LabeledResult import astminer.common.model.LanguageHandler import astminer.common.model.Node -import astminer.common.model.LabeledResult /** * PipelineBranch is a part of the pipeline that encapsulate inside itself granularity based logic. diff --git a/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt b/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt index 42777aef..a96ba459 100644 --- a/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt +++ b/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt @@ -2,11 +2,11 @@ package astminer.storage.ast import astminer.common.model.LabeledResult import astminer.common.model.Node +import astminer.common.model.Storage import astminer.common.storage.RankedIncrementalIdStorage import astminer.common.storage.dumpIdStorageToCsv import astminer.common.storage.nodeTypeToCsvString import astminer.common.storage.tokenToCsvString -import astminer.common.model.Storage import java.io.File import java.io.PrintWriter @@ -58,7 +58,7 @@ class CsvAstStorage(override val outputDirectoryPath: String) : Storage { internal fun astString(node: Node): String { return "${tokensMap.getId(node.token)} ${nodeTypesMap.getId(node.typeLabel)}{${ - node.children.joinToString(separator = "", transform = ::astString) + node.children.joinToString(separator = "", transform = ::astString) }}" } } diff --git a/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt b/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt index 3e3748a1..c4497f1a 100644 --- a/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt +++ b/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt @@ -2,8 +2,8 @@ package astminer.storage.ast import astminer.common.model.LabeledResult import astminer.common.model.Node -import astminer.common.storage.RankedIncrementalIdStorage import astminer.common.model.Storage +import astminer.common.storage.RankedIncrementalIdStorage import java.io.File import java.io.PrintWriter @@ -40,11 +40,7 @@ class DotAstStorage(override val outputDirectoryPath: String) : Storage { val nodeDescriptionFormat = "${astFilenameFormat.format(index)},$normalizedFilepath,$normalizedLabel,%d,%s,%s" for (node in labeledResult.root.preOrder()) { descriptionFileStream.write( - nodeDescriptionFormat.format( - nodesMap.getId(node) - 1, - node.token, - node.typeLabel - ) + "\n" + nodeDescriptionFormat.format(nodesMap.getId(node) - 1, node.token, node.typeLabel) + "\n" ) } ++index @@ -64,9 +60,7 @@ class DotAstStorage(override val outputDirectoryPath: String) : Storage { for (node in root.preOrder()) { val rootId = nodesMap.record(node) - 1 val childrenIds = node.children.map { nodesMap.record(it) - 1 } - out.println( - "$rootId -- {${childrenIds.joinToString(" ") { it.toString() }}};" - ) + out.println("$rootId -- {${childrenIds.joinToString(" ") { it.toString() }}};") } out.println("}") @@ -76,14 +70,14 @@ class DotAstStorage(override val outputDirectoryPath: String) : Storage { // Label should contain only latin letters, numbers and underscores, other symbols replace with an underscore internal fun normalizeAstLabel(label: String): String = - label.replace("[^A-z^0-9^_]".toRegex(), "_") + label.replace("[^A-z0-9_]".toRegex(), "_") /** * Filepath should contain only latin letters, numbers, underscores, hyphens, backslashes and dots * Underscore replace other symbols */ internal fun normalizeFilepath(filepath: String): String = - filepath.replace("[^A-z^0-9^_^\\-^.^/]".toRegex(), "_") + filepath.replace("[^A-z0-9_\\-./]".toRegex(), "_") /** * Split the full path to specified file into the parent's path, and the file name diff --git a/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt b/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt index 2832b5ed..4fc99a77 100644 --- a/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt +++ b/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt @@ -1,12 +1,12 @@ package astminer.storage.path -import astminer.common.model.LabeledResult import astminer.common.model.* +import astminer.common.model.LabeledResult +import astminer.common.model.Storage +import astminer.common.storage.* import astminer.paths.PathMiner import astminer.paths.PathRetrievalSettings import astminer.paths.toPathContext -import astminer.common.model.Storage -import astminer.common.storage.* import java.io.File import java.io.PrintWriter @@ -59,8 +59,8 @@ abstract class PathBasedStorage( private fun dumpPathContexts(labeledPathContextIds: LabeledPathContextIds) { val pathContextIdsString = labeledPathContextIds.pathContexts.filter { val isNumberOfTokensValid = config.maxTokens == null || - tokensMap.getIdRank(it.startTokenId) <= config.maxTokens && - tokensMap.getIdRank(it.endTokenId) <= config.maxTokens + tokensMap.getIdRank(it.startTokenId) <= config.maxTokens && + tokensMap.getIdRank(it.endTokenId) <= config.maxTokens val isNumberOfPathsValid = config.maxPaths == null || pathsMap.getIdRank(it.pathId) <= config.maxPaths isNumberOfTokensValid && isNumberOfPathsValid @@ -85,9 +85,12 @@ abstract class PathBasedStorage( private fun retrieveLabeledPathContexts(labeledResult: LabeledResult): LabeledPathContexts { val paths = retrievePaths(labeledResult.root) - return LabeledPathContexts(labeledResult.label, paths.map { astPath -> - toPathContext(astPath) { it.token.replace("\n", "\\n") } - }) + return LabeledPathContexts( + labeledResult.label, + paths.map { astPath -> + toPathContext(astPath) { it.token.replace("\n", "\\n") } + } + ) } /** diff --git a/src/test/kotlin/astminer/common/DummyNode.kt b/src/test/kotlin/astminer/common/DummyNode.kt index ff4b683b..056499c4 100644 --- a/src/test/kotlin/astminer/common/DummyNode.kt +++ b/src/test/kotlin/astminer/common/DummyNode.kt @@ -9,7 +9,7 @@ class DummyNode( override val children: MutableList = mutableListOf() ) : Node() { - //TODO("not implemented") + // TODO("not implemented") override val parent: Node? = null override val originalToken: String = typeLabel diff --git a/src/test/kotlin/astminer/common/TreeUtilTest.kt b/src/test/kotlin/astminer/common/TreeUtilTest.kt index 6e3ec5d2..7a263f7e 100644 --- a/src/test/kotlin/astminer/common/TreeUtilTest.kt +++ b/src/test/kotlin/astminer/common/TreeUtilTest.kt @@ -27,9 +27,9 @@ class TreeUtilTest { val token = " Token THAT \n contains Whi\"t,es''pace characters!!!and pu.n.c.t.u.a.tion \n" val expectedToken = "token" + "that" + "contains" + "whitespace" + "characters" + "and" + "punctuation" Assert.assertEquals( - "All whitespace characters and punctuation should be removed, keeping only letters", - expectedToken, - normalizeToken(token, defaultToken) + "All whitespace characters and punctuation should be removed, keeping only letters", + expectedToken, + normalizeToken(token, defaultToken) ) } @@ -38,20 +38,20 @@ class TreeUtilTest { val token = "* *\n" val expectedToken = "*_*" Assert.assertEquals( - "Token without letters have whitespaces replaced with underscores", - expectedToken, - normalizeToken(token, defaultToken) + "Token without letters have whitespaces replaced with underscores", + expectedToken, + normalizeToken(token, defaultToken) ) } @Test fun testNormalizeEmptyToken() { val token = "\n\n" - val expectedToken = DEFAULT_TOKEN + val expectedToken = EMPTY_TOKEN Assert.assertEquals( - "Token without letters have whitespaces replaced with underscores", - expectedToken, - normalizeToken(token, defaultToken) + "Token without letters have whitespaces replaced with underscores", + expectedToken, + normalizeToken(token, defaultToken) ) } @@ -60,9 +60,9 @@ class TreeUtilTest { val token = "fun_withReallyLong_And_ComplicatedName" val expectedToken = listOf("fun", "with", "really", "long", "and", "complicated", "name") Assert.assertEquals( - "Token with snake, camel and combined case should be split into list of its parts", - expectedToken, - splitToSubtokens(token) + "Token with snake, camel and combined case should be split into list of its parts", + expectedToken, + splitToSubtokens(token) ) } } diff --git a/src/test/kotlin/astminer/featureextraction/PrettyNode.kt b/src/test/kotlin/astminer/featureextraction/PrettyNode.kt index fd36faf0..3cd0a0e2 100644 --- a/src/test/kotlin/astminer/featureextraction/PrettyNode.kt +++ b/src/test/kotlin/astminer/featureextraction/PrettyNode.kt @@ -5,15 +5,14 @@ import astminer.common.model.Node class PrettyNode(override val typeLabel: String, override val originalToken: String) : Node() { override var children: MutableList = ArrayList() override var parent: PrettyNode? = null - set(value) { - value?.addChild(this) - field = value - } - + set(value) { + value?.addChild(this) + field = value + } fun addChild(node: PrettyNode) = children.add(node) - fun toPrettyString(indent: Int = 0, indentSymbol: String = "--") : String = with(StringBuilder()) { + fun toPrettyString(indent: Int = 0, indentSymbol: String = "--"): String = with(StringBuilder()) { repeat(indent) { append(indentSymbol) } append(typeLabel) if (token.isNotEmpty()) { @@ -28,10 +27,9 @@ class PrettyNode(override val typeLabel: String, override val originalToken: Str override fun removeChildrenOfType(typeLabel: String) { children.removeIf { it.typeLabel == typeLabel } } - } -fun restoreFromPrettyPrint(prettyPrintedTree: String, indentSymbol: String = "--") : PrettyNode { +fun restoreFromPrettyPrint(prettyPrintedTree: String, indentSymbol: String = "--"): PrettyNode { val lastNodeByIndent = HashMap() val tree = prettyPrintedTree.lines().map { s -> val (node, indent) = restorePrintedNode(s, indentSymbol) @@ -42,10 +40,10 @@ fun restoreFromPrettyPrint(prettyPrintedTree: String, indentSymbol: String = "-- return tree.first() } -fun restorePrintedNode(printedNode: String, indentSymbol: String = "--") : Pair { +fun restorePrintedNode(printedNode: String, indentSymbol: String = "--"): Pair { val indents = Regex("^($indentSymbol)*").find(printedNode)?.value ?: "" val nodeString = printedNode.substringAfter(indents) - val type = nodeString.substringBefore(" : ") + val type = nodeString.substringBefore(" : ") val token = nodeString.substringAfter(" : ", "") val indent = indents.length / indentSymbol.length return PrettyNode(type, token) to indent diff --git a/src/test/kotlin/astminer/filters/FunctionFiltersTest.kt b/src/test/kotlin/astminer/filters/FunctionFiltersTest.kt index f4314875..bc804f71 100644 --- a/src/test/kotlin/astminer/filters/FunctionFiltersTest.kt +++ b/src/test/kotlin/astminer/filters/FunctionFiltersTest.kt @@ -80,7 +80,7 @@ class FunctionFiltersTest { @Test fun `test WordsNumberFilter for 50 should exclude function with name of 100 words`() { val functionInfo = object : FunctionInfo { - override val root = AntlrNode("", null, "Word".repeat(100)) + override val root = AntlrNode("", null, "Word".repeat(100)) } assertFalse { WordsNumberFilter(50).validate(functionInfo) } } diff --git a/src/test/kotlin/astminer/parse/antlr/java/ANTLRJavaParserTest.kt b/src/test/kotlin/astminer/parse/antlr/java/ANTLRJavaParserTest.kt index 8cd21da2..db997fff 100644 --- a/src/test/kotlin/astminer/parse/antlr/java/ANTLRJavaParserTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/java/ANTLRJavaParserTest.kt @@ -59,7 +59,7 @@ class ANTLRJavaParserTest { val parser = JavaParser() val projectRoot = File("src/test/resources/arrayCalls") val trees = parser.parseFiles(getProjectFilesWithExtension(projectRoot, "java")) - Assert.assertEquals("There is only 5 file with .java extension in 'testData/arrayCalls' folder",5, trees.size) + Assert.assertEquals("There is only 5 file with .java extension in 'testData/arrayCalls' folder", 5, trees.size) trees.forEach { Assert.assertNotNull("Parse tree for a valid file should not be null", it) } } } diff --git a/src/test/kotlin/astminer/parse/antlr/java/JavaFunctionSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/java/JavaFunctionSplitterTest.kt index 44ed3f91..d1fea436 100644 --- a/src/test/kotlin/astminer/parse/antlr/java/JavaFunctionSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/java/JavaFunctionSplitterTest.kt @@ -3,9 +3,9 @@ package astminer.parse.antlr.java import astminer.common.model.FunctionInfo import astminer.parse.antlr.AntlrNode import org.junit.Test -import kotlin.test.assertEquals import java.io.File import kotlin.test.BeforeTest +import kotlin.test.assertEquals import kotlin.test.assertNotNull class JavaFunctionSplitterTest { @@ -14,7 +14,7 @@ class JavaFunctionSplitterTest { @BeforeTest fun parseTree() { - val testTree = parser.parseInputStream(File(FILE_PATH).inputStream()) + val testTree = parser.parseInputStream(File(FILE_PATH).inputStream()) assertNotNull(testTree) functionInfos = functionSplitter.splitIntoFunctions(testTree, FILE_PATH) } @@ -26,56 +26,56 @@ class JavaFunctionSplitterTest { @Test fun testReturnVoid() { - val methodVoid = functionInfos.find { it.name == "functionReturningVoid" } + val methodVoid = functionInfos.find { it.name == "functionReturningVoid" } assertNotNull(methodVoid) - assertEquals( "void", methodVoid.returnType) + assertEquals("void", methodVoid.returnType) } @Test fun testReturnInt() { - val methodInt = functionInfos.find { it.name == "functionReturningInt" } + val methodInt = functionInfos.find { it.name == "functionReturningInt" } assertNotNull(methodInt) - assertEquals( "int", methodInt.returnType) + assertEquals("int", methodInt.returnType) } @Test fun testReturnStrings() { - val methodStrings = functionInfos.find { it.name == "functionReturningStrings" } + val methodStrings = functionInfos.find { it.name == "functionReturningStrings" } assertNotNull(methodStrings) - assertEquals( "String[]", methodStrings.returnType) + assertEquals("String[]", methodStrings.returnType) } @Test fun testReturnClass() { - val methodClass = functionInfos.find { it.name == "functionReturningClass" } + val methodClass = functionInfos.find { it.name == "functionReturningClass" } assertNotNull(methodClass) - assertEquals( "Class1", methodClass.returnType) + assertEquals("Class1", methodClass.returnType) } @Test fun testFunctionInClass() { - val methodClass = functionInfos.find { it.name == "functionInClass1" } + val methodClass = functionInfos.find { it.name == "functionInClass1" } assertNotNull(methodClass) - assertEquals( "Class1", methodClass.enclosingElement?.name) + assertEquals("Class1", methodClass.enclosingElement?.name) } @Test fun testFunctionInNestedClass() { - val methodClass = functionInfos.find { it.name == "functionInClass2" } + val methodClass = functionInfos.find { it.name == "functionInClass2" } assertNotNull(methodClass) - assertEquals( "Class2", methodClass.enclosingElement?.name) + assertEquals("Class2", methodClass.enclosingElement?.name) } @Test fun testNoParameters() { - val methodNoParameters = functionInfos.find { it.name == "functionWithNoParameters" } + val methodNoParameters = functionInfos.find { it.name == "functionWithNoParameters" } assertNotNull(methodNoParameters) assertEquals(0, methodNoParameters.parameters.size) } @Test fun testOneParameter() { - val methodOneParameter = functionInfos.find { it.name == "functionWithOneParameter" } + val methodOneParameter = functionInfos.find { it.name == "functionWithOneParameter" } assertNotNull(methodOneParameter) assertEquals(1, methodOneParameter.parameters.size) val parameter = methodOneParameter.parameters[0] @@ -85,7 +85,7 @@ class JavaFunctionSplitterTest { @Test fun testThreeParameters() { - val methodThreeParameters = functionInfos.find { it.name == "functionWithThreeParameters" } + val methodThreeParameters = functionInfos.find { it.name == "functionWithThreeParameters" } assertNotNull(methodThreeParameters) assertEquals(3, methodThreeParameters.parameters.size) val methodTypes = listOf("Class", "String[][]", "int[]") diff --git a/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitterTest.kt index 66bfdc05..4e32386d 100644 --- a/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitterTest.kt @@ -39,17 +39,17 @@ class JavaScriptFunctionSplitterTest { fun FunctionInfo.getJsonInfo(): String { return "info : {" + - "name : ${name}, " + - "args : ${parameters.joinToString(", ") { it.name }}, " + - "enclosing element : ${enclosingElement?.type?.getEnclosingElementType()}, " + - "enclosing element name : ${enclosingElement?.name}" + - "}" + "name : $name, " + + "args : ${parameters.joinToString(", ") { it.name }}, " + + "enclosing element : ${enclosingElement?.type?.getEnclosingElementType()}, " + + "enclosing element name : ${enclosingElement?.name}" + + "}" } val actualJsonInfos = functionInfos.map { it.getJsonInfo() }.sorted() val text = File(testFilePath).readText() - val expectedJsonInfos = Regex("info : \\{.*\\}").findAll(text).toList().map { it.value }.sorted() + val expectedJsonInfos = Regex("info : \\{.*}").findAll(text).toList().map { it.value }.sorted() assertEquals(expectedJsonInfos, actualJsonInfos) } diff --git a/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionSplitterTest.kt index 9a1e1c3e..27c6286b 100644 --- a/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionSplitterTest.kt @@ -1,11 +1,11 @@ package astminer.parse.antlr.php import astminer.common.model.EnclosingElementType -import org.junit.Test -import kotlin.test.BeforeTest import astminer.common.model.FunctionInfo import astminer.parse.antlr.AntlrNode +import org.junit.Test import java.io.File +import kotlin.test.BeforeTest import kotlin.test.assertEquals import kotlin.test.assertNotNull @@ -39,9 +39,9 @@ internal class ANTLRPHPFunctionSplitterTest { fun FunctionInfo.getJsonInfo(): String = listOf( "info : {", - "name: ${name}, ", + "name: $name, ", "args: ${parameters.joinToString(", ") { - listOfNotNull(it.type, it.name).joinToString(" ") + listOfNotNull(it.type, it.name).joinToString(" ") }}, ", "enclosing element: ${enclosingElement?.type?.getEnclosingElementType()}, ", "enclosing element name: ${enclosingElement?.name}, ", @@ -52,7 +52,7 @@ internal class ANTLRPHPFunctionSplitterTest { val actualJsonInfos = functionInfos.map { it.getJsonInfo() + '\n' }.sorted() val text = File(testFilePath).readText() - val expectedJsonInfos = Regex("info : \\{.*\\}").findAll(text).toList().map { it.value + '\n' }.sorted() + val expectedJsonInfos = Regex("info : \\{.*}").findAll(text).toList().map { it.value + '\n' }.sorted() assertEquals(expectedJsonInfos, actualJsonInfos) } diff --git a/src/test/kotlin/astminer/parse/antlr/python/ANTLRPythonParserTest.kt b/src/test/kotlin/astminer/parse/antlr/python/ANTLRPythonParserTest.kt index 0b6604f4..3e2dc094 100644 --- a/src/test/kotlin/astminer/parse/antlr/python/ANTLRPythonParserTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/python/ANTLRPythonParserTest.kt @@ -22,7 +22,7 @@ class ANTLRPythonParserTest { val parser = PythonParser() val projectRoot = File("src/test/resources/examples") val trees = parser.parseFiles(getProjectFilesWithExtension(projectRoot, "py")) - Assert.assertEquals("There is only 1 file with .py extension in 'testData/examples' folder",1, trees.size) + Assert.assertEquals("There is only 1 file with .py extension in 'testData/examples' folder", 1, trees.size) trees.forEach { Assert.assertNotNull("Parse tree for a valid file should not be null", it) } } } diff --git a/src/test/kotlin/astminer/parse/antlr/python/PythonFunctionSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/python/PythonFunctionSplitterTest.kt index 8a935689..9efdac5a 100644 --- a/src/test/kotlin/astminer/parse/antlr/python/PythonFunctionSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/python/PythonFunctionSplitterTest.kt @@ -4,9 +4,9 @@ import astminer.common.model.EnclosingElementType import astminer.common.model.FunctionInfo import astminer.parse.antlr.AntlrNode import org.junit.Test -import kotlin.test.assertEquals import java.io.File import kotlin.test.BeforeTest +import kotlin.test.assertEquals import kotlin.test.assertNotNull import kotlin.test.assertNull @@ -16,7 +16,7 @@ class PythonFunctionSplitterTest { @BeforeTest fun parseTree() { - val testTree = parser.parseInputStream(File(FILE_PATH).inputStream()) + val testTree = parser.parseInputStream(File(FILE_PATH).inputStream()) assertNotNull(testTree) functionInfos = functionSplitter.splitIntoFunctions(testTree, FILE_PATH) } @@ -28,37 +28,37 @@ class PythonFunctionSplitterTest { @Test fun testFunctionNotInClass() { - val functionClass = functionInfos.find { it.name == "fun_with_no_class" } + val functionClass = functionInfos.find { it.name == "fun_with_no_class" } assertNotNull(functionClass) assertNull(functionClass.enclosingElement) } @Test fun testFunctionInClass() { - val functionClass = functionInfos.find { it.name == "fun_in_class1" } + val functionClass = functionInfos.find { it.name == "fun_in_class1" } assertNotNull(functionClass) assertEquals(EnclosingElementType.Class, functionClass.enclosingElement?.type) - assertEquals( "Class1", functionClass.enclosingElement?.name) + assertEquals("Class1", functionClass.enclosingElement?.name) } @Test fun testFunctionInNestedClass() { - val functionClass = functionInfos.find { it.name == "fun_in_class2" } + val functionClass = functionInfos.find { it.name == "fun_in_class2" } assertNotNull(functionClass) assertEquals(EnclosingElementType.Class, functionClass.enclosingElement?.type) - assertEquals( "Class2", functionClass.enclosingElement?.name) + assertEquals("Class2", functionClass.enclosingElement?.name) } @Test fun testNoParameters() { - val functionNoParameters = functionInfos.find { it.name == "function_with_no_parameters" } + val functionNoParameters = functionInfos.find { it.name == "function_with_no_parameters" } assertNotNull(functionNoParameters) assertEquals(0, functionNoParameters.parameters.size) } @Test fun testOneParameter() { - val functionOneParameter = functionInfos.find { it.name == "function_with_one_parameter" } + val functionOneParameter = functionInfos.find { it.name == "function_with_one_parameter" } assertNotNull(functionOneParameter) assertEquals(1, functionOneParameter.parameters.size) val parameter = functionOneParameter.parameters[0] @@ -87,7 +87,7 @@ class PythonFunctionSplitterTest { @Test fun testThreeParameters() { - val functionThreeParameters = functionInfos.find { it.name == "function_with_three_parameters" } + val functionThreeParameters = functionInfos.find { it.name == "function_with_three_parameters" } assertNotNull(functionThreeParameters) assertEquals(3, functionThreeParameters.parameters.size) val parameters = functionThreeParameters.parameters @@ -101,7 +101,7 @@ class PythonFunctionSplitterTest { @Test fun testParameterInClass() { - val functionOneParameter = functionInfos.find { it.name == "fun_with_parameter_in_class" } + val functionOneParameter = functionInfos.find { it.name == "fun_with_parameter_in_class" } assertNotNull(functionOneParameter) assertEquals(2, functionOneParameter.parameters.size) val parameter = functionOneParameter.parameters[1] @@ -131,7 +131,7 @@ class PythonFunctionSplitterTest { @Test fun testEnclosingMethod() { - val functionInsideMethod = functionInfos.find { it.name == "function_inside_method" } + val functionInsideMethod = functionInfos.find { it.name == "function_inside_method" } assertNotNull(functionInsideMethod) val enclosingElement = functionInsideMethod.enclosingElement @@ -142,7 +142,7 @@ class PythonFunctionSplitterTest { @Test fun testEnclosingFunctionInsideMethod() { - val funInsideFunInsideMethod = functionInfos.find { it.name == "fun_inside_fun_inside_method" } + val funInsideFunInsideMethod = functionInfos.find { it.name == "fun_inside_fun_inside_method" } assertNotNull(funInsideFunInsideMethod) val enclosingElement = funInsideFunInsideMethod.enclosingElement diff --git a/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt b/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt index 16b92b12..72d02d47 100644 --- a/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt +++ b/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt @@ -36,9 +36,9 @@ class FuzzyCppParserTest { val parser = FuzzyCppParser() val nodes = parser.parseFiles(getProjectFilesWithExtension(folder, "cpp")) Assert.assertEquals( - "There is only 3 file with .cpp extension in 'testData/examples' folder", - 3, - nodes.size + "There is only 3 file with .cpp extension in 'testData/examples' folder", + 3, + nodes.size ) } @@ -53,9 +53,9 @@ class FuzzyCppParserTest { parser.preprocessFile(folder.resolve(defineFileName), preprocessedFolder) Assert.assertEquals( - "'define' directives should be replaced", - "for (int i = (0); i < (10); ++i) { }", - preprocessedFolder.resolve(defineFileName).readInOneLine() + "'define' directives should be replaced", + "for (int i = (0); i < (10); ++i) { }", + preprocessedFolder.resolve(defineFileName).readInOneLine() ) preprocessedFolder.deleteRecursively() } @@ -71,9 +71,9 @@ class FuzzyCppParserTest { parser.preprocessFile(folder.resolve(includeFileName), preprocessedFolder) Assert.assertEquals( - "'include' directives should not be replaced", - folder.resolve(includeFileName).readInOneLine(), - preprocessedFolder.resolve(includeFileName).readInOneLine() + "'include' directives should not be replaced", + folder.resolve(includeFileName).readInOneLine(), + preprocessedFolder.resolve(includeFileName).readInOneLine() ) preprocessedFolder.deleteRecursively() } @@ -89,9 +89,9 @@ class FuzzyCppParserTest { val nodes = parser.parseFiles(getProjectFilesWithExtension(projectRoot, "cpp")) Assert.assertEquals( - "Parse tree for a valid file should not be null. There are 5 files in example project.", - 5, - nodes.size + "Parse tree for a valid file should not be null. There are 5 files in example project.", + 5, + nodes.size ) preprocessedRoot.deleteRecursively() } diff --git a/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt index d2ff0810..7f3d853a 100644 --- a/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt @@ -34,63 +34,63 @@ class FuzzyMethodSplitterTest { @Test fun testReturnVoid() { - val methodVoid = methodInfos.find { it.name == "functionReturningVoid" } + val methodVoid = methodInfos.find { it.name == "functionReturningVoid" } assertNotNull(methodVoid) - assertEquals( "void", methodVoid.returnType) + assertEquals("void", methodVoid.returnType) } @Test fun testReturnInt() { - val methodInt = methodInfos.find { it.name == "functionReturningInt" } + val methodInt = methodInfos.find { it.name == "functionReturningInt" } assertNotNull(methodInt) - assertEquals( "int", methodInt.returnType) + assertEquals("int", methodInt.returnType) } @Test fun testReturnString() { - val methodString = methodInfos.find { it.name == "functionReturningString" } + val methodString = methodInfos.find { it.name == "functionReturningString" } assertNotNull(methodString) - assertEquals( "string", methodString.returnType) + assertEquals("string", methodString.returnType) } @Test fun testReturnClass() { - val methodClass = methodInfos.find { it.name == "functionReturningClass" } + val methodClass = methodInfos.find { it.name == "functionReturningClass" } assertNotNull(methodClass) - assertEquals( "Class", methodClass.returnType) + assertEquals("Class", methodClass.returnType) } @Test fun testFunctionNotInClass() { - val methodClass = methodInfos.find { it.name == "functionWithNoClass" } + val methodClass = methodInfos.find { it.name == "functionWithNoClass" } assertNotNull(methodClass) assertNull(methodClass.enclosingElement) } @Test fun testFunctionInClass() { - val methodClass = methodInfos.find { it.name == "functionInClass1" } + val methodClass = methodInfos.find { it.name == "functionInClass1" } assertNotNull(methodClass) - assertEquals( "Class1", methodClass.enclosingElement?.name) + assertEquals("Class1", methodClass.enclosingElement?.name) } @Test fun testFunctionInNestedClass() { - val methodClass = methodInfos.find { it.name == "functionInClass2" } + val methodClass = methodInfos.find { it.name == "functionInClass2" } assertNotNull(methodClass) - assertEquals( "Class2", methodClass.enclosingElement?.name) + assertEquals("Class2", methodClass.enclosingElement?.name) } @Test fun testNoParameters() { - val methodNoParameters = methodInfos.find { it.name == "functionWithNoParameters" } + val methodNoParameters = methodInfos.find { it.name == "functionWithNoParameters" } assertNotNull(methodNoParameters) assertEquals(0, methodNoParameters.parameters.size) } @Test fun testOneParameter() { - val methodOneParameter = methodInfos.find { it.name == "functionWithOneParameter" } + val methodOneParameter = methodInfos.find { it.name == "functionWithOneParameter" } assertNotNull(methodOneParameter) assertEquals(1, methodOneParameter.parameters.size) val parameter = methodOneParameter.parameters[0] @@ -100,7 +100,7 @@ class FuzzyMethodSplitterTest { @Test fun testThreeParameters() { - val methodThreeParameters = methodInfos.find { it.name == "functionWithThreeParameters" } + val methodThreeParameters = methodInfos.find { it.name == "functionWithThreeParameters" } assertNotNull(methodThreeParameters) assertEquals(3, methodThreeParameters.parameters.size) for (i in 0 until 3) { diff --git a/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaParserTest.kt b/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaParserTest.kt index 9ecbd78b..c3a5fa22 100644 --- a/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaParserTest.kt +++ b/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaParserTest.kt @@ -22,7 +22,7 @@ class GumTreeJavaParserTest { val projectRoot = File("src/test/resources/examples") val trees = parser.parseFiles(getProjectFilesWithExtension(projectRoot, "java")) - Assert.assertEquals("There is only 2 file with .java extension in 'testData/examples' folder",2, trees.size) + Assert.assertEquals("There is only 2 file with .java extension in 'testData/examples' folder", 2, trees.size) trees.forEach { Assert.assertNotNull("Parse tree for a valid file should not be null", it) } } } diff --git a/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitterTest.kt b/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitterTest.kt index 435ba5cb..910acd93 100644 --- a/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitterTest.kt @@ -33,9 +33,13 @@ class GumTreePythonFunctionSplitterTest { @Test fun funcNamesTest() { val realNames = setOf( - "no_args_func", "with_args_no_typed", "with_typed_args", - "with_typed_return_no_args", "full_typed", - "func_dif_args_typed_return", "complex_args_full_typed" + "no_args_func", + "with_args_no_typed", + "with_typed_args", + "with_typed_return_no_args", + "full_typed", + "func_dif_args_typed_return", + "complex_args_full_typed" ) val functionInfos = splitFunctions(createPath("1.py")) val parsedNames = functionInfos.map { it.name }.toSet() diff --git a/src/test/kotlin/astminer/paths/PathWorkerTestBase.kt b/src/test/kotlin/astminer/paths/PathWorkerTestBase.kt index 3a3e9cf0..3e7ef0d3 100644 --- a/src/test/kotlin/astminer/paths/PathWorkerTestBase.kt +++ b/src/test/kotlin/astminer/paths/PathWorkerTestBase.kt @@ -21,9 +21,12 @@ abstract class PathWorkerTestBase { val allPaths = PathWorker().retrievePaths(tree) val expectedCount = nLeaves * (nLeaves - 1) / 2 - Assert.assertEquals("A tree with $nLeaves leaves contains $expectedCount paths, " + + Assert.assertEquals( + "A tree with $nLeaves leaves contains $expectedCount paths, " + "one per distinct ordered pair of leaves. Worker returned ${allPaths.size}", - expectedCount, allPaths.size) + expectedCount, + allPaths.size + ) } @Test @@ -48,9 +51,9 @@ abstract class PathWorkerTestBase { for (maxWidth in 1..leavesCount) { val paths = PathWorker().retrievePaths(tree, maxLength, maxWidth) Assert.assertEquals( - "Unexpected paths count with length $maxLength and width $maxWidth", - allPathCharacteristics.count { (w, len) -> w <= maxWidth && len <= maxLength }, - paths.size + "Unexpected paths count with length $maxLength and width $maxWidth", + allPathCharacteristics.count { (w, len) -> w <= maxWidth && len <= maxLength }, + paths.size ) } } diff --git a/src/test/kotlin/astminer/paths/PathWorkerTestUtil.kt b/src/test/kotlin/astminer/paths/PathWorkerTestUtil.kt index b3bad4ac..41de26aa 100644 --- a/src/test/kotlin/astminer/paths/PathWorkerTestUtil.kt +++ b/src/test/kotlin/astminer/paths/PathWorkerTestUtil.kt @@ -10,7 +10,6 @@ fun simpleNode(number: Int, parent: AntlrNode?): AntlrNode = fun simpleNodes(numbers: List, parent: AntlrNode?): List = numbers.map { simpleNode(it, parent) } - fun getParentStack(node: Node): List = (node.parent?.let { getParentStack(it) } ?: emptyList()) + node fun getAllPathCharacteristics(root: Node): Collection> { @@ -40,14 +39,14 @@ fun getAllPathCharacteristics(root: Node): Collection> { } fun ASTPath.allNodesAreDistinct(): Boolean { - return this.upwardNodes.size == this.upwardNodes.toSet().size - && this.downwardNodes.size == this.downwardNodes.toSet().size + return this.upwardNodes.size == this.upwardNodes.toSet().size && + this.downwardNodes.size == this.downwardNodes.toSet().size } fun ASTPath.isSimple(): Boolean { - return this.upwardNodes.toSet().intersect(this.downwardNodes.toSet()).isEmpty() - && !this.upwardNodes.contains(this.topNode) - && !this.downwardNodes.contains(this.topNode) + return this.upwardNodes.toSet().intersect(this.downwardNodes.toSet()).isEmpty() && + !this.upwardNodes.contains(this.topNode) && + !this.downwardNodes.contains(this.topNode) } fun ASTPath.piecesMatch(): Boolean = this.upwardNodes.last() === this.downwardNodes.first() @@ -55,7 +54,7 @@ fun ASTPath.piecesMatch(): Boolean = this.upwardNodes.last() === this.downwardNo fun assertPathIsValid(path: ASTPath) { Assert.assertTrue("Nodes in each of the path pieces should be distinct", path.allNodesAreDistinct()) Assert.assertTrue( - "Path should be simple: upward and downward pieces should not intersect or contain top node", - path.isSimple() + "Path should be simple: upward and downward pieces should not intersect or contain top node", + path.isSimple() ) } diff --git a/src/test/kotlin/astminer/storage/ast/DotAstStorageTest.kt b/src/test/kotlin/astminer/storage/ast/DotAstStorageTest.kt index 062ff964..3a6067c3 100644 --- a/src/test/kotlin/astminer/storage/ast/DotAstStorageTest.kt +++ b/src/test/kotlin/astminer/storage/ast/DotAstStorageTest.kt @@ -38,12 +38,12 @@ class DotAstStorageTest { storage.close() val trueLines = listOf( - "digraph entityId {", - "0 -- {1 2};", - "1 -- {};", - "2 -- {3};", - "3 -- {};", - "}" + "digraph entityId {", + "0 -- {1 2};", + "1 -- {};", + "2 -- {3};", + "3 -- {};", + "}" ) val storageLines = File(File("test_examples", "asts"), "ast_0.dot").readLines() From ae67a65ede5bdd4cdc8fef1d6c5a02c6d1ebaecc Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Thu, 22 Jul 2021 12:53:19 +0300 Subject: [PATCH 249/308] Use a temporary folder for storage tests --- .../astminer/storage/ast/CsvAstStorageTest.kt | 19 +++++++++--- .../astminer/storage/ast/DotAstStorageTest.kt | 29 ++++++++++++------- 2 files changed, 33 insertions(+), 15 deletions(-) diff --git a/src/test/kotlin/astminer/storage/ast/CsvAstStorageTest.kt b/src/test/kotlin/astminer/storage/ast/CsvAstStorageTest.kt index da8ddc4f..ee4e1873 100644 --- a/src/test/kotlin/astminer/storage/ast/CsvAstStorageTest.kt +++ b/src/test/kotlin/astminer/storage/ast/CsvAstStorageTest.kt @@ -5,6 +5,8 @@ import astminer.common.createDummyTree import astminer.common.createSmallTree import org.junit.Assert import org.junit.Test +import java.io.File +import kotlin.test.AfterTest class CsvAstStorageTest { private fun generateCorrectAstStringForBamboo(from: Int, to: Int): String { @@ -15,10 +17,15 @@ class CsvAstStorageTest { return "$from $from{$child}" } + @AfterTest + fun removeTestOutput() { + File(OUTPUT_FOLDER).deleteRecursively() + } + @Test fun testAstString() { val root = createSmallTree() - val storage = CsvAstStorage(".") + val storage = CsvAstStorage(OUTPUT_FOLDER) storage.store(root.labeledWith("entityId")) Assert.assertEquals(storage.astString(root), "1 1{2 2{}3 3{4 4{}}}") @@ -27,7 +34,7 @@ class CsvAstStorageTest { @Test fun `test ast string for bigger tree`() { val root = createDummyTree() - val storage = CsvAstStorage(".") + val storage = CsvAstStorage(OUTPUT_FOLDER) storage.store(root.labeledWith("entityId")) val expected = "1 1{2 2{3 3{}4 4{}5 5{}}6 6{7 7{}8 8{}}}" @@ -37,7 +44,7 @@ class CsvAstStorageTest { @Test fun `test ast string for small bamboo`() { val bamboo = createBamboo(10) - val storage = CsvAstStorage(".") + val storage = CsvAstStorage(OUTPUT_FOLDER) storage.store(bamboo.labeledWith("entityId")) val expected = generateCorrectAstStringForBamboo(1, 10) @@ -47,10 +54,14 @@ class CsvAstStorageTest { @Test fun `test ast string for big bamboo`() { val bamboo = createBamboo(100) - val storage = CsvAstStorage(".") + val storage = CsvAstStorage(OUTPUT_FOLDER) storage.store(bamboo.labeledWith("entityId")) val expected = generateCorrectAstStringForBamboo(1, 100) Assert.assertEquals(expected, storage.astString(bamboo)) } + + companion object { + private const val OUTPUT_FOLDER = "test_output" + } } diff --git a/src/test/kotlin/astminer/storage/ast/DotAstStorageTest.kt b/src/test/kotlin/astminer/storage/ast/DotAstStorageTest.kt index 3a6067c3..2123533b 100644 --- a/src/test/kotlin/astminer/storage/ast/DotAstStorageTest.kt +++ b/src/test/kotlin/astminer/storage/ast/DotAstStorageTest.kt @@ -3,6 +3,7 @@ package astminer.storage.ast import astminer.common.* import org.junit.Test import java.io.File +import kotlin.test.AfterTest import kotlin.test.assertEquals class DotAstStorageTest { @@ -29,10 +30,15 @@ class DotAstStorageTest { return lines } + @AfterTest + fun removeOutput() { + File(OUTPUT_FOLDER).deleteRecursively() + } + @Test fun testDotStorageOnSmallTree() { val root = createSmallTree() - val storage = DotAstStorage("test_examples") + val storage = DotAstStorage(OUTPUT_FOLDER) storage.store(root.labeledWith("entityId")) storage.close() @@ -45,10 +51,7 @@ class DotAstStorageTest { "3 -- {};", "}" ) - val storageLines = File(File("test_examples", "asts"), "ast_0.dot").readLines() - - File("test_examples").deleteRecursively() - + val storageLines = File(File(OUTPUT_FOLDER, "asts"), "ast_0.dot").readLines() assertEquals(trueLines, storageLines) } @@ -83,7 +86,7 @@ class DotAstStorageTest { @Test fun testLabelNormalization() { val label = "some/kind/of/random/path" - val storage = DotAstStorage(".") + val storage = DotAstStorage(OUTPUT_FOLDER) val normalizedLabel = storage.normalizeAstLabel(label) assertEquals("some_kind_of_random_path", normalizedLabel) @@ -92,7 +95,7 @@ class DotAstStorageTest { @Test fun testBindingNormalization() { val label = "\$supposeToBeListener" - val storage = DotAstStorage(".") + val storage = DotAstStorage(OUTPUT_FOLDER) val normalizedLabel = storage.normalizeAstLabel(label) assertEquals("_supposeToBeListener", normalizedLabel) @@ -101,7 +104,7 @@ class DotAstStorageTest { @Test fun testLabelWithCommaNormalization() { val labelWithComma = "some,bad,label" - val storage = DotAstStorage(".") + val storage = DotAstStorage(OUTPUT_FOLDER) val normalizedLabel = storage.normalizeAstLabel(labelWithComma) assertEquals("some_bad_label", normalizedLabel) @@ -110,7 +113,7 @@ class DotAstStorageTest { @Test fun testSplittingFullPath() { val fullPath = "/path1/path2/path_3/path.4/file.name" - val storage = DotAstStorage(".") + val storage = DotAstStorage(OUTPUT_FOLDER) val (path, fileName) = storage.splitFullPath(fullPath) assertEquals("/path1/path2/path_3/path.4", path) @@ -120,7 +123,7 @@ class DotAstStorageTest { @Test fun testSplittingFileName() { val fullPath = "file.name" - val storage = DotAstStorage(".") + val storage = DotAstStorage(OUTPUT_FOLDER) val (path, fileName) = storage.splitFullPath(fullPath) assertEquals("", path) @@ -131,9 +134,13 @@ class DotAstStorageTest { fun testFilepathNormalization() { // real life example val badFilepath = "interviews/Leet-Code/binary-search/pow(x,n).java" - val storage = DotAstStorage(".") + val storage = DotAstStorage(OUTPUT_FOLDER) val normalizedFilepath = storage.normalizeFilepath(badFilepath) assertEquals("interviews/Leet-Code/binary-search/pow_x_n_.java", normalizedFilepath) } + + companion object { + private const val OUTPUT_FOLDER = "test_output" + } } From af594689fc740149102e1f220631d64fbdcf4428 Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Thu, 22 Jul 2021 16:31:14 +0300 Subject: [PATCH 250/308] Move examples to separate module --- .dockerignore | 18 ++----- .gitignore | 5 +- README.md | 4 +- detekt.yaml | 2 +- settings.gradle.kts | 3 ++ .../java/astminer/GumTreeJavaPaths.java | 54 +++++++++++++++++++ .../kotlin/astminer/antlrJavaAsts.kt} | 16 ++++-- .../kotlin/astminer/antlrJavaMethodPaths.kt | 43 +++++++++++++++ .../kotlin/astminer/antlrJavaScriptPaths.kt} | 13 +++-- .../kotlin/astminer/antlrPythonPaths.kt} | 13 +++-- .../kotlin/astminer/collectFeatures.kt} | 19 ++++--- .../fuzzyCppPathsWithPreprocessing.kt | 32 +++++++++++ .../astminer/gumTreeJavaMethodPaths.kt} | 19 ++++--- .../kotlin/astminer/gumTreeJavaPaths.kt} | 16 ++++-- .../astminer/gumTreePythonMethodPaths.kt} | 14 +++-- .../astminer/methodNamePredictionPipeline.kt} | 20 ++++--- .../kotlin/astminer/runAllExamples.kt | 22 ++++++++ .../java/astminer/examples/AllJavaFiles.java | 44 --------------- src/main/kotlin/astminer/common/FileUtil.kt | 8 +++ .../kotlin/astminer/examples/AllCppFiles.kt | 27 ---------- .../kotlin/astminer/examples/AllJavaFiles.kt | 32 ----------- src/main/kotlin/astminer/examples/Common.kt | 11 ---- .../astminer/parse/cpp/FuzzyCppParserTest.kt | 2 +- 23 files changed, 265 insertions(+), 172 deletions(-) create mode 100644 settings.gradle.kts create mode 100644 src/examples/java/astminer/GumTreeJavaPaths.java rename src/{main/kotlin/astminer/examples/AllJavaAst.kt => examples/kotlin/astminer/antlrJavaAsts.kt} (52%) create mode 100644 src/examples/kotlin/astminer/antlrJavaMethodPaths.kt rename src/{main/kotlin/astminer/examples/AllJavaScriptFiles.kt => examples/kotlin/astminer/antlrJavaScriptPaths.kt} (62%) rename src/{main/kotlin/astminer/examples/AllPythonFiles.kt => examples/kotlin/astminer/antlrPythonPaths.kt} (63%) rename src/{main/kotlin/astminer/examples/FeatureExtraction.kt => examples/kotlin/astminer/collectFeatures.kt} (61%) create mode 100644 src/examples/kotlin/astminer/fuzzyCppPathsWithPreprocessing.kt rename src/{main/kotlin/astminer/examples/AllJavaMethods.kt => examples/kotlin/astminer/gumTreeJavaMethodPaths.kt} (78%) rename src/{main/kotlin/astminer/examples/AllJavaFilesGumTree.kt => examples/kotlin/astminer/gumTreeJavaPaths.kt} (53%) rename src/{main/kotlin/astminer/examples/AllPythonMethods.kt => examples/kotlin/astminer/gumTreePythonMethodPaths.kt} (84%) rename src/{main/kotlin/astminer/examples/Code2VecJavaMethods.kt => examples/kotlin/astminer/methodNamePredictionPipeline.kt} (52%) create mode 100644 src/examples/kotlin/astminer/runAllExamples.kt delete mode 100644 src/main/java/astminer/examples/AllJavaFiles.java delete mode 100644 src/main/kotlin/astminer/examples/AllCppFiles.kt delete mode 100644 src/main/kotlin/astminer/examples/AllJavaFiles.kt delete mode 100644 src/main/kotlin/astminer/examples/Common.kt diff --git a/.dockerignore b/.dockerignore index d50294c6..dd87cc65 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,15 +1,7 @@ -*.iml -*.csv -asts/ - +.github/ .idea/ -.gradle/ -examples/out/ -src/main/generated/ build/ - -.DS_Store - -.github -scripts/ - +configs/ +output/ +examples_output/ +src/main/generated/ diff --git a/.gitignore b/.gitignore index 013b6c8c..e454121f 100644 --- a/.gitignore +++ b/.gitignore @@ -4,8 +4,9 @@ src/main/generated/ build/ -examples/out/ -asts/ + +examples_output/ +output/ *.csv log.txt diff --git a/README.md b/README.md index e51aeb06..10770eeb 100644 --- a/README.md +++ b/README.md @@ -126,9 +126,9 @@ After that, add `mavenLocal()` into the `repositories` section in your gradle co #### Examples -If you want to use `astminer` as a library in your Java/Kotlin based data mining tool, check the following examples: +If you want to use `astminer` as a library in your Java/Kotlin based data mining tool, check the following astminer: -* A few [simple usage examples](src/main/kotlin/astminer/examples) can be run with `./gradlew run`. +* A few [simple usage astminer](src/main/kotlin/astminer/examples) can be run with `./gradlew run`. * A somewhat more verbose [example of usage in Java](src/main/kotlin/astminer/examples/AllJavaFiles.kt) is available as well. diff --git a/detekt.yaml b/detekt.yaml index e914391b..52fc010a 100644 --- a/detekt.yaml +++ b/detekt.yaml @@ -21,7 +21,7 @@ style: ignoreAnnotation: true ignoreEnums: true ignoreNumbers: ['-1', '0', '1', '2', '60', '100', '1000'] - excludes: ['**/test/**', '**/examples/**'] + excludes: ['**/test/**'] ReturnCount: max: 5 WildcardImport: diff --git a/settings.gradle.kts b/settings.gradle.kts new file mode 100644 index 00000000..8086a9bb --- /dev/null +++ b/settings.gradle.kts @@ -0,0 +1,3 @@ +rootProject.name = "astminer" + +include("examples") diff --git a/src/examples/java/astminer/GumTreeJavaPaths.java b/src/examples/java/astminer/GumTreeJavaPaths.java new file mode 100644 index 00000000..78d4f8f4 --- /dev/null +++ b/src/examples/java/astminer/GumTreeJavaPaths.java @@ -0,0 +1,54 @@ +package astminer; + +import astminer.common.model.LabeledResult; +import astminer.parse.gumtree.GumTreeNode; +import astminer.parse.gumtree.java.GumTreeJavaParser; +import astminer.storage.path.Code2VecPathStorage; +import astminer.storage.path.PathBasedStorage; +import astminer.storage.path.PathBasedStorageConfig; + +import java.io.FileInputStream; +import java.io.IOException; +import java.nio.file.*; +import java.nio.file.attribute.BasicFileAttributes; + +// Retrieve paths from Java files, using a GumTree parser. +public class GumTreeJavaPaths { + private static final String INPUT_FOLDER = "src/test/resources/examples"; + private static final String OUTPUT_FOLDER = "examples_output/gumtree_java_paths_java_api"; + + public static void runExample() { + final PathBasedStorageConfig config = new PathBasedStorageConfig(5, 5, null, null, null); + final PathBasedStorage code2vecStorage = new Code2VecPathStorage(OUTPUT_FOLDER, config); + + final Path inputFolder = Paths.get(INPUT_FOLDER); + + FileVisitor fileVisitor = new SimpleFileVisitor<>() { + @Override + public FileVisitResult visitFile(Path file, BasicFileAttributes attributes) throws IOException { + if (!file.getFileName().toString().endsWith(".java")) { + return FileVisitResult.CONTINUE; + } + GumTreeNode fileTree = new GumTreeJavaParser().parseInputStream(new FileInputStream(file.toFile())); + String filePath = file.toAbsolutePath().toString(); + + LabeledResult labeledResult = new LabeledResult<>(fileTree, filePath, filePath); + code2vecStorage.store(labeledResult); + + return FileVisitResult.CONTINUE; + } + }; + + try { + Files.walkFileTree(inputFolder, fileVisitor); + } catch (IOException e) { + System.out.println("Error while processing files: " + e.getMessage()); + } finally { + code2vecStorage.close(); + } + } + + public static void main(String[] args) { + runExample(); + } +} diff --git a/src/main/kotlin/astminer/examples/AllJavaAst.kt b/src/examples/kotlin/astminer/antlrJavaAsts.kt similarity index 52% rename from src/main/kotlin/astminer/examples/AllJavaAst.kt rename to src/examples/kotlin/astminer/antlrJavaAsts.kt index e783914c..9d825d80 100644 --- a/src/main/kotlin/astminer/examples/AllJavaAst.kt +++ b/src/examples/kotlin/astminer/antlrJavaAsts.kt @@ -1,17 +1,23 @@ -package astminer.examples +package astminer import astminer.config.* import astminer.pipeline.Pipeline -// Retrieve ASTs from Java files, using a generated parser. -fun allJavaAsts() { +/** + * Retrieve ASTs from Java files, using ANTLR parser and save them in JSON format. + */ +fun antlrJavaAsts() { val config = PipelineConfig( inputDir = "src/test/resources/examples/", - outputDir = "out_examples/allJavaAstsAntlr", + outputDir = "examples_output/antlr_java_asts_json_storage", parser = ParserConfig(ParserType.Antlr, listOf(FileExtension.Java)), labelExtractor = FileNameExtractorConfig(), - storage = CsvAstStorageConfig(), + storage = JsonAstStorageConfig(), ) Pipeline(config).run() } + +fun main() { + antlrJavaAsts() +} diff --git a/src/examples/kotlin/astminer/antlrJavaMethodPaths.kt b/src/examples/kotlin/astminer/antlrJavaMethodPaths.kt new file mode 100644 index 00000000..d9870ddc --- /dev/null +++ b/src/examples/kotlin/astminer/antlrJavaMethodPaths.kt @@ -0,0 +1,43 @@ +package astminer + +import astminer.common.forFilesWithSuffix +import astminer.common.model.FunctionInfo +import astminer.common.model.LabeledResult +import astminer.parse.antlr.AntlrNode +import astminer.parse.antlr.java.JavaFunctionSplitter +import astminer.parse.antlr.java.JavaParser +import astminer.storage.path.Code2VecPathStorage +import astminer.storage.path.PathBasedStorageConfig +import java.io.File + +/** + * Retrieve paths from Java files using ANTLR parser. + */ +fun antlrJavaMethodPaths() { + val inputDir = "src/test/resources/examples/" + val outputDir = "examples_output/antlr_java_method_paths" + + val storage = Code2VecPathStorage(outputDir, PathBasedStorageConfig(5, 5)) + + File(inputDir).forFilesWithSuffix(".java") { file -> + val node = JavaParser().parseInputStream(file.inputStream()) + + val functions: List> = JavaFunctionSplitter() + .splitIntoFunctions(node, file.path) + .map { + val parametersStr = + it.parameters.joinToString(" | ") { param -> "${param.name} ${param.type}" } + println("${it.name} ${it.returnType} ${it.enclosingElement?.name} [$parametersStr]") + it + } + functions.forEach { + storage.store(LabeledResult(it.root, it.name ?: "", file.path)) + } + } + + storage.close() +} + +fun main() { + antlrJavaMethodPaths() +} diff --git a/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt b/src/examples/kotlin/astminer/antlrJavaScriptPaths.kt similarity index 62% rename from src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt rename to src/examples/kotlin/astminer/antlrJavaScriptPaths.kt index f6952851..1f4b9473 100644 --- a/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt +++ b/src/examples/kotlin/astminer/antlrJavaScriptPaths.kt @@ -1,12 +1,15 @@ -package astminer.examples +package astminer import astminer.config.* import astminer.pipeline.Pipeline -fun allJavaScriptFiles() { +/** + * Retrieve paths from all JavaScript files using ANTLR parser. + */ +fun antlrJavaScriptPaths() { val config = PipelineConfig( inputDir = "src/test/resources/examples", - outputDir = "out_examples/allJavaScriptFilesAntlr", + outputDir = "examples_output/antlr_javascript_paths", parser = ParserConfig(ParserType.Antlr, listOf(FileExtension.JavaScript)), labelExtractor = FileNameExtractorConfig(), storage = Code2VecPathStorageConfig(5, 5) @@ -14,3 +17,7 @@ fun allJavaScriptFiles() { Pipeline(config).run() } + +fun main() { + antlrJavaScriptPaths() +} diff --git a/src/main/kotlin/astminer/examples/AllPythonFiles.kt b/src/examples/kotlin/astminer/antlrPythonPaths.kt similarity index 63% rename from src/main/kotlin/astminer/examples/AllPythonFiles.kt rename to src/examples/kotlin/astminer/antlrPythonPaths.kt index cf3b216f..ae8eb138 100644 --- a/src/main/kotlin/astminer/examples/AllPythonFiles.kt +++ b/src/examples/kotlin/astminer/antlrPythonPaths.kt @@ -1,12 +1,15 @@ -package astminer.examples +package astminer import astminer.config.* import astminer.pipeline.Pipeline -fun allPythonFiles() { +/** + * Retrieve paths from all Python files using ANTLR parser + */ +fun antlrPythonPaths() { val config = PipelineConfig( inputDir = "src/test/resources/examples", - outputDir = "out_examples/allPythonFiles", + outputDir = "examples_output/antlr_python_paths", parser = ParserConfig(ParserType.Antlr, listOf(FileExtension.Python)), labelExtractor = FileNameExtractorConfig(), storage = Code2VecPathStorageConfig(5, 5) @@ -14,3 +17,7 @@ fun allPythonFiles() { Pipeline(config).run() } + +fun main() { + antlrPythonPaths() +} diff --git a/src/main/kotlin/astminer/examples/FeatureExtraction.kt b/src/examples/kotlin/astminer/collectFeatures.kt similarity index 61% rename from src/main/kotlin/astminer/examples/FeatureExtraction.kt rename to src/examples/kotlin/astminer/collectFeatures.kt index 57cd35b7..ab673fdc 100644 --- a/src/main/kotlin/astminer/examples/FeatureExtraction.kt +++ b/src/examples/kotlin/astminer/collectFeatures.kt @@ -1,22 +1,27 @@ -package astminer.examples +package astminer +import astminer.common.forFilesWithSuffix import astminer.common.numberOfLines import astminer.featureextraction.* import astminer.parse.gumtree.java.GumTreeJavaParser import java.io.File -fun parseAndCollectFeatures() { +/** + * Collect different features/statistics from parsed trees. + * Target language is Java, using GumTree parser. + */ +fun collectFeatures() { val parser = GumTreeJavaParser() val features: List> = listOf(Depth, NumberOfNodes, BranchingFactor, CompressiblePathLengths, Tokens, NodeTypes) - val folderInput = "./testData/featureextraction" - val folderOutput = "out_examples/featureextraction" + val inputDir = "src/test/resources/featureextraction" + val outputDir = "examples_output/collected_features" val storage = TreeFeatureValueStorage(",") storage.storeFeatures(features) - File(folderInput).forFilesWithSuffix("java") { fileInput -> + File(inputDir).forFilesWithSuffix("java") { fileInput -> val fileName = fileInput.name val nol = numberOfLines(fileInput) @@ -24,9 +29,9 @@ fun parseAndCollectFeatures() { storage.storeParsedTree(tree) } - storage.save(folderOutput) + storage.save(outputDir) } fun main() { - parseAndCollectFeatures() + collectFeatures() } diff --git a/src/examples/kotlin/astminer/fuzzyCppPathsWithPreprocessing.kt b/src/examples/kotlin/astminer/fuzzyCppPathsWithPreprocessing.kt new file mode 100644 index 00000000..a3eff140 --- /dev/null +++ b/src/examples/kotlin/astminer/fuzzyCppPathsWithPreprocessing.kt @@ -0,0 +1,32 @@ +package astminer + +import astminer.config.* +import astminer.parse.fuzzy.cpp.FuzzyCppParser +import astminer.pipeline.Pipeline +import java.io.File + +/** + * Preprocess .cpp files and retrieve paths from them, using a fuzzyc2cpg parser. + */ +fun fuzzyCppPathsWithPreprocessing() { + val inputDir = "src/test/resources/examples" + val outputDir = "examples_output/fuzzy_cpp_paths" + + val preprocessedDir = File(outputDir).resolve("preprocessed") + val parser = FuzzyCppParser() + parser.preprocessProject(File(inputDir), preprocessedDir) + + val config = PipelineConfig( + inputDir = preprocessedDir.path, + outputDir = outputDir, + parser = ParserConfig(ParserType.Fuzzy, listOf(FileExtension.Cpp)), + labelExtractor = FileNameExtractorConfig(), + storage = Code2VecPathStorageConfig(5, 5) + ) + + Pipeline(config).run() +} + +fun main() { + fuzzyCppPathsWithPreprocessing() +} \ No newline at end of file diff --git a/src/main/kotlin/astminer/examples/AllJavaMethods.kt b/src/examples/kotlin/astminer/gumTreeJavaMethodPaths.kt similarity index 78% rename from src/main/kotlin/astminer/examples/AllJavaMethods.kt rename to src/examples/kotlin/astminer/gumTreeJavaMethodPaths.kt index 74dec938..2039a00a 100644 --- a/src/main/kotlin/astminer/examples/AllJavaMethods.kt +++ b/src/examples/kotlin/astminer/gumTreeJavaMethodPaths.kt @@ -1,5 +1,6 @@ -package astminer.examples +package astminer +import astminer.common.forFilesWithSuffix import astminer.common.model.FunctionInfo import astminer.common.model.LabeledResult import astminer.parse.gumtree.GumTreeNode @@ -16,12 +17,14 @@ private fun getCsvFriendlyMethodId(functionInfo: FunctionInfo): Str return "$className.$methodName($parameterTypes)" } -// Retrieve paths from all Java files, using a GumTree parser. -// GumTreeMethodSplitter is used to extract individual method nodes from the compilation unit tree. -fun allJavaMethods() { - val inputDir = "src/test/resources/gumTreeMethodSplitter" +/** + * Retrieve paths from all Java files, using a GumTree parser. + * GumTreeMethodSplitter is used to extract individual method nodes from the compilation unit tree. + */ +fun gumTreeJavaMethodPaths() { + val inputDir = "src/test/resources/examples" + val outputDir = "examples_output/gumtree_java_method_paths" - val outputDir = "out_examples/allJavaMethods" val storage = Code2VecPathStorage(outputDir, PathBasedStorageConfig(5, 5)) File(inputDir).forFilesWithSuffix(".java") { file -> @@ -41,3 +44,7 @@ fun allJavaMethods() { storage.close() } + +fun main() { + gumTreeJavaMethodPaths() +} diff --git a/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt b/src/examples/kotlin/astminer/gumTreeJavaPaths.kt similarity index 53% rename from src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt rename to src/examples/kotlin/astminer/gumTreeJavaPaths.kt index 3ace42f3..05a0633f 100644 --- a/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt +++ b/src/examples/kotlin/astminer/gumTreeJavaPaths.kt @@ -1,16 +1,22 @@ -package astminer.examples +package astminer import astminer.config.* import astminer.pipeline.Pipeline -// Retrieve paths from Java files, using a GumTree parser. -fun allJavaFilesGumTree() { +/** + * Retrieve paths from Java files, using a GumTree parser. + */ +fun gumTreeJavaPaths() { val config = PipelineConfig( - inputDir = "src/test/resources/gumTreeMethodSplitter/", - outputDir = "out_examples/allJavaFilesGumTree", + inputDir = "src/test/resources/examples", + outputDir = "examples_output/gumtree_java_paths_kotlin_api", parser = ParserConfig(ParserType.GumTree, listOf(FileExtension.Java)), labelExtractor = FileNameExtractorConfig(), storage = Code2VecPathStorageConfig(5, 5) ) Pipeline(config).run() } + +fun main() { + gumTreeJavaPaths() +} diff --git a/src/main/kotlin/astminer/examples/AllPythonMethods.kt b/src/examples/kotlin/astminer/gumTreePythonMethodPaths.kt similarity index 84% rename from src/main/kotlin/astminer/examples/AllPythonMethods.kt rename to src/examples/kotlin/astminer/gumTreePythonMethodPaths.kt index d7ddab1e..a56e5538 100644 --- a/src/main/kotlin/astminer/examples/AllPythonMethods.kt +++ b/src/examples/kotlin/astminer/gumTreePythonMethodPaths.kt @@ -1,5 +1,6 @@ -package astminer.examples +package astminer +import astminer.common.forFilesWithSuffix import astminer.common.model.FunctionInfo import astminer.common.model.LabeledResult import astminer.parse.gumtree.GumTreeNode @@ -16,10 +17,13 @@ private fun getCsvFriendlyMethodId(functionInfo: FunctionInfo): Str return "$className.$methodName($parameterTypes)" } -fun allPythonMethods() { +/** + * Retrieve paths from all Python methods in files using GumTree parser + */ +fun gumTreePythonMethodPaths() { val inputDir = "src/test/resources/gumTreeMethodSplitter" + val outputDir = "examples_output/gumtree_python_methods_paths" - val outputDir = "out_examples/allPythonMethods" val storage = Code2VecPathStorage(outputDir, PathBasedStorageConfig(5, 5)) File(inputDir).forFilesWithSuffix(".py") { file -> @@ -40,3 +44,7 @@ fun allPythonMethods() { storage.close() } + +fun main() { + gumTreePythonMethodPaths() +} \ No newline at end of file diff --git a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt b/src/examples/kotlin/astminer/methodNamePredictionPipeline.kt similarity index 52% rename from src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt rename to src/examples/kotlin/astminer/methodNamePredictionPipeline.kt index b8ef0c3e..107b9391 100644 --- a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt +++ b/src/examples/kotlin/astminer/methodNamePredictionPipeline.kt @@ -1,16 +1,18 @@ -package astminer.examples +package astminer import astminer.config.* import astminer.pipeline.Pipeline -// Retrieve paths from all Java files, using a GumTree parser. -// GumTreeMethodSplitter is used to extract individual method nodes from the compilation unit tree. -fun code2vecJavaMethods() { - val folder = "src/test/resources/code2vecPathMining" - val outputDir = "out_examples/code2vecPathMining" +/** + * Prepare data for training code2vec model for method name prediction task. + * Target language is Java, using ANTLR parser. + */ +fun methodNamePredictionPipeline() { + val inputDir = "src/test/resources/examples" + val outputDir = "examples_output/method_name_prediction_code2vec" val pipelineConfig = PipelineConfig( - inputDir = folder, + inputDir = inputDir, outputDir = outputDir, parser = ParserConfig(ParserType.Antlr, listOf(FileExtension.Java)), labelExtractor = FunctionNameExtractorConfig(), @@ -22,3 +24,7 @@ fun code2vecJavaMethods() { Pipeline(pipelineConfig).run() } + +fun main() { + methodNamePredictionPipeline() +} diff --git a/src/examples/kotlin/astminer/runAllExamples.kt b/src/examples/kotlin/astminer/runAllExamples.kt new file mode 100644 index 00000000..2c9f1bcb --- /dev/null +++ b/src/examples/kotlin/astminer/runAllExamples.kt @@ -0,0 +1,22 @@ +package astminer + +import astminer.parse.ParsingException + +/** + * Run all examples in one place. + */ +fun main() { + // Java api example + GumTreeJavaPaths.runExample() + // Kotlin api examples + antlrJavaAsts() + antlrJavaMethodPaths() + antlrJavaScriptPaths() + antlrPythonPaths() + collectFeatures() + fuzzyCppPathsWithPreprocessing() + gumTreeJavaMethodPaths() + gumTreeJavaPaths() + try { gumTreePythonMethodPaths() } catch (ex: ParsingException) { println("No python parser to run this example") } + methodNamePredictionPipeline() +} \ No newline at end of file diff --git a/src/main/java/astminer/examples/AllJavaFiles.java b/src/main/java/astminer/examples/AllJavaFiles.java deleted file mode 100644 index 46bfc39a..00000000 --- a/src/main/java/astminer/examples/AllJavaFiles.java +++ /dev/null @@ -1,44 +0,0 @@ -package astminer.examples; - -import astminer.common.model.LabeledResult; -import astminer.common.model.*; -import astminer.parse.gumtree.java.GumTreeJavaParser; -import astminer.storage.path.Code2VecPathStorage; -import astminer.storage.path.PathBasedStorage; -import astminer.storage.path.PathBasedStorageConfig; -import java.io.FileInputStream; -import java.io.IOException; -import java.nio.file.*; -import java.nio.file.attribute.BasicFileAttributes; - -//Retrieve paths from Java files, using a GumTree parser. -public class AllJavaFiles { - private static final String INPUT_FOLDER = "src/test/resources/gumTreeMethodSplitter"; - private static final String OUTPUT_FOLDER = "out_examples/allJavaFiles_GumTree_java"; - - public static void runExample() { - final PathBasedStorageConfig config = new PathBasedStorageConfig(5, 5, null, null, null); - final PathBasedStorage pathStorage = new Code2VecPathStorage(OUTPUT_FOLDER, config); - - final Path inputFolder = Paths.get(INPUT_FOLDER); - - FileVisitor fileVisitor = new SimpleFileVisitor() { - @Override - public FileVisitResult visitFile(Path file, BasicFileAttributes attributes) throws IOException { - Node fileTree = new GumTreeJavaParser().parseInputStream(new FileInputStream(file.toFile())); - - String filePath = file.toAbsolutePath().toString(); - pathStorage.store(new LabeledResult<>(fileTree, filePath, filePath)); - - return FileVisitResult.CONTINUE; - } - }; - - try { - Files.walkFileTree(inputFolder, fileVisitor); - pathStorage.close(); - } catch (IOException e) { - e.printStackTrace(); - } - } -} diff --git a/src/main/kotlin/astminer/common/FileUtil.kt b/src/main/kotlin/astminer/common/FileUtil.kt index 0d83dc51..09471b68 100644 --- a/src/main/kotlin/astminer/common/FileUtil.kt +++ b/src/main/kotlin/astminer/common/FileUtil.kt @@ -55,3 +55,11 @@ fun getProjectFiles(projectRoot: File, filter: (File) -> Boolean = { true }) = p fun getProjectFilesWithExtension(projectRoot: File, extension: String): List = getProjectFiles(projectRoot) { it.isFile && it.extension == extension } + +fun iterateFiles(dir: File, condition: (File) -> Boolean, action: (File) -> Unit) { + dir.walkTopDown().filter { it.isFile && condition(it) }.forEach { action.invoke(it) } +} + +fun File.forFilesWithSuffix(extension: String, action: (File) -> Unit) { + iterateFiles(this, { file: File -> file.path.endsWith(extension) }, action) +} diff --git a/src/main/kotlin/astminer/examples/AllCppFiles.kt b/src/main/kotlin/astminer/examples/AllCppFiles.kt deleted file mode 100644 index e7eb7135..00000000 --- a/src/main/kotlin/astminer/examples/AllCppFiles.kt +++ /dev/null @@ -1,27 +0,0 @@ -@file:JvmName("CppExample") - -package astminer.examples - -import astminer.config.* -import astminer.parse.fuzzy.cpp.FuzzyCppParser -import astminer.pipeline.Pipeline -import java.io.File - -// Retrieve paths from .cpp preprocessed files, using a fuzzyc2cpg parser. -fun allCppFiles() { - val inputDir = File("src/test/resources/examples/cpp") - val preprocessedDir = File("preprocessed") - // TODO: preprocessing should once become a part of the pipeline - val parser = FuzzyCppParser() - parser.preprocessProject(inputDir, preprocessedDir) - - val config = PipelineConfig( - inputDir = preprocessedDir.path, - outputDir = "out_examples/allCppFiles", - parser = ParserConfig(ParserType.Fuzzy, listOf(FileExtension.Cpp)), - labelExtractor = FileNameExtractorConfig(), - storage = Code2VecPathStorageConfig(5, 5) - ) - - Pipeline(config).run() -} diff --git a/src/main/kotlin/astminer/examples/AllJavaFiles.kt b/src/main/kotlin/astminer/examples/AllJavaFiles.kt deleted file mode 100644 index 5fa28a11..00000000 --- a/src/main/kotlin/astminer/examples/AllJavaFiles.kt +++ /dev/null @@ -1,32 +0,0 @@ -package astminer.examples - -import astminer.common.model.LabeledResult -import astminer.parse.antlr.java.JavaFunctionSplitter -import astminer.parse.antlr.java.JavaParser -import astminer.storage.path.Code2VecPathStorage -import astminer.storage.path.PathBasedStorageConfig -import java.io.File - -// Retrieve paths from Java files, using a generated parser. -fun allJavaFiles() { - val inputDir = "src/test/resources/examples/" - - val outputDir = "out_examples/allJavaFilesAntlr" - val storage = Code2VecPathStorage(outputDir, PathBasedStorageConfig(5, 5)) - - File(inputDir).forFilesWithSuffix("11.java") { file -> - val node = JavaParser().parseInputStream(file.inputStream()) - node.prettyPrint() - JavaFunctionSplitter().splitIntoFunctions(node, file.path).forEach { - println(it.name) - println(it.returnType) - println(it.enclosingElement?.name) - it.parameters.forEach { parameter -> - println("${parameter.name} ${parameter.type}") - } - } - storage.store(LabeledResult(node, file.path, file.path)) - } - - storage.close() -} diff --git a/src/main/kotlin/astminer/examples/Common.kt b/src/main/kotlin/astminer/examples/Common.kt deleted file mode 100644 index 0e3e1202..00000000 --- a/src/main/kotlin/astminer/examples/Common.kt +++ /dev/null @@ -1,11 +0,0 @@ -package astminer.examples - -import java.io.File - -fun iterateFiles(dir: File, condition: (File) -> Boolean, action: (File) -> Unit) { - dir.walkTopDown().filter { it.isFile && condition(it) }.forEach { action.invoke(it) } -} - -fun File.forFilesWithSuffix(extension: String, action: (File) -> Unit) { - iterateFiles(this, { file: File -> file.path.endsWith(extension) }, action) -} diff --git a/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt b/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt index 72d02d47..7ca5570f 100644 --- a/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt +++ b/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt @@ -1,8 +1,8 @@ package astminer.parse.cpp import astminer.checkExecutable +import astminer.common.forFilesWithSuffix import astminer.common.getProjectFilesWithExtension -import astminer.examples.forFilesWithSuffix import astminer.parse.fuzzy.cpp.FuzzyCppParser import astminer.parse.fuzzy.cpp.FuzzyNode import astminer.parseFiles From ec9f455c43496723ec2337bf9d991526099aa5a8 Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Fri, 23 Jul 2021 17:43:36 +0300 Subject: [PATCH 251/308] Write docs for pipeline parts, rewrite README, optimise docker --- .dockerignore | 7 - Dockerfile | 9 +- README.md | 130 +++++------------- cli.md | 69 ---------- cli.sh | 24 +++- configs/antlr_java_js_ast.yaml | 24 ++++ configs/antlr_python_paths.yaml | 25 ++++ configs/file-asts-csv-storage.yaml | 19 --- configs/file-asts-json-storage.yaml | 19 --- configs/file-path-representation.yaml | 18 --- ...n-name-prediction-path-representation.yaml | 18 --- configs/gumtree_java_ast.yaml | 24 ++++ configs/gumtree_java_function_names.yaml | 24 ++++ changelog.md => docs/changelog.md | 14 +- docs/cli.md | 39 ++++++ docs/filters.md | 75 ++++++++++ docs/label_extractors.md | 36 +++++ docs/parsers.md | 57 ++++++++ docs/storages.md | 61 ++++++++ .../kotlin/astminer/config/ParserConfig.kt | 6 +- .../kotlin/astminer/config/PipelineConfig.kt | 3 +- .../kotlin/astminer/config/StorageConfigs.kt | 8 +- src/main/kotlin/astminer/pipeline/Pipeline.kt | 8 +- 23 files changed, 437 insertions(+), 280 deletions(-) delete mode 100644 .dockerignore delete mode 100644 cli.md create mode 100644 configs/antlr_java_js_ast.yaml create mode 100644 configs/antlr_python_paths.yaml delete mode 100644 configs/file-asts-csv-storage.yaml delete mode 100644 configs/file-asts-json-storage.yaml delete mode 100644 configs/file-path-representation.yaml delete mode 100644 configs/function-name-prediction-path-representation.yaml create mode 100644 configs/gumtree_java_ast.yaml create mode 100644 configs/gumtree_java_function_names.yaml rename changelog.md => docs/changelog.md (90%) create mode 100644 docs/cli.md create mode 100644 docs/filters.md create mode 100644 docs/label_extractors.md create mode 100644 docs/parsers.md create mode 100644 docs/storages.md diff --git a/.dockerignore b/.dockerignore deleted file mode 100644 index dd87cc65..00000000 --- a/.dockerignore +++ /dev/null @@ -1,7 +0,0 @@ -.github/ -.idea/ -build/ -configs/ -output/ -examples_output/ -src/main/generated/ diff --git a/Dockerfile b/Dockerfile index 822f8ad5..f94f32dd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,11 +20,8 @@ RUN apt-get update && \ chmod +x pythonparser ENV PATH="/pythonparser:${PATH}" -# Copy astminer sources +# Copy astminer shadow jar WORKDIR astminer -COPY . . +COPY ./build/shadow/astminer.jar . -# Prepare shadow jar -RUN ./gradlew shadowJar - -ENTRYPOINT ["java", "-jar", "build/shadow/astminer.jar"] +ENTRYPOINT ["java", "-jar", "astminer.jar"] diff --git a/README.md b/README.md index 10770eeb..f94b5b08 100644 --- a/README.md +++ b/README.md @@ -2,30 +2,36 @@ ![astminer version](https://img.shields.io/badge/astminer-v0.6.4-blue) # `astminer` -A library for mining of [path-based representations of code](https://arxiv.org/pdf/1803.09544.pdf) and more, supported by the [Machine Learning Methods for Software Engineering](https://research.jetbrains.org/groups/ml_methods) group at [JetBrains Research](https://research.jetbrains.org). +A library for mining of [path-based representations of code](https://arxiv.org/pdf/1803.09544.pdf) and more, +supported by the +[Machine Learning Methods for Software Engineering](https://research.jetbrains.org/groups/ml_methods) +group at [JetBrains Research](https://research.jetbrains.org). Supported languages of the input: -- [x] Java -- [x] Python -- [x] C/C++ -- [x] Javascript +| | Java | Python | C/C++ | JavaScript | PHP | +|---------|------|--------|-------|------------|-----| +| ANTLR | ✅ | ✅ | | ✅ | ✅ | +| GumTree | ✅ | ✅ | | | | +| Fuzzy | | | ✅ | | | -### Version history - -See [changelog](changelog.md) ## About -`astminer` was first implemented as a part of pipeline in the [code style extraction project](https://arxiv.org/abs/2002.03997) and later converted into a reusable tool. +`astminer` was first implemented as a part of pipeline in the +[code style extraction project](https://arxiv.org/abs/2002.03997) and later converted into a reusable tool. Currently, it supports extraction of: -* Path-based representations of files -* Path-based representations of methods -* Raw ASTs +* Path-based representations of files/methods +* Raw ASTs of files/methods -Supported languages are Java, Python, C/C++, but it is designed to be very easily extensible. +It is designed to be very easily extensible to new languages. -For the output format, see [the section below](#output-format). +`astminer` lets you create end2end pipeline of data processing. +It allows convert source code, cloned from VCS to suitable for training datasets. +To do that, `astminer` provides multiple steps to handle data: +- [filters](./docs/filters.md) to remove redundant samples from data +- [label extractors](./docs/label_extractors.md) to create label for each tree +- [storages](./docs/storages.md) to define storage format. ## Usage There are two ways to use `astminer`. @@ -33,60 +39,14 @@ There are two ways to use `astminer`. - [As a standalone CLI tool](#using-astminer-cli) with pre-implemented logic for common processing and mining tasks - [Integrated](#using-astminer-as-a-dependency) into your Kotlin/Java mining pipelines as a Gradle dependency. -### Using `astminer` CLI -#### Building or installing `astminer` CLI -`astminer` CLI can be either built from sources or installed in a pre-built Docker image. - -##### Building locally -`./cli.sh` will do the job for you by triggering a Gradle build on the first run. - -##### Installing the Docker image -The C++ parser in `astminer` relies on `g++`. To avoid misconfiguration with this and likely other future external dependencies, you can use it from a Docker container. - -Install the image with the last release by pulling it from Docker Hub: -```shell -docker pull voudy/astminer -``` -To rebuild the image locally, run -```shell -docker build -t voudy/astminer . -``` +### Using `astminer` cli -#### Running `astminer` CLI -Run +Define config (examples of them in [configs](./configs) directory) and pass it shell script: ```shell -./cli.sh optionName parameters +./cli.sh ``` -Where `optionName` is one of the following options: -#### Preprocess - -Run preprocessing on C/C++ project to unfold `#define` directives. -In other tasks, if you feed C/C++ file with macroses, they will be dropped as well as their appearances in code. -```shell script -./cli.sh preprocess --project path/to/project --output path/to/preprocessedProject -``` -#### Parse - -Extract ASTs from all the files in supported languages. -```shell script -./cli.sh parse --lang py,java,c,cpp,js --project path/to/project --output path/to/result --storage dot -``` - -#### PathContexts - -Extract path contexts from all the files in supported languages and store in form `fileName triplesOfPathContexts`. -```shell script -./cli.sh pathContexts --lang py,java,c,cpp,js --project path/to/project --output path/to/results --maxL L --maxW W --maxContexts C --maxTokens T --maxPaths P -``` - -#### Code2vec - -Extract data suitable as input for [code2vec](https://github.com/tech-srl/code2vec) model. -Parse all files written in specified language into ASTs, split into methods, and store in form `method|name triplesOfPathContexts`. -```shell script -./cli.sh code2vec --lang py,java,c,cpp,js --project path/to/project --output path/to/results --maxL L --maxW W --maxContexts C --maxTokens T --maxPaths P --split-tokens --granularity method -``` +For details about config format and other navigate to [docs/cli](./docs/cli.md). ### Using `astminer` as a dependency @@ -101,7 +61,7 @@ repositories { } dependencies { - compile 'io.github.vovak:astminer:' + implementation 'io.github.vovak:astminer:' } ``` @@ -112,7 +72,7 @@ repositories { } dependencies { - compile("io.github.vovak", "astminer", ) + implementation("io.github.vovak:astminer:") } ``` @@ -126,50 +86,22 @@ After that, add `mavenLocal()` into the `repositories` section in your gradle co #### Examples -If you want to use `astminer` as a library in your Java/Kotlin based data mining tool, check the following astminer: - -* A few [simple usage astminer](src/main/kotlin/astminer/examples) can be run with `./gradlew run`. +If you want to use `astminer` as a library in your Java/Kotlin based data mining tool, check the following: -* A somewhat more verbose [example of usage in Java](src/main/kotlin/astminer/examples/AllJavaFiles.kt) is available as well. +* A few simple [examples](src/examples) of `astminer` usage in Java and Kotlin. +* Using `astminer` as a part of another mining tool — [psiminer](https://github.com/JetBrains-Research/psiminer). Please consider trying Kotlin for your data mining pipelines: from our experience, it is much better suited for data collection and transformation instruments. -### Output format - -For path-based representations, `astminer` supports two output formats. In both of them, we store 4 `.csv` files: -1. `node_types.csv` contains numeric ids and corresponding node types with directions (up/down, as described in [paper](https://arxiv.org/pdf/1803.09544.pdf)); -2. `tokens.csv` contains numeric ids and corresponding tokens; -3. `paths.csv` contains numeric ids and AST paths in form of space-separated sequences of node type ids; -4. `path_contexts.csv` contains labels and sequences of path contexts (triples of two tokens and a path between them). - -If the replica of [code2vec](https://github.com/tech-srl/code2vec) format is used, each line in `path_contexts.csv` starts with a label, -then it contains a sequence of space-separated triples. Each triple contains start token id, path id, end token id, separated with commas. - -If csv format is used, each line in `path_contexts.csv` contains label, then comma, then a sequence of `;`-separated triples. -Each triple contains start token id, path id, end token id, separated with spaces. - -## Other languages - -Support for a new programming language can be implemented in a few simple steps. - -If there is an ANTLR grammar for the language: -1. Add the corresponding [ANTLR4 grammar file](https://github.com/antlr/grammars-v4) to the `antlr` directory; -2. Run the `generateGrammarSource` Gradle task to generate the parser; -3. Implement a small wrapper around the generated parser. -See [JavaParser](src/main/kotlin/astminer/parse/antlr/java/JavaParser.kt) or [PythonParser](src/main/kotlin/astminer/parse/antlr/python/PythonParser.kt) for an example of a wrapper. - -If the language has a parsing tool that is available as Java library: -1. Add the library as a dependency in [build.gradle.kts](/build.gradle.kts); -2. Implement a wrapper for the parsing tool. -See [FuzzyCppParser](src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppParser.kt) for an example of a wrapper. - ## Contribution + We believe that `astminer` could find use beyond our own mining tasks. Please help make `astminer` easier to use by sharing your use cases. Pull requests are welcome as well. Support for other languages and documentation are the key areas of improvement. ## Citing astminer + A [paper](https://zenodo.org/record/2595271) dedicated to `astminer` (more precisely, to its older version [PathMiner](https://github.com/vovak/astminer/tree/pathminer)) was presented at [MSR'19](https://2019.msrconf.org/). If you use `astminer` in your academic work, please cite it. ``` diff --git a/cli.md b/cli.md deleted file mode 100644 index f8babcb9..00000000 --- a/cli.md +++ /dev/null @@ -1,69 +0,0 @@ -[![JetBrains Research](https://jb.gg/badges/research.svg)](https://confluence.jetbrains.com/display/ALL/JetBrains+on+GitHub) -[![CircleCI](https://circleci.com/gh/JetBrains-Research/astminer.svg?style=svg)](https://circleci.com/gh/JetBrains-Research/astminer) - -# Astminer usage example / CLI - -The project implements a CLI for [astminer](github.com/vovak/astminer) and serves as a usage example for the library. - -For now the CLI provides four options: - -* Extract data (method names and path contexts) suitable as input for [code2vec](https://github.com/tech-srl/code2vec); -* Parse a project in one of supported languages and save the extracted ASTs; -* Extract path contexts from the project files and save them in code2vec format; -* Preprocess code in C/C++ to unfold `#define` directives to enable further processing. - -### Version history - -#### Coming up in 0.4 - -* Extraction of path-based representations at method level -* Support of Javascript - -#### 0.3 - -* Extraction of path-based representations - * For now it works only at file level -* Compatibility with [code2vec](https://github.com/tech-srl/code2vec) model (see [code2vec section](#code2vec)) -* New features in AST parsing: - * Saving in [DOT format](https://www.graphviz.org/doc/info/lang.html) - * Selection of granularity level (`file` or `method`) - * You can pass `--split-token` flag to split tokens into pipe-separated sub-tokens - * You can pass `--hide-method-name` to replace names of methods with dummy `METHOD_NAME` token - -#### 0.2 - -* Parsing ASTs for Java, Python, C/C++ -* Preprocessing for C/C++ - -#### 0.1 - -* Weird alpha-release - - -## Supported languages - -* Python – supported via parser generated from [ANTLR grammar](https://github.com/antlr/grammars-v4/tree/master/python3). -* Java – supported via [GumTree](https://github.com/GumTreeDiff/gumtree) and [ANTLR Java8 grammar](https://github.com/antlr/grammars-v4/tree/master/java8). -* C and C++ – supported via [ShiftLeft CPG constructor](https://github.com/ShiftLeftSecurity/codepropertygraph). -It does not work properly with macroses (`#define` directives), thus, they should be substituted before parsing the project. -To do so, we provide a `preprocess` option for the CLI. - -## Requirements - -1. `java` to run jar -2. `g++` for preprocessing, **only for C/C++** - -## Extending the CLI - -1. Clone the repository -2. If you want to update the astminer library: - 1. Make changes to astminer (located in the root of this repository) - 2. Build astminer in the root folder with `./gradlew shadowJar` -3. Move to `astminer-cli` -4. If you want to update the CLI: - 1. To add another task for the jar, create an extension of `CliktCommand()` class - (see [ProjectParser](src/main/kotlin/cli/ProjectParser.kt) for an example) and link it in [Main.kt](src/main/kotlin/cli/Main.kt) - 2. To modify existing tasks (e.g., parse only files with specific names), update code of corresponding classes -5. Run `./gradlew shadowJar` to create a runnable jar with all the dependencies -6. Created jar is located in `build/shadow/cli-versionNumber.jar` -7. Run the jar explicitly or use `./cli.sh` for short diff --git a/cli.sh b/cli.sh index 5a0c5005..a58a5473 100755 --- a/cli.sh +++ b/cli.sh @@ -3,15 +3,27 @@ IMAGE_NAME="voudy/astminer" SHADOW_JAR_PATH="build/shadow/astminer.jar" +if ! [[ -f "$SHADOW_JAR_PATH" ]]; then + echo "$SHADOW_JAR_PATH not found, building" + ./gradlew shadowJar +fi + if [[ "$(docker images -q $IMAGE_NAME 2> /dev/null)" == "" ]]; then echo "Docker image not found, will use $SHADOW_JAR_PATH"; - if ! [[ -f "$SHADOW_JAR_PATH" ]]; then - echo "$SHADOW_JAR_PATH not found, building" - ./gradlew shadowJar - fi - java -jar $SHADOW_JAR_PATH "$@" + java -jar $SHADOW_JAR_PATH "$1" else echo "Running astminer in docker" - docker run --rm $IMAGE_NAME "$@" + +# mount config file, input dir and output dir to docker +# convert all paths to be absolute + CONFIG_PATH=$1 + INPUT_FOLDER=$(grep inputDir "$1" | cut -c 11-) + OUTPUT_FOLDER=$(grep outputDir "$1" | cut -c 12-) + docker run \ + -v "$(pwd)"/"$CONFIG_PATH":/astminer/"$CONFIG_PATH" \ + -v "$(pwd)"/"$OUTPUT_FOLDER":/astminer/"$OUTPUT_FOLDER" \ + -v "$(pwd)"/"$INPUT_FOLDER":/astminer/"$INPUT_FOLDER" \ + -v "$(pwd)"/"$SHADOW_JAR_PATH":/astminer/astminer.jar \ + --rm $IMAGE_NAME "$1" fi diff --git a/configs/antlr_java_js_ast.yaml b/configs/antlr_java_js_ast.yaml new file mode 100644 index 00000000..8fe2b2d9 --- /dev/null +++ b/configs/antlr_java_js_ast.yaml @@ -0,0 +1,24 @@ +# input directory (path to project) +inputDir: src/test/resources/ +# output directory +outputDir: output + +# parse Java & JavaScript files with ANTLR parser +parser: + name: antlr + languages: [java, js] + +filters: + - name: by tree size # exclude the trees that have > 1000 nodes + maxTreeSize: 1000 + - name: by words number + maxTokenWordsNumber: 1000 + +# use file names as labels +# this selects the file level granularity +label: + name: file name + +# save to disk ASTs in the JSON format +storage: + name: json AST diff --git a/configs/antlr_python_paths.yaml b/configs/antlr_python_paths.yaml new file mode 100644 index 00000000..2cced944 --- /dev/null +++ b/configs/antlr_python_paths.yaml @@ -0,0 +1,25 @@ +# input directory (path to project) +inputDir: src/test/resources/ +# output directory +outputDir: output + +# parse Python files with ANTLR parser +parser: + name: antlr + languages: [py] + +filters: + - name: by tree size # exclude the trees that have > 1000 nodes + maxTreeSize: 1000 + +# use file names as labels +# this selects the file level granularity +label: + name: file name + +# extract from each tree paths with length 9 and width 2 +# save paths in code2vec format +storage: + name: code2vec + maxPathLength: 9 + maxPathWidth: 2 diff --git a/configs/file-asts-csv-storage.yaml b/configs/file-asts-csv-storage.yaml deleted file mode 100644 index 638b0e50..00000000 --- a/configs/file-asts-csv-storage.yaml +++ /dev/null @@ -1,19 +0,0 @@ -inputDir: 'src/test/resources/methodSplitting/' -outputDir: 'output' - -parser: - name: 'antlr' - extensions: ['java', 'js'] - -filters: - - name: 'by tree size' - maxTreeSize: 1000 - - name: 'by words number' - maxTokenWordsNumber: 1000 - - -labelExtractor: - name: 'file name' - -storage: - name: 'CsvAST' diff --git a/configs/file-asts-json-storage.yaml b/configs/file-asts-json-storage.yaml deleted file mode 100644 index fc0be07b..00000000 --- a/configs/file-asts-json-storage.yaml +++ /dev/null @@ -1,19 +0,0 @@ -inputDir: 'src/test/resources/methodSplitting/' -outputDir: 'output' - -parser: - name: 'antlr' - extensions: ['java', 'js'] - -filters: - - name: 'by tree size' - maxTreeSize: 1000 - - name: 'by words number' - maxTokenWordsNumber: 1000 - - -labelExtractor: - name: 'file name' - -storage: - name: 'JsonAST' diff --git a/configs/file-path-representation.yaml b/configs/file-path-representation.yaml deleted file mode 100644 index 62c36d31..00000000 --- a/configs/file-path-representation.yaml +++ /dev/null @@ -1,18 +0,0 @@ -inputDir: 'src/test/resources/methodSplitting/' -outputDir: 'output' - -parser: - name: 'antlr' - extensions: ['java', 'js'] - -filters: - - name: 'by tree size' - maxTreeSize: 1000 - -labelExtractor: - name: 'file name' - -storage: - name: 'Code2vec' - maxPathLength: 5 - maxPathWidth: 5 diff --git a/configs/function-name-prediction-path-representation.yaml b/configs/function-name-prediction-path-representation.yaml deleted file mode 100644 index b2846b20..00000000 --- a/configs/function-name-prediction-path-representation.yaml +++ /dev/null @@ -1,18 +0,0 @@ -inputDir: 'src/test/resources/methodSplitting/' -outputDir: 'output' - -parser: - name: 'antlr' - extensions: ['java'] - -filters: - - name: 'by function name length' - maxWordsNumber: 10 - - name: 'by words number' - maxTokenWordsNumber: 100 - -labelExtractor: - name: 'function name' - -storage: - name: 'CsvAST' diff --git a/configs/gumtree_java_ast.yaml b/configs/gumtree_java_ast.yaml new file mode 100644 index 00000000..e6345b70 --- /dev/null +++ b/configs/gumtree_java_ast.yaml @@ -0,0 +1,24 @@ +# input directory (path to project) +inputDir: src/test/resources/ +# output directory +outputDir: output + +# parse Java files with GumTree parser +parser: + name: gumtree + languages: [java] + +filters: + - name: by tree size # exclude the trees that have > 1000 nodes + maxTreeSize: 1000 + - name: by words number + maxTokenWordsNumber: 1000 + +# use file names as labels +# this selects the file level granularity +label: + name: file name + +# save to disk ASTs in the JSON format +storage: + name: json AST diff --git a/configs/gumtree_java_function_names.yaml b/configs/gumtree_java_function_names.yaml new file mode 100644 index 00000000..451802ec --- /dev/null +++ b/configs/gumtree_java_function_names.yaml @@ -0,0 +1,24 @@ +# input directory (path to project) +inputDir: src/test/resources/ +# output directory +outputDir: output + +# parse Java files with GumTree parser +parser: + name: gumtree + languages: [java] + +filters: + - name: by function name length + maxWordsNumber: 10 + - name: by words number + maxTokenWordsNumber: 100 + +# use function name as labels +# this selects the function level granularity +label: + name: function name + +# save to disk ASTs in the JSON format +storage: + name: json AST diff --git a/changelog.md b/docs/changelog.md similarity index 90% rename from changelog.md rename to docs/changelog.md index 1e078bf0..f435c5be 100644 --- a/changelog.md +++ b/docs/changelog.md @@ -1,31 +1,31 @@ -### Version history +# Version history -#### 0.6 +## 0.6 * Greatly improved memory efficiency * Support arbitrary labels for path-contexts * Minor improvements -#### 0.5 +## 0.5 * Beta of Javascript support * Storage of ASTs in [DOT format](https://www.graphviz.org/doc/info/lang.html) * Minor fixes -#### 0.4 +## 0.4 * Support of code2vec output format * Extraction of ASTs and path-based representations of individual methods * Extraction of data for the task of method name prediction ([code2vec paper](https://arxiv.org/abs/1803.09473)) -#### 0.3 +## 0.3 * Support of C/C++ via [FuzzyC2CPG parser](https://github.com/ShiftLeftSecurity/fuzzyc2cpg) -#### 0.2 +## 0.2 * Mining of ASTs -#### 0.1 +## 0.1 * astminer is available via Maven Central * Support of Java and Python * Mining of [path-based representations of code](https://arxiv.org/pdf/1803.09544.pdf) diff --git a/docs/cli.md b/docs/cli.md new file mode 100644 index 00000000..e309fd0e --- /dev/null +++ b/docs/cli.md @@ -0,0 +1,39 @@ +# `astminer` CLI usage + +You can run `astminer` through command-line interface. +CLI allow to run the tool on any implemented parser with specifying filtering, label extracting and storage options. + +## How to +You can prepare and run CLI on any branch you want. Just navigate to it and do follow steps: +1. Build shadow jar for `astminer`: +```shell +gradle shadowJar +``` +2. [Optionally] Pull docker image with all parsers dependencies installed: +```shell +docker pull voudy/astminer +``` +3. Run `astminer` with specified config: +```shell +./cli.sh +``` + +## Config + +CLI usage of the `astminer` completely configured by YAML config. +The config should contain next values: +- `inputDir` — path to directory with input data +- `outputDir` — path to output directory +- `parser` — parser name and list of target languages +- `filters` — list of filters with their parameters +- `label` — label extractor strategy +- `storage` — storage format + +[configs](../configs) already contain some config examples, look at them for more structure details. + +## Docker + +Since some parsers have additional dependencies, +e.g. G++ must be installed for Fuzzy parser (see [parsers](./parsers.md)). +We introduce Docker image with already installed parser dependencies. +To use this image you should only pull this image from DockerHub and run CLI by `./cli.sh`. diff --git a/docs/filters.md b/docs/filters.md new file mode 100644 index 00000000..886844d8 --- /dev/null +++ b/docs/filters.md @@ -0,0 +1,75 @@ +# Filters + +Each filter dedicate to remove *bad* trees from data, e.g. too large trees. +Also, each filter works only for certain levels of granulaity. +Here we describe all implemented filters. +Each description contains corresponding YAML config. + +Since filters may be language or parser specific, `astminer` should support all this zoo. +And since we **do not** use any of intermediate representation it is impossible to unify filtering. +Therefore some languages or parsers may not support needed filter +(`FunctionInfoPropertyNotImplementedException` appears). +To handle this user should manually add specific logic of parsing AST to get info about function or code at +all. + +Filter config classes are defined in [FilterConfigs.kt](src/main/kotlin/astminer/config/FilterConfigs.kt). + +## by tree size +**granularity**: files, functions + +Exclude ASTs that are too small or too big. + + ```yaml + name: 'by tree size' + minTreeSize: 1 + maxTreeSize: 100 + ``` + +## by words number +**granularity**: files, functions + +Exclude ASTs that have too many words in any token. + + ```yaml + name: by words number + maxTokenWordsNumber: 10 + ``` + +## by function name length +**granularity**: functions + +Exclude functions that have too many words in their name. + + ```yaml + name: by function name length + maxWordsNumber: 10 + ``` + +## no constructors +**granularity**: functions + +Exclude constructors + + ```yaml + name: no constructors + ``` + +## by annotations +**granularity**: functions + +Exclude functions that have certain annotations (e.g. `@Override`) + + ```yaml + name: by annotations + annotations: [ Override ] + ``` + +## by modifiers +**granularity**: functions + +Exclude functions with certain modifiers (e.g. `private` functions) + + ```yaml + name: by modifiers + modifiers: [ private ] + ``` diff --git a/docs/label_extractors.md b/docs/label_extractors.md new file mode 100644 index 00000000..ec41845e --- /dev/null +++ b/docs/label_extractors.md @@ -0,0 +1,36 @@ +# Label extractors + +Label extractors are required for correct extracting of labels from raw ASTs. +Inside themselves they extract label from tree and process tree to avoid data leak. +Also, label extractors define granularity level for the whole pipeline. + +Label extractor config classes are defined in [LabelExtractorConfigs.kt](src/main/kotlin/astminer/config/LabelExtractorConfigs.kt). + +## file name +**granularity**: files + +Use file name of source file as label. + + ```yaml + name: file name + ``` + +## folder name +**granularity**: files + +Use name of the parent folder of source file as label. +May be useful for code classification datasets, e.g., POJ-104. + + ```yaml + name: folder name + ``` + +## function name +**granularity**: functions + +Use name of each function as label. +This label extractor will also hide the function name in the AST and all recursive calls. + + ```yaml + name: function name + ``` diff --git a/docs/parsers.md b/docs/parsers.md new file mode 100644 index 00000000..bfbd52c1 --- /dev/null +++ b/docs/parsers.md @@ -0,0 +1,57 @@ +# Parsers + +`astminer` supports multiple parsers for a large wide of programming languages. +Here we describe integrated parsers and their peculiarities. + +## ANTLR + +ANother Tool for Language Recognition from [antlr.org](https://www.antlr.org). +It provides lexer and parsers for languages that can be generated into Java code. +For now, `astminer` supports Java, Python, JS, and PHP. + +## GumTree + +[GumTree](https://github.com/GumTreeDiff/gumtree) +framework to work with source code as trees and to compute difference between them. +It also builds language-agnostic representation. +For now, `astminer` supports Java and Python. + +### python-parser + +You should install python-parser to run GumTree with Python. +There is instruction of how to do it: +1. Download sources from [GitHub](https://github.com/JetBrains-Research/pythonparser/blob/master/) +2. Install dependencies +```shell +pip install -r requirements.txt +``` +3. Make python parser script executable +```shell +chmod +x src/main/python/pythonparser/pythonparser_3.py +``` +4. Add python-parser to `PATH` +```shell +cp src/main/python/pythonparser/pythonparser_3.py src/main/python/pythonparser/pythonparser +export PATH="/src/main/python/pythonparser/pythonparser:${PATH}" +``` + +## Fuzzy + +Originally [fuzzyc2cpg](https://github.com/ShiftLeftSecurity/fuzzyc2cpg) +and now part of [codepropertygraph](https://github.com/ShiftLeftSecurity/codepropertygraph/). +`astminer`uses it C/C++ parser from that. `G++`required for this parser. + +## Other languages and parsers + +Support for a new programming language can be implemented in a few simple steps. + +If there is an ANTLR grammar for the language: +1. Add the corresponding [ANTLR4 grammar file](https://github.com/antlr/grammars-v4) to the `antlr` directory; +2. Run the `generateGrammarSource` Gradle task to generate the parser; +3. Implement a small wrapper around the generated parser. + See [JavaParser](src/main/kotlin/astminer/parse/antlr/java/JavaParser.kt) or [PythonParser](src/main/kotlin/astminer/parse/antlr/python/PythonParser.kt) for an example of a wrapper. + +If the language has a parsing tool that is available as Java library: +1. Add the library as a dependency in [build.gradle.kts](/build.gradle.kts); +2. Implement a wrapper for the parsing tool. + See [FuzzyCppParser](src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppParser.kt) for an example of a wrapper. diff --git a/docs/storages.md b/docs/storages.md new file mode 100644 index 00000000..7e9c83cf --- /dev/null +++ b/docs/storages.md @@ -0,0 +1,61 @@ +# Storages + +Storages defines how ASTs should be saved on a disk. +For now, `astminer` support saving in tree and path-based formats. + +Storage config classes are defined in [StorageConfigs.kt](src/main/kotlin/astminer/config/StorageConfigs.kt). + +## Tree formats + +### CSV + +Save trees with labels in comma-separated file. +Each tree encodes into line using sequence of parenthesis. + + ```yaml + name: csv AST + ``` + +### Dot + +Save each tree in separate file using [dot](https://graphviz.org/doc/info/lang.html) syntax. +Along with dot files, storage also saves `description.csv` with matching between files, source file, and label. + + + ```yaml + name: dot AST + ``` + +### Json lines + +Save each tree with label in Json Lines format. +Json format of AST inspired by Python-150k dataset. + + ```yaml + name: json AST + ``` + +## Path-based representations + +Path-based representation was introduced by [Alon et al.](https://arxiv.org/abs/1803.09544). +It uses in models like code2vec or code2seq. + +### Code2vec + +Extract paths from each AST. Output is 4 files: +1. `node_types.csv` contains numeric ids and corresponding node types with directions (up/down, as described in [paper](https://arxiv.org/pdf/1803.09544.pdf)); +2. `tokens.csv` contains numeric ids and corresponding tokens; +3. `paths.csv` contains numeric ids and AST paths in form of space-separated sequences of node type ids; +4. `path_contexts.csv` contains labels and sequences of path contexts (triples of two tokens and a path between them). + +Each line in `path_contexts.csv` starts with a label, +then it contains a sequence of space-separated triples. Each triple contains start token id, path id, end token id, separated with commas. + + ```yaml + name: code2vec + maxPathLength: 10 + maxPathWidth: 2 + maxTokens: 1000 # can be omitted + maxPaths: 1000 # can be omitted + maxPathContextsPerEntity: 200 # can be omitted + ``` diff --git a/src/main/kotlin/astminer/config/ParserConfig.kt b/src/main/kotlin/astminer/config/ParserConfig.kt index f9e57aa7..1cb924bd 100644 --- a/src/main/kotlin/astminer/config/ParserConfig.kt +++ b/src/main/kotlin/astminer/config/ParserConfig.kt @@ -5,15 +5,15 @@ import kotlinx.serialization.Serializable /** * This config is used to select the parsers that should be used - * If given type = "antlr" and extensions = ["py", "java"] + * If given type = "antlr" and languages = ["py", "java"] * then 2 ANTLR parsers will be used (java antler parser and python antlr parser) * @param name Type of the parser - * @param extensions File extensions that should be parsed + * @param languages File extensions that should be parsed */ @Serializable data class ParserConfig( val name: ParserType, - val extensions: List + val languages: List ) @Serializable diff --git a/src/main/kotlin/astminer/config/PipelineConfig.kt b/src/main/kotlin/astminer/config/PipelineConfig.kt index ea2425d9..4ec2fb26 100644 --- a/src/main/kotlin/astminer/config/PipelineConfig.kt +++ b/src/main/kotlin/astminer/config/PipelineConfig.kt @@ -1,5 +1,6 @@ package astminer.config +import kotlinx.serialization.SerialName import kotlinx.serialization.Serializable /** @@ -12,6 +13,6 @@ data class PipelineConfig( val outputDir: String, val parser: ParserConfig, val filters: List = emptyList(), - val labelExtractor: LabelExtractorConfig, + @SerialName("label") val labelExtractor: LabelExtractorConfig, val storage: StorageConfig ) diff --git a/src/main/kotlin/astminer/config/StorageConfigs.kt b/src/main/kotlin/astminer/config/StorageConfigs.kt index 0646581b..456bc5fd 100644 --- a/src/main/kotlin/astminer/config/StorageConfigs.kt +++ b/src/main/kotlin/astminer/config/StorageConfigs.kt @@ -22,7 +22,7 @@ sealed class StorageConfig { * @see astminer.storage.ast.CsvAstStorage */ @Serializable -@SerialName("CsvAST") +@SerialName("csv AST") class CsvAstStorageConfig : StorageConfig() { override fun createStorage(outputDirectoryPath: String) = CsvAstStorage(outputDirectoryPath) } @@ -31,7 +31,7 @@ class CsvAstStorageConfig : StorageConfig() { * @see astminer.storage.ast.DotAstStorage */ @Serializable -@SerialName("DotAST") +@SerialName("dot AST") class DotAstStorageConfig : StorageConfig() { override fun createStorage(outputDirectoryPath: String) = DotAstStorage(outputDirectoryPath) } @@ -40,7 +40,7 @@ class DotAstStorageConfig : StorageConfig() { * @see JsonAstStorage */ @Serializable -@SerialName("JsonAST") +@SerialName("json AST") class JsonAstStorageConfig : StorageConfig() { override fun createStorage(outputDirectoryPath: String) = JsonAstStorage(outputDirectoryPath) } @@ -49,7 +49,7 @@ class JsonAstStorageConfig : StorageConfig() { * Config for [astminer.storage.path.Code2VecPathStorage] */ @Serializable -@SerialName("Code2vec") +@SerialName("code2vec") data class Code2VecPathStorageConfig( val maxPathLength: Int, val maxPathWidth: Int, diff --git a/src/main/kotlin/astminer/pipeline/Pipeline.kt b/src/main/kotlin/astminer/pipeline/Pipeline.kt index ca258628..62761638 100644 --- a/src/main/kotlin/astminer/pipeline/Pipeline.kt +++ b/src/main/kotlin/astminer/pipeline/Pipeline.kt @@ -44,12 +44,12 @@ class Pipeline(private val config: PipelineConfig) { * Runs the pipeline that is defined in the [config]. */ fun run() { - for (extension in config.parser.extensions) { - val languageFactory = getHandlerFactory(extension, config.parser.name) + for (language in config.parser.languages) { + val languageFactory = getHandlerFactory(language, config.parser.name) - val files = getProjectFilesWithExtension(inputDirectory, extension.fileExtension) + val files = getProjectFilesWithExtension(inputDirectory, language.fileExtension) - createStorage(extension).use { storage -> + createStorage(language).use { storage -> languageFactory.createHandlers(files) { languageHandler -> for (labeledResult in branch.process(languageHandler)) { storage.store(labeledResult) From 6bafcc8bbb10830e7cb39c21110479d36242b842 Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Mon, 26 Jul 2021 14:15:19 +0300 Subject: [PATCH 252/308] Add code2seq storage --- .github/workflows/build.yml | 26 +++++++ configs/antlr_python_paths.yaml | 2 +- ...=> gumtree_java_function_names_paths.yaml} | 6 +- docs/storages.md | 25 ++++++- .../astminer/common/model/ParsingModel.kt | 8 +- .../kotlin/astminer/config/StorageConfigs.kt | 17 +++++ .../storage/path/Code2SeqPathStorage.kt | 42 +++++++++++ .../storage/path/Code2VecPathStorage.kt | 65 ++++++++++++++++- .../astminer/storage/path/PathBasedStorage.kt | 73 +++---------------- 9 files changed, 189 insertions(+), 75 deletions(-) rename configs/{gumtree_java_function_names.yaml => gumtree_java_function_names_paths.yaml} (83%) create mode 100644 src/main/kotlin/astminer/storage/path/Code2SeqPathStorage.kt diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 3c3cbff0..5266e6e0 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -88,3 +88,29 @@ jobs: if: ${{ always() }} with: sarif_file: build/astminer/reports/detekt/detekt.sarif + + run-on-configs: + needs: build + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Setup Java + uses: actions/setup-java@v1 + with: + java-version: 11 + + - name: Prepare shadowJar + run: ./gradlew shadowJar + + - name: Run antlr_java_js_ast.yaml + run: java -jar build/shadow/astminer.jar antlr_java_js_ast.yaml + + - name: Run antlr_python_paths.yaml + run: java -jar build/shadow/astminer.jar antlr_python_paths.yaml + + - name: Run gumtree_java_ast.yaml + run: java -jar build/shadow/astminer.jar gumtree_java_ast.yaml + + - name: Run gumtree_java_function_names_paths.yaml + run: java -jar build/shadow/astminer.jar gumtree_java_function_names_paths.yaml diff --git a/configs/antlr_python_paths.yaml b/configs/antlr_python_paths.yaml index 2cced944..b2b54b39 100644 --- a/configs/antlr_python_paths.yaml +++ b/configs/antlr_python_paths.yaml @@ -20,6 +20,6 @@ label: # extract from each tree paths with length 9 and width 2 # save paths in code2vec format storage: - name: code2vec + name: code2seq maxPathLength: 9 maxPathWidth: 2 diff --git a/configs/gumtree_java_function_names.yaml b/configs/gumtree_java_function_names_paths.yaml similarity index 83% rename from configs/gumtree_java_function_names.yaml rename to configs/gumtree_java_function_names_paths.yaml index 451802ec..211614bf 100644 --- a/configs/gumtree_java_function_names.yaml +++ b/configs/gumtree_java_function_names_paths.yaml @@ -19,6 +19,8 @@ filters: label: name: function name -# save to disk ASTs in the JSON format +# save to disk ASTs in the code2seq format storage: - name: json AST + name: code2seq + length: 9 + width: 2 diff --git a/docs/storages.md b/docs/storages.md index 7e9c83cf..6fd28031 100644 --- a/docs/storages.md +++ b/docs/storages.md @@ -3,7 +3,7 @@ Storages defines how ASTs should be saved on a disk. For now, `astminer` support saving in tree and path-based formats. -Storage config classes are defined in [StorageConfigs.kt](src/main/kotlin/astminer/config/StorageConfigs.kt). +Storage config classes are defined in [StorageConfigs.kt](../src/main/kotlin/astminer/config/StorageConfigs.kt). ## Tree formats @@ -46,9 +46,9 @@ Extract paths from each AST. Output is 4 files: 1. `node_types.csv` contains numeric ids and corresponding node types with directions (up/down, as described in [paper](https://arxiv.org/pdf/1803.09544.pdf)); 2. `tokens.csv` contains numeric ids and corresponding tokens; 3. `paths.csv` contains numeric ids and AST paths in form of space-separated sequences of node type ids; -4. `path_contexts.csv` contains labels and sequences of path contexts (triples of two tokens and a path between them). +4. `path_contexts.c2s` contains labels and sequences of path contexts (triples of two tokens and a path between them). -Each line in `path_contexts.csv` starts with a label, +Each line in `path_contexts.c2s` starts with a label, then it contains a sequence of space-separated triples. Each triple contains start token id, path id, end token id, separated with commas. ```yaml @@ -59,3 +59,22 @@ then it contains a sequence of space-separated triples. Each triple contains sta maxPaths: 1000 # can be omitted maxPathContextsPerEntity: 200 # can be omitted ``` + + +### Code2seq + +Extract paths from each AST and save in code2seq format. +Output is `path_context.c2s` file, +each line in it starts with a label, then it contains a sequence of space-separated triples. +Each triple contains start token, path node types, end token id, separated with commas. + +To reduce memory usage you can enable `nodesToNumber` option. +If it is `true` then all types are converted into numbers and `node_types.csv` would be added to output files. + + ```yaml + name: code2seq + maxPathLength: 10 + maxPathWidth: 2 + maxPathContextsPerEntity: 200 # can be omitted + nodeToNumber: true # can be omitted + ``` diff --git a/src/main/kotlin/astminer/common/model/ParsingModel.kt b/src/main/kotlin/astminer/common/model/ParsingModel.kt index 1bebbeac..c5136d75 100644 --- a/src/main/kotlin/astminer/common/model/ParsingModel.kt +++ b/src/main/kotlin/astminer/common/model/ParsingModel.kt @@ -12,16 +12,16 @@ abstract class Node { abstract val parent: Node? abstract val originalToken: String? - val normalizedToken: String? by lazy { + val normalizedToken: String by lazy { originalToken?.let { val subtokens = splitToSubtokens(it) - if (subtokens.isEmpty()) null else subtokens.joinToString(TOKEN_DELIMITER) - } + if (subtokens.isEmpty()) EMPTY_TOKEN else subtokens.joinToString(TOKEN_DELIMITER) + } ?: EMPTY_TOKEN } var technicalToken: String? = null val token: String - get() = listOfNotNull(technicalToken, normalizedToken, originalToken).firstOrNull() ?: EMPTY_TOKEN + get() = technicalToken ?: normalizedToken val metadata: MutableMap = HashMap() fun isLeaf() = children.isEmpty() diff --git a/src/main/kotlin/astminer/config/StorageConfigs.kt b/src/main/kotlin/astminer/config/StorageConfigs.kt index 456bc5fd..0de6a75c 100644 --- a/src/main/kotlin/astminer/config/StorageConfigs.kt +++ b/src/main/kotlin/astminer/config/StorageConfigs.kt @@ -4,6 +4,7 @@ import astminer.common.model.Storage import astminer.storage.ast.CsvAstStorage import astminer.storage.ast.DotAstStorage import astminer.storage.ast.JsonAstStorage +import astminer.storage.path.Code2SeqPathStorage import astminer.storage.path.Code2VecPathStorage import astminer.storage.path.PathBasedStorageConfig import kotlinx.serialization.SerialName @@ -64,3 +65,19 @@ data class Code2VecPathStorageConfig( override fun createStorage(outputDirectoryPath: String) = Code2VecPathStorage(outputDirectoryPath, pathBasedStorageConfig) } + +@Serializable +@SerialName("code2seq") +data class Code2SeqPathStorageConfig( + @SerialName("length") val maxPathLength: Int, + @SerialName("width") val maxPathWidth: Int, + val maxPathContextsPerEntity: Int? = null, + val nodesToNumber: Boolean = true +) : StorageConfig() { + @Transient + private val pathBasedStorageConfig = + PathBasedStorageConfig(maxPathLength, maxPathWidth, maxPathContextsPerEntity = maxPathContextsPerEntity) + + override fun createStorage(outputDirectoryPath: String) = + Code2SeqPathStorage(outputDirectoryPath, pathBasedStorageConfig, nodesToNumber) +} diff --git a/src/main/kotlin/astminer/storage/path/Code2SeqPathStorage.kt b/src/main/kotlin/astminer/storage/path/Code2SeqPathStorage.kt new file mode 100644 index 00000000..0c5c01c0 --- /dev/null +++ b/src/main/kotlin/astminer/storage/path/Code2SeqPathStorage.kt @@ -0,0 +1,42 @@ +package astminer.storage.path + +import astminer.common.model.LabeledPathContexts +import astminer.common.model.PathContext +import astminer.common.storage.RankedIncrementalIdStorage +import astminer.common.storage.dumpIdStorageToCsv +import java.io.File + +class Code2SeqPathStorage( + outputDirectoryPath: String, + config: PathBasedStorageConfig, + private val nodesToNumbers: Boolean = true +) : PathBasedStorage(outputDirectoryPath, config) { + + private val nodeTypesMap: RankedIncrementalIdStorage = RankedIncrementalIdStorage() + + private fun pathContextToString(pathContext: PathContext): String { + val stringNodeSequence = if (nodesToNumbers) { + pathContext.orientedNodeTypes.joinToString("|") { nodeTypesMap.record(it.typeLabel).toString() } + } else { + pathContext.orientedNodeTypes.joinToString("|") { it.typeLabel } + } + return "${pathContext.startToken},$stringNodeSequence,${pathContext.endToken}" + } + + override fun labeledPathContextsToString(labeledPathContexts: LabeledPathContexts): String { + val pathContexts = labeledPathContexts.pathContexts.map { pathContextToString(it) } + return "${labeledPathContexts.label} ${pathContexts.joinToString(" ")}" + } + + override fun close() { + super.close() + if (nodesToNumbers) { + dumpIdStorageToCsv( + nodeTypesMap, + "node_type", + { it }, + File("$outputDirectoryPath/node_types.csv") + ) + } + } +} diff --git a/src/main/kotlin/astminer/storage/path/Code2VecPathStorage.kt b/src/main/kotlin/astminer/storage/path/Code2VecPathStorage.kt index f69fb560..39018ba9 100644 --- a/src/main/kotlin/astminer/storage/path/Code2VecPathStorage.kt +++ b/src/main/kotlin/astminer/storage/path/Code2VecPathStorage.kt @@ -1,14 +1,73 @@ package astminer.storage.path -import astminer.common.model.PathContextId +import astminer.common.model.* +import astminer.common.storage.* +import java.io.File -class Code2VecPathStorage(outputDirectoryPath: String, config: PathBasedStorageConfig) : +class Code2VecPathStorage(outputDirectoryPath: String, private val config: PathBasedStorageConfig) : PathBasedStorage(outputDirectoryPath, config) { - override fun pathContextIdsToString(pathContextIds: List, label: String): String { + private val tokensMap: RankedIncrementalIdStorage = RankedIncrementalIdStorage() + private val orientedNodeTypesMap: RankedIncrementalIdStorage = RankedIncrementalIdStorage() + private val pathsMap: RankedIncrementalIdStorage> = RankedIncrementalIdStorage() + + private fun dumpPathContexts(labeledPathContextIds: LabeledPathContextIds): String { + val pathContextIdsString = labeledPathContextIds.pathContexts.filter { + val isNumberOfTokensValid = config.maxTokens == null || + tokensMap.getIdRank(it.startTokenId) <= config.maxTokens && + tokensMap.getIdRank(it.endTokenId) <= config.maxTokens + val isNumberOfPathsValid = config.maxPaths == null || pathsMap.getIdRank(it.pathId) <= config.maxPaths + + isNumberOfTokensValid && isNumberOfPathsValid + } + + return pathContextIdsToString(pathContextIdsString, labeledPathContextIds.label) + } + + private fun storePathContext(pathContext: PathContext): PathContextId { + val startTokenId = tokensMap.record(pathContext.startToken) + val endTokenId = tokensMap.record(pathContext.endToken) + val orientedNodesIds = pathContext.orientedNodeTypes.map { orientedNodeTypesMap.record(it) } + val pathId = pathsMap.record(orientedNodesIds) + return PathContextId(startTokenId, pathId, endTokenId) + } + + override fun labeledPathContextsToString(labeledPathContexts: LabeledPathContexts): String { + val labeledPathContextIds = LabeledPathContextIds( + labeledPathContexts.label, + labeledPathContexts.pathContexts.map { storePathContext(it) } + ) + return dumpPathContexts(labeledPathContextIds) + } + + private fun pathContextIdsToString(pathContextIds: List, label: String): String { val joinedPathContexts = pathContextIds.joinToString(" ") { pathContextId -> "${pathContextId.startTokenId},${pathContextId.pathId},${pathContextId.endTokenId}" } return "$label $joinedPathContexts" } + + override fun close() { + super.close() + dumpIdStorageToCsv( + tokensMap, + "token", + tokenToCsvString, + File("$outputDirectoryPath/tokens.csv"), + config.maxTokens + ) + dumpIdStorageToCsv( + orientedNodeTypesMap, + "node_type", + orientedNodeToCsvString, + File("$outputDirectoryPath/node_types.csv") + ) + dumpIdStorageToCsv( + pathsMap, + "path", + pathToCsvString, + File("$outputDirectoryPath/paths.csv"), + config.maxPaths + ) + } } diff --git a/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt b/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt index 4fc99a77..3a99cf4e 100644 --- a/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt +++ b/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt @@ -3,20 +3,18 @@ package astminer.storage.path import astminer.common.model.* import astminer.common.model.LabeledResult import astminer.common.model.Storage -import astminer.common.storage.* import astminer.paths.PathMiner import astminer.paths.PathRetrievalSettings import astminer.paths.toPathContext import java.io.File import java.io.PrintWriter -// TODO: finish the documentation /** * Config for CountingPathStorage which contains all hyperparameters for path extraction. * @property maxPathLength The maximum length of a single path (based on the formal math definition of path length) * @property maxPathWidth The maximum width of a single path (based on the formal math definition of path width) - * @property maxTokens ?? - * @property maxPaths ?? + * @property maxTokens The maximum number of tokens saved per extraction + * @property maxPaths The maximum number of paths saved per extraction * @property maxPathContextsPerEntity The maximum number of path contexts that should be extracted from tree. * In other words, the maximum number of path contexts to save from each file/method (depending on granularity) */ @@ -40,45 +38,18 @@ abstract class PathBasedStorage( private val pathMiner = PathMiner(PathRetrievalSettings(config.maxPathLength, config.maxPathWidth)) - private val tokensMap: RankedIncrementalIdStorage = RankedIncrementalIdStorage() - private val orientedNodeTypesMap: RankedIncrementalIdStorage = RankedIncrementalIdStorage() - private val pathsMap: RankedIncrementalIdStorage> = RankedIncrementalIdStorage() - private val pathsFile: File - private val labeledPathContextIdsWriter: PrintWriter + private val pathContextPrintWriter: PrintWriter init { File(outputDirectoryPath).mkdirs() - pathsFile = File("$outputDirectoryPath/path_contexts.csv") + pathsFile = File(outputDirectoryPath).resolve("path_contexts.c2s") pathsFile.createNewFile() - labeledPathContextIdsWriter = PrintWriter(pathsFile) - } - - abstract fun pathContextIdsToString(pathContextIds: List, label: String): String - - private fun dumpPathContexts(labeledPathContextIds: LabeledPathContextIds) { - val pathContextIdsString = labeledPathContextIds.pathContexts.filter { - val isNumberOfTokensValid = config.maxTokens == null || - tokensMap.getIdRank(it.startTokenId) <= config.maxTokens && - tokensMap.getIdRank(it.endTokenId) <= config.maxTokens - val isNumberOfPathsValid = config.maxPaths == null || pathsMap.getIdRank(it.pathId) <= config.maxPaths - - isNumberOfTokensValid && isNumberOfPathsValid - } - - labeledPathContextIdsWriter.println(pathContextIdsToString(pathContextIdsString, labeledPathContextIds.label)) - } - - private fun storePathContext(pathContext: PathContext): PathContextId { - val startTokenId = tokensMap.record(pathContext.startToken) - val endTokenId = tokensMap.record(pathContext.endToken) - val orientedNodesIds = pathContext.orientedNodeTypes.map { orientedNodeTypesMap.record(it) } - val pathId = pathsMap.record(orientedNodesIds) - return PathContextId(startTokenId, pathId, endTokenId) + pathContextPrintWriter = PrintWriter(pathsFile) } private fun retrievePaths(node: Node) = if (config.maxPathContextsPerEntity != null) { - pathMiner.retrievePaths(node).take(config.maxPathContextsPerEntity) + pathMiner.retrievePaths(node).shuffled().take(config.maxPathContextsPerEntity) } else { pathMiner.retrievePaths(node) } @@ -93,40 +64,18 @@ abstract class PathBasedStorage( ) } + abstract fun labeledPathContextsToString(labeledPathContexts: LabeledPathContexts): String + /** * Extract paths from [labeledResult] and store them in the specified format. */ override fun store(labeledResult: LabeledResult) { val labeledPathContexts = retrieveLabeledPathContexts(labeledResult) - val labeledPathContextIds = LabeledPathContextIds( - labeledPathContexts.label, - labeledPathContexts.pathContexts.map { storePathContext(it) } - ) - dumpPathContexts(labeledPathContextIds) + val output = labeledPathContextsToString(labeledPathContexts) + pathContextPrintWriter.println(output) } override fun close() { - dumpIdStorageToCsv( - tokensMap, - "token", - tokenToCsvString, - File("$outputDirectoryPath/tokens.csv"), - config.maxTokens - ) - dumpIdStorageToCsv( - orientedNodeTypesMap, - "node_type", - orientedNodeToCsvString, - File("$outputDirectoryPath/node_types.csv") - ) - dumpIdStorageToCsv( - pathsMap, - "path", - pathToCsvString, - File("$outputDirectoryPath/paths.csv"), - config.maxPaths - ) - - labeledPathContextIdsWriter.close() + pathContextPrintWriter.close() } } From 884db1dea836763be5b66742ad578753945f79d8 Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Mon, 26 Jul 2021 16:43:18 +0300 Subject: [PATCH 253/308] Fix tests --- src/test/kotlin/astminer/common/DummyNode.kt | 6 +++++- .../kotlin/astminer/pipeline/util/OutputVerification.kt | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/test/kotlin/astminer/common/DummyNode.kt b/src/test/kotlin/astminer/common/DummyNode.kt index 056499c4..cff7b637 100644 --- a/src/test/kotlin/astminer/common/DummyNode.kt +++ b/src/test/kotlin/astminer/common/DummyNode.kt @@ -9,11 +9,15 @@ class DummyNode( override val children: MutableList = mutableListOf() ) : Node() { - // TODO("not implemented") override val parent: Node? = null override val originalToken: String = typeLabel + init { + // Tokens may change after normalization, for tests we want tokens to be unchanged + technicalToken = typeLabel + } + override fun removeChildrenOfType(typeLabel: String) { children.removeIf { it.typeLabel == typeLabel } } diff --git a/src/test/kotlin/astminer/pipeline/util/OutputVerification.kt b/src/test/kotlin/astminer/pipeline/util/OutputVerification.kt index f257b364..60650ed8 100644 --- a/src/test/kotlin/astminer/pipeline/util/OutputVerification.kt +++ b/src/test/kotlin/astminer/pipeline/util/OutputVerification.kt @@ -22,9 +22,9 @@ internal fun checkExtractedDir(extractedDataDir: File, languages: List) internal fun validPathContextsFile(name: String, batching: Boolean): Boolean { return if (batching) { - name.startsWith("path_contexts_") && name.endsWith(".csv") + name.startsWith("path_contexts_") && name.endsWith(".c2s") } else { - name == "path_contexts.csv" + name == "path_contexts.c2s" } } From 6be0265aeb5565e2afc8892a5d061a8255affee2 Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Mon, 26 Jul 2021 17:38:34 +0300 Subject: [PATCH 254/308] Implement filters for gumtree java parser --- configs/gumtree_java_function_names_paths.yaml | 2 ++ docs/filters.md | 4 ++-- src/examples/kotlin/astminer/gumTreeJavaMethodPaths.kt | 3 +++ .../astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt | 7 +++++++ .../resources/examples/test_dir_name_extension.java/1.java | 7 +++++++ 5 files changed, 21 insertions(+), 2 deletions(-) diff --git a/configs/gumtree_java_function_names_paths.yaml b/configs/gumtree_java_function_names_paths.yaml index 211614bf..11ef0d9b 100644 --- a/configs/gumtree_java_function_names_paths.yaml +++ b/configs/gumtree_java_function_names_paths.yaml @@ -13,6 +13,8 @@ filters: maxWordsNumber: 10 - name: by words number maxTokenWordsNumber: 100 + - name: by annotations + annotations: [override] # use function name as labels # this selects the function level granularity diff --git a/docs/filters.md b/docs/filters.md index 886844d8..ab20199f 100644 --- a/docs/filters.md +++ b/docs/filters.md @@ -12,7 +12,7 @@ Therefore some languages or parsers may not support needed filter To handle this user should manually add specific logic of parsing AST to get info about function or code at all. -Filter config classes are defined in [FilterConfigs.kt](src/main/kotlin/astminer/config/FilterConfigs.kt). +Filter config classes are defined in [FilterConfigs.kt](../src/main/kotlin/astminer/config/FilterConfigs.kt). ## by tree size **granularity**: files, functions @@ -61,7 +61,7 @@ Exclude functions that have certain annotations (e.g. `@Override`) ```yaml name: by annotations - annotations: [ Override ] + annotations: [ override ] ``` ## by modifiers diff --git a/src/examples/kotlin/astminer/gumTreeJavaMethodPaths.kt b/src/examples/kotlin/astminer/gumTreeJavaMethodPaths.kt index 2039a00a..36a615ce 100644 --- a/src/examples/kotlin/astminer/gumTreeJavaMethodPaths.kt +++ b/src/examples/kotlin/astminer/gumTreeJavaMethodPaths.kt @@ -36,6 +36,9 @@ fun gumTreeJavaMethodPaths() { methodNodes.forEach { methodInfo -> // Retrieve a method identifier + println("Method name: ${methodInfo.name}, " + + "modifiers: ${methodInfo.modifiers}, " + + "annotations: ${methodInfo.annotations}") val entityId = "${file.path}::${getCsvFriendlyMethodId(methodInfo)}" val labelingResult = LabeledResult(fileNode, entityId, file.path) storage.store(labelingResult) diff --git a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt index db1c5219..a6af57f7 100644 --- a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt @@ -16,6 +16,13 @@ class GumTreeJavaFunctionInfo( override val returnType: String? = root.getElementType() override val enclosingElement: EnclosingElement? = collectEnclosingClass() + override val modifiers: List = root.children.filter { it.typeLabel == "Modifier" }.map { it.originalToken } + override val annotations: List = root + .children + .filter { it.typeLabel == "MarkerAnnotation" } + .map { it.children.first().originalToken } + override val isConstructor: Boolean = root.typeLabel == "Initializer" + private fun collectEnclosingClass(): EnclosingElement? { val enclosingClassNode = getEnclosingClassNode(root.parent) ?: return null val enclosingClassName = enclosingClassNode.getChildOfType(TypeLabels.simpleName)?.originalToken diff --git a/src/test/resources/examples/test_dir_name_extension.java/1.java b/src/test/resources/examples/test_dir_name_extension.java/1.java index 52c71b04..179582b5 100644 --- a/src/test/resources/examples/test_dir_name_extension.java/1.java +++ b/src/test/resources/examples/test_dir_name_extension.java/1.java @@ -1,5 +1,12 @@ class SingleFunction { + int x; + + @Override void fun(String[] args, int param) { System.out.println("Hello again world!"); } + + public SingleFunction { + x = 5; + } } From 478a8b21d4286ea56be6d427f7cf9b6801a343fc Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Mon, 26 Jul 2021 18:09:12 +0300 Subject: [PATCH 255/308] Update paths to configs in github CI config --- .github/workflows/build.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 5266e6e0..9e8eceaf 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -104,13 +104,13 @@ jobs: run: ./gradlew shadowJar - name: Run antlr_java_js_ast.yaml - run: java -jar build/shadow/astminer.jar antlr_java_js_ast.yaml + run: java -jar build/shadow/astminer.jar configs/antlr_java_js_ast.yaml - name: Run antlr_python_paths.yaml - run: java -jar build/shadow/astminer.jar antlr_python_paths.yaml + run: java -jar build/shadow/astminer.jar configs/antlr_python_paths.yaml - name: Run gumtree_java_ast.yaml - run: java -jar build/shadow/astminer.jar gumtree_java_ast.yaml + run: java -jar build/shadow/astminer.jar configs/gumtree_java_ast.yaml - name: Run gumtree_java_function_names_paths.yaml - run: java -jar build/shadow/astminer.jar gumtree_java_function_names_paths.yaml + run: java -jar build/shadow/astminer.jar configs/gumtree_java_function_names_paths.yaml From 46c110053a774bb663e5e035be9328e04d407bed Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Tue, 27 Jul 2021 16:58:25 +0500 Subject: [PATCH 256/308] Remove ParseResult, rename parsing result factory --- scripts/fuzzy/convert.sh | 16 ------- .../astminer/common/model/ParsingModel.kt | 4 +- ...{HandlerModel.kt => ParsingResultModel.kt} | 14 +++--- .../astminer/common/model/PipelineModel.kt | 6 +-- .../kotlin/astminer/filters/CommonFilters.kt | 4 +- .../labelextractor/FileLabelExtractors.kt | 9 ++-- .../astminer/parse/antlr/AntlrHandler.kt | 48 ------------------- .../parse/antlr/AntlrParsingResult.kt | 48 +++++++++++++++++++ src/main/kotlin/astminer/parse/factory.kt | 44 ++++++++--------- .../parse/fuzzy/{cpp => }/FuzzyNode.kt | 2 +- .../parse/fuzzy/FuzzyParsingResult.kt | 16 +++++++ .../parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt | 1 + .../parse/fuzzy/cpp/FuzzyCppParser.kt | 15 +++--- .../parse/fuzzy/cpp/FuzzyFunctionSplitter.kt | 1 + .../astminer/parse/fuzzy/cpp/FuzzyHandler.kt | 15 ------ .../kotlin/astminer/parse/fuzzy/cpp/utils.kt | 8 ++-- .../astminer/parse/gumtree/GumtreeHandler.kt | 28 ----------- .../parse/gumtree/GumtreeParsingResult.kt | 27 +++++++++++ src/main/kotlin/astminer/pipeline/Pipeline.kt | 6 +-- .../pipeline/branch/FilePipelineBranch.kt | 10 ++-- .../pipeline/branch/FunctionPipelineBranch.kt | 5 +- .../pipeline/branch/PipelineBranch.kt | 4 +- src/test/kotlin/astminer/Utils.kt | 2 +- src/test/kotlin/astminer/common/DummyNode.kt | 13 +++-- .../labelextractor/FileNameExtractorTest.kt | 9 ++-- .../labelextractor/FolderNameExtractorTest.kt | 11 +++-- .../antlr/python/ANTLRPythonParserTest.kt | 2 +- .../astminer/parse/cpp/FuzzyCppParserTest.kt | 2 +- .../parse/cpp/FuzzyMethodSplitterTest.kt | 2 +- .../gumtree/java/GumTreeJavaParserTest.kt | 2 +- 30 files changed, 180 insertions(+), 194 deletions(-) delete mode 100755 scripts/fuzzy/convert.sh rename src/main/kotlin/astminer/common/model/{HandlerModel.kt => ParsingResultModel.kt} (58%) delete mode 100644 src/main/kotlin/astminer/parse/antlr/AntlrHandler.kt create mode 100644 src/main/kotlin/astminer/parse/antlr/AntlrParsingResult.kt rename src/main/kotlin/astminer/parse/fuzzy/{cpp => }/FuzzyNode.kt (96%) create mode 100644 src/main/kotlin/astminer/parse/fuzzy/FuzzyParsingResult.kt delete mode 100644 src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyHandler.kt delete mode 100644 src/main/kotlin/astminer/parse/gumtree/GumtreeHandler.kt create mode 100644 src/main/kotlin/astminer/parse/gumtree/GumtreeParsingResult.kt diff --git a/scripts/fuzzy/convert.sh b/scripts/fuzzy/convert.sh deleted file mode 100755 index fd79e243..00000000 --- a/scripts/fuzzy/convert.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash - -if [[ $# -ne 2 || ! -f $1 ]] ; then - echo "Invalid args / Check file " - exit -fi - -file_name=$1 -output_name=$2 - -grep '^\s*#\s*include' $file_name > /tmp/include.c -grep -Pv '^\s*#\s*include\b' $file_name > /tmp/code.c -gcc -E /tmp/code.c | grep -v ^# > /tmp/preprocessed.c -mkdir -p $output_name -cat /tmp/include.c > $output_name/$file_name -cat /tmp/preprocessed.c >> $output_name/$file_name \ No newline at end of file diff --git a/src/main/kotlin/astminer/common/model/ParsingModel.kt b/src/main/kotlin/astminer/common/model/ParsingModel.kt index c5136d75..9b1af5ba 100644 --- a/src/main/kotlin/astminer/common/model/ParsingModel.kt +++ b/src/main/kotlin/astminer/common/model/ParsingModel.kt @@ -70,7 +70,5 @@ interface Parser { * @param file file to parse * @return ParseResult instance */ - fun parseFile(file: File) = ParseResult(parseInputStream(file.inputStream()), file.path) + fun parseFile(file: File) = parseInputStream(file.inputStream()) } - -data class ParseResult(val root: T, val filePath: String) diff --git a/src/main/kotlin/astminer/common/model/HandlerModel.kt b/src/main/kotlin/astminer/common/model/ParsingResultModel.kt similarity index 58% rename from src/main/kotlin/astminer/common/model/HandlerModel.kt rename to src/main/kotlin/astminer/common/model/ParsingResultModel.kt index e944bdef..d1897310 100644 --- a/src/main/kotlin/astminer/common/model/HandlerModel.kt +++ b/src/main/kotlin/astminer/common/model/ParsingResultModel.kt @@ -6,13 +6,13 @@ import java.io.File private val logger = KotlinLogging.logger("HandlerFactory") -interface HandlerFactory { - fun createHandler(file: File): LanguageHandler +interface ParsingResultFactory { + fun parse(file: File): ParsingResult - fun createHandlers(files: List, action: (LanguageHandler) -> Unit) { + fun parseFiles(files: List, action: (ParsingResult) -> Unit) { for (file in files) { try { - action(createHandler(file)) + action(parse(file)) } catch (parsingException: ParsingException) { logger.error(parsingException) { "Failed to parse file ${file.path}" } } @@ -20,10 +20,10 @@ interface HandlerFactory { } } -abstract class LanguageHandler { - abstract val parseResult: ParseResult +abstract class ParsingResult(internal val file: File) { + abstract val root: T protected abstract val splitter: TreeFunctionSplitter fun splitIntoFunctions(): Collection> = - splitter.splitIntoFunctions(parseResult.root, parseResult.filePath) + splitter.splitIntoFunctions(root, file.path) } diff --git a/src/main/kotlin/astminer/common/model/PipelineModel.kt b/src/main/kotlin/astminer/common/model/PipelineModel.kt index 343e24ea..d96ec77c 100644 --- a/src/main/kotlin/astminer/common/model/PipelineModel.kt +++ b/src/main/kotlin/astminer/common/model/PipelineModel.kt @@ -7,7 +7,7 @@ interface Filter interface LabelExtractor interface FileFilter : Filter { - fun validate(parseResult: ParseResult): Boolean + fun validate(parseResult: ParsingResult): Boolean } interface FunctionFilter : Filter { @@ -15,7 +15,7 @@ interface FunctionFilter : Filter { } interface FileLabelExtractor : LabelExtractor { - fun process(parseResult: ParseResult): LabeledResult? + fun process(parseResult: ParsingResult): LabeledResult? } interface FunctionLabelExtractor : LabelExtractor { @@ -30,7 +30,7 @@ interface FunctionLabelExtractor : LabelExtractor { */ data class LabeledResult(val root: T, val label: String, val filePath: String) -fun ParseResult.labeledWith(label: String): LabeledResult = LabeledResult(root, label, filePath) +fun ParsingResult.labeledWith(label: String): LabeledResult = LabeledResult(root, label, file.path) /** * Storage saved labeled results to disk in a specified format. diff --git a/src/main/kotlin/astminer/filters/CommonFilters.kt b/src/main/kotlin/astminer/filters/CommonFilters.kt index 00d0904d..a0f47848 100644 --- a/src/main/kotlin/astminer/filters/CommonFilters.kt +++ b/src/main/kotlin/astminer/filters/CommonFilters.kt @@ -16,7 +16,7 @@ class TreeSizeFilter(private val minSize: Int = 0, private val maxSize: Int? = n override fun validate(functionInfo: FunctionInfo): Boolean = validateTree(functionInfo.root) - override fun validate(parseResult: ParseResult): Boolean = validateTree(parseResult.root) + override fun validate(parseResult: ParsingResult): Boolean = validateTree(parseResult.root) } /** @@ -28,5 +28,5 @@ class WordsNumberFilter(private val maxWordsNumber: Int) : FunctionFilter, FileF override fun validate(functionInfo: FunctionInfo) = validateTree(functionInfo.root) - override fun validate(parseResult: ParseResult) = validateTree(parseResult.root) + override fun validate(parseResult: ParsingResult) = validateTree(parseResult.root) } diff --git a/src/main/kotlin/astminer/labelextractor/FileLabelExtractors.kt b/src/main/kotlin/astminer/labelextractor/FileLabelExtractors.kt index ba6ed3da..b7c2b6d8 100644 --- a/src/main/kotlin/astminer/labelextractor/FileLabelExtractors.kt +++ b/src/main/kotlin/astminer/labelextractor/FileLabelExtractors.kt @@ -1,22 +1,21 @@ package astminer.labelextractor import astminer.common.model.* -import java.io.File /** * Labels files with folder names */ object FileNameExtractor : FileLabelExtractor { - override fun process(parseResult: ParseResult): LabeledResult = - parseResult.labeledWith(File(parseResult.filePath).name) + override fun process(parseResult: ParsingResult): LabeledResult = + parseResult.labeledWith(parseResult.file.name) } /** * Labels files with folder names */ object FolderNameExtractor : FileLabelExtractor { - override fun process(parseResult: ParseResult): LabeledResult? { - val folderName = File(parseResult.filePath).parentFile?.name ?: return null + override fun process(parseResult: ParsingResult): LabeledResult? { + val folderName = parseResult.file.parentFile?.name ?: return null return parseResult.labeledWith(folderName) } } diff --git a/src/main/kotlin/astminer/parse/antlr/AntlrHandler.kt b/src/main/kotlin/astminer/parse/antlr/AntlrHandler.kt deleted file mode 100644 index c8d81a40..00000000 --- a/src/main/kotlin/astminer/parse/antlr/AntlrHandler.kt +++ /dev/null @@ -1,48 +0,0 @@ -package astminer.parse.antlr - -import astminer.common.model.* -import astminer.parse.antlr.java.JavaFunctionSplitter -import astminer.parse.antlr.java.JavaParser -import astminer.parse.antlr.javascript.JavaScriptFunctionSplitter -import astminer.parse.antlr.javascript.JavaScriptParser -import astminer.parse.antlr.php.PHPFunctionSplitter -import astminer.parse.antlr.php.PHPParser -import astminer.parse.antlr.python.PythonFunctionSplitter -import astminer.parse.antlr.python.PythonParser -import java.io.File - -object AntlrJavaHandlerFactory : HandlerFactory { - override fun createHandler(file: File) = AntlrJavaHandler(file) - - class AntlrJavaHandler(file: File) : LanguageHandler() { - override val parseResult: ParseResult = JavaParser().parseFile(file) - override val splitter = JavaFunctionSplitter() - } -} - -object AntlrPythonHandlerFactory : HandlerFactory { - override fun createHandler(file: File) = AntlrPythonHandler(file) - - class AntlrPythonHandler(file: File) : LanguageHandler() { - override val parseResult: ParseResult = PythonParser().parseFile(file) - override val splitter = PythonFunctionSplitter() - } -} - -object AntlrJavascriptHandlerFactory : HandlerFactory { - override fun createHandler(file: File) = AntlrJavascriptHandler(file) - - class AntlrJavascriptHandler(file: File) : LanguageHandler() { - override val parseResult: ParseResult = JavaScriptParser().parseFile(file) - override val splitter = JavaScriptFunctionSplitter() - } -} - -object AntlrPHPHandlerFactory : HandlerFactory { - override fun createHandler(file: File): LanguageHandler = AntlrPHPHandler(file) - - class AntlrPHPHandler(file: File) : LanguageHandler() { - override val parseResult: ParseResult = PHPParser().parseFile(file) - override val splitter: TreeFunctionSplitter = PHPFunctionSplitter() - } -} diff --git a/src/main/kotlin/astminer/parse/antlr/AntlrParsingResult.kt b/src/main/kotlin/astminer/parse/antlr/AntlrParsingResult.kt new file mode 100644 index 00000000..8a76397f --- /dev/null +++ b/src/main/kotlin/astminer/parse/antlr/AntlrParsingResult.kt @@ -0,0 +1,48 @@ +package astminer.parse.antlr + +import astminer.common.model.* +import astminer.parse.antlr.java.JavaFunctionSplitter +import astminer.parse.antlr.java.JavaParser +import astminer.parse.antlr.javascript.JavaScriptFunctionSplitter +import astminer.parse.antlr.javascript.JavaScriptParser +import astminer.parse.antlr.php.PHPFunctionSplitter +import astminer.parse.antlr.php.PHPParser +import astminer.parse.antlr.python.PythonFunctionSplitter +import astminer.parse.antlr.python.PythonParser +import java.io.File + +object AntlrJavaParsingResultFactory : ParsingResultFactory { + override fun parse(file: File) = AntlrJavaParsingResult(file) + + class AntlrJavaParsingResult(file: File) : ParsingResult(file) { + override val root = JavaParser().parseFile(file) + override val splitter = JavaFunctionSplitter() + } +} + +object AntlrPythonParsingResultFactory : ParsingResultFactory { + override fun parse(file: File) = AntlrPythonParsingResult(file) + + class AntlrPythonParsingResult(file: File) : ParsingResult(file) { + override val root = PythonParser().parseFile(file) + override val splitter = PythonFunctionSplitter() + } +} + +object AntlrJavascriptParsingResultFactory : ParsingResultFactory { + override fun parse(file: File) = AntlrJavascriptParsingResult(file) + + class AntlrJavascriptParsingResult(file: File) : ParsingResult(file) { + override val root = JavaScriptParser().parseFile(file) + override val splitter = JavaScriptFunctionSplitter() + } +} + +object AntlrPHPParsingResultFactory : ParsingResultFactory { + override fun parse(file: File): ParsingResult = AntlrPHPParsingResult(file) + + class AntlrPHPParsingResult(file: File) : ParsingResult(file) { + override val root = PHPParser().parseFile(file) + override val splitter = PHPFunctionSplitter() + } +} diff --git a/src/main/kotlin/astminer/parse/factory.kt b/src/main/kotlin/astminer/parse/factory.kt index 2646db39..f5f055cc 100644 --- a/src/main/kotlin/astminer/parse/factory.kt +++ b/src/main/kotlin/astminer/parse/factory.kt @@ -1,45 +1,45 @@ package astminer.parse -import astminer.common.model.HandlerFactory +import astminer.common.model.ParsingResultFactory import astminer.config.FileExtension import astminer.config.ParserType -import astminer.parse.antlr.AntlrJavaHandlerFactory -import astminer.parse.antlr.AntlrJavascriptHandlerFactory -import astminer.parse.antlr.AntlrPHPHandlerFactory -import astminer.parse.antlr.AntlrPythonHandlerFactory -import astminer.parse.fuzzy.cpp.FuzzyHandler -import astminer.parse.gumtree.GumtreeJavaHandlerFactory -import astminer.parse.gumtree.GumtreePythonHandlerFactory +import astminer.parse.antlr.AntlrJavaParsingResultFactory +import astminer.parse.antlr.AntlrJavascriptParsingResultFactory +import astminer.parse.antlr.AntlrPHPParsingResultFactory +import astminer.parse.antlr.AntlrPythonParsingResultFactory +import astminer.parse.fuzzy.FuzzyParsingResult +import astminer.parse.gumtree.GumtreeJavaParsingResultFactory +import astminer.parse.gumtree.GumtreePythonParsingResultFactory -fun getHandlerFactory(extension: FileExtension, parserType: ParserType): HandlerFactory { +fun getParsingResultFactory(extension: FileExtension, parserType: ParserType): ParsingResultFactory { return when (parserType) { - ParserType.GumTree -> getGumtreeHandlerFactory(extension) - ParserType.Antlr -> getAntlrHandlerFactory(extension) - ParserType.Fuzzy -> getFuzzyHandlerFactory(extension) + ParserType.GumTree -> getGumtreeParsingResultFactory(extension) + ParserType.Antlr -> getAntlrParsingResultFactory(extension) + ParserType.Fuzzy -> getFuzzyParsingResultFactory(extension) } } -private fun getGumtreeHandlerFactory(extension: FileExtension): HandlerFactory { +private fun getGumtreeParsingResultFactory(extension: FileExtension): ParsingResultFactory { return when (extension) { - FileExtension.Java -> GumtreeJavaHandlerFactory - FileExtension.Python -> GumtreePythonHandlerFactory + FileExtension.Java -> GumtreeJavaParsingResultFactory + FileExtension.Python -> GumtreePythonParsingResultFactory else -> throw UnsupportedOperationException() } } -private fun getAntlrHandlerFactory(extension: FileExtension): HandlerFactory { +private fun getAntlrParsingResultFactory(extension: FileExtension): ParsingResultFactory { return when (extension) { - FileExtension.Java -> AntlrJavaHandlerFactory - FileExtension.JavaScript -> AntlrJavascriptHandlerFactory - FileExtension.Python -> AntlrPythonHandlerFactory - FileExtension.PHP -> AntlrPHPHandlerFactory + FileExtension.Java -> AntlrJavaParsingResultFactory + FileExtension.JavaScript -> AntlrJavascriptParsingResultFactory + FileExtension.Python -> AntlrPythonParsingResultFactory + FileExtension.PHP -> AntlrPHPParsingResultFactory else -> throw UnsupportedOperationException() } } -private fun getFuzzyHandlerFactory(extension: FileExtension): HandlerFactory { +private fun getFuzzyParsingResultFactory(extension: FileExtension): ParsingResultFactory { return when (extension) { - FileExtension.C, FileExtension.Cpp -> FuzzyHandler + FileExtension.C, FileExtension.Cpp -> FuzzyParsingResult else -> throw UnsupportedOperationException() } } diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyNode.kt b/src/main/kotlin/astminer/parse/fuzzy/FuzzyNode.kt similarity index 96% rename from src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyNode.kt rename to src/main/kotlin/astminer/parse/fuzzy/FuzzyNode.kt index 097bd914..07924b52 100644 --- a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyNode.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/FuzzyNode.kt @@ -1,4 +1,4 @@ -package astminer.parse.fuzzy.cpp +package astminer.parse.fuzzy import astminer.common.model.Node import com.google.common.collect.TreeMultiset diff --git a/src/main/kotlin/astminer/parse/fuzzy/FuzzyParsingResult.kt b/src/main/kotlin/astminer/parse/fuzzy/FuzzyParsingResult.kt new file mode 100644 index 00000000..13ee046d --- /dev/null +++ b/src/main/kotlin/astminer/parse/fuzzy/FuzzyParsingResult.kt @@ -0,0 +1,16 @@ +package astminer.parse.fuzzy + +import astminer.common.model.ParsingResult +import astminer.common.model.ParsingResultFactory +import astminer.parse.fuzzy.cpp.FuzzyCppParser +import astminer.parse.fuzzy.cpp.FuzzyFunctionSplitter +import java.io.File + +object FuzzyParsingResult : ParsingResultFactory { + override fun parse(file: File): ParsingResult = CppFuzzyParsingResult(file) + + class CppFuzzyParsingResult(file: File) : ParsingResult(file) { + override val root = FuzzyCppParser().parseFile(file) + override val splitter = FuzzyFunctionSplitter() + } +} diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt index 681a3cac..5bcc6b06 100644 --- a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt @@ -5,6 +5,7 @@ import astminer.common.model.EnclosingElementType import astminer.common.model.FunctionInfo import astminer.common.model.FunctionInfoParameter import astminer.parse.findEnclosingElementBy +import astminer.parse.fuzzy.FuzzyNode class FuzzyCppFunctionInfo(override val root: FuzzyNode, override val filePath: String) : FunctionInfo { diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppParser.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppParser.kt index 8be634bd..6de6f299 100644 --- a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppParser.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppParser.kt @@ -1,8 +1,8 @@ package astminer.parse.fuzzy.cpp -import astminer.common.model.ParseResult import astminer.common.model.Parser import astminer.parse.ParsingException +import astminer.parse.fuzzy.FuzzyNode import io.shiftleft.codepropertygraph.Cpg import io.shiftleft.codepropertygraph.generated.EdgeTypes import io.shiftleft.codepropertygraph.generated.NodeKeys @@ -34,15 +34,15 @@ class FuzzyCppParser : Parser { file.outputStream().use { content.copyTo(it) } - return parseFile(file).root + return parseFile(file) } /** * Parse a single file and create an AST. * @param file to parse - * @return [ParseResult] with root of an AST (null if parsing failed) and file path + * @return root of an AST (null if parsing failed) */ - override fun parseFile(file: File): ParseResult { + override fun parseFile(file: File): FuzzyNode { // We need some tweaks to create Scala sets from Kotlin code val pathSetScalaBuilder = Set.newBuilder() pathSetScalaBuilder.addOne(file.path) @@ -60,13 +60,13 @@ class FuzzyCppParser : Parser { /** * Convert [cpg][io.shiftleft.codepropertygraph.Cpg] created by fuzzyc2cpg - * to list of [FuzzyNode][astminer.parse.fuzzy.cpp.FuzzyNode]. + * to list of [FuzzyNode][astminer.parse.fuzzy.FuzzyNode]. * Cpg may contain graphs for several files, in that case several ASTs will be created. * @param cpg to be converted * @param filePath to the parsed file that will be used if parsing failed * @return list of AST roots */ - private fun cpg2Nodes(cpg: Cpg, filePath: String): ParseResult { + private fun cpg2Nodes(cpg: Cpg, filePath: String): FuzzyNode { val g = cpg.graph() val vertexToNode = mutableMapOf() g.E().forEach { @@ -80,8 +80,7 @@ class FuzzyCppParser : Parser { if (File(actualFilePath).absolutePath != File(filePath).absolutePath) { println("While parsing $filePath, actually parsed $actualFilePath") } - val node = vertexToNode[it] ?: throw ParsingException("Fuzzy", "C++") - return ParseResult(node, actualFilePath) + return vertexToNode[it] ?: throw ParsingException("Fuzzy", "C++") } } throw ParsingException("Fuzzy", "C++") diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyFunctionSplitter.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyFunctionSplitter.kt index 3fb83611..6af517cd 100644 --- a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyFunctionSplitter.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyFunctionSplitter.kt @@ -1,6 +1,7 @@ package astminer.parse.fuzzy.cpp import astminer.common.model.* +import astminer.parse.fuzzy.FuzzyNode class FuzzyFunctionSplitter : TreeFunctionSplitter { private val methodNode = "METHOD" diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyHandler.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyHandler.kt deleted file mode 100644 index 95fd7c7a..00000000 --- a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyHandler.kt +++ /dev/null @@ -1,15 +0,0 @@ -package astminer.parse.fuzzy.cpp - -import astminer.common.model.HandlerFactory -import astminer.common.model.LanguageHandler -import astminer.common.model.ParseResult -import java.io.File - -object FuzzyHandler : HandlerFactory { - override fun createHandler(file: File): LanguageHandler = CppFuzzyHandler(file) - - class CppFuzzyHandler(file: File) : LanguageHandler() { - override val splitter = FuzzyFunctionSplitter() - override val parseResult: ParseResult = FuzzyCppParser().parseFile(file) - } -} diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/utils.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/utils.kt index 56bc6544..ef3abbeb 100644 --- a/src/main/kotlin/astminer/parse/fuzzy/cpp/utils.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/utils.kt @@ -13,14 +13,14 @@ fun String.runCommand(workingDir: File) { } fun preprocessCppCode(file: File, outputDir: File, preprocessCommand: String) = """ - grep '^\s*#\s*include' ${file.absolutePath} >__tmp_include.cpp - grep -v '^\s*#\s*include\b' ${file.absolutePath} >__tmp_code.cpp + grep '^\s*#\s*include' "${file.absolutePath}" >__tmp_include.cpp + grep -v '^\s*#\s*include\b' "${file.absolutePath}" >__tmp_code.cpp touch __tmp_preprocessed.cpp if [ -s __tmp_code.cpp ] then $preprocessCommand __tmp_code.cpp | grep -v ^# >__tmp_preprocessed.cpp fi - cat __tmp_include.cpp >${outputDir.absolutePath}/${file.name} - cat __tmp_preprocessed.cpp >>${outputDir.absolutePath}/${file.name} + cat __tmp_include.cpp > "${outputDir.absolutePath}"/"${file.name}" + cat __tmp_preprocessed.cpp >> "${outputDir.absolutePath}"/"${file.name}" rm __tmp_*.cpp """.trimIndent() diff --git a/src/main/kotlin/astminer/parse/gumtree/GumtreeHandler.kt b/src/main/kotlin/astminer/parse/gumtree/GumtreeHandler.kt deleted file mode 100644 index 7b94646a..00000000 --- a/src/main/kotlin/astminer/parse/gumtree/GumtreeHandler.kt +++ /dev/null @@ -1,28 +0,0 @@ -package astminer.parse.gumtree - -import astminer.common.model.HandlerFactory -import astminer.common.model.LanguageHandler -import astminer.common.model.ParseResult -import astminer.parse.gumtree.java.GumTreeJavaFunctionSplitter -import astminer.parse.gumtree.java.GumTreeJavaParser -import astminer.parse.gumtree.python.GumTreePythonFunctionSplitter -import astminer.parse.gumtree.python.GumTreePythonParser -import java.io.File - -object GumtreeJavaHandlerFactory : HandlerFactory { - override fun createHandler(file: File): LanguageHandler = JavaGumtreeHandler(file) - - class JavaGumtreeHandler(file: File) : LanguageHandler() { - override val splitter = GumTreeJavaFunctionSplitter() - override val parseResult: ParseResult = GumTreeJavaParser().parseFile(file) - } -} - -object GumtreePythonHandlerFactory : HandlerFactory { - override fun createHandler(file: File): LanguageHandler = PythonGumTreeHandler(file) - - class PythonGumTreeHandler(file: File) : LanguageHandler() { - override val splitter = GumTreePythonFunctionSplitter() - override val parseResult: ParseResult = GumTreePythonParser().parseFile(file) - } -} diff --git a/src/main/kotlin/astminer/parse/gumtree/GumtreeParsingResult.kt b/src/main/kotlin/astminer/parse/gumtree/GumtreeParsingResult.kt new file mode 100644 index 00000000..4de7c5ae --- /dev/null +++ b/src/main/kotlin/astminer/parse/gumtree/GumtreeParsingResult.kt @@ -0,0 +1,27 @@ +package astminer.parse.gumtree + +import astminer.common.model.ParsingResult +import astminer.common.model.ParsingResultFactory +import astminer.parse.gumtree.java.GumTreeJavaFunctionSplitter +import astminer.parse.gumtree.java.GumTreeJavaParser +import astminer.parse.gumtree.python.GumTreePythonFunctionSplitter +import astminer.parse.gumtree.python.GumTreePythonParser +import java.io.File + +object GumtreeJavaParsingResultFactory : ParsingResultFactory { + override fun parse(file: File): ParsingResult = JavaGumtreeParsingResult(file) + + class JavaGumtreeParsingResult(file: File) : ParsingResult(file) { + override val root = GumTreeJavaParser().parseFile(file) + override val splitter = GumTreeJavaFunctionSplitter() + } +} + +object GumtreePythonParsingResultFactory : ParsingResultFactory { + override fun parse(file: File): ParsingResult = PythonGumtreeParsingResult(file) + + class PythonGumtreeParsingResult(file: File) : ParsingResult(file) { + override val root = GumTreePythonParser().parseFile(file) + override val splitter = GumTreePythonFunctionSplitter() + } +} diff --git a/src/main/kotlin/astminer/pipeline/Pipeline.kt b/src/main/kotlin/astminer/pipeline/Pipeline.kt index 62761638..965bbae0 100644 --- a/src/main/kotlin/astminer/pipeline/Pipeline.kt +++ b/src/main/kotlin/astminer/pipeline/Pipeline.kt @@ -6,7 +6,7 @@ import astminer.common.model.FunctionLabelExtractor import astminer.common.model.Storage import astminer.config.FileExtension import astminer.config.PipelineConfig -import astminer.parse.getHandlerFactory +import astminer.parse.getParsingResultFactory import astminer.pipeline.branch.FilePipelineBranch import astminer.pipeline.branch.FunctionPipelineBranch import astminer.pipeline.branch.IllegalLabelExtractorException @@ -45,12 +45,12 @@ class Pipeline(private val config: PipelineConfig) { */ fun run() { for (language in config.parser.languages) { - val languageFactory = getHandlerFactory(language, config.parser.name) + val parsingResultFactory = getParsingResultFactory(language, config.parser.name) val files = getProjectFilesWithExtension(inputDirectory, language.fileExtension) createStorage(language).use { storage -> - languageFactory.createHandlers(files) { languageHandler -> + parsingResultFactory.parseFiles(files) { languageHandler -> for (labeledResult in branch.process(languageHandler)) { storage.store(labeledResult) } diff --git a/src/main/kotlin/astminer/pipeline/branch/FilePipelineBranch.kt b/src/main/kotlin/astminer/pipeline/branch/FilePipelineBranch.kt index e31b28b4..81963e99 100644 --- a/src/main/kotlin/astminer/pipeline/branch/FilePipelineBranch.kt +++ b/src/main/kotlin/astminer/pipeline/branch/FilePipelineBranch.kt @@ -2,7 +2,6 @@ package astminer.pipeline.branch import astminer.common.model.* import astminer.common.model.FileFilter -import astminer.labelextractor.* /** * PipelineBranch for pipeline with file-level granularity (FilePipelineConfig). @@ -18,13 +17,12 @@ class FilePipelineBranch( ?: throw IllegalFilterException("file", filter::class.simpleName) } - private fun passesThroughFilters(parseResult: ParseResult) = + private fun passesThroughFilters(parseResult: ParsingResult) = filters.all { filter -> filter.validate(parseResult) } - override fun process(languageHandler: LanguageHandler): List> { - val parseResult = languageHandler.parseResult - return if (passesThroughFilters(parseResult)) { - val labeledResult = labelExtractor.process(parseResult) ?: return emptyList() + override fun process(parsingResult: ParsingResult): List> { + return if (passesThroughFilters(parsingResult)) { + val labeledResult = labelExtractor.process(parsingResult) ?: return emptyList() listOf(labeledResult) } else { emptyList() diff --git a/src/main/kotlin/astminer/pipeline/branch/FunctionPipelineBranch.kt b/src/main/kotlin/astminer/pipeline/branch/FunctionPipelineBranch.kt index 62a5f184..08231b20 100644 --- a/src/main/kotlin/astminer/pipeline/branch/FunctionPipelineBranch.kt +++ b/src/main/kotlin/astminer/pipeline/branch/FunctionPipelineBranch.kt @@ -1,7 +1,6 @@ package astminer.pipeline.branch import astminer.common.model.* -import astminer.labelextractor.* /** * PipelineBranch for pipeline with function-level granularity (FunctionPipelineConfig). @@ -21,8 +20,8 @@ class FunctionPipelineBranch( private fun passesThroughFilters(functionInfo: FunctionInfo) = filters.all { filter -> filter.validate(functionInfo) } - override fun process(languageHandler: LanguageHandler): List> = - languageHandler.splitIntoFunctions() + override fun process(parsingResult: ParsingResult): List> = + parsingResult.splitIntoFunctions() .filter { functionInfo -> passesThroughFilters(functionInfo) } .mapNotNull { functionInfo -> labelExtractor.process(functionInfo) } } diff --git a/src/main/kotlin/astminer/pipeline/branch/PipelineBranch.kt b/src/main/kotlin/astminer/pipeline/branch/PipelineBranch.kt index 7b743781..35559c57 100644 --- a/src/main/kotlin/astminer/pipeline/branch/PipelineBranch.kt +++ b/src/main/kotlin/astminer/pipeline/branch/PipelineBranch.kt @@ -1,8 +1,8 @@ package astminer.pipeline.branch import astminer.common.model.LabeledResult -import astminer.common.model.LanguageHandler import astminer.common.model.Node +import astminer.common.model.ParsingResult /** * PipelineBranch is a part of the pipeline that encapsulate inside itself granularity based logic. @@ -14,5 +14,5 @@ interface PipelineBranch { * May mutate the AST. * Should have no other side-effects */ - fun process(languageHandler: LanguageHandler): List> + fun process(parsingResult: ParsingResult): List> } diff --git a/src/test/kotlin/astminer/Utils.kt b/src/test/kotlin/astminer/Utils.kt index d6deacd9..5f421c54 100644 --- a/src/test/kotlin/astminer/Utils.kt +++ b/src/test/kotlin/astminer/Utils.kt @@ -15,4 +15,4 @@ fun checkExecutable(execName: String): Boolean { return false } -fun Parser.parseFiles(files: List) = files.map { parseFile(it).root } +fun Parser.parseFiles(files: List) = files.map { parseFile(it) } diff --git a/src/test/kotlin/astminer/common/DummyNode.kt b/src/test/kotlin/astminer/common/DummyNode.kt index cff7b637..1a39d0d4 100644 --- a/src/test/kotlin/astminer/common/DummyNode.kt +++ b/src/test/kotlin/astminer/common/DummyNode.kt @@ -1,8 +1,7 @@ package astminer.common -import astminer.common.model.LabeledResult -import astminer.common.model.Node -import astminer.common.model.ParseResult +import astminer.common.model.* +import java.io.File class DummyNode( override val typeLabel: String, @@ -22,11 +21,17 @@ class DummyNode( children.removeIf { it.typeLabel == typeLabel } } - fun toParseResult() = ParseResult(this, "") + fun toParseResult() = DummyParsingResult(File("."), this) fun labeledWith(label: String) = LabeledResult(this, label, "") } +class DummyParsingResult(file: File, override val root: DummyNode) : ParsingResult(file) { + override val splitter: TreeFunctionSplitter = object : TreeFunctionSplitter { + override fun splitIntoFunctions(root: DummyNode, filePath: String) = listOf>() + } +} + /** * Returns a small tree. * Diagram: diff --git a/src/test/kotlin/astminer/labelextractor/FileNameExtractorTest.kt b/src/test/kotlin/astminer/labelextractor/FileNameExtractorTest.kt index db392d48..bce37ead 100644 --- a/src/test/kotlin/astminer/labelextractor/FileNameExtractorTest.kt +++ b/src/test/kotlin/astminer/labelextractor/FileNameExtractorTest.kt @@ -1,15 +1,16 @@ package astminer.labelextractor +import astminer.common.DummyNode +import astminer.common.DummyParsingResult import astminer.common.model.LabeledResult -import astminer.common.model.ParseResult -import astminer.parse.antlr.AntlrNode import org.junit.Test +import java.io.File import kotlin.test.assertEquals class FileNameExtractorTest { @Test fun `test file path extractor returns the same root and file path and labels with file path`() { - val nonEmptyParseResult = ParseResult(dummyRoot, PATH) + val nonEmptyParseResult = DummyParsingResult(File(PATH), dummyRoot) val labeledParseResult = FileNameExtractor.process(nonEmptyParseResult) assertEquals(LabeledResult(dummyRoot, FILE_NAME, PATH), labeledParseResult) @@ -18,6 +19,6 @@ class FileNameExtractorTest { companion object { private const val FILE_NAME = "file.txt" private const val PATH = "random/folder/$FILE_NAME" - private var dummyRoot = AntlrNode("", null, null) + private var dummyRoot = DummyNode("") } } diff --git a/src/test/kotlin/astminer/labelextractor/FolderNameExtractorTest.kt b/src/test/kotlin/astminer/labelextractor/FolderNameExtractorTest.kt index b07a236d..ed2fbbe7 100644 --- a/src/test/kotlin/astminer/labelextractor/FolderNameExtractorTest.kt +++ b/src/test/kotlin/astminer/labelextractor/FolderNameExtractorTest.kt @@ -1,9 +1,10 @@ package astminer.labelextractor +import astminer.common.DummyNode +import astminer.common.DummyParsingResult import astminer.common.model.LabeledResult -import astminer.common.model.ParseResult -import astminer.parse.antlr.AntlrNode import org.junit.Test +import java.io.File import kotlin.test.assertEquals import kotlin.test.assertNull @@ -11,7 +12,7 @@ class FolderNameExtractorTest { @Test fun `test folder extractor returns null when folder is empty or not found`() { - val nonEmptyParseResult = ParseResult(dummyRoot, "") + val nonEmptyParseResult = DummyParsingResult(File(""), dummyRoot) val labeledParseResult = FolderNameExtractor.process(nonEmptyParseResult) assertNull(labeledParseResult) @@ -19,7 +20,7 @@ class FolderNameExtractorTest { @Test fun `test folder extractor extracts folder when it is not empty`() { - val nonEmptyParseResult = ParseResult(dummyRoot, PATH) + val nonEmptyParseResult = DummyParsingResult(File(PATH), dummyRoot) val labeledParseResult = FolderNameExtractor.process(nonEmptyParseResult) assertEquals(LabeledResult(dummyRoot, FOLDER, PATH), labeledParseResult) @@ -28,6 +29,6 @@ class FolderNameExtractorTest { companion object { private const val PATH = "random/folder/file.txt" private const val FOLDER = "folder" - private var dummyRoot = AntlrNode("", null, null) + private var dummyRoot = DummyNode("") } } diff --git a/src/test/kotlin/astminer/parse/antlr/python/ANTLRPythonParserTest.kt b/src/test/kotlin/astminer/parse/antlr/python/ANTLRPythonParserTest.kt index 3e2dc094..e85eadb0 100644 --- a/src/test/kotlin/astminer/parse/antlr/python/ANTLRPythonParserTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/python/ANTLRPythonParserTest.kt @@ -13,7 +13,7 @@ class ANTLRPythonParserTest { val parser = PythonParser() val file = File("src/test/resources/examples/1.py") - val node = parser.parseFile(file).root + val node = parser.parseFile(file) Assert.assertNotNull("Parse tree for a valid file should not be null", node) } diff --git a/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt b/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt index 7ca5570f..4ad908cc 100644 --- a/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt +++ b/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt @@ -3,8 +3,8 @@ package astminer.parse.cpp import astminer.checkExecutable import astminer.common.forFilesWithSuffix import astminer.common.getProjectFilesWithExtension +import astminer.parse.fuzzy.FuzzyNode import astminer.parse.fuzzy.cpp.FuzzyCppParser -import astminer.parse.fuzzy.cpp.FuzzyNode import astminer.parseFiles import org.junit.Assert import org.junit.Assume diff --git a/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt index 7f3d853a..ddb5ad63 100644 --- a/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt @@ -2,9 +2,9 @@ package astminer.parse.cpp import astminer.checkExecutable import astminer.common.model.FunctionInfo +import astminer.parse.fuzzy.FuzzyNode import astminer.parse.fuzzy.cpp.FuzzyCppParser import astminer.parse.fuzzy.cpp.FuzzyFunctionSplitter -import astminer.parse.fuzzy.cpp.FuzzyNode import org.junit.Assume import org.junit.Before import org.junit.Test diff --git a/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaParserTest.kt b/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaParserTest.kt index c3a5fa22..a727ad72 100644 --- a/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaParserTest.kt +++ b/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaParserTest.kt @@ -12,7 +12,7 @@ class GumTreeJavaParserTest { val parser = GumTreeJavaParser() val file = File("src/test/resources/examples/1.java") - val node = parser.parseFile(file).root + val node = parser.parseFile(file) Assert.assertNotNull("Parse tree for a valid file should not be null", node) } From 54160726ae046345a6ecec10f25b2ad1e3af7f1f Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Tue, 27 Jul 2021 19:08:34 +0500 Subject: [PATCH 257/308] Add preprocessing before creating parsing result --- .../fuzzyCppPathsWithPreprocessing.kt | 9 +---- .../common/model/ParsingResultModel.kt | 28 +++++++++++-- src/main/kotlin/astminer/parse/factory.kt | 4 +- .../parse/fuzzy/FuzzyParsingResult.kt | 16 -------- .../parse/fuzzy/FuzzyParsingResultFactory.kt | 40 +++++++++++++++++++ .../parse/fuzzy/cpp/FuzzyCppParser.kt | 35 ---------------- .../astminer/parse/fuzzy/{cpp => }/utils.kt | 12 +++--- src/main/kotlin/astminer/pipeline/Pipeline.kt | 4 +- .../astminer/parse/cpp/FuzzyCppParserTest.kt | 31 ++++++-------- 9 files changed, 90 insertions(+), 89 deletions(-) delete mode 100644 src/main/kotlin/astminer/parse/fuzzy/FuzzyParsingResult.kt create mode 100644 src/main/kotlin/astminer/parse/fuzzy/FuzzyParsingResultFactory.kt rename src/main/kotlin/astminer/parse/fuzzy/{cpp => }/utils.kt (57%) diff --git a/src/examples/kotlin/astminer/fuzzyCppPathsWithPreprocessing.kt b/src/examples/kotlin/astminer/fuzzyCppPathsWithPreprocessing.kt index a3eff140..3c4dd15a 100644 --- a/src/examples/kotlin/astminer/fuzzyCppPathsWithPreprocessing.kt +++ b/src/examples/kotlin/astminer/fuzzyCppPathsWithPreprocessing.kt @@ -1,9 +1,7 @@ package astminer import astminer.config.* -import astminer.parse.fuzzy.cpp.FuzzyCppParser import astminer.pipeline.Pipeline -import java.io.File /** * Preprocess .cpp files and retrieve paths from them, using a fuzzyc2cpg parser. @@ -12,12 +10,9 @@ fun fuzzyCppPathsWithPreprocessing() { val inputDir = "src/test/resources/examples" val outputDir = "examples_output/fuzzy_cpp_paths" - val preprocessedDir = File(outputDir).resolve("preprocessed") - val parser = FuzzyCppParser() - parser.preprocessProject(File(inputDir), preprocessedDir) - + // Pipeline will handle preprocessing automatically val config = PipelineConfig( - inputDir = preprocessedDir.path, + inputDir = inputDir, outputDir = outputDir, parser = ParserConfig(ParserType.Fuzzy, listOf(FileExtension.Cpp)), labelExtractor = FileNameExtractorConfig(), diff --git a/src/main/kotlin/astminer/common/model/ParsingResultModel.kt b/src/main/kotlin/astminer/common/model/ParsingResultModel.kt index d1897310..c9135b44 100644 --- a/src/main/kotlin/astminer/common/model/ParsingResultModel.kt +++ b/src/main/kotlin/astminer/common/model/ParsingResultModel.kt @@ -9,15 +9,37 @@ private val logger = KotlinLogging.logger("HandlerFactory") interface ParsingResultFactory { fun parse(file: File): ParsingResult - fun parseFiles(files: List, action: (ParsingResult) -> Unit) { - for (file in files) { + fun parseFiles(files: List, action: (ParsingResult) -> T) = + files.map { file -> try { action(parse(file)) } catch (parsingException: ParsingException) { logger.error(parsingException) { "Failed to parse file ${file.path}" } + null + } + } +} + +interface PreprocessingParsingResultFactory : ParsingResultFactory { + fun preprocess(file: File, outputDir: File? = null): File + + /** + * Run preprocessing and parsing for all files. + * @param files list of files to be parsed with preprocessing + * @param action action to do with parsed files (e.g. save on the disk) + */ + override fun parseFiles(files: List, action: (ParsingResult) -> T) = + files.map { file -> + try { + val preprocessedFile = preprocess(file) + val result = action(parse(preprocessedFile)) + preprocessedFile.delete() + result + } catch (parsingException: ParsingException) { + logger.error(parsingException) { "Failed to parse file ${file.path}" } + null } } - } } abstract class ParsingResult(internal val file: File) { diff --git a/src/main/kotlin/astminer/parse/factory.kt b/src/main/kotlin/astminer/parse/factory.kt index f5f055cc..2ccdccf7 100644 --- a/src/main/kotlin/astminer/parse/factory.kt +++ b/src/main/kotlin/astminer/parse/factory.kt @@ -7,7 +7,7 @@ import astminer.parse.antlr.AntlrJavaParsingResultFactory import astminer.parse.antlr.AntlrJavascriptParsingResultFactory import astminer.parse.antlr.AntlrPHPParsingResultFactory import astminer.parse.antlr.AntlrPythonParsingResultFactory -import astminer.parse.fuzzy.FuzzyParsingResult +import astminer.parse.fuzzy.FuzzyParsingResultFactory import astminer.parse.gumtree.GumtreeJavaParsingResultFactory import astminer.parse.gumtree.GumtreePythonParsingResultFactory @@ -39,7 +39,7 @@ private fun getAntlrParsingResultFactory(extension: FileExtension): ParsingResul private fun getFuzzyParsingResultFactory(extension: FileExtension): ParsingResultFactory { return when (extension) { - FileExtension.C, FileExtension.Cpp -> FuzzyParsingResult + FileExtension.C, FileExtension.Cpp -> FuzzyParsingResultFactory else -> throw UnsupportedOperationException() } } diff --git a/src/main/kotlin/astminer/parse/fuzzy/FuzzyParsingResult.kt b/src/main/kotlin/astminer/parse/fuzzy/FuzzyParsingResult.kt deleted file mode 100644 index 13ee046d..00000000 --- a/src/main/kotlin/astminer/parse/fuzzy/FuzzyParsingResult.kt +++ /dev/null @@ -1,16 +0,0 @@ -package astminer.parse.fuzzy - -import astminer.common.model.ParsingResult -import astminer.common.model.ParsingResultFactory -import astminer.parse.fuzzy.cpp.FuzzyCppParser -import astminer.parse.fuzzy.cpp.FuzzyFunctionSplitter -import java.io.File - -object FuzzyParsingResult : ParsingResultFactory { - override fun parse(file: File): ParsingResult = CppFuzzyParsingResult(file) - - class CppFuzzyParsingResult(file: File) : ParsingResult(file) { - override val root = FuzzyCppParser().parseFile(file) - override val splitter = FuzzyFunctionSplitter() - } -} diff --git a/src/main/kotlin/astminer/parse/fuzzy/FuzzyParsingResultFactory.kt b/src/main/kotlin/astminer/parse/fuzzy/FuzzyParsingResultFactory.kt new file mode 100644 index 00000000..da8bfe00 --- /dev/null +++ b/src/main/kotlin/astminer/parse/fuzzy/FuzzyParsingResultFactory.kt @@ -0,0 +1,40 @@ +package astminer.parse.fuzzy + +import astminer.common.model.ParsingResult +import astminer.common.model.PreprocessingParsingResultFactory +import astminer.parse.fuzzy.cpp.FuzzyCppParser +import astminer.parse.fuzzy.cpp.FuzzyFunctionSplitter +import java.io.File + +object FuzzyParsingResultFactory : PreprocessingParsingResultFactory { + override fun parse(file: File): ParsingResult { + val actualFile = if (file.nameWithoutExtension.endsWith(preprocessSuffix)) { + val actualFileNameSize = file.nameWithoutExtension.length - preprocessSuffix.length + file.parentFile.resolve("${file.nameWithoutExtension.take(actualFileNameSize)}.${file.extension}") + } else file + return CppFuzzyParsingResult(actualFile) + } + + /** + * Run g++ preprocessor (with [preprocessCommand]) on a given file excluding 'include' directives. + * The result of preprocessing is stored in create file "_preprocessed.cpp" + * @param file file to preprocess + * + */ + override fun preprocess(file: File, outputDir: File?): File { + if (file.extension !in supportedExtensions) return file + val outputFile = outputDir?.resolve(file.name) + ?: file.parentFile.resolve("${file.nameWithoutExtension}$preprocessSuffix.${file.extension}") + preprocessCppCode(file, outputFile, preprocessCommand).runCommand(file.absoluteFile.parentFile) + return outputFile + } + + class CppFuzzyParsingResult(file: File) : ParsingResult(file) { + override val root = FuzzyCppParser().parseFile(file) + override val splitter = FuzzyFunctionSplitter() + } + + private val supportedExtensions = listOf("c", "cpp") + private const val preprocessCommand: String = "g++ -E" + private const val preprocessSuffix = "_preprocessed" +} diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppParser.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppParser.kt index 6de6f299..fe641865 100644 --- a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppParser.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppParser.kt @@ -86,39 +86,6 @@ class FuzzyCppParser : Parser { throw ParsingException("Fuzzy", "C++") } - /** - * Run g++ preprocessor (if [preprocessCommand] is set) on a given file excluding 'include' directives. - * The result of preprocessing is stored in created directory [outputDir] - * @param file file to preprocess - * @param outputDir directory where the preprocessed file will be stored - * @param preprocessCommand bash command that runs preprocessing, "g++ -E" by default - */ - fun preprocessFile(file: File, outputDir: File, preprocessCommand: String = "g++ -E") { - outputDir.mkdirs() - preprocessCppCode(file, outputDir, preprocessCommand).runCommand(file.absoluteFile.parentFile) - } - - /** - * Run preprocessing for all .c and .cpp files in the [project][projectRoot]. - * The preprocessed files will be stored in [outputDir], replicating file hierarchy of the original project. - * @param projectRoot root of the project that should be preprocessed - * @param outputDir directory where the preprocessed files will be stored - */ - fun preprocessProject(projectRoot: File, outputDir: File) { - val files = projectRoot.walkTopDown() - .filter { file -> supportedExtensions.contains(file.extension) } - files.forEach { file -> - val relativeFilePath = file.relativeTo(projectRoot) - val outputPath = if (relativeFilePath.parent != null) { - outputDir.resolve(relativeFilePath.parent) - } else { - outputDir - } - outputPath.mkdirs() - preprocessFile(file, outputPath) - } - } - private fun addNodesFromEdge(e: Edge, map: MutableMap) { val parentNode = map.getOrPut(e.outNode()) { createNodeFromVertex(e.outNode()) } val childNode = map.getOrPut(e.inNode()) { createNodeFromVertex(e.inNode()) } @@ -156,8 +123,6 @@ class FuzzyCppParser : Parser { } companion object { - private val supportedExtensions = listOf("c", "cpp") - data class ExpandableNodeKey( val key: String, val supportedNodeLabels: List, diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/utils.kt b/src/main/kotlin/astminer/parse/fuzzy/utils.kt similarity index 57% rename from src/main/kotlin/astminer/parse/fuzzy/cpp/utils.kt rename to src/main/kotlin/astminer/parse/fuzzy/utils.kt index ef3abbeb..8ec52105 100644 --- a/src/main/kotlin/astminer/parse/fuzzy/cpp/utils.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/utils.kt @@ -1,4 +1,4 @@ -package astminer.parse.fuzzy.cpp +package astminer.parse.fuzzy import java.io.File import java.util.concurrent.TimeUnit @@ -12,15 +12,15 @@ fun String.runCommand(workingDir: File) { .waitFor(60, TimeUnit.MINUTES) } -fun preprocessCppCode(file: File, outputDir: File, preprocessCommand: String) = """ - grep '^\s*#\s*include' "${file.absolutePath}" >__tmp_include.cpp - grep -v '^\s*#\s*include\b' "${file.absolutePath}" >__tmp_code.cpp +fun preprocessCppCode(inputFile: File, outputFile: File, preprocessCommand: String) = """ + grep '^\s*#\s*include' "${inputFile.absolutePath}" >__tmp_include.cpp + grep -v '^\s*#\s*include\b' "${inputFile.absolutePath}" >__tmp_code.cpp touch __tmp_preprocessed.cpp if [ -s __tmp_code.cpp ] then $preprocessCommand __tmp_code.cpp | grep -v ^# >__tmp_preprocessed.cpp fi - cat __tmp_include.cpp > "${outputDir.absolutePath}"/"${file.name}" - cat __tmp_preprocessed.cpp >> "${outputDir.absolutePath}"/"${file.name}" + cat __tmp_include.cpp > "${outputFile.absolutePath}" + cat __tmp_preprocessed.cpp >> "${outputFile.absolutePath}" rm __tmp_*.cpp """.trimIndent() diff --git a/src/main/kotlin/astminer/pipeline/Pipeline.kt b/src/main/kotlin/astminer/pipeline/Pipeline.kt index 965bbae0..d71a7a08 100644 --- a/src/main/kotlin/astminer/pipeline/Pipeline.kt +++ b/src/main/kotlin/astminer/pipeline/Pipeline.kt @@ -50,8 +50,8 @@ class Pipeline(private val config: PipelineConfig) { val files = getProjectFilesWithExtension(inputDirectory, language.fileExtension) createStorage(language).use { storage -> - parsingResultFactory.parseFiles(files) { languageHandler -> - for (labeledResult in branch.process(languageHandler)) { + parsingResultFactory.parseFiles(files) { parseResult -> + for (labeledResult in branch.process(parseResult)) { storage.store(labeledResult) } } diff --git a/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt b/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt index 4ad908cc..531f7934 100644 --- a/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt +++ b/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt @@ -4,6 +4,7 @@ import astminer.checkExecutable import astminer.common.forFilesWithSuffix import astminer.common.getProjectFilesWithExtension import astminer.parse.fuzzy.FuzzyNode +import astminer.parse.fuzzy.FuzzyParsingResultFactory import astminer.parse.fuzzy.cpp.FuzzyCppParser import astminer.parseFiles import org.junit.Assert @@ -45,54 +46,48 @@ class FuzzyCppParserTest { @Test fun testPreprocessingDefine() { val folder = File("src/test/resources/fuzzy") - val preprocessedFolder = folder.resolve("preprocessed") - preprocessedFolder.mkdir() val defineFileName = "preprocDefineTest.cpp" - val parser = FuzzyCppParser() + val preprocessedFileName = "preprocDefineTest_preprocessed.cpp" - parser.preprocessFile(folder.resolve(defineFileName), preprocessedFolder) + FuzzyParsingResultFactory.preprocess(folder.resolve(defineFileName)) Assert.assertEquals( "'define' directives should be replaced", "for (int i = (0); i < (10); ++i) { }", - preprocessedFolder.resolve(defineFileName).readInOneLine() + folder.resolve(preprocessedFileName).readInOneLine() ) - preprocessedFolder.deleteRecursively() + folder.resolve(preprocessedFileName).delete() } @Test fun testPreprocessingInclude() { val folder = File("src/test/resources/fuzzy") - val preprocessedFolder = folder.resolve("preprocessed") - preprocessedFolder.mkdir() val includeFileName = "preprocIncludeTest.cpp" - val parser = FuzzyCppParser() + val preprocessedFileName = "preprocIncludeTest_preprocessed.cpp" - parser.preprocessFile(folder.resolve(includeFileName), preprocessedFolder) + FuzzyParsingResultFactory.preprocess(folder.resolve(includeFileName)) Assert.assertEquals( "'include' directives should not be replaced", folder.resolve(includeFileName).readInOneLine(), - preprocessedFolder.resolve(includeFileName).readInOneLine() + folder.resolve(preprocessedFileName).readInOneLine() ) - preprocessedFolder.deleteRecursively() + folder.resolve(preprocessedFileName).delete() } @Test fun testPreprocessingProject() { val projectRoot = File("src/test/resources/examples/cpp") - val preprocessedRoot = File("src/test/resources/examples/preprocessed") - preprocessedRoot.mkdir() - val parser = FuzzyCppParser() - parser.preprocessProject(projectRoot, preprocessedRoot) - val nodes = parser.parseFiles(getProjectFilesWithExtension(projectRoot, "cpp")) + val files = getProjectFilesWithExtension(projectRoot, "cpp") + val nodes = FuzzyParsingResultFactory.parseFiles(files) { it.root }.filterNotNull() Assert.assertEquals( "Parse tree for a valid file should not be null. There are 5 files in example project.", 5, nodes.size ) - preprocessedRoot.deleteRecursively() + files.map { "${it.nameWithoutExtension}_preprocessed.${it.extension}" } + .forEach { projectRoot.resolve(it).delete() } } } From 66b0a88c11211ca1378797c7e2ee1f0a058a7492 Mon Sep 17 00:00:00 2001 From: illided Date: Wed, 28 Jul 2021 14:21:26 +0300 Subject: [PATCH 258/308] added possibility to save ast with paths --- src/main/kotlin/astminer/config/StorageConfigs.kt | 4 ++-- src/main/kotlin/astminer/storage/ast/JsonAstStorage.kt | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/main/kotlin/astminer/config/StorageConfigs.kt b/src/main/kotlin/astminer/config/StorageConfigs.kt index 0de6a75c..42b0788b 100644 --- a/src/main/kotlin/astminer/config/StorageConfigs.kt +++ b/src/main/kotlin/astminer/config/StorageConfigs.kt @@ -42,8 +42,8 @@ class DotAstStorageConfig : StorageConfig() { */ @Serializable @SerialName("json AST") -class JsonAstStorageConfig : StorageConfig() { - override fun createStorage(outputDirectoryPath: String) = JsonAstStorage(outputDirectoryPath) +class JsonAstStorageConfig(private val withPaths: Boolean = false) : StorageConfig() { + override fun createStorage(outputDirectoryPath: String) = JsonAstStorage(outputDirectoryPath, withPaths) } /** diff --git a/src/main/kotlin/astminer/storage/ast/JsonAstStorage.kt b/src/main/kotlin/astminer/storage/ast/JsonAstStorage.kt index e0007951..b7ea399b 100644 --- a/src/main/kotlin/astminer/storage/ast/JsonAstStorage.kt +++ b/src/main/kotlin/astminer/storage/ast/JsonAstStorage.kt @@ -16,7 +16,7 @@ private typealias Id = Int * Each line in the output file is a single json object that corresponds to one of the labeled trees. * Each tree is flattened and represented as a list of nodes. */ -class JsonAstStorage(override val outputDirectoryPath: String) : Storage { +class JsonAstStorage(override val outputDirectoryPath: String, private val withPaths: Boolean) : Storage { private val treeFlattener = TreeFlattener() private val writer: PrintWriter @@ -30,7 +30,7 @@ class JsonAstStorage(override val outputDirectoryPath: String) : Storage { } @Serializable - private data class LabeledAst(val label: String, val ast: List) + private data class LabeledAst(val label: String, val path: String? = null, val ast: List) @Serializable private data class OutputNode(val token: String, val typeLabel: String, val children: List) @@ -40,7 +40,8 @@ class JsonAstStorage(override val outputDirectoryPath: String) : Storage { override fun store(labeledResult: LabeledResult) { val outputNodes = treeFlattener.flatten(labeledResult.root).map { it.toOutputNode() } - val labeledAst = LabeledAst(labeledResult.label, outputNodes) + val path = if (withPaths) labeledResult.filePath else null + val labeledAst = LabeledAst(labeledResult.label, path, outputNodes) writer.println(Json.encodeToString(labeledAst)) } From 0e6c05ce464cb47e774a313493c10fbfc3ec211f Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Wed, 28 Jul 2021 18:55:09 +0500 Subject: [PATCH 259/308] Run CI on pull requests --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 9e8eceaf..2683788e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,6 +1,6 @@ name: Build with lint -on: [push] +on: [push, pull_request] jobs: setup: From d7f297880bba6ffe05e698a58ede12e6f3404e63 Mon Sep 17 00:00:00 2001 From: illided Date: Fri, 30 Jul 2021 19:08:00 +0300 Subject: [PATCH 260/308] status bar prototype added --- build.gradle.kts | 3 +++ .../astminer/common/model/ParsingResultModel.kt | 12 ++++++++---- src/main/kotlin/astminer/pipeline/Pipeline.kt | 3 +++ 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/build.gradle.kts b/build.gradle.kts index d4ceace8..bab1143c 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -49,6 +49,9 @@ dependencies { // ===== Detekt ===== detektPlugins("io.gitlab.arturbosch.detekt:detekt-formatting:1.17.1") + + // ==== Status bar ==== + implementation("me.tongfei:progressbar:0.9.2") } val generatedSourcesPath = "src/main/generated" diff --git a/src/main/kotlin/astminer/common/model/ParsingResultModel.kt b/src/main/kotlin/astminer/common/model/ParsingResultModel.kt index c9135b44..769e3376 100644 --- a/src/main/kotlin/astminer/common/model/ParsingResultModel.kt +++ b/src/main/kotlin/astminer/common/model/ParsingResultModel.kt @@ -1,6 +1,7 @@ package astminer.common.model import astminer.parse.ParsingException +import me.tongfei.progressbar.ProgressBar import mu.KotlinLogging import java.io.File @@ -9,15 +10,18 @@ private val logger = KotlinLogging.logger("HandlerFactory") interface ParsingResultFactory { fun parse(file: File): ParsingResult - fun parseFiles(files: List, action: (ParsingResult) -> T) = - files.map { file -> + fun parseFiles(files: List, action: (ParsingResult) -> T): List { + val results = mutableListOf() + for (file in ProgressBar.wrap(files, "")) { try { - action(parse(file)) + results.add(action(parse(file))) } catch (parsingException: ParsingException) { logger.error(parsingException) { "Failed to parse file ${file.path}" } - null + results.add(null) } } + return results + } } interface PreprocessingParsingResultFactory : ParsingResultFactory { diff --git a/src/main/kotlin/astminer/pipeline/Pipeline.kt b/src/main/kotlin/astminer/pipeline/Pipeline.kt index d71a7a08..840c608f 100644 --- a/src/main/kotlin/astminer/pipeline/Pipeline.kt +++ b/src/main/kotlin/astminer/pipeline/Pipeline.kt @@ -45,9 +45,12 @@ class Pipeline(private val config: PipelineConfig) { */ fun run() { for (language in config.parser.languages) { + println("Parsing $language") val parsingResultFactory = getParsingResultFactory(language, config.parser.name) + println("Files collecting...") val files = getProjectFilesWithExtension(inputDirectory, language.fileExtension) + println("${files.size} files retrieved") createStorage(language).use { storage -> parsingResultFactory.parseFiles(files) { parseResult -> From 3177259d876fb375eb151b06a12fc54ed353721d Mon Sep 17 00:00:00 2001 From: illided Date: Mon, 2 Aug 2021 17:04:08 +0300 Subject: [PATCH 261/308] some performance improvement --- src/main/kotlin/astminer/common/TreeUtil.kt | 22 +++++++++---- .../common/model/ParsingResultModel.kt | 31 +++++++++++++++++-- src/main/kotlin/astminer/pipeline/Pipeline.kt | 8 +++-- 3 files changed, 49 insertions(+), 12 deletions(-) diff --git a/src/main/kotlin/astminer/common/TreeUtil.kt b/src/main/kotlin/astminer/common/TreeUtil.kt index 267f9356..1a53158d 100644 --- a/src/main/kotlin/astminer/common/TreeUtil.kt +++ b/src/main/kotlin/astminer/common/TreeUtil.kt @@ -6,14 +6,21 @@ const val EMPTY_TOKEN = "EMPTY" * The function was adopted from the original code2vec implementation in order to match their behavior: * https://github.com/tech-srl/code2vec/blob/master/JavaExtractor/JPredict/src/main/java/JavaExtractor/Common/Common.java */ + +val newLineReg = "\\\\n".toRegex() +val whitespaceReg = "//s+".toRegex() +val quotesApostrophesCommasReg = "[\"',]".toRegex() +val unicodeWeirdCharReg = "\\P{Print}".toRegex() +val notALetterReg = "[^A-Za-z]".toRegex() + fun normalizeToken(token: String, defaultToken: String): String { val cleanToken = token.lowercase() - .replace("\\\\n".toRegex(), "") // escaped new line - .replace("//s+".toRegex(), "") // whitespaces - .replace("[\"',]".toRegex(), "") // quotes, apostrophes, commas - .replace("\\P{Print}".toRegex(), "") // unicode weird characters + .replace(newLineReg, "") // escaped new line + .replace(whitespaceReg, "") // whitespaces + .replace(quotesApostrophesCommasReg, "") // quotes, apostrophes, commas + .replace(unicodeWeirdCharReg, "") // unicode weird characters - val stripped = cleanToken.replace("[^A-Za-z]".toRegex(), "") + val stripped = cleanToken.replace(notALetterReg, "") return stripped.ifEmpty { val carefulStripped = cleanToken.replace(" ", "_") @@ -27,9 +34,12 @@ fun normalizeToken(token: String, defaultToken: String): String { * The function was adopted from the original code2vec implementation in order to match their behavior: * https://github.com/tech-srl/code2vec/blob/master/JavaExtractor/JPredict/src/main/java/JavaExtractor/Common/Common.java */ + +val splitRegex = "(?<=[a-z])(?=[A-Z])|_|[0-9]|(?<=[A-Z])(?=[A-Z][a-z])|\\s+".toRegex() + fun splitToSubtokens(token: String) = token .trim() - .split("(?<=[a-z])(?=[A-Z])|_|[0-9]|(?<=[A-Z])(?=[A-Z][a-z])|\\s+".toRegex()) + .split(splitRegex) .map { s -> normalizeToken(s, "") } .filter { it.isNotEmpty() } .toList() diff --git a/src/main/kotlin/astminer/common/model/ParsingResultModel.kt b/src/main/kotlin/astminer/common/model/ParsingResultModel.kt index 769e3376..45b0ce9a 100644 --- a/src/main/kotlin/astminer/common/model/ParsingResultModel.kt +++ b/src/main/kotlin/astminer/common/model/ParsingResultModel.kt @@ -4,22 +4,43 @@ import astminer.parse.ParsingException import me.tongfei.progressbar.ProgressBar import mu.KotlinLogging import java.io.File +import kotlin.concurrent.thread private val logger = KotlinLogging.logger("HandlerFactory") +private const val NUM_OF_THREADS = 16 interface ParsingResultFactory { fun parse(file: File): ParsingResult - fun parseFiles(files: List, action: (ParsingResult) -> T): List { + fun parseFiles( + files: List, + progressBar: ProgressBar = ProgressBar("", files.size.toLong()), + action: (ParsingResult) -> T + ): List { val results = mutableListOf() - for (file in ProgressBar.wrap(files, "")) { + files.map { file -> try { results.add(action(parse(file))) } catch (parsingException: ParsingException) { logger.error(parsingException) { "Failed to parse file ${file.path}" } results.add(null) } + progressBar.step() + } + return results + } + + fun parseFilesAsync(files: List, action: (ParsingResult) -> T): List { + val results = mutableListOf() + val threads = mutableListOf() + val progressBar = ProgressBar("", files.size.toLong()) + + synchronized(results) { + files.chunked(files.size / (NUM_OF_THREADS - 1)) + .map { chunk -> + threads.add(thread { results.addAll(parseFiles(chunk, progressBar, action)) }) } } + threads.map { it.join() } return results } } @@ -32,7 +53,11 @@ interface PreprocessingParsingResultFactory : ParsingResultFactory { * @param files list of files to be parsed with preprocessing * @param action action to do with parsed files (e.g. save on the disk) */ - override fun parseFiles(files: List, action: (ParsingResult) -> T) = + override fun parseFiles( + files: List, + progressBar: ProgressBar, + action: (ParsingResult) -> T + ) = files.map { file -> try { val preprocessedFile = preprocess(file) diff --git a/src/main/kotlin/astminer/pipeline/Pipeline.kt b/src/main/kotlin/astminer/pipeline/Pipeline.kt index 840c608f..2c52dbe4 100644 --- a/src/main/kotlin/astminer/pipeline/Pipeline.kt +++ b/src/main/kotlin/astminer/pipeline/Pipeline.kt @@ -53,9 +53,11 @@ class Pipeline(private val config: PipelineConfig) { println("${files.size} files retrieved") createStorage(language).use { storage -> - parsingResultFactory.parseFiles(files) { parseResult -> - for (labeledResult in branch.process(parseResult)) { - storage.store(labeledResult) + synchronized(storage) { + parsingResultFactory.parseFilesAsync(files) { parseResult -> + for (labeledResult in branch.process(parseResult)) { + storage.store(labeledResult) + } } } } From 30f1c190e1ad349e8c1cc9bc75064b872983ab00 Mon Sep 17 00:00:00 2001 From: illided Date: Mon, 2 Aug 2021 18:28:14 +0300 Subject: [PATCH 262/308] calculation heavy part moved from critical section --- src/main/kotlin/astminer/common/model/ParsingModel.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/kotlin/astminer/common/model/ParsingModel.kt b/src/main/kotlin/astminer/common/model/ParsingModel.kt index 9b1af5ba..c7484bf3 100644 --- a/src/main/kotlin/astminer/common/model/ParsingModel.kt +++ b/src/main/kotlin/astminer/common/model/ParsingModel.kt @@ -12,7 +12,7 @@ abstract class Node { abstract val parent: Node? abstract val originalToken: String? - val normalizedToken: String by lazy { + val normalizedToken: String = run { originalToken?.let { val subtokens = splitToSubtokens(it) if (subtokens.isEmpty()) EMPTY_TOKEN else subtokens.joinToString(TOKEN_DELIMITER) From 23bdd3da7dce436dc4c952a1f82fb4d98751923e Mon Sep 17 00:00:00 2001 From: illided Date: Mon, 2 Aug 2021 21:29:16 +0300 Subject: [PATCH 263/308] small improvements and rollback to normalized token --- .../kotlin/astminer/common/model/ParsingModel.kt | 3 ++- .../astminer/common/model/ParsingResultModel.kt | 14 ++++++++------ src/main/kotlin/astminer/pipeline/Pipeline.kt | 1 + 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/main/kotlin/astminer/common/model/ParsingModel.kt b/src/main/kotlin/astminer/common/model/ParsingModel.kt index c7484bf3..2d6df324 100644 --- a/src/main/kotlin/astminer/common/model/ParsingModel.kt +++ b/src/main/kotlin/astminer/common/model/ParsingModel.kt @@ -12,12 +12,13 @@ abstract class Node { abstract val parent: Node? abstract val originalToken: String? - val normalizedToken: String = run { + val normalizedToken: String by lazy { originalToken?.let { val subtokens = splitToSubtokens(it) if (subtokens.isEmpty()) EMPTY_TOKEN else subtokens.joinToString(TOKEN_DELIMITER) } ?: EMPTY_TOKEN } + var technicalToken: String? = null val token: String diff --git a/src/main/kotlin/astminer/common/model/ParsingResultModel.kt b/src/main/kotlin/astminer/common/model/ParsingResultModel.kt index 45b0ce9a..a3b9688b 100644 --- a/src/main/kotlin/astminer/common/model/ParsingResultModel.kt +++ b/src/main/kotlin/astminer/common/model/ParsingResultModel.kt @@ -14,7 +14,7 @@ interface ParsingResultFactory { fun parseFiles( files: List, - progressBar: ProgressBar = ProgressBar("", files.size.toLong()), + progressBar: ProgressBar? = null, action: (ParsingResult) -> T ): List { val results = mutableListOf() @@ -25,7 +25,7 @@ interface ParsingResultFactory { logger.error(parsingException) { "Failed to parse file ${file.path}" } results.add(null) } - progressBar.step() + progressBar?.step() } return results } @@ -33,12 +33,13 @@ interface ParsingResultFactory { fun parseFilesAsync(files: List, action: (ParsingResult) -> T): List { val results = mutableListOf() val threads = mutableListOf() - val progressBar = ProgressBar("", files.size.toLong()) + val progressBar = ProgressBar("Parsing progress:", files.size.toLong()) synchronized(results) { - files.chunked(files.size / (NUM_OF_THREADS - 1)) + files.chunked(files.size / (NUM_OF_THREADS - 1) + 1).filter { it.isNotEmpty() } .map { chunk -> - threads.add(thread { results.addAll(parseFiles(chunk, progressBar, action)) }) } + threads.add(thread { results.addAll(parseFiles(chunk, progressBar, action)) }) + } } threads.map { it.join() } return results @@ -55,10 +56,11 @@ interface PreprocessingParsingResultFactory : ParsingResultFactory { */ override fun parseFiles( files: List, - progressBar: ProgressBar, + progressBar: ProgressBar?, action: (ParsingResult) -> T ) = files.map { file -> + progressBar?.step() try { val preprocessedFile = preprocess(file) val result = action(parse(preprocessedFile)) diff --git a/src/main/kotlin/astminer/pipeline/Pipeline.kt b/src/main/kotlin/astminer/pipeline/Pipeline.kt index 2c52dbe4..c6aa20b2 100644 --- a/src/main/kotlin/astminer/pipeline/Pipeline.kt +++ b/src/main/kotlin/astminer/pipeline/Pipeline.kt @@ -62,5 +62,6 @@ class Pipeline(private val config: PipelineConfig) { } } } + println("Done!") } } From 5a8482ccb01fe3c8497c28b2ec353882d8648cf1 Mon Sep 17 00:00:00 2001 From: illided Date: Mon, 2 Aug 2021 22:31:22 +0300 Subject: [PATCH 264/308] parameters list properties now is nullable --- .../common/model/FunctionInfoModel.kt | 6 +-- .../astminer/filters/FunctionFilters.kt | 14 ++++-- .../antlr/java/JavaFunctionSplitterTest.kt | 26 +++++----- .../JavaScriptFunctionSplitterTest.kt | 2 +- .../antlr/php/ANTLRPHPFunctionSplitterTest.kt | 2 +- .../python/PythonFunctionSplitterTest.kt | 42 ++++++++-------- .../parse/cpp/FuzzyMethodSplitterTest.kt | 18 +++---- .../java/GumTreeJavaFunctionSplitterTest.kt | 20 ++++---- .../GumTreePythonFunctionSplitterTest.kt | 48 +++++++++---------- 9 files changed, 91 insertions(+), 87 deletions(-) diff --git a/src/main/kotlin/astminer/common/model/FunctionInfoModel.kt b/src/main/kotlin/astminer/common/model/FunctionInfoModel.kt index 2b40034f..cacdd94c 100644 --- a/src/main/kotlin/astminer/common/model/FunctionInfoModel.kt +++ b/src/main/kotlin/astminer/common/model/FunctionInfoModel.kt @@ -22,11 +22,11 @@ interface FunctionInfo { get() = notImplemented("root") val filePath: String get() = notImplemented("filePath") - val annotations: List + val annotations: List? get() = notImplemented("annotations") - val modifiers: List + val modifiers: List? get() = notImplemented("modifiers") - val parameters: List + val parameters: List? get() = notImplemented("parameters") val returnType: String? get() = notImplemented("returnType") diff --git a/src/main/kotlin/astminer/filters/FunctionFilters.kt b/src/main/kotlin/astminer/filters/FunctionFilters.kt index 15591f34..db95d100 100644 --- a/src/main/kotlin/astminer/filters/FunctionFilters.kt +++ b/src/main/kotlin/astminer/filters/FunctionFilters.kt @@ -9,16 +9,20 @@ import astminer.common.splitToSubtokens * Filter that excludes functions that have at least one of modifiers from the [excludeModifiers] list. */ class ModifierFilter(private val excludeModifiers: List) : FunctionFilter { - override fun validate(functionInfo: FunctionInfo): Boolean = - !excludeModifiers.any { modifier -> modifier in functionInfo.modifiers } + override fun validate(functionInfo: FunctionInfo): Boolean { + return functionInfo.modifiers?.let {modifiers -> excludeModifiers.intersect(modifiers).isEmpty() } + ?: throw IllegalStateException("Modifiers wasn't properly parsed") + } } /** - * Filter that excludes functions that have at least one annotations from the [excludeAnnotations] list. + * Filter that excludes functions that have at least one of annotations from the [excludeAnnotations] list. */ class AnnotationFilter(private val excludeAnnotations: List) : FunctionFilter { - override fun validate(functionInfo: FunctionInfo): Boolean = - !excludeAnnotations.any { annotation -> annotation in functionInfo.annotations } + override fun validate(functionInfo: FunctionInfo): Boolean { + return functionInfo.annotations?.let { annotations -> excludeAnnotations.intersect(annotations).isEmpty() } + ?: throw IllegalStateException("Annotations was not properly parsed") + } } /** diff --git a/src/test/kotlin/astminer/parse/antlr/java/JavaFunctionSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/java/JavaFunctionSplitterTest.kt index d1fea436..73822194 100644 --- a/src/test/kotlin/astminer/parse/antlr/java/JavaFunctionSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/java/JavaFunctionSplitterTest.kt @@ -70,29 +70,29 @@ class JavaFunctionSplitterTest { fun testNoParameters() { val methodNoParameters = functionInfos.find { it.name == "functionWithNoParameters" } assertNotNull(methodNoParameters) - assertEquals(0, methodNoParameters.parameters.size) + assertEquals(0, methodNoParameters.parameters?.size) } @Test fun testOneParameter() { val methodOneParameter = functionInfos.find { it.name == "functionWithOneParameter" } assertNotNull(methodOneParameter) - assertEquals(1, methodOneParameter.parameters.size) - val parameter = methodOneParameter.parameters[0] - assertEquals("p1", parameter.name) - assertEquals("int", parameter.type) + assertEquals(1, methodOneParameter.parameters?.size) + val parameter = methodOneParameter.parameters?.get(0) + assertEquals("p1", parameter?.name) + assertEquals("int", parameter?.type) } @Test fun testThreeParameters() { val methodThreeParameters = functionInfos.find { it.name == "functionWithThreeParameters" } assertNotNull(methodThreeParameters) - assertEquals(3, methodThreeParameters.parameters.size) + assertEquals(3, methodThreeParameters.parameters?.size) val methodTypes = listOf("Class", "String[][]", "int[]") for (i in 0 until 3) { - val parameter = methodThreeParameters.parameters[i] - assertEquals("p${i + 1}", parameter.name) - assertEquals(methodTypes[i], parameter.type) + val parameter = methodThreeParameters.parameters?.get(i) + assertEquals("p${i + 1}", parameter?.name) + assertEquals(methodTypes[i], parameter?.type) } } @@ -100,10 +100,10 @@ class JavaFunctionSplitterTest { fun testWeirdArrayParameter() { val methodWeirdArrayParameter = functionInfos.find { it.name == "functionWithStrangeArrayParameter" } assertNotNull(methodWeirdArrayParameter) - assertEquals(1, methodWeirdArrayParameter.parameters.size) - val weirdParameter = methodWeirdArrayParameter.parameters[0] - assertEquals(weirdParameter.name, "arr[]") - assertEquals(weirdParameter.type, "int") + assertEquals(1, methodWeirdArrayParameter.parameters?.size) + val weirdParameter = methodWeirdArrayParameter.parameters?.get(0) + assertEquals(weirdParameter?.name, "arr[]") + assertEquals(weirdParameter?.type, "int") } companion object { diff --git a/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitterTest.kt index 4e32386d..977013fb 100644 --- a/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitterTest.kt @@ -40,7 +40,7 @@ class JavaScriptFunctionSplitterTest { fun FunctionInfo.getJsonInfo(): String { return "info : {" + "name : $name, " + - "args : ${parameters.joinToString(", ") { it.name }}, " + + "args : ${parameters?.joinToString(", ") { it.name }}, " + "enclosing element : ${enclosingElement?.type?.getEnclosingElementType()}, " + "enclosing element name : ${enclosingElement?.name}" + "}" diff --git a/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionSplitterTest.kt index 27c6286b..3be0583d 100644 --- a/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionSplitterTest.kt @@ -40,7 +40,7 @@ internal class ANTLRPHPFunctionSplitterTest { fun FunctionInfo.getJsonInfo(): String = listOf( "info : {", "name: $name, ", - "args: ${parameters.joinToString(", ") { + "args: ${parameters?.joinToString(", ") { listOfNotNull(it.type, it.name).joinToString(" ") }}, ", "enclosing element: ${enclosingElement?.type?.getEnclosingElementType()}, ", diff --git a/src/test/kotlin/astminer/parse/antlr/python/PythonFunctionSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/python/PythonFunctionSplitterTest.kt index 9efdac5a..0e8bc21e 100644 --- a/src/test/kotlin/astminer/parse/antlr/python/PythonFunctionSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/python/PythonFunctionSplitterTest.kt @@ -53,44 +53,44 @@ class PythonFunctionSplitterTest { fun testNoParameters() { val functionNoParameters = functionInfos.find { it.name == "function_with_no_parameters" } assertNotNull(functionNoParameters) - assertEquals(0, functionNoParameters.parameters.size) + assertEquals(0, functionNoParameters.parameters?.size) } @Test fun testOneParameter() { val functionOneParameter = functionInfos.find { it.name == "function_with_one_parameter" } assertNotNull(functionOneParameter) - assertEquals(1, functionOneParameter.parameters.size) - val parameter = functionOneParameter.parameters[0] - assertEquals("p1", parameter.name) + assertEquals(1, functionOneParameter.parameters?.size) + val parameter = functionOneParameter.parameters?.get(0) + assertEquals("p1", parameter?.name) } @Test fun testOneTypedParameter() { val functionOneTypedParameter = functionInfos.find { it.name == "function_with_one_typed_parameter" } assertNotNull(functionOneTypedParameter) - assertEquals(1, functionOneTypedParameter.parameters.size) - val parameter = functionOneTypedParameter.parameters[0] - assertEquals("p1", parameter.name) - assertEquals("int", parameter.type) + assertEquals(1, functionOneTypedParameter.parameters?.size) + val parameter = functionOneTypedParameter.parameters?.get(0) + assertEquals("p1", parameter?.name) + assertEquals("int", parameter?.type) } @Test fun functionWithComplexParameter() { val functionOneTypedParameter = functionInfos.find { it.name == "function_with_complex_parameter" } assertNotNull(functionOneTypedParameter) - assertEquals(1, functionOneTypedParameter.parameters.size) - val parameter = functionOneTypedParameter.parameters[0] - assertEquals("p1", parameter.name) - assertEquals("List[int]", parameter.type) + assertEquals(1, functionOneTypedParameter.parameters?.size) + val parameter = functionOneTypedParameter.parameters?.get(0) + assertEquals("p1", parameter?.name) + assertEquals("List[int]", parameter?.type) } @Test fun testThreeParameters() { val functionThreeParameters = functionInfos.find { it.name == "function_with_three_parameters" } assertNotNull(functionThreeParameters) - assertEquals(3, functionThreeParameters.parameters.size) - val parameters = functionThreeParameters.parameters + assertEquals(3, functionThreeParameters.parameters?.size) + val parameters = functionThreeParameters.parameters!! assertEquals("p1", parameters[0].name) assertEquals("p2", parameters[1].name) @@ -103,19 +103,19 @@ class PythonFunctionSplitterTest { fun testParameterInClass() { val functionOneParameter = functionInfos.find { it.name == "fun_with_parameter_in_class" } assertNotNull(functionOneParameter) - assertEquals(2, functionOneParameter.parameters.size) - val parameter = functionOneParameter.parameters[1] - assertEquals("p1", parameter.name) + assertEquals(2, functionOneParameter.parameters?.size) + val parameter = functionOneParameter.parameters?.get(1) + assertEquals("p1", parameter?.name) } @Test fun testTypedParameterInClass() { val functionOneTypedParameter = functionInfos.find { it.name == "fun_with_typed_parameter_in_class" } assertNotNull(functionOneTypedParameter) - assertEquals(2, functionOneTypedParameter.parameters.size) - val parameter = functionOneTypedParameter.parameters[1] - assertEquals("p1", parameter.name) - assertEquals("int", parameter.type) + assertEquals(2, functionOneTypedParameter.parameters?.size) + val parameter = functionOneTypedParameter.parameters?.get(1) + assertEquals("p1", parameter?.name) + assertEquals("int", parameter?.type) } @Test diff --git a/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt index ddb5ad63..3ff65833 100644 --- a/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt @@ -85,28 +85,28 @@ class FuzzyMethodSplitterTest { fun testNoParameters() { val methodNoParameters = methodInfos.find { it.name == "functionWithNoParameters" } assertNotNull(methodNoParameters) - assertEquals(0, methodNoParameters.parameters.size) + assertEquals(0, methodNoParameters.parameters?.size) } @Test fun testOneParameter() { val methodOneParameter = methodInfos.find { it.name == "functionWithOneParameter" } assertNotNull(methodOneParameter) - assertEquals(1, methodOneParameter.parameters.size) - val parameter = methodOneParameter.parameters[0] - assertEquals("p1", parameter.name) - assertEquals("int", parameter.type) + assertEquals(1, methodOneParameter.parameters?.size) + val parameter = methodOneParameter.parameters?.get(0) + assertEquals("p1", parameter?.name) + assertEquals("int", parameter?.type) } @Test fun testThreeParameters() { val methodThreeParameters = methodInfos.find { it.name == "functionWithThreeParameters" } assertNotNull(methodThreeParameters) - assertEquals(3, methodThreeParameters.parameters.size) + assertEquals(3, methodThreeParameters.parameters?.size) for (i in 0 until 3) { - val parameter = methodThreeParameters.parameters[i] - assertEquals("p${i + 1}", parameter.name) - assertEquals("int", parameter.type) + val parameter = methodThreeParameters.parameters?.get(i) + assertEquals("p${i + 1}", parameter?.name) + assertEquals("int", parameter?.type) } } diff --git a/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitterTest.kt b/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitterTest.kt index 1cf8cb4c..2dfab373 100644 --- a/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitterTest.kt @@ -22,8 +22,8 @@ class GumTreeJavaFunctionSplitterTest { assertEquals("fun", name) assertEquals("void", returnType) assertEquals("SingleFunction", enclosingElement?.name) - assertEquals(listOf("args", "param"), parameters.map { it.name }) - assertEquals(listOf("String[]", "int"), parameters.map { it.type }) + assertEquals(listOf("args", "param"), parameters?.map { it.name }) + assertEquals(listOf("String[]", "int"), parameters?.map { it.type }) } } @@ -36,8 +36,8 @@ class GumTreeJavaFunctionSplitterTest { assertEquals("main", name) assertEquals("void", returnType) assertEquals("InnerClass", enclosingElement?.name) - assertEquals(listOf("args"), parameters.map { it.name }) - assertEquals(listOf("String[]"), parameters.map { it.type }) + assertEquals(listOf("args"), parameters?.map { it.name }) + assertEquals(listOf("String[]"), parameters?.map { it.type }) } } @@ -50,15 +50,15 @@ class GumTreeJavaFunctionSplitterTest { assertEquals("main", name) assertEquals("void", returnType) assertEquals("InnerClass", enclosingElement?.name) - assertEquals(listOf("args"), parameters.map { it.name }) - assertEquals(listOf("String[]"), parameters.map { it.type }) + assertEquals(listOf("args"), parameters?.map { it.name }) + assertEquals(listOf("String[]"), parameters?.map { it.type }) } with(functionInfos.last()) { assertEquals("fun", name) assertEquals("void", returnType) assertEquals("SingleMethodInnerClass", enclosingElement?.name) - assertEquals(listOf("args", "param"), parameters.map { it.name }) - assertEquals(listOf("String[]", "int"), parameters.map { it.type }) + assertEquals(listOf("args", "param"), parameters?.map { it.name }) + assertEquals(listOf("String[]", "int"), parameters?.map { it.type }) } } @@ -71,8 +71,8 @@ class GumTreeJavaFunctionSplitterTest { assertEquals("fun", name) assertEquals("int", returnType) assertEquals("SingleFunction", enclosingElement?.name) - assertEquals(listOf("args", "param"), parameters.map { it.name }) - assertEquals(listOf("int", "SingleFunction"), parameters.map { it.type }) + assertEquals(listOf("args", "param"), parameters?.map { it.name }) + assertEquals(listOf("int", "SingleFunction"), parameters?.map { it.type }) } } diff --git a/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitterTest.kt b/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitterTest.kt index 910acd93..124926af 100644 --- a/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitterTest.kt @@ -54,9 +54,9 @@ class GumTreePythonFunctionSplitterTest { with(functionInfo) { assertEquals("complex_args_full_typed", name) assertEquals(null, returnType) - assertEquals(1, parameters.size) - assertEquals(listOf("node"), parameters.map { it.name }.toList()) - assertEquals(listOf("JsonNodeType"), parameters.map { it.type }.toList()) + assertEquals(1, parameters?.size) + assertEquals(listOf("node"), parameters?.map { it.name }?.toList()) + assertEquals(listOf("JsonNodeType"), parameters?.map { it.type }?.toList()) } } @@ -68,9 +68,9 @@ class GumTreePythonFunctionSplitterTest { with(functionInfo) { assertEquals("func_dif_args_typed_return", name) assertEquals("Constant-int", returnType) - assertEquals(6, parameters.size) - assertEquals(listOf("a", "b", "c", "d", "e", "f"), parameters.map { it.name }.toList()) - assertEquals(emptyList(), parameters.mapNotNull { it.type }.toList()) + assertEquals(6, parameters?.size) + assertEquals(listOf("a", "b", "c", "d", "e", "f"), parameters?.map { it.name }?.toList()) + assertEquals(emptyList(), parameters?.mapNotNull { it.type }?.toList()) } } @@ -83,9 +83,9 @@ class GumTreePythonFunctionSplitterTest { assertEquals("foo_typed", name) assertEquals("A", enclosingElement?.name) assertEquals(null, returnType) - assertEquals(3, parameters.size) - assertEquals(listOf("self", "x", "y"), parameters.map { it.name }.toList()) - assertEquals(listOf(null, "int", "int"), parameters.map { it.type }.toList()) + assertEquals(3, parameters?.size) + assertEquals(listOf("self", "x", "y"), parameters?.map { it.name }?.toList()) + assertEquals(listOf(null, "int", "int"), parameters?.map { it.type }?.toList()) } } @@ -98,9 +98,9 @@ class GumTreePythonFunctionSplitterTest { assertEquals("bar_typed", name) assertEquals("C", enclosingElement?.name) assertEquals(null, returnType) - assertEquals(2, parameters.size) - assertEquals(listOf("self", "x"), parameters.map { it.name }.toList()) - assertEquals(listOf(null, "int"), parameters.map { it.type }.toList()) + assertEquals(2, parameters?.size) + assertEquals(listOf("self", "x"), parameters?.map { it.name }?.toList()) + assertEquals(listOf(null, "int"), parameters?.map { it.type }?.toList()) } } @@ -114,9 +114,9 @@ class GumTreePythonFunctionSplitterTest { assertEquals("AsyncFunctionDef", root.typeLabel) assertEquals(null, enclosingElement?.name) assertEquals("Constant-int", returnType) - assertEquals(4, parameters.size) - assertEquals(listOf("event", "x", "args", "kwargs"), parameters.map { it.name }.toList()) - assertEquals(listOf("str", "int", null, null), parameters.map { it.type }.toList()) + assertEquals(4, parameters?.size) + assertEquals(listOf("event", "x", "args", "kwargs"), parameters?.map { it.name }?.toList()) + assertEquals(listOf("str", "int", null, null), parameters?.map { it.type }?.toList()) } } @@ -136,12 +136,12 @@ class GumTreePythonFunctionSplitterTest { ?.getChildOfType("Constant-str") ?.originalToken ) - assertEquals(4, parameters.size) + assertEquals(4, parameters?.size) assertEquals( listOf("gh", "original_issue", "branch", "backport_pr_number"), - parameters.map { it.name }.toList() + parameters?.map { it.name }?.toList() ) - assertEquals(listOf(null, null, null, null), parameters.map { it.type }.toList()) + assertEquals(listOf(null, null, null, null), parameters?.map { it.type }?.toList()) } } @@ -155,9 +155,9 @@ class GumTreePythonFunctionSplitterTest { assertEquals("foo_1", functionInfo.root.parent?.wrappedNode?.parent?.label) assertEquals(null, enclosingElement?.name) assertEquals("Constant-NoneType", returnType) - assertEquals(1, parameters.size) - assertEquals(listOf("c"), parameters.map { it.name }.toList()) - assertEquals(listOf(null), parameters.map { it.type }.toList()) + assertEquals(1, parameters?.size) + assertEquals(listOf("c"), parameters?.map { it.name }?.toList()) + assertEquals(listOf(null), parameters?.map { it.type }?.toList()) } } @@ -171,9 +171,9 @@ class GumTreePythonFunctionSplitterTest { assertEquals("bar_1", functionInfo.root.parent?.wrappedNode?.parent?.label) assertEquals(null, enclosingElement?.name) assertEquals("Constant-int", returnType) - assertEquals(2, parameters.size) - assertEquals(listOf("d", "e"), parameters.map { it.name }.toList()) - assertEquals(listOf("int", "int"), parameters.map { it.type }.toList()) + assertEquals(2, parameters?.size) + assertEquals(listOf("d", "e"), parameters?.map { it.name }?.toList()) + assertEquals(listOf("int", "int"), parameters?.map { it.type }?.toList()) } } } From c86986d20cea2acec49a60499b964b54f35a361e Mon Sep 17 00:00:00 2001 From: illided Date: Tue, 3 Aug 2021 12:31:47 +0300 Subject: [PATCH 265/308] progress bar close added --- src/main/kotlin/astminer/common/model/ParsingResultModel.kt | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/kotlin/astminer/common/model/ParsingResultModel.kt b/src/main/kotlin/astminer/common/model/ParsingResultModel.kt index a3b9688b..7e3dc43b 100644 --- a/src/main/kotlin/astminer/common/model/ParsingResultModel.kt +++ b/src/main/kotlin/astminer/common/model/ParsingResultModel.kt @@ -42,6 +42,7 @@ interface ParsingResultFactory { } } threads.map { it.join() } + progressBar.close() return results } } From ca966bd967d9e3c53d75c7650765fc6b69b9c52d Mon Sep 17 00:00:00 2001 From: illided Date: Tue, 3 Aug 2021 12:50:47 +0300 Subject: [PATCH 266/308] code style fixes --- src/main/kotlin/astminer/filters/FunctionFilters.kt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/kotlin/astminer/filters/FunctionFilters.kt b/src/main/kotlin/astminer/filters/FunctionFilters.kt index db95d100..d449131d 100644 --- a/src/main/kotlin/astminer/filters/FunctionFilters.kt +++ b/src/main/kotlin/astminer/filters/FunctionFilters.kt @@ -10,8 +10,8 @@ import astminer.common.splitToSubtokens */ class ModifierFilter(private val excludeModifiers: List) : FunctionFilter { override fun validate(functionInfo: FunctionInfo): Boolean { - return functionInfo.modifiers?.let {modifiers -> excludeModifiers.intersect(modifiers).isEmpty() } - ?: throw IllegalStateException("Modifiers wasn't properly parsed") + val functionModifiers = checkNotNull(functionInfo.modifiers) { "Modifiers weren't properly parsed" } + return functionModifiers.none { modifier -> modifier in excludeModifiers } } } @@ -20,8 +20,8 @@ class ModifierFilter(private val excludeModifiers: List) : FunctionFilte */ class AnnotationFilter(private val excludeAnnotations: List) : FunctionFilter { override fun validate(functionInfo: FunctionInfo): Boolean { - return functionInfo.annotations?.let { annotations -> excludeAnnotations.intersect(annotations).isEmpty() } - ?: throw IllegalStateException("Annotations was not properly parsed") + val functionAnnotations = checkNotNull(functionInfo.annotations) { "Annotations weren't properly parsed" } + return functionAnnotations.none { annotation -> annotation in excludeAnnotations } } } From a74a73257d7e45464d826bc147da6178493d04b5 Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Tue, 3 Aug 2021 14:51:05 +0500 Subject: [PATCH 267/308] Use astminer docker container for CI --- .github/workflows/build.yml | 37 ++++++++++--------------------------- 1 file changed, 10 insertions(+), 27 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 2683788e..672f2184 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -5,14 +5,10 @@ on: [push, pull_request] jobs: setup: runs-on: ubuntu-latest + container: voudy/astminer + steps: - uses: actions/checkout@v2 - - - name: Setup Java - uses: actions/setup-java@v1 - with: - java-version: 11 - - name: Cache Gradle dependencies uses: actions/cache@v2 with: @@ -26,15 +22,11 @@ jobs: build: needs: setup runs-on: ubuntu-latest + container: voudy/astminer steps: - uses: actions/checkout@v2 - - name: Setup Java - uses: actions/setup-java@v1 - with: - java-version: 11 - - name: Cache Gradle dependencies uses: actions/cache@v2 with: @@ -45,20 +37,17 @@ jobs: restore-keys: | ${{ runner.os }}-gradle- - - name: Build plugin + - name: Build astminer run: ./gradlew build test: needs: build runs-on: ubuntu-latest + container: voudy/astminer + steps: - uses: actions/checkout@v2 - - name: Setup Java - uses: actions/setup-java@v1 - with: - java-version: 11 - - name: Run JUnit tests run: ./gradlew test @@ -72,14 +61,11 @@ jobs: code-style: needs: build runs-on: ubuntu-latest + container: voudy/astminer + steps: - uses: actions/checkout@v2 - - name: Setup Java - uses: actions/setup-java@v1 - with: - java-version: 11 - - name: Run detekt run: ./gradlew detekt @@ -92,14 +78,11 @@ jobs: run-on-configs: needs: build runs-on: ubuntu-latest + container: voudy/astminer + steps: - uses: actions/checkout@v2 - - name: Setup Java - uses: actions/setup-java@v1 - with: - java-version: 11 - - name: Prepare shadowJar run: ./gradlew shadowJar From 4a9755a113deef60a310c3d80e3553155665b702 Mon Sep 17 00:00:00 2001 From: illided Date: Tue, 3 Aug 2021 13:17:29 +0300 Subject: [PATCH 268/308] antlr parameters function properties now null on failed parse --- .../parse/antlr/java/AntlrJavaFunctionInfo.kt | 10 ++++++++- .../javascript/AntlrJavaScriptElementInfo.kt | 22 ++++++++++++++++--- .../parse/antlr/php/ANTLRPHPFunctionInfo.kt | 7 ++++-- .../antlr/python/AntlrPythonFunctionInfo.kt | 9 +++++++- 4 files changed, 41 insertions(+), 7 deletions(-) diff --git a/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt b/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt index d6ac9ec6..3a21e5f1 100644 --- a/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt @@ -3,13 +3,21 @@ package astminer.parse.antlr.java import astminer.common.model.* import astminer.parse.antlr.* import astminer.parse.findEnclosingElementBy +import mu.KotlinLogging + +val logger = KotlinLogging.logger("Antlr-Java-function-info") class AntlrJavaFunctionInfo(override val root: AntlrNode, override val filePath: String) : FunctionInfo { override val nameNode: AntlrNode? = collectNameNode() - override val parameters: List = collectParameters() override val returnType: String? = collectReturnType() override val enclosingElement: EnclosingElement? = collectEnclosingClass() + override val parameters: List? = + try { collectParameters() } catch (e: IllegalStateException) { + logger.warn { e.message } + null + } + private fun collectNameNode(): AntlrNode? = root.getChildOfType(METHOD_NAME_NODE) private fun collectReturnType(): String? { diff --git a/src/main/kotlin/astminer/parse/antlr/javascript/AntlrJavaScriptElementInfo.kt b/src/main/kotlin/astminer/parse/antlr/javascript/AntlrJavaScriptElementInfo.kt index 33589567..5ac35813 100644 --- a/src/main/kotlin/astminer/parse/antlr/javascript/AntlrJavaScriptElementInfo.kt +++ b/src/main/kotlin/astminer/parse/antlr/javascript/AntlrJavaScriptElementInfo.kt @@ -3,6 +3,9 @@ package astminer.parse.antlr.javascript import astminer.common.model.* import astminer.parse.antlr.* import astminer.parse.findEnclosingElementBy +import mu.KotlinLogging + +val logger = KotlinLogging.logger("Antlr-Javascript-function-info") /** Base class for describing JavaScript methods, functions or arrow functions. @@ -76,9 +79,14 @@ abstract class AntlrJavaScriptElementInfo(override val root: AntlrNode, override class JavaScriptArrowInfo(root: AntlrNode, filePath: String) : AntlrJavaScriptElementInfo(root, filePath) { override val enclosingElement: EnclosingElement? = collectEnclosingElement() - override val parameters: List = collectParameters() override val nameNode: AntlrNode? = root.getChildOfType(ARROW_NAME_NODE) + override val parameters: List? = + try { collectParameters() } catch (e: IllegalStateException) { + logger.warn { e.message } + null + } + override fun getParametersRoot(): AntlrNode? { val parameterRoot = root.getChildOfType(ARROW_PARAMETER_NODE) return parameterRoot?.getChildOfType(ARROW_PARAMETER_INNER_NODE) ?: parameterRoot @@ -94,8 +102,12 @@ class JavaScriptArrowInfo(root: AntlrNode, filePath: String) : AntlrJavaScriptEl class JavaScriptMethodInfo(root: AntlrNode, filePath: String) : AntlrJavaScriptElementInfo(root, filePath) { override val enclosingElement: EnclosingElement? = collectEnclosingElement() - override val parameters: List = collectParameters() override val nameNode: AntlrNode? = collectNameNode() + override val parameters: List? = + try { collectParameters() } catch (e: IllegalStateException) { + logger.warn { e.message } + null + } private fun collectNameNode(): AntlrNode? { val methodNameParent = root.children.firstOrNull { @@ -119,8 +131,12 @@ class JavaScriptMethodInfo(root: AntlrNode, filePath: String) : AntlrJavaScriptE class JavaScriptFunctionInfo(root: AntlrNode, filePath: String) : AntlrJavaScriptElementInfo(root, filePath) { override val enclosingElement: EnclosingElement? = collectEnclosingElement() - override val parameters: List = collectParameters() override val nameNode: AntlrNode? = root.getChildOfType(FUNCTION_NAME_NODE) + override val parameters: List? = + try { collectParameters() } catch (e: IllegalStateException) { + logger.warn { e.message } + null + } override fun getParametersRoot(): AntlrNode? = root.getChildOfType(FUNCTION_PARAMETER_NODE) diff --git a/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt b/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt index a2bb3cf2..484ace50 100644 --- a/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt @@ -13,9 +13,12 @@ private val logger = KotlinLogging.logger("ANTLR-PHP-function-info") class ANTLRPHPFunctionInfo(override val root: AntlrNode, override val filePath: String) : FunctionInfo { override val returnType = getElementType(root) override val nameNode: AntlrNode? = root.getChildOfType(FUNCTION_NAME) - - override val parameters: List = collectParameters() override val enclosingElement: EnclosingElement? = collectEnclosingElement() + override val parameters: List? = + try { collectParameters() } catch (e: IllegalStateException) { + astminer.parse.antlr.javascript.logger.warn { e.message } + null + } private fun collectParameters(): List { // Parameters in this grammar have following structure (children order may be wrong): diff --git a/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt b/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt index 53ee37c4..3c409ea4 100644 --- a/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt @@ -3,11 +3,18 @@ package astminer.parse.antlr.python import astminer.common.model.* import astminer.parse.antlr.* import astminer.parse.findEnclosingElementBy +import mu.KotlinLogging + +val logger = KotlinLogging.logger("Antlr-python-function-info") class AntlrPythonFunctionInfo(override val root: AntlrNode, override val filePath: String) : FunctionInfo { override val nameNode: AntlrNode? = collectNameNode() - override val parameters: List = collectParameters() override val enclosingElement: EnclosingElement? = collectEnclosingElement() + override val parameters: List? = + try { collectParameters() } catch (e: IllegalStateException) { + logger.warn { e.message } + null + } private fun collectNameNode(): AntlrNode? = root.getChildOfType(FUNCTION_NAME_NODE) From e71b170bc90e25d0773b20eaf38ba042ef0b0454 Mon Sep 17 00:00:00 2001 From: illided Date: Tue, 3 Aug 2021 14:54:41 +0300 Subject: [PATCH 269/308] same update on other parsers --- .../astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt | 8 +++++++- .../parse/gumtree/java/GumTreeJavaFunctionInfo.kt | 8 +++++++- .../parse/gumtree/python/GumTreePythonFunctionInfo.kt | 8 +++++++- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt index 5bcc6b06..daf78a86 100644 --- a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt @@ -4,6 +4,7 @@ import astminer.common.model.EnclosingElement import astminer.common.model.EnclosingElementType import astminer.common.model.FunctionInfo import astminer.common.model.FunctionInfoParameter +import astminer.parse.antlr.javascript.logger import astminer.parse.findEnclosingElementBy import astminer.parse.fuzzy.FuzzyNode @@ -11,8 +12,13 @@ class FuzzyCppFunctionInfo(override val root: FuzzyNode, override val filePath: override val returnType: String? = collectReturnType() override val enclosingElement: EnclosingElement? = collectEnclosingClass() - override val parameters: List = collectParameters() override val nameNode: FuzzyNode? = collectNameNode() + override val parameters: List? = + try { collectParameters() } + catch (e: IllegalStateException) { + logger.warn { e.message } + null + } private fun collectNameNode(): FuzzyNode? = root.getChildOfType(METHOD_NAME_NODE) as? FuzzyNode diff --git a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt index a6af57f7..2e4c749a 100644 --- a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt @@ -4,6 +4,7 @@ import astminer.common.model.EnclosingElement import astminer.common.model.EnclosingElementType import astminer.common.model.FunctionInfo import astminer.common.model.FunctionInfoParameter +import astminer.parse.antlr.javascript.logger import astminer.parse.gumtree.GumTreeNode class GumTreeJavaFunctionInfo( @@ -12,9 +13,14 @@ class GumTreeJavaFunctionInfo( ) : FunctionInfo { override val nameNode: GumTreeNode? = root.getChildOfType(TypeLabels.simpleName) - override val parameters: List = collectParameters() override val returnType: String? = root.getElementType() override val enclosingElement: EnclosingElement? = collectEnclosingClass() + override val parameters: List? = + try { collectParameters() } + catch (e: IllegalStateException) { + logger.warn { e.message } + null + } override val modifiers: List = root.children.filter { it.typeLabel == "Modifier" }.map { it.originalToken } override val annotations: List = root diff --git a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt index d297a3ab..e40fb0d3 100644 --- a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt @@ -4,6 +4,7 @@ import astminer.common.model.EnclosingElement import astminer.common.model.EnclosingElementType import astminer.common.model.FunctionInfo import astminer.common.model.FunctionInfoParameter +import astminer.parse.antlr.javascript.logger import astminer.parse.findEnclosingElementBy import astminer.parse.gumtree.GumTreeNode @@ -13,9 +14,14 @@ class GumTreePythonFunctionInfo( ) : FunctionInfo { override val nameNode: GumTreeNode = root - override val parameters: List = collectParameters() override val enclosingElement: EnclosingElement? = collectEnclosingClass() override val returnType: String? = getElementType(root)?.typeLabel + override val parameters: List? = + try { collectParameters() } + catch (e: IllegalStateException) { + logger.warn { e.message } + null + } private fun getElementType(node: GumTreeNode): GumTreeNode? { if (node.typeLabel == TypeLabels.arg) { From 7dee65050dc2e894acab9e28c9c75c12639ba901 Mon Sep 17 00:00:00 2001 From: illided Date: Tue, 3 Aug 2021 14:58:06 +0300 Subject: [PATCH 270/308] proper loggers added --- .../kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt | 2 +- .../parse/antlr/javascript/AntlrJavaScriptElementInfo.kt | 2 +- .../kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt | 2 +- .../astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt | 2 +- .../kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt | 4 +++- .../astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt | 4 +++- .../parse/gumtree/python/GumTreePythonFunctionInfo.kt | 4 +++- 7 files changed, 13 insertions(+), 7 deletions(-) diff --git a/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt b/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt index 3a21e5f1..1a79687d 100644 --- a/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt @@ -5,7 +5,7 @@ import astminer.parse.antlr.* import astminer.parse.findEnclosingElementBy import mu.KotlinLogging -val logger = KotlinLogging.logger("Antlr-Java-function-info") +private val logger = KotlinLogging.logger("Antlr-Java-function-info") class AntlrJavaFunctionInfo(override val root: AntlrNode, override val filePath: String) : FunctionInfo { override val nameNode: AntlrNode? = collectNameNode() diff --git a/src/main/kotlin/astminer/parse/antlr/javascript/AntlrJavaScriptElementInfo.kt b/src/main/kotlin/astminer/parse/antlr/javascript/AntlrJavaScriptElementInfo.kt index 5ac35813..d5c12555 100644 --- a/src/main/kotlin/astminer/parse/antlr/javascript/AntlrJavaScriptElementInfo.kt +++ b/src/main/kotlin/astminer/parse/antlr/javascript/AntlrJavaScriptElementInfo.kt @@ -5,7 +5,7 @@ import astminer.parse.antlr.* import astminer.parse.findEnclosingElementBy import mu.KotlinLogging -val logger = KotlinLogging.logger("Antlr-Javascript-function-info") +private val logger = KotlinLogging.logger("Antlr-Javascript-function-info") /** Base class for describing JavaScript methods, functions or arrow functions. diff --git a/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt b/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt index 484ace50..e7642cd2 100644 --- a/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt @@ -16,7 +16,7 @@ class ANTLRPHPFunctionInfo(override val root: AntlrNode, override val filePath: override val enclosingElement: EnclosingElement? = collectEnclosingElement() override val parameters: List? = try { collectParameters() } catch (e: IllegalStateException) { - astminer.parse.antlr.javascript.logger.warn { e.message } + logger.warn { e.message } null } diff --git a/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt b/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt index 3c409ea4..bfbb5600 100644 --- a/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt @@ -5,7 +5,7 @@ import astminer.parse.antlr.* import astminer.parse.findEnclosingElementBy import mu.KotlinLogging -val logger = KotlinLogging.logger("Antlr-python-function-info") +private val logger = KotlinLogging.logger("Antlr-python-function-info") class AntlrPythonFunctionInfo(override val root: AntlrNode, override val filePath: String) : FunctionInfo { override val nameNode: AntlrNode? = collectNameNode() diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt index daf78a86..341ad923 100644 --- a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt @@ -4,9 +4,11 @@ import astminer.common.model.EnclosingElement import astminer.common.model.EnclosingElementType import astminer.common.model.FunctionInfo import astminer.common.model.FunctionInfoParameter -import astminer.parse.antlr.javascript.logger import astminer.parse.findEnclosingElementBy import astminer.parse.fuzzy.FuzzyNode +import mu.KotlinLogging + +private val logger = KotlinLogging.logger("Fuzzyparser-Cpp-function-info") class FuzzyCppFunctionInfo(override val root: FuzzyNode, override val filePath: String) : FunctionInfo { diff --git a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt index 2e4c749a..c2dccf45 100644 --- a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt @@ -4,8 +4,10 @@ import astminer.common.model.EnclosingElement import astminer.common.model.EnclosingElementType import astminer.common.model.FunctionInfo import astminer.common.model.FunctionInfoParameter -import astminer.parse.antlr.javascript.logger import astminer.parse.gumtree.GumTreeNode +import mu.KotlinLogging + +private val logger = KotlinLogging.logger("Gumtree-Java-function-info") class GumTreeJavaFunctionInfo( override val root: GumTreeNode, diff --git a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt index e40fb0d3..b6aa8411 100644 --- a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt @@ -4,9 +4,11 @@ import astminer.common.model.EnclosingElement import astminer.common.model.EnclosingElementType import astminer.common.model.FunctionInfo import astminer.common.model.FunctionInfoParameter -import astminer.parse.antlr.javascript.logger import astminer.parse.findEnclosingElementBy import astminer.parse.gumtree.GumTreeNode +import mu.KotlinLogging + +private val logger = KotlinLogging.logger("Gumtree-Java-function-info") class GumTreePythonFunctionInfo( override val root: GumTreeNode, From a8124bd1569d83a06a07fc14fbde6d5eca04fbd6 Mon Sep 17 00:00:00 2001 From: illided Date: Tue, 3 Aug 2021 15:00:43 +0300 Subject: [PATCH 271/308] code style fixes --- .../kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt | 3 +-- .../astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt | 3 +-- .../astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt index 341ad923..711293b7 100644 --- a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt @@ -16,8 +16,7 @@ class FuzzyCppFunctionInfo(override val root: FuzzyNode, override val filePath: override val enclosingElement: EnclosingElement? = collectEnclosingClass() override val nameNode: FuzzyNode? = collectNameNode() override val parameters: List? = - try { collectParameters() } - catch (e: IllegalStateException) { + try { collectParameters() } catch (e: IllegalStateException) { logger.warn { e.message } null } diff --git a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt index c2dccf45..ecf58f88 100644 --- a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt @@ -18,8 +18,7 @@ class GumTreeJavaFunctionInfo( override val returnType: String? = root.getElementType() override val enclosingElement: EnclosingElement? = collectEnclosingClass() override val parameters: List? = - try { collectParameters() } - catch (e: IllegalStateException) { + try { collectParameters() } catch (e: IllegalStateException) { logger.warn { e.message } null } diff --git a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt index b6aa8411..f873851a 100644 --- a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt @@ -19,8 +19,7 @@ class GumTreePythonFunctionInfo( override val enclosingElement: EnclosingElement? = collectEnclosingClass() override val returnType: String? = getElementType(root)?.typeLabel override val parameters: List? = - try { collectParameters() } - catch (e: IllegalStateException) { + try { collectParameters() } catch (e: IllegalStateException) { logger.warn { e.message } null } From 9e36101116440c0ed499fba27c42908b7cf18b16 Mon Sep 17 00:00:00 2001 From: illided Date: Tue, 3 Aug 2021 19:26:54 +0300 Subject: [PATCH 272/308] json AST stress tests added --- .../pipeline/PipelineAsyncStressTest.kt | 67 +++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 src/test/kotlin/astminer/pipeline/PipelineAsyncStressTest.kt diff --git a/src/test/kotlin/astminer/pipeline/PipelineAsyncStressTest.kt b/src/test/kotlin/astminer/pipeline/PipelineAsyncStressTest.kt new file mode 100644 index 00000000..4aec1a2a --- /dev/null +++ b/src/test/kotlin/astminer/pipeline/PipelineAsyncStressTest.kt @@ -0,0 +1,67 @@ +package astminer.pipeline + +import astminer.config.* +import org.junit.AfterClass +import org.junit.Test +import org.junit.BeforeClass +import java.io.BufferedReader +import java.io.File +import java.io.FileReader +import kotlin.test.AfterTest +import kotlin.test.assertEquals + +class PipelineAsyncStressTest { + companion object { + private const val numOfFiles = 3000 + private const val numOfMethods = 100 + private val tempInputDir = File("src/test/resources/someData") + private val tempOutputDir = File("src/test/resources/someOutput") + + @BeforeClass + @JvmStatic + fun setup() { + tempInputDir.mkdirs() + repeat(numOfFiles) { index -> + val newFile = File.createTempFile("someFile", ".java", tempInputDir) + newFile.writeText("class someClass$index {\n") + repeat(numOfMethods) {newFile.appendText("public void someMethod${it + index * numOfMethods}() {} \n")} + newFile.appendText("}") + } + } + + @AfterClass + @JvmStatic + fun tearDown() { + tempInputDir.deleteRecursively() + } + + } + + @AfterTest + fun deleteOutput() { + tempOutputDir.deleteRecursively() + } + + @Test + fun jsonStorageTest() { + val config = PipelineConfig( + inputDir = tempInputDir.path, + outputDir = tempOutputDir.path, + parser = ParserConfig( + name = ParserType.Antlr, + languages = listOf(FileExtension.Java)), + filters = listOf(), + labelExtractor = FunctionNameExtractorConfig(), + storage = JsonAstStorageConfig() + ) + Pipeline(config).run() + assertEquals((numOfFiles * numOfMethods).toLong(), countLines("${tempOutputDir.path}/java/asts.jsonl")) + } + + private fun countLines(filePath: String): Long { + val reader = BufferedReader(FileReader(filePath)) + var numOfLines = 0L + while (reader.readLine() != null) {numOfLines++} + return numOfLines + } +} \ No newline at end of file From e4e44a3e630ec178eba9729c8373fb4d3f712c0f Mon Sep 17 00:00:00 2001 From: illided Date: Tue, 3 Aug 2021 19:28:34 +0300 Subject: [PATCH 273/308] code style issues fixed --- .../pipeline/PipelineAsyncStressTest.kt | 62 ++++++++++--------- 1 file changed, 32 insertions(+), 30 deletions(-) diff --git a/src/test/kotlin/astminer/pipeline/PipelineAsyncStressTest.kt b/src/test/kotlin/astminer/pipeline/PipelineAsyncStressTest.kt index 4aec1a2a..25364da7 100644 --- a/src/test/kotlin/astminer/pipeline/PipelineAsyncStressTest.kt +++ b/src/test/kotlin/astminer/pipeline/PipelineAsyncStressTest.kt @@ -2,8 +2,8 @@ package astminer.pipeline import astminer.config.* import org.junit.AfterClass -import org.junit.Test import org.junit.BeforeClass +import org.junit.Test import java.io.BufferedReader import java.io.File import java.io.FileReader @@ -11,32 +11,6 @@ import kotlin.test.AfterTest import kotlin.test.assertEquals class PipelineAsyncStressTest { - companion object { - private const val numOfFiles = 3000 - private const val numOfMethods = 100 - private val tempInputDir = File("src/test/resources/someData") - private val tempOutputDir = File("src/test/resources/someOutput") - - @BeforeClass - @JvmStatic - fun setup() { - tempInputDir.mkdirs() - repeat(numOfFiles) { index -> - val newFile = File.createTempFile("someFile", ".java", tempInputDir) - newFile.writeText("class someClass$index {\n") - repeat(numOfMethods) {newFile.appendText("public void someMethod${it + index * numOfMethods}() {} \n")} - newFile.appendText("}") - } - } - - @AfterClass - @JvmStatic - fun tearDown() { - tempInputDir.deleteRecursively() - } - - } - @AfterTest fun deleteOutput() { tempOutputDir.deleteRecursively() @@ -49,7 +23,8 @@ class PipelineAsyncStressTest { outputDir = tempOutputDir.path, parser = ParserConfig( name = ParserType.Antlr, - languages = listOf(FileExtension.Java)), + languages = listOf(FileExtension.Java) + ), filters = listOf(), labelExtractor = FunctionNameExtractorConfig(), storage = JsonAstStorageConfig() @@ -61,7 +36,34 @@ class PipelineAsyncStressTest { private fun countLines(filePath: String): Long { val reader = BufferedReader(FileReader(filePath)) var numOfLines = 0L - while (reader.readLine() != null) {numOfLines++} + while (reader.readLine() != null) { numOfLines++ } return numOfLines } -} \ No newline at end of file + + companion object { + private const val numOfFiles = 3000 + private const val numOfMethods = 100 + private val tempInputDir = File("src/test/resources/someData") + private val tempOutputDir = File("src/test/resources/someOutput") + + @BeforeClass + @JvmStatic + fun setup() { + tempInputDir.mkdirs() + repeat(numOfFiles) { index -> + val newFile = File.createTempFile("someFile", ".java", tempInputDir) + newFile.writeText("class someClass$index {\n") + repeat(numOfMethods) { + newFile.appendText("public void someMethod${it + index * numOfMethods}() {} \n") + } + newFile.appendText("}") + } + } + + @AfterClass + @JvmStatic + fun tearDown() { + tempInputDir.deleteRecursively() + } + } +} From 9fdc80e444d2543a3dead49e9491eba5bf9dc79b Mon Sep 17 00:00:00 2001 From: illided Date: Wed, 4 Aug 2021 14:05:14 +0300 Subject: [PATCH 274/308] file distribution fix --- src/main/kotlin/astminer/common/model/ParsingResultModel.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/kotlin/astminer/common/model/ParsingResultModel.kt b/src/main/kotlin/astminer/common/model/ParsingResultModel.kt index 7e3dc43b..dde3db68 100644 --- a/src/main/kotlin/astminer/common/model/ParsingResultModel.kt +++ b/src/main/kotlin/astminer/common/model/ParsingResultModel.kt @@ -36,7 +36,7 @@ interface ParsingResultFactory { val progressBar = ProgressBar("Parsing progress:", files.size.toLong()) synchronized(results) { - files.chunked(files.size / (NUM_OF_THREADS - 1) + 1).filter { it.isNotEmpty() } + files.chunked(files.size / NUM_OF_THREADS + 1).filter { it.isNotEmpty() } .map { chunk -> threads.add(thread { results.addAll(parseFiles(chunk, progressBar, action)) }) } From 05c9bc886c32d155a5da68eb013bb372ae1200a9 Mon Sep 17 00:00:00 2001 From: illided Date: Wed, 4 Aug 2021 14:28:41 +0300 Subject: [PATCH 275/308] normalization moved to critical section again --- src/main/kotlin/astminer/common/model/ParsingModel.kt | 3 +-- src/main/kotlin/astminer/parse/antlr/AntlrNode.kt | 4 ++-- src/main/kotlin/astminer/parse/fuzzy/FuzzyNode.kt | 4 ++-- src/main/kotlin/astminer/parse/gumtree/GumTreeNode.kt | 4 ++-- .../astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt | 4 ++-- .../parse/gumtree/python/GumTreePythonFunctionInfo.kt | 7 ++++++- src/test/kotlin/astminer/common/DummyNode.kt | 4 +--- src/test/kotlin/astminer/featureextraction/PrettyNode.kt | 2 +- 8 files changed, 17 insertions(+), 15 deletions(-) diff --git a/src/main/kotlin/astminer/common/model/ParsingModel.kt b/src/main/kotlin/astminer/common/model/ParsingModel.kt index 2d6df324..28e14c3b 100644 --- a/src/main/kotlin/astminer/common/model/ParsingModel.kt +++ b/src/main/kotlin/astminer/common/model/ParsingModel.kt @@ -6,11 +6,10 @@ import java.io.File import java.io.InputStream import java.util.* -abstract class Node { +abstract class Node(val originalToken: String?) { abstract val typeLabel: String abstract val children: List abstract val parent: Node? - abstract val originalToken: String? val normalizedToken: String by lazy { originalToken?.let { diff --git a/src/main/kotlin/astminer/parse/antlr/AntlrNode.kt b/src/main/kotlin/astminer/parse/antlr/AntlrNode.kt index 4f738482..cc0899e3 100644 --- a/src/main/kotlin/astminer/parse/antlr/AntlrNode.kt +++ b/src/main/kotlin/astminer/parse/antlr/AntlrNode.kt @@ -5,8 +5,8 @@ import astminer.common.model.Node class AntlrNode( override val typeLabel: String, override var parent: AntlrNode?, - override val originalToken: String? -) : Node() { + originalToken: String? +) : Node(originalToken) { override val children: MutableList = mutableListOf() diff --git a/src/main/kotlin/astminer/parse/fuzzy/FuzzyNode.kt b/src/main/kotlin/astminer/parse/fuzzy/FuzzyNode.kt index 07924b52..af122b38 100644 --- a/src/main/kotlin/astminer/parse/fuzzy/FuzzyNode.kt +++ b/src/main/kotlin/astminer/parse/fuzzy/FuzzyNode.kt @@ -11,9 +11,9 @@ import com.google.common.collect.TreeMultiset */ class FuzzyNode( override val typeLabel: String, - override val originalToken: String?, + originalToken: String?, order: Int? -) : Node() { +) : Node(originalToken) { private val order = order ?: -1 override var parent: Node? = null private val childrenMultiset = TreeMultiset.create( diff --git a/src/main/kotlin/astminer/parse/gumtree/GumTreeNode.kt b/src/main/kotlin/astminer/parse/gumtree/GumTreeNode.kt index 79c79e04..f8db3f21 100644 --- a/src/main/kotlin/astminer/parse/gumtree/GumTreeNode.kt +++ b/src/main/kotlin/astminer/parse/gumtree/GumTreeNode.kt @@ -4,14 +4,14 @@ import astminer.common.model.Node import com.github.gumtreediff.tree.ITree import com.github.gumtreediff.tree.TreeContext -class GumTreeNode(val wrappedNode: ITree, val context: TreeContext, override var parent: GumTreeNode?) : Node() { +class GumTreeNode(val wrappedNode: ITree, val context: TreeContext, override var parent: GumTreeNode?) : + Node(wrappedNode.label) { override val typeLabel: String get() = context.getTypeLabel(wrappedNode) override val children: MutableList by lazy { wrappedNode.children.map { GumTreeNode(it, context, this) }.toMutableList() } - override val originalToken: String = wrappedNode.label override fun removeChildrenOfType(typeLabel: String) { children.removeIf { it.typeLabel == typeLabel } diff --git a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt index a6af57f7..ce9ba3fc 100644 --- a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt @@ -16,11 +16,11 @@ class GumTreeJavaFunctionInfo( override val returnType: String? = root.getElementType() override val enclosingElement: EnclosingElement? = collectEnclosingClass() - override val modifiers: List = root.children.filter { it.typeLabel == "Modifier" }.map { it.originalToken } + override val modifiers: List = root.children.filter { it.typeLabel == "Modifier" }.mapNotNull { it.originalToken } override val annotations: List = root .children .filter { it.typeLabel == "MarkerAnnotation" } - .map { it.children.first().originalToken } + .mapNotNull { it.children.first().originalToken } override val isConstructor: Boolean = root.typeLabel == "Initializer" private fun collectEnclosingClass(): EnclosingElement? { diff --git a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt index d297a3ab..6f727859 100644 --- a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt @@ -55,7 +55,12 @@ class GumTreePythonFunctionInfo( else -> emptyList() } } - return params.map { FunctionInfoParameter(it.originalToken, getElementType(it)?.originalToken) } + return params.mapNotNull { + FunctionInfoParameter( + it.originalToken ?: return@mapNotNull null, + getElementType(it)?.originalToken + ) + } } companion object { diff --git a/src/test/kotlin/astminer/common/DummyNode.kt b/src/test/kotlin/astminer/common/DummyNode.kt index 1a39d0d4..91aaa046 100644 --- a/src/test/kotlin/astminer/common/DummyNode.kt +++ b/src/test/kotlin/astminer/common/DummyNode.kt @@ -6,12 +6,10 @@ import java.io.File class DummyNode( override val typeLabel: String, override val children: MutableList = mutableListOf() -) : Node() { +) : Node(typeLabel) { override val parent: Node? = null - override val originalToken: String = typeLabel - init { // Tokens may change after normalization, for tests we want tokens to be unchanged technicalToken = typeLabel diff --git a/src/test/kotlin/astminer/featureextraction/PrettyNode.kt b/src/test/kotlin/astminer/featureextraction/PrettyNode.kt index 3cd0a0e2..d0867ce0 100644 --- a/src/test/kotlin/astminer/featureextraction/PrettyNode.kt +++ b/src/test/kotlin/astminer/featureextraction/PrettyNode.kt @@ -2,7 +2,7 @@ package astminer.featureextraction import astminer.common.model.Node -class PrettyNode(override val typeLabel: String, override val originalToken: String) : Node() { +class PrettyNode(override val typeLabel: String, originalToken: String) : Node(originalToken) { override var children: MutableList = ArrayList() override var parent: PrettyNode? = null set(value) { From 306a6a3e744faf5dca0d411dedd593f07c97ad56 Mon Sep 17 00:00:00 2001 From: illided Date: Wed, 4 Aug 2021 15:03:06 +0300 Subject: [PATCH 276/308] lazy stayed in Node but look ahead calculation for antlr was added --- src/main/kotlin/astminer/common/model/ParsingModel.kt | 2 ++ src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/main/kotlin/astminer/common/model/ParsingModel.kt b/src/main/kotlin/astminer/common/model/ParsingModel.kt index 28e14c3b..f9dec295 100644 --- a/src/main/kotlin/astminer/common/model/ParsingModel.kt +++ b/src/main/kotlin/astminer/common/model/ParsingModel.kt @@ -42,6 +42,7 @@ abstract class Node(val originalToken: String?) { resultList.add(this) children.forEach { it.doTraversePreOrder(resultList) } } + fun preOrderIterator(): Iterator = preOrder().listIterator() open fun preOrder(): List = mutableListOf().also { doTraversePreOrder(it) } @@ -49,6 +50,7 @@ abstract class Node(val originalToken: String?) { children.forEach { it.doTraversePostOrder(resultList) } resultList.add(this) } + fun postOrderIterator(): Iterator = postOrder().listIterator() open fun postOrder(): List = mutableListOf().also { doTraversePostOrder(it) } diff --git a/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt b/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt index 71e45c6f..68580b11 100644 --- a/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt +++ b/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt @@ -28,7 +28,8 @@ private fun convertRuleContext( } } currentNode.replaceChildren(children) - + /* Forcing lazy property be calculated right now if it's a leaf */ + if (currentNode.isLeaf()) { currentNode.normalizedToken } return currentNode } From a027ce36152c17ad455da34c0c72bd81bfd603f8 Mon Sep 17 00:00:00 2001 From: illided Date: Wed, 4 Aug 2021 15:09:19 +0300 Subject: [PATCH 277/308] code style fixes --- .../astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt | 7 ++++++- .../parse/gumtree/python/GumTreePythonFunctionInfo.kt | 5 +---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt index ce9ba3fc..d96631c9 100644 --- a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt @@ -16,11 +16,16 @@ class GumTreeJavaFunctionInfo( override val returnType: String? = root.getElementType() override val enclosingElement: EnclosingElement? = collectEnclosingClass() - override val modifiers: List = root.children.filter { it.typeLabel == "Modifier" }.mapNotNull { it.originalToken } + override val modifiers: List = root + .children + .filter { it.typeLabel == "Modifier" } + .mapNotNull { it.originalToken } + override val annotations: List = root .children .filter { it.typeLabel == "MarkerAnnotation" } .mapNotNull { it.children.first().originalToken } + override val isConstructor: Boolean = root.typeLabel == "Initializer" private fun collectEnclosingClass(): EnclosingElement? { diff --git a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt index 6f727859..1590ce1b 100644 --- a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt +++ b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt @@ -56,10 +56,7 @@ class GumTreePythonFunctionInfo( } } return params.mapNotNull { - FunctionInfoParameter( - it.originalToken ?: return@mapNotNull null, - getElementType(it)?.originalToken - ) + FunctionInfoParameter(it.originalToken ?: return@mapNotNull null, getElementType(it)?.originalToken) } } From c59f16c75046551b79d36984fabbc12cadcdd97e Mon Sep 17 00:00:00 2001 From: illided Date: Wed, 4 Aug 2021 18:08:31 +0300 Subject: [PATCH 278/308] gumtree exception wrappers --- .../kotlin/astminer/common/model/ParsingModel.kt | 5 +++++ .../parse/gumtree/java/GumTreeJavaParser.kt | 13 +++++++++++-- .../parse/gumtree/python/GumTreePythonParser.kt | 7 ++++++- 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/src/main/kotlin/astminer/common/model/ParsingModel.kt b/src/main/kotlin/astminer/common/model/ParsingModel.kt index 9b1af5ba..39ac9d7e 100644 --- a/src/main/kotlin/astminer/common/model/ParsingModel.kt +++ b/src/main/kotlin/astminer/common/model/ParsingModel.kt @@ -72,3 +72,8 @@ interface Parser { */ fun parseFile(file: File) = parseInputStream(file.inputStream()) } + +class ParserNotInstalledException(parser: String, language: String, val e: Exception): Exception() { + override val message: String = "Tools for parsing $language with $parser were not properly installed" + override val cause: Throwable = e +} \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaParser.kt b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaParser.kt index 253a17d5..3a11dee9 100644 --- a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaParser.kt +++ b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaParser.kt @@ -1,21 +1,30 @@ package astminer.parse.gumtree.java import astminer.common.model.Parser +import astminer.config.FileExtension +import astminer.config.ParserType +import astminer.parse.ParsingException import astminer.parse.gumtree.GumTreeNode import com.github.gumtreediff.client.Run +import com.github.gumtreediff.gen.SyntaxException import com.github.gumtreediff.gen.jdt.JdtTreeGenerator import com.github.gumtreediff.tree.TreeContext +import mu.KotlinLogging import java.io.InputStream import java.io.InputStreamReader +private val logger = KotlinLogging.logger("GumTree-JavaParser") + class GumTreeJavaParser : Parser { init { Run.initGenerators() } - override fun parseInputStream(content: InputStream): GumTreeNode { + override fun parseInputStream(content: InputStream): GumTreeNode = try { val treeContext = JdtTreeGenerator().generate(InputStreamReader(content)) - return wrapGumTreeNode(treeContext) + wrapGumTreeNode(treeContext) + } catch (e: SyntaxException) { + throw ParsingException(parserType = "Gumtree", language = "Java", exc = e) } } diff --git a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonParser.kt b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonParser.kt index 8a407687..1b6d5a5a 100644 --- a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonParser.kt +++ b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonParser.kt @@ -1,11 +1,14 @@ package astminer.parse.gumtree.python import astminer.common.model.Parser +import astminer.common.model.ParserNotInstalledException import astminer.parse.ParsingException import astminer.parse.gumtree.GumTreeNode import com.github.gumtreediff.client.Run +import com.github.gumtreediff.gen.SyntaxException import com.github.gumtreediff.gen.python.PythonTreeGenerator import com.github.gumtreediff.tree.TreeContext +import java.io.IOException import java.io.InputStream import java.io.InputStreamReader @@ -17,8 +20,10 @@ class GumTreePythonParser : Parser { override fun parseInputStream(content: InputStream): GumTreeNode = try { val context = PythonTreeGenerator().generate(InputStreamReader(content)) wrapGumTreeNode(context) - } catch (e: Exception) { + } catch (e: SyntaxException) { throw ParsingException("GumTree", "Python", e) + } catch (e: IOException) { + throw ParserNotInstalledException("Gumtree", "Python", e) } } From 9f9c43d0d344d6ab9b31fd7c3efd918eed26e1d2 Mon Sep 17 00:00:00 2001 From: Vladimir Kovalenko Date: Wed, 4 Aug 2021 20:24:00 +0200 Subject: [PATCH 279/308] edit README.md --- README.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index f94b5b08..d6c9f57c 100644 --- a/README.md +++ b/README.md @@ -26,9 +26,9 @@ Currently, it supports extraction of: It is designed to be very easily extensible to new languages. -`astminer` lets you create end2end pipeline of data processing. -It allows convert source code, cloned from VCS to suitable for training datasets. -To do that, `astminer` provides multiple steps to handle data: +`astminer` lets you create an end-to-end pipeline to processing code for machine learning models. +It allows to convert source code cloned from VCS to formats suitable for training. +To achieve that, `astminer` caters for multiple data processing steps: - [filters](./docs/filters.md) to remove redundant samples from data - [label extractors](./docs/label_extractors.md) to create label for each tree - [storages](./docs/storages.md) to define storage format. @@ -41,12 +41,12 @@ There are two ways to use `astminer`. ### Using `astminer` cli -Define config (examples of them in [configs](./configs) directory) and pass it shell script: +Define config (examples of them in [configs](./configs) directory) and pass it to shell script: ```shell ./cli.sh ``` -For details about config format and other navigate to [docs/cli](./docs/cli.md). +For details on CLI configuration, see [docs/cli](./docs/cli.md). ### Using `astminer` as a dependency @@ -88,10 +88,10 @@ After that, add `mavenLocal()` into the `repositories` section in your gradle co If you want to use `astminer` as a library in your Java/Kotlin based data mining tool, check the following: -* A few simple [examples](src/examples) of `astminer` usage in Java and Kotlin. +* A few simple [examples](src/examples) of using `astminer` in Java and Kotlin. * Using `astminer` as a part of another mining tool — [psiminer](https://github.com/JetBrains-Research/psiminer). -Please consider trying Kotlin for your data mining pipelines: from our experience, it is much better suited for data collection and transformation instruments. +Please consider trying Kotlin for your data mining pipelines: from our experience, it is much better suited for data collection and transformation instruments than Java. ## Contribution From f789c7b5ac46ec86044476f57844f9927f3c47b6 Mon Sep 17 00:00:00 2001 From: Vladimir Kovalenko Date: Wed, 4 Aug 2021 20:34:08 +0200 Subject: [PATCH 280/308] edit cli.sh --- docs/cli.md | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/docs/cli.md b/docs/cli.md index e309fd0e..1d5ef81d 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -1,39 +1,44 @@ # `astminer` CLI usage -You can run `astminer` through command-line interface. -CLI allow to run the tool on any implemented parser with specifying filtering, label extracting and storage options. +You can run `astminer` through a command line interface (CLI). +The CLI allows to run the tool on any implemented parser with specified options for filtering, label extraction, and storage of the results. ## How to -You can prepare and run CLI on any branch you want. Just navigate to it and do follow steps: -1. Build shadow jar for `astminer`: +You can build and run the CLI with any version of `astminer`: +1. Check out the relevant version of `astminer` sources (for example, the `master-dev` branch) +2. Build a shadow jar for `astminer`: ```shell gradle shadowJar ``` -2. [Optionally] Pull docker image with all parsers dependencies installed: +3. [optional] Pull a docker image with all parsers dependencies installed: ```shell docker pull voudy/astminer ``` -3. Run `astminer` with specified config: +4. Run `astminer` with specified config: ```shell ./cli.sh ``` ## Config -CLI usage of the `astminer` completely configured by YAML config. +CLI of `astminer` is fully configured by a YAML config. The config should contain next values: -- `inputDir` — path to directory with input data -- `outputDir` — path to output directory +- `inputDir` — path to the directory with input data +- `outputDir` — path to the output directory - `parser` — parser name and list of target languages -- `filters` — list of filters with their parameters -- `label` — label extractor strategy +- `filters` — list of filters and parameters +- `label` — label extraction strategy - `storage` — storage format -[configs](../configs) already contain some config examples, look at them for more structure details. +[configs](../configs) contains some config examples that could be used as a reference for the YAML structure. ## Docker -Since some parsers have additional dependencies, -e.g. G++ must be installed for Fuzzy parser (see [parsers](./parsers.md)). -We introduce Docker image with already installed parser dependencies. -To use this image you should only pull this image from DockerHub and run CLI by `./cli.sh`. +Some parsers have non-trivial environment requirements. +For example, g++ must be installed for Fuzzy parser (see [parsers](./parsers.md)). + +To ease dealing with such cases, we provide a Docker image with all parser dependencies. +This image can be pulled from DockerHub: +```shell +docker pull voudy/astminer +``` From 171b16a7c0438b1733ddd363556b31e3ca7c1299 Mon Sep 17 00:00:00 2001 From: Vladimir Kovalenko Date: Wed, 4 Aug 2021 20:43:19 +0200 Subject: [PATCH 281/308] edit filters.md --- docs/filters.md | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/docs/filters.md b/docs/filters.md index ab20199f..6a9c3448 100644 --- a/docs/filters.md +++ b/docs/filters.md @@ -1,20 +1,18 @@ # Filters -Each filter dedicate to remove *bad* trees from data, e.g. too large trees. -Also, each filter works only for certain levels of granulaity. -Here we describe all implemented filters. -Each description contains corresponding YAML config. - -Since filters may be language or parser specific, `astminer` should support all this zoo. -And since we **do not** use any of intermediate representation it is impossible to unify filtering. -Therefore some languages or parsers may not support needed filter +Each filter is dedicated to removing *bad* trees from the data, e.g. trees that are too big. +Moreover, each filter works only for certain levels of granulaity. +Here we describe all filters provided by `astminer`. +Each description contains the corresponding YAML config. + +Filters can be specific to a language or a parser. +Therefore, some languages or parsers may not support the needed filter (`FunctionInfoPropertyNotImplementedException` appears). -To handle this user should manually add specific logic of parsing AST to get info about function or code at -all. +To handle this, the user might manually add specific logic of parsing AST to get the desired information about function or code at all. Filter config classes are defined in [FilterConfigs.kt](../src/main/kotlin/astminer/config/FilterConfigs.kt). -## by tree size +## Filter by tree size **granularity**: files, functions Exclude ASTs that are too small or too big. @@ -25,7 +23,7 @@ Exclude ASTs that are too small or too big. maxTreeSize: 100 ``` -## by words number +## Filter by words count **granularity**: files, functions Exclude ASTs that have too many words in any token. @@ -35,7 +33,7 @@ Exclude ASTs that have too many words in any token. maxTokenWordsNumber: 10 ``` -## by function name length +## Filter by function name length **granularity**: functions Exclude functions that have too many words in their name. @@ -45,7 +43,7 @@ Exclude functions that have too many words in their name. maxWordsNumber: 10 ``` -## no constructors +## Exclude constructors **granularity**: functions Exclude constructors @@ -54,7 +52,7 @@ Exclude constructors name: no constructors ``` -## by annotations +## Filter by annotation **granularity**: functions Exclude functions that have certain annotations (e.g. `@Override`) @@ -64,7 +62,7 @@ Exclude functions that have certain annotations (e.g. `@Override`) annotations: [ override ] ``` -## by modifiers +## Filter by modifiers **granularity**: functions Exclude functions with certain modifiers (e.g. `private` functions) From b3f5d78d0bb87692868d2ff47a79be2bba17d0ac Mon Sep 17 00:00:00 2001 From: Vladimir Kovalenko Date: Wed, 4 Aug 2021 20:54:59 +0200 Subject: [PATCH 282/308] edit label_extractors.md --- docs/label_extractors.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/label_extractors.md b/docs/label_extractors.md index ec41845e..fbad4f8e 100644 --- a/docs/label_extractors.md +++ b/docs/label_extractors.md @@ -1,15 +1,15 @@ # Label extractors -Label extractors are required for correct extracting of labels from raw ASTs. -Inside themselves they extract label from tree and process tree to avoid data leak. -Also, label extractors define granularity level for the whole pipeline. +Label extractors are required for correct extraction of labels from raw ASTs. +Internally, they extract labels from the tree and process the tree to avoid data leaks. +Also, label extractors define the granularity level for the whole pipeline. Label extractor config classes are defined in [LabelExtractorConfigs.kt](src/main/kotlin/astminer/config/LabelExtractorConfigs.kt). ## file name **granularity**: files -Use file name of source file as label. +Use file name of source file as a label. ```yaml name: file name @@ -18,7 +18,7 @@ Use file name of source file as label. ## folder name **granularity**: files -Use name of the parent folder of source file as label. +Use the name of the parent folder of source file as a label. May be useful for code classification datasets, e.g., POJ-104. ```yaml @@ -28,8 +28,8 @@ May be useful for code classification datasets, e.g., POJ-104. ## function name **granularity**: functions -Use name of each function as label. -This label extractor will also hide the function name in the AST and all recursive calls. +Use name of each function as a label. +This label extractor will also hide the function name in the AST and all recursive calls to prevent data leaks. ```yaml name: function name From 5452f07c5365ff7a773915a0fd375efc8cbd970b Mon Sep 17 00:00:00 2001 From: Vladimir Kovalenko Date: Wed, 4 Aug 2021 20:55:07 +0200 Subject: [PATCH 283/308] edit parsers.md --- docs/parsers.md | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/docs/parsers.md b/docs/parsers.md index bfbd52c1..42bc9d7b 100644 --- a/docs/parsers.md +++ b/docs/parsers.md @@ -1,31 +1,30 @@ # Parsers -`astminer` supports multiple parsers for a large wide of programming languages. -Here we describe integrated parsers and their peculiarities. +`astminer` supports multiple parsers for various programming languages. +Here we describe the integrated parsers and their peculiarities. ## ANTLR -ANother Tool for Language Recognition from [antlr.org](https://www.antlr.org). -It provides lexer and parsers for languages that can be generated into Java code. -For now, `astminer` supports Java, Python, JS, and PHP. +[ANTLR](https://www.antlr.org) provides an infrastructure to generate lexers and parsers for languages based on grammars. +For now, `astminer` supports ANTLR-based parsers for Java, Python, JS, and PHP. ## GumTree [GumTree](https://github.com/GumTreeDiff/gumtree) -framework to work with source code as trees and to compute difference between them. -It also builds language-agnostic representation. -For now, `astminer` supports Java and Python. +is a framework to work with source code as trees and to compute differences of trees between different versions of code. +It also builds language-agnostic representations of code. +For now, `astminer` supports GumTree-based parsers for Java and Python. ### python-parser -You should install python-parser to run GumTree with Python. -There is instruction of how to do it: +Running GumTree with Python requires `python-parser`. +It can be set up through the following steps: 1. Download sources from [GitHub](https://github.com/JetBrains-Research/pythonparser/blob/master/) 2. Install dependencies ```shell pip install -r requirements.txt ``` -3. Make python parser script executable +3. Make the `python-parser` script executable ```shell chmod +x src/main/python/pythonparser/pythonparser_3.py ``` @@ -37,9 +36,9 @@ export PATH="/src/main/python/pythonparser/pythonparser:${PATH}" ## Fuzzy -Originally [fuzzyc2cpg](https://github.com/ShiftLeftSecurity/fuzzyc2cpg) -and now part of [codepropertygraph](https://github.com/ShiftLeftSecurity/codepropertygraph/). -`astminer`uses it C/C++ parser from that. `G++`required for this parser. +Originally [fuzzyc2cpg](https://github.com/ShiftLeftSecurity/fuzzyc2cpg), Fuzzy is +now part of [codepropertygraph](https://github.com/ShiftLeftSecurity/codepropertygraph/). +`astminer`uses it to parse C/C++ code. `g++` is required for this parser. ## Other languages and parsers From 7090f2708d44d1346cb160825c18cfde79348b4b Mon Sep 17 00:00:00 2001 From: Vladimir Kovalenko Date: Wed, 4 Aug 2021 21:00:19 +0200 Subject: [PATCH 284/308] edit storages.md --- docs/storages.md | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/docs/storages.md b/docs/storages.md index 6fd28031..058bd426 100644 --- a/docs/storages.md +++ b/docs/storages.md @@ -1,7 +1,7 @@ # Storages -Storages defines how ASTs should be saved on a disk. -For now, `astminer` support saving in tree and path-based formats. +The storage defines how the ASTs should be saved on disk. +For now, `astminer` support tree-based and path-based storage formats. Storage config classes are defined in [StorageConfigs.kt](../src/main/kotlin/astminer/config/StorageConfigs.kt). @@ -9,8 +9,8 @@ Storage config classes are defined in [StorageConfigs.kt](../src/main/kotlin/ast ### CSV -Save trees with labels in comma-separated file. -Each tree encodes into line using sequence of parenthesis. +Saves the trees with labels to a comma-separated file. +Each tree is encoded to a single line using parentheses sequences. ```yaml name: csv AST @@ -18,8 +18,8 @@ Each tree encodes into line using sequence of parenthesis. ### Dot -Save each tree in separate file using [dot](https://graphviz.org/doc/info/lang.html) syntax. -Along with dot files, storage also saves `description.csv` with matching between files, source file, and label. +Saves each tree in separate file using [dot](https://graphviz.org/doc/info/lang.html) syntax. +Along with dot files, this storage also saves `description.csv` with mapping between files, source files, and labels. ```yaml @@ -28,8 +28,8 @@ Along with dot files, storage also saves `description.csv` with matching between ### Json lines -Save each tree with label in Json Lines format. -Json format of AST inspired by Python-150k dataset. +Saves each tree with its label in the Json Lines format. +Json format of AST inspired by the [150k Python](https://www.sri.inf.ethz.ch/py150) dataset. ```yaml name: json AST @@ -38,7 +38,7 @@ Json format of AST inspired by Python-150k dataset. ## Path-based representations Path-based representation was introduced by [Alon et al.](https://arxiv.org/abs/1803.09544). -It uses in models like code2vec or code2seq. +It is used in popular code representation models such as `code2vec` and `code2seq`. ### Code2vec @@ -46,10 +46,9 @@ Extract paths from each AST. Output is 4 files: 1. `node_types.csv` contains numeric ids and corresponding node types with directions (up/down, as described in [paper](https://arxiv.org/pdf/1803.09544.pdf)); 2. `tokens.csv` contains numeric ids and corresponding tokens; 3. `paths.csv` contains numeric ids and AST paths in form of space-separated sequences of node type ids; -4. `path_contexts.c2s` contains labels and sequences of path contexts (triples of two tokens and a path between them). +4. `path_contexts.c2s` contains the labels and sequences of path-contexts (each representing two tokens and a path between them). -Each line in `path_contexts.c2s` starts with a label, -then it contains a sequence of space-separated triples. Each triple contains start token id, path id, end token id, separated with commas. +Each line in `path_contexts.c2s` starts with a label, followed by a sequence of space-separated triples. Each triple contains start token id, path id, end token id, separated with commas. ```yaml name: code2vec @@ -63,13 +62,13 @@ then it contains a sequence of space-separated triples. Each triple contains sta ### Code2seq -Extract paths from each AST and save in code2seq format. -Output is `path_context.c2s` file, -each line in it starts with a label, then it contains a sequence of space-separated triples. -Each triple contains start token, path node types, end token id, separated with commas. +Extract paths from each AST and save in the code2seq format. +The output is `path_context.c2s` file. +Each line starts with a label, followed by a sequence of space-separated triples. +Each triple contains the start token, path node types, and end token id, separated with commas. -To reduce memory usage you can enable `nodesToNumber` option. -If it is `true` then all types are converted into numbers and `node_types.csv` would be added to output files. +To reduce memory usage, you can enable `nodesToNumber` option. +If `nodesToNumber` is set to `true`, all types are converted into numbers and `node_types.csv` is added to output files. ```yaml name: code2seq From 1699388cc46ce836b4be6882cf2a07921c8b454b Mon Sep 17 00:00:00 2001 From: illided Date: Thu, 5 Aug 2021 15:31:47 +0300 Subject: [PATCH 285/308] path stress tests added --- .../pipeline/PipelineAsyncStressTest.kt | 63 ++++++++++++++++--- 1 file changed, 56 insertions(+), 7 deletions(-) diff --git a/src/test/kotlin/astminer/pipeline/PipelineAsyncStressTest.kt b/src/test/kotlin/astminer/pipeline/PipelineAsyncStressTest.kt index 25364da7..a3bfa41d 100644 --- a/src/test/kotlin/astminer/pipeline/PipelineAsyncStressTest.kt +++ b/src/test/kotlin/astminer/pipeline/PipelineAsyncStressTest.kt @@ -11,16 +11,12 @@ import kotlin.test.AfterTest import kotlin.test.assertEquals class PipelineAsyncStressTest { - @AfterTest - fun deleteOutput() { - tempOutputDir.deleteRecursively() - } - @Test fun jsonStorageTest() { + val outputPath = tempOutputDir.resolve("json").path val config = PipelineConfig( inputDir = tempInputDir.path, - outputDir = tempOutputDir.path, + outputDir = outputPath, parser = ParserConfig( name = ParserType.Antlr, languages = listOf(FileExtension.Java) @@ -30,7 +26,59 @@ class PipelineAsyncStressTest { storage = JsonAstStorageConfig() ) Pipeline(config).run() - assertEquals((numOfFiles * numOfMethods).toLong(), countLines("${tempOutputDir.path}/java/asts.jsonl")) + val expectedNumOfAst = numOfFiles * numOfMethods + val actualNumOfAst = countLines("$outputPath/java/asts.jsonl") + assertEquals(expected = expectedNumOfAst.toLong(), actual = actualNumOfAst) + } + + @Test + fun code2vecStorageTest() { + val outputPath = tempOutputDir.resolve("code2vec").path + val config = PipelineConfig( + inputDir = tempInputDir.path, + outputDir = outputPath, + parser = ParserConfig( + name = ParserType.Antlr, + languages = listOf(FileExtension.Java) + ), + filters = listOf(), + labelExtractor = FunctionNameExtractorConfig(), + storage = Code2VecPathStorageConfig( + maxPaths = null, + maxTokens = null, + maxPathContextsPerEntity = null, + maxPathLength = 1000, + maxPathWidth = 1000 + ) + ) + Pipeline(config).run() + val expectedNumOfPathContexts = numOfFiles * numOfMethods + val actualNumOfPathContexts = countLines("$outputPath/java/path_contexts.c2s") + assertEquals(expected = expectedNumOfPathContexts.toLong(), actual = actualNumOfPathContexts) + } + + @Test + fun code2seqStorageTest() { + val outputPath = tempOutputDir.resolve("code2seq").path + val config = PipelineConfig( + inputDir = tempInputDir.path, + outputDir = outputPath, + parser = ParserConfig( + name = ParserType.Antlr, + languages = listOf(FileExtension.Java) + ), + filters = listOf(), + labelExtractor = FunctionNameExtractorConfig(), + storage = Code2SeqPathStorageConfig( + maxPathContextsPerEntity = null, + maxPathLength = 1000, + maxPathWidth = 1000 + ) + ) + Pipeline(config).run() + val expectedNumOfPathContexts = numOfFiles * numOfMethods + val actualNumOfPathContexts = countLines("$outputPath/java/path_contexts.c2s") + assertEquals(expected = expectedNumOfPathContexts.toLong(), actual = actualNumOfPathContexts) } private fun countLines(filePath: String): Long { @@ -64,6 +112,7 @@ class PipelineAsyncStressTest { @JvmStatic fun tearDown() { tempInputDir.deleteRecursively() + tempOutputDir.deleteRecursively() } } } From 3bf52dbb9df1fb03699567dcfb1fa757b51c3ea0 Mon Sep 17 00:00:00 2001 From: illided Date: Thu, 5 Aug 2021 16:02:23 +0300 Subject: [PATCH 286/308] look ahead calculation removed because it had no effect --- src/main/kotlin/astminer/common/model/ParsingModel.kt | 2 +- src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/main/kotlin/astminer/common/model/ParsingModel.kt b/src/main/kotlin/astminer/common/model/ParsingModel.kt index f9dec295..24c0a3a6 100644 --- a/src/main/kotlin/astminer/common/model/ParsingModel.kt +++ b/src/main/kotlin/astminer/common/model/ParsingModel.kt @@ -11,7 +11,7 @@ abstract class Node(val originalToken: String?) { abstract val children: List abstract val parent: Node? - val normalizedToken: String by lazy { + val normalizedToken: String = run { originalToken?.let { val subtokens = splitToSubtokens(it) if (subtokens.isEmpty()) EMPTY_TOKEN else subtokens.joinToString(TOKEN_DELIMITER) diff --git a/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt b/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt index 68580b11..e9aea811 100644 --- a/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt +++ b/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt @@ -28,8 +28,6 @@ private fun convertRuleContext( } } currentNode.replaceChildren(children) - /* Forcing lazy property be calculated right now if it's a leaf */ - if (currentNode.isLeaf()) { currentNode.normalizedToken } return currentNode } From 942ad498efe7f080ca6cf5fc3285fd45378365d2 Mon Sep 17 00:00:00 2001 From: illided Date: Thu, 5 Aug 2021 16:07:12 +0300 Subject: [PATCH 287/308] unused import removed --- src/test/kotlin/astminer/pipeline/PipelineAsyncStressTest.kt | 1 - 1 file changed, 1 deletion(-) diff --git a/src/test/kotlin/astminer/pipeline/PipelineAsyncStressTest.kt b/src/test/kotlin/astminer/pipeline/PipelineAsyncStressTest.kt index a3bfa41d..3cc16e6b 100644 --- a/src/test/kotlin/astminer/pipeline/PipelineAsyncStressTest.kt +++ b/src/test/kotlin/astminer/pipeline/PipelineAsyncStressTest.kt @@ -7,7 +7,6 @@ import org.junit.Test import java.io.BufferedReader import java.io.File import java.io.FileReader -import kotlin.test.AfterTest import kotlin.test.assertEquals class PipelineAsyncStressTest { From 455f33d23bd8cc5529b19ae0f359a837af00abf2 Mon Sep 17 00:00:00 2001 From: ElenaErratic <33476575+ElenaErratic@users.noreply.github.com> Date: Thu, 5 Aug 2021 16:28:47 +0300 Subject: [PATCH 288/308] Update README.md review additions --- README.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index d6c9f57c..3a3b32d4 100644 --- a/README.md +++ b/README.md @@ -28,20 +28,20 @@ It is designed to be very easily extensible to new languages. `astminer` lets you create an end-to-end pipeline to processing code for machine learning models. It allows to convert source code cloned from VCS to formats suitable for training. -To achieve that, `astminer` caters for multiple data processing steps: -- [filters](./docs/filters.md) to remove redundant samples from data -- [label extractors](./docs/label_extractors.md) to create label for each tree -- [storages](./docs/storages.md) to define storage format. +To achieve that, `astminer` incorporates the following processing modules: +- [Filters](./docs/filters.md) to remove redundant samples from data. +- [Label extractors](./docs/label_extractors.md) to create label for each tree. +- [Storages](./docs/storages.md) to define storage format. ## Usage -There are two ways to use `astminer`. +There are two ways to use `astminer`: -- [As a standalone CLI tool](#using-astminer-cli) with pre-implemented logic for common processing and mining tasks +- [As a standalone CLI tool](#using-astminer-cli) with a pre-implemented logic for common processing and mining tasks. - [Integrated](#using-astminer-as-a-dependency) into your Kotlin/Java mining pipelines as a Gradle dependency. ### Using `astminer` cli -Define config (examples of them in [configs](./configs) directory) and pass it to shell script: +Specify a config (see examples in [configs](./configs) directory) and pass it to the shell script: ```shell ./cli.sh ``` @@ -78,7 +78,7 @@ dependencies { #### Local development -To use a specific version of the library, navigate to the required branch and build local version of `astminer`: +To use a specific version of the library, navigate to the required branch and build a local version of `astminer`: ```shell ./gradlew publishToMavenLocal ``` @@ -86,7 +86,7 @@ After that, add `mavenLocal()` into the `repositories` section in your gradle co #### Examples -If you want to use `astminer` as a library in your Java/Kotlin based data mining tool, check the following: +If you want to use `astminer` as a library in your Java/Kotlin-based data mining tool, check the following: * A few simple [examples](src/examples) of using `astminer` in Java and Kotlin. * Using `astminer` as a part of another mining tool — [psiminer](https://github.com/JetBrains-Research/psiminer). From dec1da8e7fb9fc77405cc2aa683592b58a4f1c36 Mon Sep 17 00:00:00 2001 From: illided Date: Thu, 5 Aug 2021 16:44:33 +0300 Subject: [PATCH 289/308] code style fixes --- src/main/kotlin/astminer/common/model/ParsingModel.kt | 4 ++-- .../kotlin/astminer/parse/gumtree/java/GumTreeJavaParser.kt | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/main/kotlin/astminer/common/model/ParsingModel.kt b/src/main/kotlin/astminer/common/model/ParsingModel.kt index 39ac9d7e..55a8e983 100644 --- a/src/main/kotlin/astminer/common/model/ParsingModel.kt +++ b/src/main/kotlin/astminer/common/model/ParsingModel.kt @@ -73,7 +73,7 @@ interface Parser { fun parseFile(file: File) = parseInputStream(file.inputStream()) } -class ParserNotInstalledException(parser: String, language: String, val e: Exception): Exception() { +class ParserNotInstalledException(parser: String, language: String, val e: Exception) : Exception() { override val message: String = "Tools for parsing $language with $parser were not properly installed" override val cause: Throwable = e -} \ No newline at end of file +} diff --git a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaParser.kt b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaParser.kt index 3a11dee9..41c35bb3 100644 --- a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaParser.kt +++ b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaParser.kt @@ -1,8 +1,6 @@ package astminer.parse.gumtree.java import astminer.common.model.Parser -import astminer.config.FileExtension -import astminer.config.ParserType import astminer.parse.ParsingException import astminer.parse.gumtree.GumTreeNode import com.github.gumtreediff.client.Run From d78a3fc956c2d6d2213a86559f76e101033fdecf Mon Sep 17 00:00:00 2001 From: illided Date: Thu, 5 Aug 2021 19:19:39 +0300 Subject: [PATCH 290/308] holdout prototype added --- configs/antlr_java_js_ast.yaml | 4 ++- .../astminer/common/model/PipelineModel.kt | 35 +++++++++++++++++-- src/main/kotlin/astminer/pipeline/Pipeline.kt | 13 ++++--- .../astminer/storage/ast/CsvAstStorage.kt | 30 ++++++++++------ .../astminer/storage/ast/DotAstStorage.kt | 15 +++++--- .../astminer/storage/ast/JsonAstStorage.kt | 19 ++++++---- .../astminer/storage/path/PathBasedStorage.kt | 23 +++++++----- .../astminer/storage/ast/CsvAstStorageTest.kt | 1 + 8 files changed, 101 insertions(+), 39 deletions(-) diff --git a/configs/antlr_java_js_ast.yaml b/configs/antlr_java_js_ast.yaml index 8fe2b2d9..abacc464 100644 --- a/configs/antlr_java_js_ast.yaml +++ b/configs/antlr_java_js_ast.yaml @@ -21,4 +21,6 @@ label: # save to disk ASTs in the JSON format storage: - name: json AST + name: code2seq + length: 9 + width: 2 diff --git a/src/main/kotlin/astminer/common/model/PipelineModel.kt b/src/main/kotlin/astminer/common/model/PipelineModel.kt index d96ec77c..725295cb 100644 --- a/src/main/kotlin/astminer/common/model/PipelineModel.kt +++ b/src/main/kotlin/astminer/common/model/PipelineModel.kt @@ -1,6 +1,7 @@ package astminer.common.model import java.io.Closeable +import java.io.File interface Filter @@ -40,11 +41,39 @@ fun ParsingResult.labeledWith(label: String): LabeledResult = L interface Storage : Closeable { val outputDirectoryPath: String - fun store(labeledResult: LabeledResult) + fun store(labeledResult: LabeledResult, holdout: DatasetHoldout = DatasetHoldout.None) - fun store(labeledResults: Iterable>) { + fun store(labeledResults: Iterable>, holdout: DatasetHoldout = DatasetHoldout.None) { for (labeledResult in labeledResults) { - store(labeledResult) + store(labeledResult, holdout) } } } + +enum class DatasetHoldout(val dirName: String) { + Train("train"), + Validation("val"), + Test("test"), + None("data"); +} + +/** Returns map with three entries (keys: train data pool, validation data pool and test data pool; + * values: holdout directories) if dataset structure is present. + * One pool (None) otherwise.**/ +fun findDatasetHoldouts(inputDir: File): Map { + val trainDir = inputDir.resolve(DatasetHoldout.Train.dirName) + val valDir = inputDir.resolve(DatasetHoldout.Validation.dirName) + val testDir = inputDir.resolve(DatasetHoldout.Test.dirName) + + return if (trainDir.exists() && valDir.exists() && testDir.exists()) { + mapOf( + DatasetHoldout.Train to trainDir, + DatasetHoldout.Validation to valDir, + DatasetHoldout.Test to testDir + ) + } else { + mapOf( + DatasetHoldout.None to inputDir + ) + } +} diff --git a/src/main/kotlin/astminer/pipeline/Pipeline.kt b/src/main/kotlin/astminer/pipeline/Pipeline.kt index d71a7a08..e03b393d 100644 --- a/src/main/kotlin/astminer/pipeline/Pipeline.kt +++ b/src/main/kotlin/astminer/pipeline/Pipeline.kt @@ -4,6 +4,7 @@ import astminer.common.getProjectFilesWithExtension import astminer.common.model.FileLabelExtractor import astminer.common.model.FunctionLabelExtractor import astminer.common.model.Storage +import astminer.common.model.findDatasetHoldouts import astminer.config.FileExtension import astminer.config.PipelineConfig import astminer.parse.getParsingResultFactory @@ -44,15 +45,17 @@ class Pipeline(private val config: PipelineConfig) { * Runs the pipeline that is defined in the [config]. */ fun run() { + val holdouts = findDatasetHoldouts(inputDirectory) for (language in config.parser.languages) { val parsingResultFactory = getParsingResultFactory(language, config.parser.name) - val files = getProjectFilesWithExtension(inputDirectory, language.fileExtension) - createStorage(language).use { storage -> - parsingResultFactory.parseFiles(files) { parseResult -> - for (labeledResult in branch.process(parseResult)) { - storage.store(labeledResult) + for ((holdoutType, holdout) in holdouts) { + val holdoutFiles = getProjectFilesWithExtension(holdout, language.fileExtension) + + parsingResultFactory.parseFiles(holdoutFiles) { parseResult -> + val labeledResults = branch.process(parseResult) + storage.store(labeledResults, holdoutType) } } } diff --git a/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt b/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt index a96ba459..3c7f8cec 100644 --- a/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt +++ b/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt @@ -1,5 +1,6 @@ package astminer.storage.ast +import astminer.common.model.DatasetHoldout import astminer.common.model.LabeledResult import astminer.common.model.Node import astminer.common.model.Storage @@ -19,29 +20,26 @@ class CsvAstStorage(override val outputDirectoryPath: String) : Storage { private val tokensMap: RankedIncrementalIdStorage = RankedIncrementalIdStorage() private val nodeTypesMap: RankedIncrementalIdStorage = RankedIncrementalIdStorage() - private val astsOutputStream: PrintWriter + private val astsPrintWriters = mutableMapOf() init { File(outputDirectoryPath).mkdirs() - val astsFile = File("$outputDirectoryPath/asts.csv") - astsFile.createNewFile() - astsOutputStream = PrintWriter(astsFile) - astsOutputStream.write("id,ast\n") } - override fun store(labeledResult: LabeledResult) { + override fun store(labeledResult: LabeledResult, holdout: DatasetHoldout) { for (node in labeledResult.root.preOrder()) { tokensMap.record(node.token) nodeTypesMap.record(node.typeLabel) } - dumpAst(labeledResult.root, labeledResult.label) + val writer = astsPrintWriters.getOrPut(holdout) { holdout.resolveHoldout() } + dumpAst(labeledResult.root, labeledResult.label, writer) } override fun close() { dumpTokenStorage(File("$outputDirectoryPath/tokens.csv")) dumpNodeTypesStorage(File("$outputDirectoryPath/node_types.csv")) - astsOutputStream.close() + astsPrintWriters.values.map { it.close() } } private fun dumpTokenStorage(file: File) { @@ -52,13 +50,23 @@ class CsvAstStorage(override val outputDirectoryPath: String) : Storage { dumpIdStorageToCsv(nodeTypesMap, "node_type", nodeTypeToCsvString, file) } - private fun dumpAst(root: Node, id: String) { - astsOutputStream.write("$id,${astString(root)}\n") + private fun dumpAst(root: Node, id: String, writer: PrintWriter) { + writer.println("$id,${astString(root)}") } internal fun astString(node: Node): String { return "${tokensMap.getId(node.token)} ${nodeTypesMap.getId(node.typeLabel)}{${ - node.children.joinToString(separator = "", transform = ::astString) + node.children.joinToString(separator = "", transform = ::astString) }}" } + + private fun DatasetHoldout.resolveHoldout(): PrintWriter { + val holdoutDir = File(outputDirectoryPath).resolve(this.dirName) + holdoutDir.mkdirs() + val astFile = holdoutDir.resolve("asts.csv") + astFile.createNewFile() + val newWriter = PrintWriter(astFile) + newWriter.println("id,ast") + return newWriter + } } diff --git a/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt b/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt index c4497f1a..5206cc24 100644 --- a/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt +++ b/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt @@ -1,5 +1,6 @@ package astminer.storage.ast +import astminer.common.model.DatasetHoldout import astminer.common.model.LabeledResult import astminer.common.model.Node import astminer.common.model.Storage @@ -15,26 +16,25 @@ class DotAstStorage(override val outputDirectoryPath: String) : Storage { internal data class FilePath(val parentPath: String, val fileName: String) - private val astDirectoryPath: File + private val astDirectoryPaths = mutableMapOf() private val astFilenameFormat = "ast_%d.dot" private val descriptionFileStream: PrintWriter private var index: Long = 0 init { File(outputDirectoryPath).mkdirs() - astDirectoryPath = File(outputDirectoryPath, "asts") - astDirectoryPath.mkdirs() val descriptionFile = File(outputDirectoryPath, "description.csv") descriptionFile.createNewFile() descriptionFileStream = PrintWriter(descriptionFile) descriptionFileStream.write("dot_file,source_file,label,node_id,token,type\n") } - override fun store(labeledResult: LabeledResult) { + override fun store(labeledResult: LabeledResult, holdout: DatasetHoldout) { // Use filename as a label for ast // TODO: save full signature for method val normalizedLabel = normalizeAstLabel(labeledResult.label) val normalizedFilepath = normalizeFilepath(labeledResult.filePath) + val astDirectoryPath = astDirectoryPaths.getOrPut(holdout) {holdout.resolveHoldout()} val nodesMap = dumpAst(labeledResult.root, File(astDirectoryPath, astFilenameFormat.format(index)), normalizedLabel) val nodeDescriptionFormat = "${astFilenameFormat.format(index)},$normalizedFilepath,$normalizedLabel,%d,%s,%s" @@ -68,6 +68,13 @@ class DotAstStorage(override val outputDirectoryPath: String) : Storage { return nodesMap } + private fun DatasetHoldout.resolveHoldout(): File { + val outputDir = File(outputDirectoryPath) + val asts = outputDir.resolve(this.dirName).resolve("asts") + asts.mkdirs() + return asts + } + // Label should contain only latin letters, numbers and underscores, other symbols replace with an underscore internal fun normalizeAstLabel(label: String): String = label.replace("[^A-z0-9_]".toRegex(), "_") diff --git a/src/main/kotlin/astminer/storage/ast/JsonAstStorage.kt b/src/main/kotlin/astminer/storage/ast/JsonAstStorage.kt index b7ea399b..3ce2463e 100644 --- a/src/main/kotlin/astminer/storage/ast/JsonAstStorage.kt +++ b/src/main/kotlin/astminer/storage/ast/JsonAstStorage.kt @@ -1,5 +1,6 @@ package astminer.storage.ast +import astminer.common.model.DatasetHoldout import astminer.common.model.LabeledResult import astminer.common.model.Node import astminer.common.model.Storage @@ -19,14 +20,11 @@ private typealias Id = Int class JsonAstStorage(override val outputDirectoryPath: String, private val withPaths: Boolean) : Storage { private val treeFlattener = TreeFlattener() - private val writer: PrintWriter + private val datasetWriters = mutableMapOf() init { val outputDirectory = File(outputDirectoryPath) outputDirectory.mkdirs() - val file = outputDirectory.resolve("asts.jsonl") - file.createNewFile() - writer = file.printWriter() } @Serializable @@ -38,15 +36,24 @@ class JsonAstStorage(override val outputDirectoryPath: String, private val withP private fun TreeFlattener.EnumeratedNode.toOutputNode() = OutputNode(node.token, node.typeLabel, children.map { it.id }) - override fun store(labeledResult: LabeledResult) { + override fun store(labeledResult: LabeledResult, holdout: DatasetHoldout) { val outputNodes = treeFlattener.flatten(labeledResult.root).map { it.toOutputNode() } val path = if (withPaths) labeledResult.filePath else null val labeledAst = LabeledAst(labeledResult.label, path, outputNodes) + val writer = datasetWriters.getOrPut(holdout) { holdout.resolveHoldout() } writer.println(Json.encodeToString(labeledAst)) } override fun close() { - writer.close() + datasetWriters.values.map { it.close() } + } + + private fun DatasetHoldout.resolveHoldout(): PrintWriter { + val holdoutDir = File(outputDirectoryPath).resolve(this.dirName) + holdoutDir.mkdirs() + val astFile = holdoutDir.resolve("asts.jsonl") + astFile.createNewFile() + return PrintWriter(astFile) } } diff --git a/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt b/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt index 3a99cf4e..1a3586f9 100644 --- a/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt +++ b/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt @@ -7,6 +7,7 @@ import astminer.paths.PathMiner import astminer.paths.PathRetrievalSettings import astminer.paths.toPathContext import java.io.File +import java.io.FileWriter import java.io.PrintWriter /** @@ -37,15 +38,10 @@ abstract class PathBasedStorage( ) : Storage { private val pathMiner = PathMiner(PathRetrievalSettings(config.maxPathLength, config.maxPathWidth)) - - private val pathsFile: File - private val pathContextPrintWriter: PrintWriter + private val datasetFileWriters = mutableMapOf() init { File(outputDirectoryPath).mkdirs() - pathsFile = File(outputDirectoryPath).resolve("path_contexts.c2s") - pathsFile.createNewFile() - pathContextPrintWriter = PrintWriter(pathsFile) } private fun retrievePaths(node: Node) = if (config.maxPathContextsPerEntity != null) { @@ -69,13 +65,22 @@ abstract class PathBasedStorage( /** * Extract paths from [labeledResult] and store them in the specified format. */ - override fun store(labeledResult: LabeledResult) { + override fun store(labeledResult: LabeledResult, holdout: DatasetHoldout) { val labeledPathContexts = retrieveLabeledPathContexts(labeledResult) val output = labeledPathContextsToString(labeledPathContexts) - pathContextPrintWriter.println(output) + val writer = datasetFileWriters.getOrPut(holdout) { holdout.resolveWriter() } + writer.println(output) } override fun close() { - pathContextPrintWriter.close() + datasetFileWriters.values.map { it.close() } + } + + private fun DatasetHoldout.resolveWriter(): PrintWriter { + val holdoutDir = File(outputDirectoryPath).resolve(this.dirName) + holdoutDir.mkdirs() + val pathContextFile = holdoutDir.resolve("path_contexts.c2s") + pathContextFile.createNewFile() + return PrintWriter(pathContextFile) } } diff --git a/src/test/kotlin/astminer/storage/ast/CsvAstStorageTest.kt b/src/test/kotlin/astminer/storage/ast/CsvAstStorageTest.kt index ee4e1873..522befbc 100644 --- a/src/test/kotlin/astminer/storage/ast/CsvAstStorageTest.kt +++ b/src/test/kotlin/astminer/storage/ast/CsvAstStorageTest.kt @@ -3,6 +3,7 @@ package astminer.storage.ast import astminer.common.createBamboo import astminer.common.createDummyTree import astminer.common.createSmallTree +import astminer.common.model.DatasetHoldout import org.junit.Assert import org.junit.Test import java.io.File From 122cc1dc11b4a8a7a9577f938d15a4ef68c109f4 Mon Sep 17 00:00:00 2001 From: illided Date: Thu, 5 Aug 2021 20:47:34 +0300 Subject: [PATCH 291/308] tests fixed --- .../kotlin/astminer/pipeline/Code2VecExtractionTest.kt | 2 +- .../astminer/pipeline/util/OutputVerification.kt | 10 +++++++++- .../kotlin/astminer/storage/ast/DotAstStorageTest.kt | 4 ++-- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/test/kotlin/astminer/pipeline/Code2VecExtractionTest.kt b/src/test/kotlin/astminer/pipeline/Code2VecExtractionTest.kt index d59b9bb9..3cfaee15 100644 --- a/src/test/kotlin/astminer/pipeline/Code2VecExtractionTest.kt +++ b/src/test/kotlin/astminer/pipeline/Code2VecExtractionTest.kt @@ -10,7 +10,7 @@ internal class Code2VecExtractionTest { private val testDataDir = File("src/test/resources") @Test - fun `test code2vec path extraction from files generates correct folders and files`() { + fun `test code2vec path extraction from files generates correct folders and files when no dataset`() { val extractedDataDir = Files.createTempDirectory("extractedData") val languages = listOf(FileExtension.Java, FileExtension.Python) diff --git a/src/test/kotlin/astminer/pipeline/util/OutputVerification.kt b/src/test/kotlin/astminer/pipeline/util/OutputVerification.kt index 60650ed8..fd788375 100644 --- a/src/test/kotlin/astminer/pipeline/util/OutputVerification.kt +++ b/src/test/kotlin/astminer/pipeline/util/OutputVerification.kt @@ -1,5 +1,6 @@ package astminer.pipeline.util +import astminer.common.model.DatasetHoldout import java.io.File import kotlin.test.assertTrue @@ -28,12 +29,19 @@ internal fun validPathContextsFile(name: String, batching: Boolean): Boolean { } } +internal fun validPathContextHoldout(holdoutDir: File, batching: Boolean): Boolean { + val holdoutFiles = checkNotNull(holdoutDir.listFiles()) + return holdoutFiles.all { validPathContextsFile(it.name, batching) } +} + internal fun checkPathContextsDir(languageDir: File, batching: Boolean) { val expectedFiles = listOf("tokens.csv", "paths.csv", "node_types.csv") languageDir.listFiles()?.forEach { file -> with(file) { + val isDescriptionFile = expectedFiles.contains(name) + val isPathContextHoldout = this.isDirectory && validPathContextHoldout(this, batching) assertTrue( - expectedFiles.contains(name) || validPathContextsFile(name, batching), + isDescriptionFile || isPathContextHoldout, "Unexpected file $name in ${languageDir.name}" ) } diff --git a/src/test/kotlin/astminer/storage/ast/DotAstStorageTest.kt b/src/test/kotlin/astminer/storage/ast/DotAstStorageTest.kt index 2123533b..f14a82e7 100644 --- a/src/test/kotlin/astminer/storage/ast/DotAstStorageTest.kt +++ b/src/test/kotlin/astminer/storage/ast/DotAstStorageTest.kt @@ -12,7 +12,7 @@ class DotAstStorageTest { storage.store(root.labeledWith("entityId")) } - val storageLines = File(File("test_examples", "asts"), "ast_0.dot").readLines() + val storageLines = File(File("test_examples/data", "asts"), "ast_0.dot").readLines() File("test_examples").deleteRecursively() @@ -51,7 +51,7 @@ class DotAstStorageTest { "3 -- {};", "}" ) - val storageLines = File(File(OUTPUT_FOLDER, "asts"), "ast_0.dot").readLines() + val storageLines = File(File("$OUTPUT_FOLDER/data", "asts"), "ast_0.dot").readLines() assertEquals(trueLines, storageLines) } From b8a9eac2824b1a72ed38d9a86ccc0999c4576913 Mon Sep 17 00:00:00 2001 From: illided Date: Thu, 5 Aug 2021 20:49:57 +0300 Subject: [PATCH 292/308] code style fixes --- src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt | 2 +- src/main/kotlin/astminer/storage/ast/DotAstStorage.kt | 2 +- src/main/kotlin/astminer/storage/path/PathBasedStorage.kt | 1 - src/test/kotlin/astminer/pipeline/util/OutputVerification.kt | 1 - src/test/kotlin/astminer/storage/ast/CsvAstStorageTest.kt | 1 - 5 files changed, 2 insertions(+), 5 deletions(-) diff --git a/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt b/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt index 3c7f8cec..9ef41b0b 100644 --- a/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt +++ b/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt @@ -56,7 +56,7 @@ class CsvAstStorage(override val outputDirectoryPath: String) : Storage { internal fun astString(node: Node): String { return "${tokensMap.getId(node.token)} ${nodeTypesMap.getId(node.typeLabel)}{${ - node.children.joinToString(separator = "", transform = ::astString) + node.children.joinToString(separator = "", transform = ::astString) }}" } diff --git a/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt b/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt index 5206cc24..fc147270 100644 --- a/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt +++ b/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt @@ -34,7 +34,7 @@ class DotAstStorage(override val outputDirectoryPath: String) : Storage { // TODO: save full signature for method val normalizedLabel = normalizeAstLabel(labeledResult.label) val normalizedFilepath = normalizeFilepath(labeledResult.filePath) - val astDirectoryPath = astDirectoryPaths.getOrPut(holdout) {holdout.resolveHoldout()} + val astDirectoryPath = astDirectoryPaths.getOrPut(holdout) { holdout.resolveHoldout() } val nodesMap = dumpAst(labeledResult.root, File(astDirectoryPath, astFilenameFormat.format(index)), normalizedLabel) val nodeDescriptionFormat = "${astFilenameFormat.format(index)},$normalizedFilepath,$normalizedLabel,%d,%s,%s" diff --git a/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt b/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt index 1a3586f9..d41ad286 100644 --- a/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt +++ b/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt @@ -7,7 +7,6 @@ import astminer.paths.PathMiner import astminer.paths.PathRetrievalSettings import astminer.paths.toPathContext import java.io.File -import java.io.FileWriter import java.io.PrintWriter /** diff --git a/src/test/kotlin/astminer/pipeline/util/OutputVerification.kt b/src/test/kotlin/astminer/pipeline/util/OutputVerification.kt index fd788375..33c60822 100644 --- a/src/test/kotlin/astminer/pipeline/util/OutputVerification.kt +++ b/src/test/kotlin/astminer/pipeline/util/OutputVerification.kt @@ -1,6 +1,5 @@ package astminer.pipeline.util -import astminer.common.model.DatasetHoldout import java.io.File import kotlin.test.assertTrue diff --git a/src/test/kotlin/astminer/storage/ast/CsvAstStorageTest.kt b/src/test/kotlin/astminer/storage/ast/CsvAstStorageTest.kt index 522befbc..ee4e1873 100644 --- a/src/test/kotlin/astminer/storage/ast/CsvAstStorageTest.kt +++ b/src/test/kotlin/astminer/storage/ast/CsvAstStorageTest.kt @@ -3,7 +3,6 @@ package astminer.storage.ast import astminer.common.createBamboo import astminer.common.createDummyTree import astminer.common.createSmallTree -import astminer.common.model.DatasetHoldout import org.junit.Assert import org.junit.Test import java.io.File From 8fbeb39d9b919c03385cedae09706f80942170af Mon Sep 17 00:00:00 2001 From: illided Date: Thu, 5 Aug 2021 21:52:07 +0300 Subject: [PATCH 293/308] config and multiple tweaks added --- .../astminer/common/model/ParsingModel.kt | 3 +-- .../astminer/common/model/ParsingResultModel.kt | 17 +++++++---------- .../kotlin/astminer/config/PipelineConfig.kt | 3 ++- src/main/kotlin/astminer/pipeline/Pipeline.kt | 8 +++++++- 4 files changed, 17 insertions(+), 14 deletions(-) diff --git a/src/main/kotlin/astminer/common/model/ParsingModel.kt b/src/main/kotlin/astminer/common/model/ParsingModel.kt index 24c0a3a6..88355a1e 100644 --- a/src/main/kotlin/astminer/common/model/ParsingModel.kt +++ b/src/main/kotlin/astminer/common/model/ParsingModel.kt @@ -11,12 +11,11 @@ abstract class Node(val originalToken: String?) { abstract val children: List abstract val parent: Node? - val normalizedToken: String = run { + val normalizedToken: String = originalToken?.let { val subtokens = splitToSubtokens(it) if (subtokens.isEmpty()) EMPTY_TOKEN else subtokens.joinToString(TOKEN_DELIMITER) } ?: EMPTY_TOKEN - } var technicalToken: String? = null diff --git a/src/main/kotlin/astminer/common/model/ParsingResultModel.kt b/src/main/kotlin/astminer/common/model/ParsingResultModel.kt index dde3db68..7589476e 100644 --- a/src/main/kotlin/astminer/common/model/ParsingResultModel.kt +++ b/src/main/kotlin/astminer/common/model/ParsingResultModel.kt @@ -7,14 +7,12 @@ import java.io.File import kotlin.concurrent.thread private val logger = KotlinLogging.logger("HandlerFactory") -private const val NUM_OF_THREADS = 16 interface ParsingResultFactory { fun parse(file: File): ParsingResult fun parseFiles( files: List, - progressBar: ProgressBar? = null, action: (ParsingResult) -> T ): List { val results = mutableListOf() @@ -25,24 +23,25 @@ interface ParsingResultFactory { logger.error(parsingException) { "Failed to parse file ${file.path}" } results.add(null) } - progressBar?.step() } return results } - fun parseFilesAsync(files: List, action: (ParsingResult) -> T): List { + fun parseFilesInThreads( + files: List, + numOfThreads: Int, + action: (ParsingResult) -> T + ): List { val results = mutableListOf() val threads = mutableListOf() - val progressBar = ProgressBar("Parsing progress:", files.size.toLong()) synchronized(results) { - files.chunked(files.size / NUM_OF_THREADS + 1).filter { it.isNotEmpty() } + files.chunked(files.size / numOfThreads + 1).filter { it.isNotEmpty() } .map { chunk -> - threads.add(thread { results.addAll(parseFiles(chunk, progressBar, action)) }) + threads.add(thread { results.addAll(parseFiles(chunk, action)) }) } } threads.map { it.join() } - progressBar.close() return results } } @@ -57,11 +56,9 @@ interface PreprocessingParsingResultFactory : ParsingResultFactory { */ override fun parseFiles( files: List, - progressBar: ProgressBar?, action: (ParsingResult) -> T ) = files.map { file -> - progressBar?.step() try { val preprocessedFile = preprocess(file) val result = action(parse(preprocessedFile)) diff --git a/src/main/kotlin/astminer/config/PipelineConfig.kt b/src/main/kotlin/astminer/config/PipelineConfig.kt index 4ec2fb26..61489324 100644 --- a/src/main/kotlin/astminer/config/PipelineConfig.kt +++ b/src/main/kotlin/astminer/config/PipelineConfig.kt @@ -14,5 +14,6 @@ data class PipelineConfig( val parser: ParserConfig, val filters: List = emptyList(), @SerialName("label") val labelExtractor: LabelExtractorConfig, - val storage: StorageConfig + val storage: StorageConfig, + val performance: PerformanceConfig = defaultPerformanceConfig ) diff --git a/src/main/kotlin/astminer/pipeline/Pipeline.kt b/src/main/kotlin/astminer/pipeline/Pipeline.kt index c6aa20b2..7ca0fd75 100644 --- a/src/main/kotlin/astminer/pipeline/Pipeline.kt +++ b/src/main/kotlin/astminer/pipeline/Pipeline.kt @@ -10,6 +10,7 @@ import astminer.parse.getParsingResultFactory import astminer.pipeline.branch.FilePipelineBranch import astminer.pipeline.branch.FunctionPipelineBranch import astminer.pipeline.branch.IllegalLabelExtractorException +import me.tongfei.progressbar.ProgressBar import java.io.File /** @@ -44,6 +45,7 @@ class Pipeline(private val config: PipelineConfig) { * Runs the pipeline that is defined in the [config]. */ fun run() { + println("Working in ${config.performance.numOfThreads}") for (language in config.parser.languages) { println("Parsing $language") val parsingResultFactory = getParsingResultFactory(language, config.parser.name) @@ -52,15 +54,19 @@ class Pipeline(private val config: PipelineConfig) { val files = getProjectFilesWithExtension(inputDirectory, language.fileExtension) println("${files.size} files retrieved") + val progressBar = ProgressBar("", files.size.toLong()) + createStorage(language).use { storage -> synchronized(storage) { - parsingResultFactory.parseFilesAsync(files) { parseResult -> + parsingResultFactory.parseFilesInThreads(files, config.performance.numOfThreads) { parseResult -> for (labeledResult in branch.process(parseResult)) { storage.store(labeledResult) } + progressBar.step() } } } + progressBar.close() } println("Done!") } From 08048e624276a890275b83502fbdb8a6960f778e Mon Sep 17 00:00:00 2001 From: illided Date: Thu, 5 Aug 2021 21:53:10 +0300 Subject: [PATCH 294/308] code style fixes --- src/main/kotlin/astminer/common/model/ParsingResultModel.kt | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/kotlin/astminer/common/model/ParsingResultModel.kt b/src/main/kotlin/astminer/common/model/ParsingResultModel.kt index 7589476e..bd84ea95 100644 --- a/src/main/kotlin/astminer/common/model/ParsingResultModel.kt +++ b/src/main/kotlin/astminer/common/model/ParsingResultModel.kt @@ -1,7 +1,6 @@ package astminer.common.model import astminer.parse.ParsingException -import me.tongfei.progressbar.ProgressBar import mu.KotlinLogging import java.io.File import kotlin.concurrent.thread From 10cd05589edc53768ed22777775560002df39718 Mon Sep 17 00:00:00 2001 From: illided Date: Thu, 5 Aug 2021 22:07:56 +0300 Subject: [PATCH 295/308] performance config added --- .../kotlin/astminer/config/PerformanceConfig.kt | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 src/main/kotlin/astminer/config/PerformanceConfig.kt diff --git a/src/main/kotlin/astminer/config/PerformanceConfig.kt b/src/main/kotlin/astminer/config/PerformanceConfig.kt new file mode 100644 index 00000000..64b0c869 --- /dev/null +++ b/src/main/kotlin/astminer/config/PerformanceConfig.kt @@ -0,0 +1,17 @@ +package astminer.config + +import kotlinx.serialization.Serializable + +private const val DEFAULT_NUM_OF_THREADS = 16 + +/** + * Config which defines various performance tweaks + * (for example number of threads). Can be left blank, + * then default config will be used. + * **/ +@Serializable +data class PerformanceConfig( + val numOfThreads: Int +) + +val defaultPerformanceConfig = PerformanceConfig(DEFAULT_NUM_OF_THREADS) From e2000da8c9c1f6293874974572273d0d5b76234c Mon Sep 17 00:00:00 2001 From: illided Date: Fri, 6 Aug 2021 13:29:47 +0300 Subject: [PATCH 296/308] some little improvements --- .../astminer/common/model/ParsingResultModel.kt | 3 ++- .../kotlin/astminer/config/PerformanceConfig.kt | 17 ----------------- .../kotlin/astminer/config/PipelineConfig.kt | 11 +++++++++-- src/main/kotlin/astminer/pipeline/Pipeline.kt | 6 +++--- ...Test.kt => PipelineMultiThreadStressTest.kt} | 2 +- 5 files changed, 15 insertions(+), 24 deletions(-) delete mode 100644 src/main/kotlin/astminer/config/PerformanceConfig.kt rename src/test/kotlin/astminer/pipeline/{PipelineAsyncStressTest.kt => PipelineMultiThreadStressTest.kt} (99%) diff --git a/src/main/kotlin/astminer/common/model/ParsingResultModel.kt b/src/main/kotlin/astminer/common/model/ParsingResultModel.kt index bd84ea95..7eb4c559 100644 --- a/src/main/kotlin/astminer/common/model/ParsingResultModel.kt +++ b/src/main/kotlin/astminer/common/model/ParsingResultModel.kt @@ -4,6 +4,7 @@ import astminer.parse.ParsingException import mu.KotlinLogging import java.io.File import kotlin.concurrent.thread +import kotlin.math.ceil private val logger = KotlinLogging.logger("HandlerFactory") @@ -35,7 +36,7 @@ interface ParsingResultFactory { val threads = mutableListOf() synchronized(results) { - files.chunked(files.size / numOfThreads + 1).filter { it.isNotEmpty() } + files.chunked(ceil(files.size.toDouble() / numOfThreads).toInt()).filter { it.isNotEmpty() } .map { chunk -> threads.add(thread { results.addAll(parseFiles(chunk, action)) }) } diff --git a/src/main/kotlin/astminer/config/PerformanceConfig.kt b/src/main/kotlin/astminer/config/PerformanceConfig.kt deleted file mode 100644 index 64b0c869..00000000 --- a/src/main/kotlin/astminer/config/PerformanceConfig.kt +++ /dev/null @@ -1,17 +0,0 @@ -package astminer.config - -import kotlinx.serialization.Serializable - -private const val DEFAULT_NUM_OF_THREADS = 16 - -/** - * Config which defines various performance tweaks - * (for example number of threads). Can be left blank, - * then default config will be used. - * **/ -@Serializable -data class PerformanceConfig( - val numOfThreads: Int -) - -val defaultPerformanceConfig = PerformanceConfig(DEFAULT_NUM_OF_THREADS) diff --git a/src/main/kotlin/astminer/config/PipelineConfig.kt b/src/main/kotlin/astminer/config/PipelineConfig.kt index 61489324..c05165f2 100644 --- a/src/main/kotlin/astminer/config/PipelineConfig.kt +++ b/src/main/kotlin/astminer/config/PipelineConfig.kt @@ -2,6 +2,7 @@ package astminer.config import kotlinx.serialization.SerialName import kotlinx.serialization.Serializable +import kotlinx.serialization.SerializationException /** * Config which defines the pipeline @@ -15,5 +16,11 @@ data class PipelineConfig( val filters: List = emptyList(), @SerialName("label") val labelExtractor: LabelExtractorConfig, val storage: StorageConfig, - val performance: PerformanceConfig = defaultPerformanceConfig -) + val numOfThreads: Int = 1 +) { + init { + if (numOfThreads <= 0) { + throw SerializationException("Number of threads must be a positive integer") + } + } +} \ No newline at end of file diff --git a/src/main/kotlin/astminer/pipeline/Pipeline.kt b/src/main/kotlin/astminer/pipeline/Pipeline.kt index 7ca0fd75..3b3d0d3b 100644 --- a/src/main/kotlin/astminer/pipeline/Pipeline.kt +++ b/src/main/kotlin/astminer/pipeline/Pipeline.kt @@ -45,12 +45,12 @@ class Pipeline(private val config: PipelineConfig) { * Runs the pipeline that is defined in the [config]. */ fun run() { - println("Working in ${config.performance.numOfThreads}") + println("Working in ${config.numOfThreads} thread(s)") for (language in config.parser.languages) { println("Parsing $language") val parsingResultFactory = getParsingResultFactory(language, config.parser.name) - println("Files collecting...") + println("Collecting files...") val files = getProjectFilesWithExtension(inputDirectory, language.fileExtension) println("${files.size} files retrieved") @@ -58,7 +58,7 @@ class Pipeline(private val config: PipelineConfig) { createStorage(language).use { storage -> synchronized(storage) { - parsingResultFactory.parseFilesInThreads(files, config.performance.numOfThreads) { parseResult -> + parsingResultFactory.parseFilesInThreads(files, config.numOfThreads) { parseResult -> for (labeledResult in branch.process(parseResult)) { storage.store(labeledResult) } diff --git a/src/test/kotlin/astminer/pipeline/PipelineAsyncStressTest.kt b/src/test/kotlin/astminer/pipeline/PipelineMultiThreadStressTest.kt similarity index 99% rename from src/test/kotlin/astminer/pipeline/PipelineAsyncStressTest.kt rename to src/test/kotlin/astminer/pipeline/PipelineMultiThreadStressTest.kt index 3cc16e6b..aa53c287 100644 --- a/src/test/kotlin/astminer/pipeline/PipelineAsyncStressTest.kt +++ b/src/test/kotlin/astminer/pipeline/PipelineMultiThreadStressTest.kt @@ -9,7 +9,7 @@ import java.io.File import java.io.FileReader import kotlin.test.assertEquals -class PipelineAsyncStressTest { +class PipelineMultiThreadStressTest { @Test fun jsonStorageTest() { val outputPath = tempOutputDir.resolve("json").path From 6c8ba43d58d9a2ae4514cf1f21187cb288d30a94 Mon Sep 17 00:00:00 2001 From: illided Date: Fri, 6 Aug 2021 13:32:50 +0300 Subject: [PATCH 297/308] num of thread usage added to configs --- configs/antlr_java_js_ast.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/configs/antlr_java_js_ast.yaml b/configs/antlr_java_js_ast.yaml index 8fe2b2d9..f032a01d 100644 --- a/configs/antlr_java_js_ast.yaml +++ b/configs/antlr_java_js_ast.yaml @@ -22,3 +22,7 @@ label: # save to disk ASTs in the JSON format storage: name: json AST + +# number of threads used for parsing +# the default is one thread +numOfThreads: 4 \ No newline at end of file From 56b69290f5d15ef4830d099b1f9ed4cffdc14c3a Mon Sep 17 00:00:00 2001 From: illided Date: Fri, 6 Aug 2021 13:34:22 +0300 Subject: [PATCH 298/308] final new lines added --- src/main/kotlin/astminer/config/PipelineConfig.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/kotlin/astminer/config/PipelineConfig.kt b/src/main/kotlin/astminer/config/PipelineConfig.kt index c05165f2..c0f78f54 100644 --- a/src/main/kotlin/astminer/config/PipelineConfig.kt +++ b/src/main/kotlin/astminer/config/PipelineConfig.kt @@ -23,4 +23,4 @@ data class PipelineConfig( throw SerializationException("Number of threads must be a positive integer") } } -} \ No newline at end of file +} From d5c1e4967ae59d7c714c979fb42e3f1cff26030b Mon Sep 17 00:00:00 2001 From: illided Date: Fri, 6 Aug 2021 13:59:50 +0300 Subject: [PATCH 299/308] little improvements in config and docs --- configs/antlr_java_js_ast.yaml | 4 +--- docs/storages.md | 8 +++++++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/configs/antlr_java_js_ast.yaml b/configs/antlr_java_js_ast.yaml index abacc464..8fe2b2d9 100644 --- a/configs/antlr_java_js_ast.yaml +++ b/configs/antlr_java_js_ast.yaml @@ -21,6 +21,4 @@ label: # save to disk ASTs in the JSON format storage: - name: code2seq - length: 9 - width: 2 + name: json AST diff --git a/docs/storages.md b/docs/storages.md index 058bd426..fd9a74bc 100644 --- a/docs/storages.md +++ b/docs/storages.md @@ -3,6 +3,11 @@ The storage defines how the ASTs should be saved on disk. For now, `astminer` support tree-based and path-based storage formats. +`Astminer` also knows how to find the structure of the dataset and can +save trees or path contexts in the appropriate holdout folders. (`train`, `val` and `test`). If the data is not structured, +all trees will be saved in the `data` folder. Description files for trees or paths will be +saved along with holdouts in the same `outputPath` directory. + Storage config classes are defined in [StorageConfigs.kt](../src/main/kotlin/astminer/config/StorageConfigs.kt). ## Tree formats @@ -47,6 +52,7 @@ Extract paths from each AST. Output is 4 files: 2. `tokens.csv` contains numeric ids and corresponding tokens; 3. `paths.csv` contains numeric ids and AST paths in form of space-separated sequences of node type ids; 4. `path_contexts.c2s` contains the labels and sequences of path-contexts (each representing two tokens and a path between them). + This file will be generated for every holdout. Each line in `path_contexts.c2s` starts with a label, followed by a sequence of space-separated triples. Each triple contains start token id, path id, end token id, separated with commas. @@ -63,7 +69,7 @@ Each line in `path_contexts.c2s` starts with a label, followed by a sequence of ### Code2seq Extract paths from each AST and save in the code2seq format. -The output is `path_context.c2s` file. +The output is `path_context.c2s` file, which will be generated for every holdout. Each line starts with a label, followed by a sequence of space-separated triples. Each triple contains the start token, path node types, and end token id, separated with commas. From b7eb5f31d7f5d7607335041d747db17e80d754ca Mon Sep 17 00:00:00 2001 From: illided Date: Fri, 6 Aug 2021 15:47:48 +0300 Subject: [PATCH 300/308] conflicts resolved and some refactoring done --- .../common/model/ParsingResultModel.kt | 2 + src/main/kotlin/astminer/pipeline/Pipeline.kt | 60 ++++++++++++------- .../pipeline/PipelineMultiThreadStressTest.kt | 6 +- 3 files changed, 43 insertions(+), 25 deletions(-) diff --git a/src/main/kotlin/astminer/common/model/ParsingResultModel.kt b/src/main/kotlin/astminer/common/model/ParsingResultModel.kt index 7eb4c559..4895a3be 100644 --- a/src/main/kotlin/astminer/common/model/ParsingResultModel.kt +++ b/src/main/kotlin/astminer/common/model/ParsingResultModel.kt @@ -35,6 +35,8 @@ interface ParsingResultFactory { val results = mutableListOf() val threads = mutableListOf() + if (files.isEmpty()) {return emptyList()} + synchronized(results) { files.chunked(ceil(files.size.toDouble() / numOfThreads).toInt()).filter { it.isNotEmpty() } .map { chunk -> diff --git a/src/main/kotlin/astminer/pipeline/Pipeline.kt b/src/main/kotlin/astminer/pipeline/Pipeline.kt index 3febb874..c6cd0520 100644 --- a/src/main/kotlin/astminer/pipeline/Pipeline.kt +++ b/src/main/kotlin/astminer/pipeline/Pipeline.kt @@ -1,10 +1,7 @@ package astminer.pipeline import astminer.common.getProjectFilesWithExtension -import astminer.common.model.FileLabelExtractor -import astminer.common.model.FunctionLabelExtractor -import astminer.common.model.Storage -import astminer.common.model.findDatasetHoldouts +import astminer.common.model.* import astminer.config.FileExtension import astminer.config.PipelineConfig import astminer.parse.getParsingResultFactory @@ -12,6 +9,7 @@ import astminer.pipeline.branch.FilePipelineBranch import astminer.pipeline.branch.FunctionPipelineBranch import astminer.pipeline.branch.IllegalLabelExtractorException import me.tongfei.progressbar.ProgressBar +import java.io.Closeable import java.io.File /** @@ -25,6 +23,9 @@ class Pipeline(private val config: PipelineConfig) { private val filters = config.filters.map { it.filterImpl } private val labelExtractor = config.labelExtractor.labelExtractorImpl + private val holdoutMap = findDatasetHoldouts(inputDirectory) + private val isDataset = holdoutMap.size > 1 + private val branch = when (labelExtractor) { is FileLabelExtractor -> FilePipelineBranch(filters, labelExtractor) is FunctionLabelExtractor -> FunctionPipelineBranch(filters, labelExtractor) @@ -42,31 +43,46 @@ class Pipeline(private val config: PipelineConfig) { return config.storage.createStorage(storagePath) } + private fun T.useSynchronously(callback: (T) -> R) = + this.use { + synchronized(this) { + callback(this) + } + } + + + private fun parseLanguage(language: FileExtension) { + val parsingResultFactory = getParsingResultFactory(language, config.parser.name) + createStorage(language).useSynchronously { storage -> + for ((holdoutType, holdoutDir) in holdoutMap) { + val holdoutFiles = getProjectFilesWithExtension(holdoutDir, language.fileExtension) + printHoldoutStat(holdoutFiles, holdoutType) + val progressBar = ProgressBar("", holdoutFiles.size.toLong()) + parsingResultFactory.parseFilesInThreads(holdoutFiles, config.numOfThreads) { parseResult -> + val labeledResults = branch.process(parseResult) + storage.store(labeledResults, holdoutType) + progressBar.step() + } + progressBar.close() + } + } + } + + private fun printHoldoutStat(files: List, holdoutType: DatasetHoldout) { + var output = "${files.size} file(s) found" + if (isDataset) { output += " in ${holdoutType.name}" } + println(output) + } + /** * Runs the pipeline that is defined in the [config]. */ fun run() { println("Working in ${config.numOfThreads} thread(s)") - val holdouts = findDatasetHoldouts(inputDirectory) + if (isDataset) { println("Dataset structure found") } for (language in config.parser.languages) { println("Parsing $language") - val parsingResultFactory = getParsingResultFactory(language, config.parser.name) - - val progressBar = ProgressBar("", files.size.toLong()) - - createStorage(language).use { storage -> - for ((holdoutType, holdoutDir) in holdouts) { - synchronized(storage) { - val holdoutFiles = getProjectFilesWithExtension(holdoutDir, language.fileExtension) - parsingResultFactory.parseFilesInThreads(files, config.numOfThreads) { parseResult -> - for (labeledResult in branch.process(parseResult)) { - storage.store(labeledResult, holdoutType) - } - progressBar.step() - } - } } - } - progressBar.close() + parseLanguage(language) } println("Done!") } diff --git a/src/test/kotlin/astminer/pipeline/PipelineMultiThreadStressTest.kt b/src/test/kotlin/astminer/pipeline/PipelineMultiThreadStressTest.kt index aa53c287..0bbff239 100644 --- a/src/test/kotlin/astminer/pipeline/PipelineMultiThreadStressTest.kt +++ b/src/test/kotlin/astminer/pipeline/PipelineMultiThreadStressTest.kt @@ -26,7 +26,7 @@ class PipelineMultiThreadStressTest { ) Pipeline(config).run() val expectedNumOfAst = numOfFiles * numOfMethods - val actualNumOfAst = countLines("$outputPath/java/asts.jsonl") + val actualNumOfAst = countLines("$outputPath/java/data/asts.jsonl") assertEquals(expected = expectedNumOfAst.toLong(), actual = actualNumOfAst) } @@ -52,7 +52,7 @@ class PipelineMultiThreadStressTest { ) Pipeline(config).run() val expectedNumOfPathContexts = numOfFiles * numOfMethods - val actualNumOfPathContexts = countLines("$outputPath/java/path_contexts.c2s") + val actualNumOfPathContexts = countLines("$outputPath/java/data/path_contexts.c2s") assertEquals(expected = expectedNumOfPathContexts.toLong(), actual = actualNumOfPathContexts) } @@ -76,7 +76,7 @@ class PipelineMultiThreadStressTest { ) Pipeline(config).run() val expectedNumOfPathContexts = numOfFiles * numOfMethods - val actualNumOfPathContexts = countLines("$outputPath/java/path_contexts.c2s") + val actualNumOfPathContexts = countLines("$outputPath/java/data/path_contexts.c2s") assertEquals(expected = expectedNumOfPathContexts.toLong(), actual = actualNumOfPathContexts) } From 175d7a21a9710f8e28aaa1f786b8a4d405f8139d Mon Sep 17 00:00:00 2001 From: illided Date: Fri, 6 Aug 2021 15:48:55 +0300 Subject: [PATCH 301/308] code style issues fixed --- src/main/kotlin/astminer/pipeline/Pipeline.kt | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/kotlin/astminer/pipeline/Pipeline.kt b/src/main/kotlin/astminer/pipeline/Pipeline.kt index c6cd0520..c663f9f0 100644 --- a/src/main/kotlin/astminer/pipeline/Pipeline.kt +++ b/src/main/kotlin/astminer/pipeline/Pipeline.kt @@ -50,7 +50,6 @@ class Pipeline(private val config: PipelineConfig) { } } - private fun parseLanguage(language: FileExtension) { val parsingResultFactory = getParsingResultFactory(language, config.parser.name) createStorage(language).useSynchronously { storage -> From 2e30fcf57b2e951c5f52a9383e5acf83b742a48e Mon Sep 17 00:00:00 2001 From: illided Date: Fri, 6 Aug 2021 15:54:28 +0300 Subject: [PATCH 302/308] spacing around curly --- src/main/kotlin/astminer/common/model/ParsingResultModel.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/kotlin/astminer/common/model/ParsingResultModel.kt b/src/main/kotlin/astminer/common/model/ParsingResultModel.kt index 4895a3be..b4652590 100644 --- a/src/main/kotlin/astminer/common/model/ParsingResultModel.kt +++ b/src/main/kotlin/astminer/common/model/ParsingResultModel.kt @@ -35,7 +35,7 @@ interface ParsingResultFactory { val results = mutableListOf() val threads = mutableListOf() - if (files.isEmpty()) {return emptyList()} + if (files.isEmpty()) { return emptyList() } synchronized(results) { files.chunked(ceil(files.size.toDouble() / numOfThreads).toInt()).filter { it.isNotEmpty() } From f1400e179ec30f4750e281bc751cc71305a7c566 Mon Sep 17 00:00:00 2001 From: illided Date: Fri, 6 Aug 2021 17:46:14 +0300 Subject: [PATCH 303/308] exception type changed --- .../kotlin/astminer/parse/gumtree/python/GumTreePythonParser.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonParser.kt b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonParser.kt index 1b6d5a5a..9c44c0d9 100644 --- a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonParser.kt +++ b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonParser.kt @@ -20,7 +20,7 @@ class GumTreePythonParser : Parser { override fun parseInputStream(content: InputStream): GumTreeNode = try { val context = PythonTreeGenerator().generate(InputStreamReader(content)) wrapGumTreeNode(context) - } catch (e: SyntaxException) { + } catch (e: RuntimeException) { throw ParsingException("GumTree", "Python", e) } catch (e: IOException) { throw ParserNotInstalledException("Gumtree", "Python", e) From a6b1ad5711993893850f2e3712fda9b7a952f125 Mon Sep 17 00:00:00 2001 From: illided Date: Fri, 6 Aug 2021 18:24:39 +0300 Subject: [PATCH 304/308] unused import removed --- .../kotlin/astminer/parse/gumtree/python/GumTreePythonParser.kt | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonParser.kt b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonParser.kt index 9c44c0d9..1af64aca 100644 --- a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonParser.kt +++ b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonParser.kt @@ -5,7 +5,6 @@ import astminer.common.model.ParserNotInstalledException import astminer.parse.ParsingException import astminer.parse.gumtree.GumTreeNode import com.github.gumtreediff.client.Run -import com.github.gumtreediff.gen.SyntaxException import com.github.gumtreediff.gen.python.PythonTreeGenerator import com.github.gumtreediff.tree.TreeContext import java.io.IOException From de26ca0484832c4c89db554d0f7e8c562a1af921 Mon Sep 17 00:00:00 2001 From: illided Date: Fri, 6 Aug 2021 20:32:17 +0300 Subject: [PATCH 305/308] small improvements and test fix --- .../astminer/common/model/PipelineModel.kt | 13 +++++++++++++ src/main/kotlin/astminer/pipeline/Pipeline.kt | 17 +++++------------ .../pipeline/PipelineMultiThreadStressTest.kt | 9 ++++++--- 3 files changed, 24 insertions(+), 15 deletions(-) diff --git a/src/main/kotlin/astminer/common/model/PipelineModel.kt b/src/main/kotlin/astminer/common/model/PipelineModel.kt index 725295cb..476f81f3 100644 --- a/src/main/kotlin/astminer/common/model/PipelineModel.kt +++ b/src/main/kotlin/astminer/common/model/PipelineModel.kt @@ -43,11 +43,24 @@ interface Storage : Closeable { fun store(labeledResult: LabeledResult, holdout: DatasetHoldout = DatasetHoldout.None) + fun storeSynchronously(labeledResult: LabeledResult, holdout: DatasetHoldout = DatasetHoldout.None) { + synchronized(this) { + store(labeledResult, holdout) + } + } + fun store(labeledResults: Iterable>, holdout: DatasetHoldout = DatasetHoldout.None) { for (labeledResult in labeledResults) { store(labeledResult, holdout) } } + + fun storeSynchronously( + labeledResults: Iterable>, + holdout: DatasetHoldout = DatasetHoldout.None + ) = synchronized(this) { + store(labeledResults, holdout) + } } enum class DatasetHoldout(val dirName: String) { diff --git a/src/main/kotlin/astminer/pipeline/Pipeline.kt b/src/main/kotlin/astminer/pipeline/Pipeline.kt index c663f9f0..bb8901c8 100644 --- a/src/main/kotlin/astminer/pipeline/Pipeline.kt +++ b/src/main/kotlin/astminer/pipeline/Pipeline.kt @@ -43,23 +43,16 @@ class Pipeline(private val config: PipelineConfig) { return config.storage.createStorage(storagePath) } - private fun T.useSynchronously(callback: (T) -> R) = - this.use { - synchronized(this) { - callback(this) - } - } - private fun parseLanguage(language: FileExtension) { val parsingResultFactory = getParsingResultFactory(language, config.parser.name) - createStorage(language).useSynchronously { storage -> + createStorage(language).use { storage -> for ((holdoutType, holdoutDir) in holdoutMap) { val holdoutFiles = getProjectFilesWithExtension(holdoutDir, language.fileExtension) printHoldoutStat(holdoutFiles, holdoutType) val progressBar = ProgressBar("", holdoutFiles.size.toLong()) parsingResultFactory.parseFilesInThreads(holdoutFiles, config.numOfThreads) { parseResult -> val labeledResults = branch.process(parseResult) - storage.store(labeledResults, holdoutType) + storage.storeSynchronously(labeledResults, holdoutType) progressBar.step() } progressBar.close() @@ -68,9 +61,9 @@ class Pipeline(private val config: PipelineConfig) { } private fun printHoldoutStat(files: List, holdoutType: DatasetHoldout) { - var output = "${files.size} file(s) found" - if (isDataset) { output += " in ${holdoutType.name}" } - println(output) + val output = StringBuilder("${files.size} file(s) found") + if (isDataset) { output.append(" in ${holdoutType.name}") } + println(output.toString()) } /** diff --git a/src/test/kotlin/astminer/pipeline/PipelineMultiThreadStressTest.kt b/src/test/kotlin/astminer/pipeline/PipelineMultiThreadStressTest.kt index 0bbff239..114b03f7 100644 --- a/src/test/kotlin/astminer/pipeline/PipelineMultiThreadStressTest.kt +++ b/src/test/kotlin/astminer/pipeline/PipelineMultiThreadStressTest.kt @@ -22,7 +22,8 @@ class PipelineMultiThreadStressTest { ), filters = listOf(), labelExtractor = FunctionNameExtractorConfig(), - storage = JsonAstStorageConfig() + storage = JsonAstStorageConfig(), + numOfThreads = 8 ) Pipeline(config).run() val expectedNumOfAst = numOfFiles * numOfMethods @@ -48,7 +49,8 @@ class PipelineMultiThreadStressTest { maxPathContextsPerEntity = null, maxPathLength = 1000, maxPathWidth = 1000 - ) + ), + numOfThreads = 8 ) Pipeline(config).run() val expectedNumOfPathContexts = numOfFiles * numOfMethods @@ -72,7 +74,8 @@ class PipelineMultiThreadStressTest { maxPathContextsPerEntity = null, maxPathLength = 1000, maxPathWidth = 1000 - ) + ), + numOfThreads = 8 ) Pipeline(config).run() val expectedNumOfPathContexts = numOfFiles * numOfMethods From d30f321c28b14934fca892fe5c19ee2cb46776ac Mon Sep 17 00:00:00 2001 From: illided Date: Fri, 6 Aug 2021 20:38:23 +0300 Subject: [PATCH 306/308] unused import removed --- src/main/kotlin/astminer/pipeline/Pipeline.kt | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/kotlin/astminer/pipeline/Pipeline.kt b/src/main/kotlin/astminer/pipeline/Pipeline.kt index bb8901c8..0ba4468b 100644 --- a/src/main/kotlin/astminer/pipeline/Pipeline.kt +++ b/src/main/kotlin/astminer/pipeline/Pipeline.kt @@ -9,7 +9,6 @@ import astminer.pipeline.branch.FilePipelineBranch import astminer.pipeline.branch.FunctionPipelineBranch import astminer.pipeline.branch.IllegalLabelExtractorException import me.tongfei.progressbar.ProgressBar -import java.io.Closeable import java.io.File /** From e8e98e730e5c5774063e390e1799aead254c4b54 Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Tue, 10 Aug 2021 18:50:37 +0500 Subject: [PATCH 307/308] Prepare new release --- .github/workflows/build.yml | 24 +----------------------- .space.kts | 16 ++++------------ build.gradle.kts | 2 +- 3 files changed, 6 insertions(+), 36 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 672f2184..ca39f53f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -40,17 +40,6 @@ jobs: - name: Build astminer run: ./gradlew build - test: - needs: build - runs-on: ubuntu-latest - container: voudy/astminer - - steps: - - uses: actions/checkout@v2 - - - name: Run JUnit tests - run: ./gradlew test - - name: Upload Test Report uses: actions/upload-artifact@v2 if: ${{ always() }} @@ -58,18 +47,7 @@ jobs: name: test-report path: build/astminer/reports/tests/**/* - code-style: - needs: build - runs-on: ubuntu-latest - container: voudy/astminer - - steps: - - uses: actions/checkout@v2 - - - name: Run detekt - run: ./gradlew detekt - - - name: Upload Report + - name: Upload Detekt Report uses: github/codeql-action/upload-sarif@v1 if: ${{ always() }} with: diff --git a/.space.kts b/.space.kts index 8b404a58..cde5a790 100644 --- a/.space.kts +++ b/.space.kts @@ -1,17 +1,9 @@ -job("Test") { - container(image="voudy/astminer") { - shellScript { - content = """ - ./gradlew test - """ - } - } -} - job("Release") { startOn { gitPush { - enabled = false + branchFilter { + +"refs/tags/*" + } } } @@ -25,4 +17,4 @@ job("Release") { """ } } -} \ No newline at end of file +} diff --git a/build.gradle.kts b/build.gradle.kts index bab1143c..8aaf0ab7 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -1,7 +1,7 @@ import tanvd.kosogor.proxy.shadowJar group = "io.github.vovak" -version = "0.6.4" +version = "0.7.0" plugins { id("java") From 2c4179d25332e47ec5a50d20df81c1de494ce055 Mon Sep 17 00:00:00 2001 From: Egor Spirin Date: Tue, 10 Aug 2021 18:57:17 +0500 Subject: [PATCH 308/308] Update version in README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 3a3b32d4..73e4200f 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ [![JetBrains Research](https://jb.gg/badges/research.svg)](https://confluence.jetbrains.com/display/ALL/JetBrains+on+GitHub) -![astminer version](https://img.shields.io/badge/astminer-v0.6.4-blue) +![astminer version](https://img.shields.io/badge/astminer-v0.7.0-blue) # `astminer` A library for mining of [path-based representations of code](https://arxiv.org/pdf/1803.09544.pdf) and more,