diff --git a/.circleci/config.yml b/.circleci/config.yml deleted file mode 100644 index 29c651f9..00000000 --- a/.circleci/config.yml +++ /dev/null @@ -1,66 +0,0 @@ -version: 2.1 -jobs: - # build with machine executor - build: - machine: - enabled: true - working_directory: ~/astminer - steps: - - checkout: - path: ~/astminer - - run: ./gradlew build --stacktrace - - # release - release: - machine: - enabled: true - working_directory: ~/astminer - environment: - GIT_BRANCH: << pipeline.git.branch >> - steps: - - checkout: - path: ~/astminer - - run: ./gradlew bintrayUpload "-PbranchName=$GIT_BRANCH" - - release-dev: - machine: - enabled: true - working_directory: ~/astminer - environment: - CI_VERSION: << pipeline.number >> - GIT_BRANCH: << pipeline.git.branch >> - steps: - - checkout: - path: ~/astminer - - run: ./gradlew bintrayUpload "-PciVersion=1.$CI_VERSION" "-PbranchName=$GIT_BRANCH" - - -workflows: - version: 2 - - # release with manual approval in CircleCI app - deploy-library: - jobs: - - build - - approve-release: - type: approval - requires: - - build - filters: - branches: - only: - - master - - release: - requires: - - approve-release - filters: - branches: - only: - - master - - release-dev: - requires: - - build - filters: - branches: - only: - - master-dev diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 00000000..ca39f53f --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,77 @@ +name: Build with lint + +on: [push, pull_request] + +jobs: + setup: + runs-on: ubuntu-latest + container: voudy/astminer + + steps: + - uses: actions/checkout@v2 + - name: Cache Gradle dependencies + uses: actions/cache@v2 + with: + path: | + ~/.gradle/caches + ~/.gradle/wrapper + key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }} + restore-keys: | + ${{ runner.os }}-gradle- + + build: + needs: setup + runs-on: ubuntu-latest + container: voudy/astminer + + steps: + - uses: actions/checkout@v2 + + - name: Cache Gradle dependencies + uses: actions/cache@v2 + with: + path: | + ~/.gradle/caches + ~/.gradle/wrapper + key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }} + restore-keys: | + ${{ runner.os }}-gradle- + + - name: Build astminer + run: ./gradlew build + + - name: Upload Test Report + uses: actions/upload-artifact@v2 + if: ${{ always() }} + with: + name: test-report + path: build/astminer/reports/tests/**/* + + - name: Upload Detekt Report + uses: github/codeql-action/upload-sarif@v1 + if: ${{ always() }} + with: + sarif_file: build/astminer/reports/detekt/detekt.sarif + + run-on-configs: + needs: build + runs-on: ubuntu-latest + container: voudy/astminer + + steps: + - uses: actions/checkout@v2 + + - name: Prepare shadowJar + run: ./gradlew shadowJar + + - name: Run antlr_java_js_ast.yaml + run: java -jar build/shadow/astminer.jar configs/antlr_java_js_ast.yaml + + - name: Run antlr_python_paths.yaml + run: java -jar build/shadow/astminer.jar configs/antlr_python_paths.yaml + + - name: Run gumtree_java_ast.yaml + run: java -jar build/shadow/astminer.jar configs/gumtree_java_ast.yaml + + - name: Run gumtree_java_function_names_paths.yaml + run: java -jar build/shadow/astminer.jar configs/gumtree_java_function_names_paths.yaml diff --git a/.gitignore b/.gitignore index 3e0eeee6..e454121f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,12 @@ -*.iml .idea/ .gradle/ -examples/out/ +.DS_Store + +src/main/generated/ +build/ + +examples_output/ +output/ +*.csv +log.txt + diff --git a/.space.kts b/.space.kts new file mode 100644 index 00000000..cde5a790 --- /dev/null +++ b/.space.kts @@ -0,0 +1,20 @@ +job("Release") { + startOn { + gitPush { + branchFilter { + +"refs/tags/*" + } + } + } + + container(image="voudy/astminer") { + env["PUBLISH_USER"] = Secrets("publish_user") + env["PUBLISH_PASSWORD"] = Secrets("publish_password") + + shellScript { + content = """ + ./gradlew test publish + """ + } + } +} diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..f94f32dd --- /dev/null +++ b/Dockerfile @@ -0,0 +1,27 @@ +FROM ubuntu:20.04 + +LABEL desc="Docker container to run ASTMiner with all preinstalled requirements" + +# Instal OpenJDK11 +RUN apt-get update && apt-get install -y openjdk-11-jdk + +# Install G++ (required for Fuzzy parser) +RUN apt-get update && apt-get install -y g++ + +# Install PythonParser for GumTree +ARG PYTHONPARSER_REPO=https://raw.githubusercontent.com/JetBrains-Research/pythonparser/master +RUN apt-get update && \ + apt-get install -y python3.8 python3-pip git wget && \ + mkdir pythonparser && \ + cd pythonparser && \ + wget $PYTHONPARSER_REPO/requirements.txt && \ + wget $PYTHONPARSER_REPO/src/main/python/pythonparser/pythonparser_3.py -O pythonparser && \ + pip3 install -r requirements.txt && \ + chmod +x pythonparser +ENV PATH="/pythonparser:${PATH}" + +# Copy astminer shadow jar +WORKDIR astminer +COPY ./build/shadow/astminer.jar . + +ENTRYPOINT ["java", "-jar", "astminer.jar"] diff --git a/README.md b/README.md index 01b07bee..73e4200f 100644 --- a/README.md +++ b/README.md @@ -1,145 +1,109 @@ [![JetBrains Research](https://jb.gg/badges/research.svg)](https://confluence.jetbrains.com/display/ALL/JetBrains+on+GitHub) -[![CircleCI](https://circleci.com/gh/JetBrains-Research/astminer.svg?style=svg)](https://circleci.com/gh/JetBrains-Research/astminer) -[ ![Download](https://api.bintray.com/packages/egor-bogomolov/astminer/astminer/images/download.svg) ](https://bintray.com/egor-bogomolov/astminer/astminer/_latestVersion) +![astminer version](https://img.shields.io/badge/astminer-v0.7.0-blue) -# astminer -A library for mining of [path-based representations of code](https://arxiv.org/pdf/1803.09544.pdf) and more, supported by the [Machine Learning Methods for Software Engineering](https://research.jetbrains.org/groups/ml_methods) group at [JetBrains Research](https://research.jetbrains.org). +# `astminer` +A library for mining of [path-based representations of code](https://arxiv.org/pdf/1803.09544.pdf) and more, +supported by the +[Machine Learning Methods for Software Engineering](https://research.jetbrains.org/groups/ml_methods) +group at [JetBrains Research](https://research.jetbrains.org). Supported languages of the input: -- [x] Java -- [x] Python -- [x] C/C++ -- [x] Javascript (beta) (see [issue](https://github.com/vovak/astminer/issues/22)) +| | Java | Python | C/C++ | JavaScript | PHP | +|---------|------|--------|-------|------------|-----| +| ANTLR | ✅ | ✅ | | ✅ | ✅ | +| GumTree | ✅ | ✅ | | | | +| Fuzzy | | | ✅ | | | -### Version history - -See [changelog](changelog.md) ## About -Astminer was first implemented as a part of pipeline in the [the code style extraction project](https://arxiv.org/abs/2002.03997) and later converted into a reusable tool. +`astminer` was first implemented as a part of pipeline in the +[code style extraction project](https://arxiv.org/abs/2002.03997) and later converted into a reusable tool. -Currently it supports extraction of: -* Path-based representations of files -* Path-based representations of methods -* Raw ASTs +Currently, it supports extraction of: +* Path-based representations of files/methods +* Raw ASTs of files/methods -Supported languages are Java, Python, C/C++, but it is designed to be very easily extensible. +It is designed to be very easily extensible to new languages. -For the output format, see the section below. +`astminer` lets you create an end-to-end pipeline to processing code for machine learning models. +It allows to convert source code cloned from VCS to formats suitable for training. +To achieve that, `astminer` incorporates the following processing modules: +- [Filters](./docs/filters.md) to remove redundant samples from data. +- [Label extractors](./docs/label_extractors.md) to create label for each tree. +- [Storages](./docs/storages.md) to define storage format. ## Usage +There are two ways to use `astminer`: -### Use as CLI - -1. Run `./gradlew shadowJar` in project directory -2. Now you can use shell script to run cli `./cli.sh optionName parameters`, where `optionName` is one of the following options: - -#### Preprocess +- [As a standalone CLI tool](#using-astminer-cli) with a pre-implemented logic for common processing and mining tasks. +- [Integrated](#using-astminer-as-a-dependency) into your Kotlin/Java mining pipelines as a Gradle dependency. -Run preprocessing on C/C++ project to unfold `#define` directives. -In other tasks, if you feed C/C++ file with macroses, they will be dropped as well as their appearances in code. -```shell script -./cli.sh preprocess --project path/to/project --output path/to/preprocessedProject -``` -#### Parse +### Using `astminer` cli -Extract ASTs from all the files in supported languages. -```shell script -./cli.sh parse --lang py,java,c,cpp --project path/to/project --output path/to/result --storage dot +Specify a config (see examples in [configs](./configs) directory) and pass it to the shell script: +```shell +./cli.sh ``` -#### PathContexts - -Extract path contexts from all the files in supported languages and store in form `fileName triplesOfPathContexts`. -```shell script -./cli.sh pathContexts --lang py,java,c,cpp --project path/to/project --output path/to/results --maxL L --maxW W --maxContexts C --maxTokens T --maxPaths P -``` +For details on CLI configuration, see [docs/cli](./docs/cli.md). -#### Code2vec - -Extract data suitable as input for [code2vec](https://github.com/tech-srl/code2vec) model. -Parse all files written in specified language into ASTs, split into methods, and store in form `method|name triplesOfPathContexts`. -```shell script -./cli.sh code2vec --lang py,java,c,cpp --project path/to/project --output path/to/results --maxL L --maxW W --maxContexts C --maxTokens T --maxPaths P --split-tokens --granularity method -``` - -### Integrate in your mining pipeline +### Using `astminer` as a dependency #### Import -Astminer is available in [Bintray repo](https://bintray.com/egor-bogomolov/astminer/astminer). You can add the dependency in your `build.gradle` file: +`astminer` is available in the JetBrains Space package repository. You can add the dependency in your `build.gradle` file: ``` repositories { maven { - url "https://dl.bintray.com/egor-bogomolov/astminer" + url "https://packages.jetbrains.team/maven/p/astminer/astminer" } } dependencies { - compile 'io.github.vovak.astminer:astminer:0.6' + implementation 'io.github.vovak:astminer:' } ``` If you use `build.gradle.kts`: ``` repositories { - maven(url = "https://dl.bintray.com/egor-bogomolov/astminer/") + maven(url = uri("https://packages.jetbrains.team/maven/p/astminer/astminer")) } dependencies { - compile("io.github.vovak.astminer", "astminer", "0.6") + implementation("io.github.vovak:astminer:") } ``` -#### Examples - -If you want to use astminer as a library in your Java/Kotlin based data mining tool, check the following examples: +#### Local development -* A few [simple usage examples](src/main/kotlin/astminer/examples) can be run with `./gradlew run`. - -* A somewhat more verbose [example of usage in Java](src/main/kotlin/astminer/examples/AllJavaFiles.kt) is available as well. - -Please consider trying Kotlin for your data mining pipelines: from our experience, it is much better suited for data collection and transformation instruments. - -### Output format - -For path-based representations, astminer supports two output formats. In both of them, we store 4 `.csv` files: -1. `node_types.csv` contains numeric ids and corresponding node types with directions (up/down, as described in [paper](https://arxiv.org/pdf/1803.09544.pdf)); -2. `tokens.csv` contains numeric ids and corresponding tokens; -3. `paths.csv` contains numeric ids and AST paths in form of space-separated sequences of node type ids; -4. `path_contexts.csv` contains labels and sequences of path contexts (triples of two tokens and a path between them). - -If the replica of [code2vec](https://github.com/tech-srl/code2vec) format is used, each line in `path_contexts.csv` starts with a label, -then it contains a sequence of space-separated triples. Each triple contains start token id, path id, end token id, separated with commas. - -If csv format is used, each line in `path_contexts.csv` contains label, then comma, then a sequence of `;`-separated triples. -Each triple contains start token id, path id, end token id, separated with spaces. +To use a specific version of the library, navigate to the required branch and build a local version of `astminer`: +```shell +./gradlew publishToMavenLocal +``` +After that, add `mavenLocal()` into the `repositories` section in your gradle configuration. -## Other languages +#### Examples -Support for a new programming language can be implemented in a few simple steps. +If you want to use `astminer` as a library in your Java/Kotlin-based data mining tool, check the following: -If there is an ANTLR grammar for the language: -1. Add the corresponding [ANTLR4 grammar file](https://github.com/antlr/grammars-v4) to the `antlr` directory; -2. Run the `generateGrammarSource` Gradle task to generate the parser; -3. Implement a small wrapper around the generated parser. -See [JavaParser](src/main/kotlin/astminer/parse/antlr/java/JavaParser.kt) or [PythonParser](src/main/kotlin/astminer/parse/antlr/python/PythonParser.kt) for an example of a wrapper. +* A few simple [examples](src/examples) of using `astminer` in Java and Kotlin. +* Using `astminer` as a part of another mining tool — [psiminer](https://github.com/JetBrains-Research/psiminer). -If the language has a parsing tool that is available as Java library: -1. Add the library as a dependency in [build.gradle.kts](/build.gradle.kts); -2. Implement a wrapper for the parsing tool. -See [FuzzyCppParser](src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt) for an example of a wrapper. +Please consider trying Kotlin for your data mining pipelines: from our experience, it is much better suited for data collection and transformation instruments than Java. ## Contribution -We believe that astminer could find use beyond our own mining tasks. -Please help make astminer easier to use by sharing your use cases. Pull requests are welcome as well. +We believe that `astminer` could find use beyond our own mining tasks. + +Please help make `astminer` easier to use by sharing your use cases. Pull requests are welcome as well. Support for other languages and documentation are the key areas of improvement. ## Citing astminer -A [paper](https://zenodo.org/record/2595271) dedicated to astminer (more precisely, to its older version [PathMiner](https://github.com/vovak/astminer/tree/pathminer)) was presented at [MSR'19](https://2019.msrconf.org/). -If you use astminer in your academic work, please consider citing it. + +A [paper](https://zenodo.org/record/2595271) dedicated to `astminer` (more precisely, to its older version [PathMiner](https://github.com/vovak/astminer/tree/pathminer)) was presented at [MSR'19](https://2019.msrconf.org/). +If you use `astminer` in your academic work, please cite it. ``` @inproceedings{kovalenko2019pathminer, title={PathMiner: a library for mining of path-based representations of code}, diff --git a/build.gradle.kts b/build.gradle.kts index ae0ab76d..8aaf0ab7 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -1,84 +1,66 @@ -import tanvd.kosogor.proxy.publishJar import tanvd.kosogor.proxy.shadowJar -import org.jetbrains.kotlin.gradle.tasks.KotlinCompile -group = "io.github.vovak.astminer" - -val branchName: String by project -val ciVersion: String by project - -version = if (project.hasProperty("ciVersion")) { - ciVersion -} else { - "0.6" -} - -println(version) +group = "io.github.vovak" +version = "0.7.0" plugins { id("java") - kotlin("jvm") version "1.3.61" apply true id("antlr") - id("idea") id("application") - id("tanvd.kosogor") version "1.0.6" - id("org.jetbrains.dokka") version "0.9.18" - id("me.champeau.gradle.jmh") version "0.5.0" -} - - -application { - mainClassName = "astminer.MainKt" + id("maven-publish") + id("org.jetbrains.dokka") version "1.4.32" + id("tanvd.kosogor") version "1.0.10" + id("io.gitlab.arturbosch.detekt") version "1.17.1" + kotlin("jvm") version "1.5.21" apply true + kotlin("plugin.serialization") version "1.5.21" } defaultTasks("run") repositories { - mavenLocal() mavenCentral() - jcenter() } - -val generatedSourcesPath = "src/main/generated" - dependencies { + // ===== Parsers ===== antlr("org.antlr:antlr4:4.7.1") - implementation(kotlin("stdlib")) - // https://mvnrepository.com/artifact/com.github.gumtreediff - api("com.github.gumtreediff", "core", "2.1.0") - api("com.github.gumtreediff", "client", "2.1.0") - api("com.github.gumtreediff", "gen.jdt", "2.1.0") - + api("com.github.gumtreediff", "core", "2.1.2") + api("com.github.gumtreediff", "client", "2.1.2") + api("com.github.gumtreediff", "gen.jdt", "2.1.2") + api("com.github.gumtreediff", "gen.python", "2.1.2") // https://mvnrepository.com/artifact/io.shiftleft/fuzzyc2cpg - api("io.shiftleft", "fuzzyc2cpg_2.12", "0.1.74") { - exclude("org.slf4j", "slf4j-simple") - } + api("io.shiftleft", "fuzzyc2cpg_2.13", "1.2.30") - testImplementation("junit:junit:4.11") - testImplementation(kotlin("test-junit")) + // ===== Main ===== + implementation(kotlin("stdlib")) + implementation("com.github.ajalt.clikt:clikt:3.2.0") + implementation("com.charleskorn.kaml:kaml:0.33.0") + implementation("org.jetbrains.kotlinx:kotlinx-serialization-json:1.2.2") - implementation("com.github.ajalt", "clikt", "2.1.0") + // ===== Logging ===== + // https://mvnrepository.com/artifact/org.slf4j/slf4j-simple + implementation("org.slf4j", "slf4j-simple", "1.7.30") + implementation("io.github.microutils:kotlin-logging:1.5.9") - jmhImplementation("org.jetbrains.kotlin:kotlin-reflect:1.3.61") - jmhImplementation("org.openjdk.jmh:jmh-core:1.21") - jmhImplementation("org.openjdk.jmh:jmh-generator-annprocess:1.21") -} + // ===== Test ===== + testImplementation("junit:junit:4.13.2") + testImplementation(kotlin("test-junit")) -val shadowJar = shadowJar { - jar { - archiveName = "lib-$version.jar" - mainClass = "astminer.MainKt" - } -}.apply { - task.archiveClassifier.set("") + // ===== Detekt ===== + detektPlugins("io.gitlab.arturbosch.detekt:detekt-formatting:1.17.1") + + // ==== Status bar ==== + implementation("me.tongfei:progressbar:0.9.2") } +val generatedSourcesPath = "src/main/generated" +sourceSets["main"].java.srcDir(file(generatedSourcesPath)) +idea.module.generatedSourceDirs.add(file(generatedSourcesPath)) tasks.generateGrammarSource { - maxHeapSize = "64m" - arguments = arguments + listOf("-package", "me.vovak.antlr.parser") + // maxHeapSize = "64m" + arguments.addAll(listOf("-package", "me.vovak.antlr.parser")) // Keep a copy of generated sources doLast { println("Copying generated grammar lexer/parser files to main directory.") @@ -100,77 +82,67 @@ tasks.clean { tasks.compileKotlin { dependsOn(tasks.generateGrammarSource) + kotlinOptions.jvmTarget = "11" } tasks.compileJava { dependsOn(tasks.generateGrammarSource) + targetCompatibility = "11" + sourceCompatibility = "11" } -configure { - sourceCompatibility = JavaVersion.VERSION_1_8 -} -tasks.withType { - kotlinOptions.jvmTarget = "1.8" -} - -sourceSets["main"].java.srcDir(file(generatedSourcesPath)) - - -idea { - module { - generatedSourceDirs.add(file(generatedSourcesPath)) +publishing { + publications { + create("maven") { + groupId = project.group.toString() + artifactId = project.name + version = project.version.toString() + from(components["java"]) + } } -} - -publishJar { - publication { - artifactId = if (project.hasProperty("branchName")) { - when(branchName) { - "master" -> "astminer" - "master-dev" -> "astminer-dev" - else -> "" + repositories { + maven { + url = uri("https://packages.jetbrains.team/maven/p/astminer/astminer") + credentials { + username = System.getenv("PUBLISH_USER")?.takeIf { it.isNotBlank() } ?: "" + password = System.getenv("PUBLISH_PASSWORD")?.takeIf { it.isNotBlank() } ?: "" } - } else { - "astminer" } } +} - bintray { - - // If username and secretKey not set, will be taken from System environment param `bintray_user`, 'bintray_key' - repository = "astminer" - - info { - githubRepo = "JetBrains-Research/astminer" - vcsUrl = "https://github.com/JetBrains-Research/astminer" - labels.addAll(listOf("mining", "ast", "ml4se", "code2vec", "path-based representations")) - license = "MIT" - description = "Extract AST, AST-related metrics, and path-based representations from source code" - } +application.mainClassName = "astminer.MainKt" +shadowJar { + jar { + archiveName = "astminer.jar" } +}.apply { + task.archiveClassifier.set("") } -tasks.dokka { - outputFormat = "html" - outputDirectory = "$buildDir/javadoc" +tasks.withType { + // Kotlin DSL workaround from https://github.com/gradle/kotlin-dsl-samples/issues/836#issuecomment-384206237 + addTestListener(object : TestListener { + override fun beforeSuite(suite: TestDescriptor) {} + override fun beforeTest(testDescriptor: TestDescriptor) {} + override fun afterTest(testDescriptor: TestDescriptor, result: TestResult) {} + override fun afterSuite(suite: TestDescriptor, result: TestResult) { + if (suite.parent == null) { + println( + "${result.resultType} (${result.testCount} tests, ${result.successfulTestCount} successes, " + + "${result.failedTestCount} failures, ${result.skippedTestCount} skipped)" + ) + } + } + }) } -configure { - sourceCompatibility = JavaVersion.VERSION_1_8 -} -tasks.withType { - kotlinOptions.jvmTarget = "1.8" +detekt { + allRules = true + autoCorrect = true + parallel = true + config = files("detekt.yaml") } -jmh { - duplicateClassesStrategy = DuplicatesStrategy.WARN - profilers = listOf("gc") - resultFormat = "CSV" - isZip64 = true - failOnError = true - forceGC = true - warmupIterations = 1 - iterations = 4 - fork = 2 - benchmarkMode = listOf("AverageTime") - resultsFile = file("build/reports/benchmarks.csv") +tasks.dokkaHtml.configure { + outputDirectory.set(buildDir.resolve("javadoc")) } diff --git a/cli.md b/cli.md deleted file mode 100644 index f8babcb9..00000000 --- a/cli.md +++ /dev/null @@ -1,69 +0,0 @@ -[![JetBrains Research](https://jb.gg/badges/research.svg)](https://confluence.jetbrains.com/display/ALL/JetBrains+on+GitHub) -[![CircleCI](https://circleci.com/gh/JetBrains-Research/astminer.svg?style=svg)](https://circleci.com/gh/JetBrains-Research/astminer) - -# Astminer usage example / CLI - -The project implements a CLI for [astminer](github.com/vovak/astminer) and serves as a usage example for the library. - -For now the CLI provides four options: - -* Extract data (method names and path contexts) suitable as input for [code2vec](https://github.com/tech-srl/code2vec); -* Parse a project in one of supported languages and save the extracted ASTs; -* Extract path contexts from the project files and save them in code2vec format; -* Preprocess code in C/C++ to unfold `#define` directives to enable further processing. - -### Version history - -#### Coming up in 0.4 - -* Extraction of path-based representations at method level -* Support of Javascript - -#### 0.3 - -* Extraction of path-based representations - * For now it works only at file level -* Compatibility with [code2vec](https://github.com/tech-srl/code2vec) model (see [code2vec section](#code2vec)) -* New features in AST parsing: - * Saving in [DOT format](https://www.graphviz.org/doc/info/lang.html) - * Selection of granularity level (`file` or `method`) - * You can pass `--split-token` flag to split tokens into pipe-separated sub-tokens - * You can pass `--hide-method-name` to replace names of methods with dummy `METHOD_NAME` token - -#### 0.2 - -* Parsing ASTs for Java, Python, C/C++ -* Preprocessing for C/C++ - -#### 0.1 - -* Weird alpha-release - - -## Supported languages - -* Python – supported via parser generated from [ANTLR grammar](https://github.com/antlr/grammars-v4/tree/master/python3). -* Java – supported via [GumTree](https://github.com/GumTreeDiff/gumtree) and [ANTLR Java8 grammar](https://github.com/antlr/grammars-v4/tree/master/java8). -* C and C++ – supported via [ShiftLeft CPG constructor](https://github.com/ShiftLeftSecurity/codepropertygraph). -It does not work properly with macroses (`#define` directives), thus, they should be substituted before parsing the project. -To do so, we provide a `preprocess` option for the CLI. - -## Requirements - -1. `java` to run jar -2. `g++` for preprocessing, **only for C/C++** - -## Extending the CLI - -1. Clone the repository -2. If you want to update the astminer library: - 1. Make changes to astminer (located in the root of this repository) - 2. Build astminer in the root folder with `./gradlew shadowJar` -3. Move to `astminer-cli` -4. If you want to update the CLI: - 1. To add another task for the jar, create an extension of `CliktCommand()` class - (see [ProjectParser](src/main/kotlin/cli/ProjectParser.kt) for an example) and link it in [Main.kt](src/main/kotlin/cli/Main.kt) - 2. To modify existing tasks (e.g., parse only files with specific names), update code of corresponding classes -5. Run `./gradlew shadowJar` to create a runnable jar with all the dependencies -6. Created jar is located in `build/shadow/cli-versionNumber.jar` -7. Run the jar explicitly or use `./cli.sh` for short diff --git a/cli.sh b/cli.sh index 41dc7ffd..a58a5473 100755 --- a/cli.sh +++ b/cli.sh @@ -1,3 +1,29 @@ #!/bin/bash -java -jar build/shadow/lib-0.5.jar "$@" \ No newline at end of file +IMAGE_NAME="voudy/astminer" +SHADOW_JAR_PATH="build/shadow/astminer.jar" + +if ! [[ -f "$SHADOW_JAR_PATH" ]]; then + echo "$SHADOW_JAR_PATH not found, building" + ./gradlew shadowJar +fi + +if [[ "$(docker images -q $IMAGE_NAME 2> /dev/null)" == "" ]]; then + echo "Docker image not found, will use $SHADOW_JAR_PATH"; + java -jar $SHADOW_JAR_PATH "$1" +else + echo "Running astminer in docker" + +# mount config file, input dir and output dir to docker +# convert all paths to be absolute + CONFIG_PATH=$1 + INPUT_FOLDER=$(grep inputDir "$1" | cut -c 11-) + OUTPUT_FOLDER=$(grep outputDir "$1" | cut -c 12-) + docker run \ + -v "$(pwd)"/"$CONFIG_PATH":/astminer/"$CONFIG_PATH" \ + -v "$(pwd)"/"$OUTPUT_FOLDER":/astminer/"$OUTPUT_FOLDER" \ + -v "$(pwd)"/"$INPUT_FOLDER":/astminer/"$INPUT_FOLDER" \ + -v "$(pwd)"/"$SHADOW_JAR_PATH":/astminer/astminer.jar \ + --rm $IMAGE_NAME "$1" +fi + diff --git a/configs/antlr_java_js_ast.yaml b/configs/antlr_java_js_ast.yaml new file mode 100644 index 00000000..f032a01d --- /dev/null +++ b/configs/antlr_java_js_ast.yaml @@ -0,0 +1,28 @@ +# input directory (path to project) +inputDir: src/test/resources/ +# output directory +outputDir: output + +# parse Java & JavaScript files with ANTLR parser +parser: + name: antlr + languages: [java, js] + +filters: + - name: by tree size # exclude the trees that have > 1000 nodes + maxTreeSize: 1000 + - name: by words number + maxTokenWordsNumber: 1000 + +# use file names as labels +# this selects the file level granularity +label: + name: file name + +# save to disk ASTs in the JSON format +storage: + name: json AST + +# number of threads used for parsing +# the default is one thread +numOfThreads: 4 \ No newline at end of file diff --git a/configs/antlr_python_paths.yaml b/configs/antlr_python_paths.yaml new file mode 100644 index 00000000..b2b54b39 --- /dev/null +++ b/configs/antlr_python_paths.yaml @@ -0,0 +1,25 @@ +# input directory (path to project) +inputDir: src/test/resources/ +# output directory +outputDir: output + +# parse Python files with ANTLR parser +parser: + name: antlr + languages: [py] + +filters: + - name: by tree size # exclude the trees that have > 1000 nodes + maxTreeSize: 1000 + +# use file names as labels +# this selects the file level granularity +label: + name: file name + +# extract from each tree paths with length 9 and width 2 +# save paths in code2vec format +storage: + name: code2seq + maxPathLength: 9 + maxPathWidth: 2 diff --git a/configs/gumtree_java_ast.yaml b/configs/gumtree_java_ast.yaml new file mode 100644 index 00000000..e6345b70 --- /dev/null +++ b/configs/gumtree_java_ast.yaml @@ -0,0 +1,24 @@ +# input directory (path to project) +inputDir: src/test/resources/ +# output directory +outputDir: output + +# parse Java files with GumTree parser +parser: + name: gumtree + languages: [java] + +filters: + - name: by tree size # exclude the trees that have > 1000 nodes + maxTreeSize: 1000 + - name: by words number + maxTokenWordsNumber: 1000 + +# use file names as labels +# this selects the file level granularity +label: + name: file name + +# save to disk ASTs in the JSON format +storage: + name: json AST diff --git a/configs/gumtree_java_function_names_paths.yaml b/configs/gumtree_java_function_names_paths.yaml new file mode 100644 index 00000000..11ef0d9b --- /dev/null +++ b/configs/gumtree_java_function_names_paths.yaml @@ -0,0 +1,28 @@ +# input directory (path to project) +inputDir: src/test/resources/ +# output directory +outputDir: output + +# parse Java files with GumTree parser +parser: + name: gumtree + languages: [java] + +filters: + - name: by function name length + maxWordsNumber: 10 + - name: by words number + maxTokenWordsNumber: 100 + - name: by annotations + annotations: [override] + +# use function name as labels +# this selects the function level granularity +label: + name: function name + +# save to disk ASTs in the code2seq format +storage: + name: code2seq + length: 9 + width: 2 diff --git a/detekt.yaml b/detekt.yaml new file mode 100644 index 00000000..52fc010a --- /dev/null +++ b/detekt.yaml @@ -0,0 +1,46 @@ +complexity: + TooManyFunctions: + thresholdInClasses: 15 + thresholdInFiles: 16 + ComplexInterface: + threshold: 15 + StringLiteralDuplication: + active: false + LabeledExpression: + active: false + +exceptions: + TooGenericExceptionCaught: + active: false + +style: + ForbiddenComment: + allowedPatterns: 'TODO:' + MagicNumber: + ignorePropertyDeclaration: true + ignoreAnnotation: true + ignoreEnums: true + ignoreNumbers: ['-1', '0', '1', '2', '60', '100', '1000'] + excludes: ['**/test/**'] + ReturnCount: + max: 5 + WildcardImport: + active: false + +formatting: + autoCorrect: true + NoConsecutiveBlankLines: + active: true + NoWildcardImports: + active: false + +comments: + active: false + +output-reports: + active: true + exclude: + - 'TxtOutputReport' + - 'XmlOutputReport' + - 'HtmlOutputReport' + diff --git a/changelog.md b/docs/changelog.md similarity index 90% rename from changelog.md rename to docs/changelog.md index 1e078bf0..f435c5be 100644 --- a/changelog.md +++ b/docs/changelog.md @@ -1,31 +1,31 @@ -### Version history +# Version history -#### 0.6 +## 0.6 * Greatly improved memory efficiency * Support arbitrary labels for path-contexts * Minor improvements -#### 0.5 +## 0.5 * Beta of Javascript support * Storage of ASTs in [DOT format](https://www.graphviz.org/doc/info/lang.html) * Minor fixes -#### 0.4 +## 0.4 * Support of code2vec output format * Extraction of ASTs and path-based representations of individual methods * Extraction of data for the task of method name prediction ([code2vec paper](https://arxiv.org/abs/1803.09473)) -#### 0.3 +## 0.3 * Support of C/C++ via [FuzzyC2CPG parser](https://github.com/ShiftLeftSecurity/fuzzyc2cpg) -#### 0.2 +## 0.2 * Mining of ASTs -#### 0.1 +## 0.1 * astminer is available via Maven Central * Support of Java and Python * Mining of [path-based representations of code](https://arxiv.org/pdf/1803.09544.pdf) diff --git a/docs/cli.md b/docs/cli.md new file mode 100644 index 00000000..1d5ef81d --- /dev/null +++ b/docs/cli.md @@ -0,0 +1,44 @@ +# `astminer` CLI usage + +You can run `astminer` through a command line interface (CLI). +The CLI allows to run the tool on any implemented parser with specified options for filtering, label extraction, and storage of the results. + +## How to +You can build and run the CLI with any version of `astminer`: +1. Check out the relevant version of `astminer` sources (for example, the `master-dev` branch) +2. Build a shadow jar for `astminer`: +```shell +gradle shadowJar +``` +3. [optional] Pull a docker image with all parsers dependencies installed: +```shell +docker pull voudy/astminer +``` +4. Run `astminer` with specified config: +```shell +./cli.sh +``` + +## Config + +CLI of `astminer` is fully configured by a YAML config. +The config should contain next values: +- `inputDir` — path to the directory with input data +- `outputDir` — path to the output directory +- `parser` — parser name and list of target languages +- `filters` — list of filters and parameters +- `label` — label extraction strategy +- `storage` — storage format + +[configs](../configs) contains some config examples that could be used as a reference for the YAML structure. + +## Docker + +Some parsers have non-trivial environment requirements. +For example, g++ must be installed for Fuzzy parser (see [parsers](./parsers.md)). + +To ease dealing with such cases, we provide a Docker image with all parser dependencies. +This image can be pulled from DockerHub: +```shell +docker pull voudy/astminer +``` diff --git a/docs/filters.md b/docs/filters.md new file mode 100644 index 00000000..6a9c3448 --- /dev/null +++ b/docs/filters.md @@ -0,0 +1,73 @@ +# Filters + +Each filter is dedicated to removing *bad* trees from the data, e.g. trees that are too big. +Moreover, each filter works only for certain levels of granulaity. +Here we describe all filters provided by `astminer`. +Each description contains the corresponding YAML config. + +Filters can be specific to a language or a parser. +Therefore, some languages or parsers may not support the needed filter +(`FunctionInfoPropertyNotImplementedException` appears). +To handle this, the user might manually add specific logic of parsing AST to get the desired information about function or code at all. + +Filter config classes are defined in [FilterConfigs.kt](../src/main/kotlin/astminer/config/FilterConfigs.kt). + +## Filter by tree size +**granularity**: files, functions + +Exclude ASTs that are too small or too big. + + ```yaml + name: 'by tree size' + minTreeSize: 1 + maxTreeSize: 100 + ``` + +## Filter by words count +**granularity**: files, functions + +Exclude ASTs that have too many words in any token. + + ```yaml + name: by words number + maxTokenWordsNumber: 10 + ``` + +## Filter by function name length +**granularity**: functions + +Exclude functions that have too many words in their name. + + ```yaml + name: by function name length + maxWordsNumber: 10 + ``` + +## Exclude constructors +**granularity**: functions + +Exclude constructors + + ```yaml + name: no constructors + ``` + +## Filter by annotation +**granularity**: functions + +Exclude functions that have certain annotations (e.g. `@Override`) + + ```yaml + name: by annotations + annotations: [ override ] + ``` + +## Filter by modifiers +**granularity**: functions + +Exclude functions with certain modifiers (e.g. `private` functions) + + ```yaml + name: by modifiers + modifiers: [ private ] + ``` diff --git a/docs/label_extractors.md b/docs/label_extractors.md new file mode 100644 index 00000000..fbad4f8e --- /dev/null +++ b/docs/label_extractors.md @@ -0,0 +1,36 @@ +# Label extractors + +Label extractors are required for correct extraction of labels from raw ASTs. +Internally, they extract labels from the tree and process the tree to avoid data leaks. +Also, label extractors define the granularity level for the whole pipeline. + +Label extractor config classes are defined in [LabelExtractorConfigs.kt](src/main/kotlin/astminer/config/LabelExtractorConfigs.kt). + +## file name +**granularity**: files + +Use file name of source file as a label. + + ```yaml + name: file name + ``` + +## folder name +**granularity**: files + +Use the name of the parent folder of source file as a label. +May be useful for code classification datasets, e.g., POJ-104. + + ```yaml + name: folder name + ``` + +## function name +**granularity**: functions + +Use name of each function as a label. +This label extractor will also hide the function name in the AST and all recursive calls to prevent data leaks. + + ```yaml + name: function name + ``` diff --git a/docs/parsers.md b/docs/parsers.md new file mode 100644 index 00000000..42bc9d7b --- /dev/null +++ b/docs/parsers.md @@ -0,0 +1,56 @@ +# Parsers + +`astminer` supports multiple parsers for various programming languages. +Here we describe the integrated parsers and their peculiarities. + +## ANTLR + +[ANTLR](https://www.antlr.org) provides an infrastructure to generate lexers and parsers for languages based on grammars. +For now, `astminer` supports ANTLR-based parsers for Java, Python, JS, and PHP. + +## GumTree + +[GumTree](https://github.com/GumTreeDiff/gumtree) +is a framework to work with source code as trees and to compute differences of trees between different versions of code. +It also builds language-agnostic representations of code. +For now, `astminer` supports GumTree-based parsers for Java and Python. + +### python-parser + +Running GumTree with Python requires `python-parser`. +It can be set up through the following steps: +1. Download sources from [GitHub](https://github.com/JetBrains-Research/pythonparser/blob/master/) +2. Install dependencies +```shell +pip install -r requirements.txt +``` +3. Make the `python-parser` script executable +```shell +chmod +x src/main/python/pythonparser/pythonparser_3.py +``` +4. Add python-parser to `PATH` +```shell +cp src/main/python/pythonparser/pythonparser_3.py src/main/python/pythonparser/pythonparser +export PATH="/src/main/python/pythonparser/pythonparser:${PATH}" +``` + +## Fuzzy + +Originally [fuzzyc2cpg](https://github.com/ShiftLeftSecurity/fuzzyc2cpg), Fuzzy is +now part of [codepropertygraph](https://github.com/ShiftLeftSecurity/codepropertygraph/). +`astminer`uses it to parse C/C++ code. `g++` is required for this parser. + +## Other languages and parsers + +Support for a new programming language can be implemented in a few simple steps. + +If there is an ANTLR grammar for the language: +1. Add the corresponding [ANTLR4 grammar file](https://github.com/antlr/grammars-v4) to the `antlr` directory; +2. Run the `generateGrammarSource` Gradle task to generate the parser; +3. Implement a small wrapper around the generated parser. + See [JavaParser](src/main/kotlin/astminer/parse/antlr/java/JavaParser.kt) or [PythonParser](src/main/kotlin/astminer/parse/antlr/python/PythonParser.kt) for an example of a wrapper. + +If the language has a parsing tool that is available as Java library: +1. Add the library as a dependency in [build.gradle.kts](/build.gradle.kts); +2. Implement a wrapper for the parsing tool. + See [FuzzyCppParser](src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppParser.kt) for an example of a wrapper. diff --git a/docs/storages.md b/docs/storages.md new file mode 100644 index 00000000..fd9a74bc --- /dev/null +++ b/docs/storages.md @@ -0,0 +1,85 @@ +# Storages + +The storage defines how the ASTs should be saved on disk. +For now, `astminer` support tree-based and path-based storage formats. + +`Astminer` also knows how to find the structure of the dataset and can +save trees or path contexts in the appropriate holdout folders. (`train`, `val` and `test`). If the data is not structured, +all trees will be saved in the `data` folder. Description files for trees or paths will be +saved along with holdouts in the same `outputPath` directory. + +Storage config classes are defined in [StorageConfigs.kt](../src/main/kotlin/astminer/config/StorageConfigs.kt). + +## Tree formats + +### CSV + +Saves the trees with labels to a comma-separated file. +Each tree is encoded to a single line using parentheses sequences. + + ```yaml + name: csv AST + ``` + +### Dot + +Saves each tree in separate file using [dot](https://graphviz.org/doc/info/lang.html) syntax. +Along with dot files, this storage also saves `description.csv` with mapping between files, source files, and labels. + + + ```yaml + name: dot AST + ``` + +### Json lines + +Saves each tree with its label in the Json Lines format. +Json format of AST inspired by the [150k Python](https://www.sri.inf.ethz.ch/py150) dataset. + + ```yaml + name: json AST + ``` + +## Path-based representations + +Path-based representation was introduced by [Alon et al.](https://arxiv.org/abs/1803.09544). +It is used in popular code representation models such as `code2vec` and `code2seq`. + +### Code2vec + +Extract paths from each AST. Output is 4 files: +1. `node_types.csv` contains numeric ids and corresponding node types with directions (up/down, as described in [paper](https://arxiv.org/pdf/1803.09544.pdf)); +2. `tokens.csv` contains numeric ids and corresponding tokens; +3. `paths.csv` contains numeric ids and AST paths in form of space-separated sequences of node type ids; +4. `path_contexts.c2s` contains the labels and sequences of path-contexts (each representing two tokens and a path between them). + This file will be generated for every holdout. + +Each line in `path_contexts.c2s` starts with a label, followed by a sequence of space-separated triples. Each triple contains start token id, path id, end token id, separated with commas. + + ```yaml + name: code2vec + maxPathLength: 10 + maxPathWidth: 2 + maxTokens: 1000 # can be omitted + maxPaths: 1000 # can be omitted + maxPathContextsPerEntity: 200 # can be omitted + ``` + + +### Code2seq + +Extract paths from each AST and save in the code2seq format. +The output is `path_context.c2s` file, which will be generated for every holdout. +Each line starts with a label, followed by a sequence of space-separated triples. +Each triple contains the start token, path node types, and end token id, separated with commas. + +To reduce memory usage, you can enable `nodesToNumber` option. +If `nodesToNumber` is set to `true`, all types are converted into numbers and `node_types.csv` is added to output files. + + ```yaml + name: code2seq + maxPathLength: 10 + maxPathWidth: 2 + maxPathContextsPerEntity: 200 # can be omitted + nodeToNumber: true # can be omitted + ``` diff --git a/gradle.properties b/gradle.properties index a725de05..7fc6f1ff 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,2 +1 @@ kotlin.code.style=official -org.gradle.jvmargs=-Xmx32768M \ No newline at end of file diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index 33682bbb..442d9132 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,5 +1,5 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-6.6.1-all.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-6.8.3-bin.zip zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists diff --git a/scripts/fuzzy/convert.sh b/scripts/fuzzy/convert.sh deleted file mode 100755 index fd79e243..00000000 --- a/scripts/fuzzy/convert.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash - -if [[ $# -ne 2 || ! -f $1 ]] ; then - echo "Invalid args / Check file " - exit -fi - -file_name=$1 -output_name=$2 - -grep '^\s*#\s*include' $file_name > /tmp/include.c -grep -Pv '^\s*#\s*include\b' $file_name > /tmp/code.c -gcc -E /tmp/code.c | grep -v ^# > /tmp/preprocessed.c -mkdir -p $output_name -cat /tmp/include.c > $output_name/$file_name -cat /tmp/preprocessed.c >> $output_name/$file_name \ No newline at end of file diff --git a/settings.gradle.kts b/settings.gradle.kts new file mode 100644 index 00000000..8086a9bb --- /dev/null +++ b/settings.gradle.kts @@ -0,0 +1,3 @@ +rootProject.name = "astminer" + +include("examples") diff --git a/src/examples/java/astminer/GumTreeJavaPaths.java b/src/examples/java/astminer/GumTreeJavaPaths.java new file mode 100644 index 00000000..78d4f8f4 --- /dev/null +++ b/src/examples/java/astminer/GumTreeJavaPaths.java @@ -0,0 +1,54 @@ +package astminer; + +import astminer.common.model.LabeledResult; +import astminer.parse.gumtree.GumTreeNode; +import astminer.parse.gumtree.java.GumTreeJavaParser; +import astminer.storage.path.Code2VecPathStorage; +import astminer.storage.path.PathBasedStorage; +import astminer.storage.path.PathBasedStorageConfig; + +import java.io.FileInputStream; +import java.io.IOException; +import java.nio.file.*; +import java.nio.file.attribute.BasicFileAttributes; + +// Retrieve paths from Java files, using a GumTree parser. +public class GumTreeJavaPaths { + private static final String INPUT_FOLDER = "src/test/resources/examples"; + private static final String OUTPUT_FOLDER = "examples_output/gumtree_java_paths_java_api"; + + public static void runExample() { + final PathBasedStorageConfig config = new PathBasedStorageConfig(5, 5, null, null, null); + final PathBasedStorage code2vecStorage = new Code2VecPathStorage(OUTPUT_FOLDER, config); + + final Path inputFolder = Paths.get(INPUT_FOLDER); + + FileVisitor fileVisitor = new SimpleFileVisitor<>() { + @Override + public FileVisitResult visitFile(Path file, BasicFileAttributes attributes) throws IOException { + if (!file.getFileName().toString().endsWith(".java")) { + return FileVisitResult.CONTINUE; + } + GumTreeNode fileTree = new GumTreeJavaParser().parseInputStream(new FileInputStream(file.toFile())); + String filePath = file.toAbsolutePath().toString(); + + LabeledResult labeledResult = new LabeledResult<>(fileTree, filePath, filePath); + code2vecStorage.store(labeledResult); + + return FileVisitResult.CONTINUE; + } + }; + + try { + Files.walkFileTree(inputFolder, fileVisitor); + } catch (IOException e) { + System.out.println("Error while processing files: " + e.getMessage()); + } finally { + code2vecStorage.close(); + } + } + + public static void main(String[] args) { + runExample(); + } +} diff --git a/src/examples/kotlin/astminer/antlrJavaAsts.kt b/src/examples/kotlin/astminer/antlrJavaAsts.kt new file mode 100644 index 00000000..9d825d80 --- /dev/null +++ b/src/examples/kotlin/astminer/antlrJavaAsts.kt @@ -0,0 +1,23 @@ +package astminer + +import astminer.config.* +import astminer.pipeline.Pipeline + +/** + * Retrieve ASTs from Java files, using ANTLR parser and save them in JSON format. + */ +fun antlrJavaAsts() { + val config = PipelineConfig( + inputDir = "src/test/resources/examples/", + outputDir = "examples_output/antlr_java_asts_json_storage", + parser = ParserConfig(ParserType.Antlr, listOf(FileExtension.Java)), + labelExtractor = FileNameExtractorConfig(), + storage = JsonAstStorageConfig(), + ) + + Pipeline(config).run() +} + +fun main() { + antlrJavaAsts() +} diff --git a/src/examples/kotlin/astminer/antlrJavaMethodPaths.kt b/src/examples/kotlin/astminer/antlrJavaMethodPaths.kt new file mode 100644 index 00000000..d9870ddc --- /dev/null +++ b/src/examples/kotlin/astminer/antlrJavaMethodPaths.kt @@ -0,0 +1,43 @@ +package astminer + +import astminer.common.forFilesWithSuffix +import astminer.common.model.FunctionInfo +import astminer.common.model.LabeledResult +import astminer.parse.antlr.AntlrNode +import astminer.parse.antlr.java.JavaFunctionSplitter +import astminer.parse.antlr.java.JavaParser +import astminer.storage.path.Code2VecPathStorage +import astminer.storage.path.PathBasedStorageConfig +import java.io.File + +/** + * Retrieve paths from Java files using ANTLR parser. + */ +fun antlrJavaMethodPaths() { + val inputDir = "src/test/resources/examples/" + val outputDir = "examples_output/antlr_java_method_paths" + + val storage = Code2VecPathStorage(outputDir, PathBasedStorageConfig(5, 5)) + + File(inputDir).forFilesWithSuffix(".java") { file -> + val node = JavaParser().parseInputStream(file.inputStream()) + + val functions: List> = JavaFunctionSplitter() + .splitIntoFunctions(node, file.path) + .map { + val parametersStr = + it.parameters.joinToString(" | ") { param -> "${param.name} ${param.type}" } + println("${it.name} ${it.returnType} ${it.enclosingElement?.name} [$parametersStr]") + it + } + functions.forEach { + storage.store(LabeledResult(it.root, it.name ?: "", file.path)) + } + } + + storage.close() +} + +fun main() { + antlrJavaMethodPaths() +} diff --git a/src/examples/kotlin/astminer/antlrJavaScriptPaths.kt b/src/examples/kotlin/astminer/antlrJavaScriptPaths.kt new file mode 100644 index 00000000..1f4b9473 --- /dev/null +++ b/src/examples/kotlin/astminer/antlrJavaScriptPaths.kt @@ -0,0 +1,23 @@ +package astminer + +import astminer.config.* +import astminer.pipeline.Pipeline + +/** + * Retrieve paths from all JavaScript files using ANTLR parser. + */ +fun antlrJavaScriptPaths() { + val config = PipelineConfig( + inputDir = "src/test/resources/examples", + outputDir = "examples_output/antlr_javascript_paths", + parser = ParserConfig(ParserType.Antlr, listOf(FileExtension.JavaScript)), + labelExtractor = FileNameExtractorConfig(), + storage = Code2VecPathStorageConfig(5, 5) + ) + + Pipeline(config).run() +} + +fun main() { + antlrJavaScriptPaths() +} diff --git a/src/examples/kotlin/astminer/antlrPythonPaths.kt b/src/examples/kotlin/astminer/antlrPythonPaths.kt new file mode 100644 index 00000000..ae8eb138 --- /dev/null +++ b/src/examples/kotlin/astminer/antlrPythonPaths.kt @@ -0,0 +1,23 @@ +package astminer + +import astminer.config.* +import astminer.pipeline.Pipeline + +/** + * Retrieve paths from all Python files using ANTLR parser + */ +fun antlrPythonPaths() { + val config = PipelineConfig( + inputDir = "src/test/resources/examples", + outputDir = "examples_output/antlr_python_paths", + parser = ParserConfig(ParserType.Antlr, listOf(FileExtension.Python)), + labelExtractor = FileNameExtractorConfig(), + storage = Code2VecPathStorageConfig(5, 5) + ) + + Pipeline(config).run() +} + +fun main() { + antlrPythonPaths() +} diff --git a/src/examples/kotlin/astminer/collectFeatures.kt b/src/examples/kotlin/astminer/collectFeatures.kt new file mode 100644 index 00000000..ab673fdc --- /dev/null +++ b/src/examples/kotlin/astminer/collectFeatures.kt @@ -0,0 +1,37 @@ +package astminer + +import astminer.common.forFilesWithSuffix +import astminer.common.numberOfLines +import astminer.featureextraction.* +import astminer.parse.gumtree.java.GumTreeJavaParser +import java.io.File + +/** + * Collect different features/statistics from parsed trees. + * Target language is Java, using GumTree parser. + */ +fun collectFeatures() { + val parser = GumTreeJavaParser() + val features: List> = + listOf(Depth, NumberOfNodes, BranchingFactor, CompressiblePathLengths, Tokens, NodeTypes) + + val inputDir = "src/test/resources/featureextraction" + val outputDir = "examples_output/collected_features" + + val storage = TreeFeatureValueStorage(",") + storage.storeFeatures(features) + + File(inputDir).forFilesWithSuffix("java") { fileInput -> + val fileName = fileInput.name + val nol = numberOfLines(fileInput) + + val tree = ParsedTree(parser.className(), parser.parseInputStream(fileInput.inputStream()), fileName, nol) + storage.storeParsedTree(tree) + } + + storage.save(outputDir) +} + +fun main() { + collectFeatures() +} diff --git a/src/examples/kotlin/astminer/fuzzyCppPathsWithPreprocessing.kt b/src/examples/kotlin/astminer/fuzzyCppPathsWithPreprocessing.kt new file mode 100644 index 00000000..3c4dd15a --- /dev/null +++ b/src/examples/kotlin/astminer/fuzzyCppPathsWithPreprocessing.kt @@ -0,0 +1,27 @@ +package astminer + +import astminer.config.* +import astminer.pipeline.Pipeline + +/** + * Preprocess .cpp files and retrieve paths from them, using a fuzzyc2cpg parser. + */ +fun fuzzyCppPathsWithPreprocessing() { + val inputDir = "src/test/resources/examples" + val outputDir = "examples_output/fuzzy_cpp_paths" + + // Pipeline will handle preprocessing automatically + val config = PipelineConfig( + inputDir = inputDir, + outputDir = outputDir, + parser = ParserConfig(ParserType.Fuzzy, listOf(FileExtension.Cpp)), + labelExtractor = FileNameExtractorConfig(), + storage = Code2VecPathStorageConfig(5, 5) + ) + + Pipeline(config).run() +} + +fun main() { + fuzzyCppPathsWithPreprocessing() +} \ No newline at end of file diff --git a/src/examples/kotlin/astminer/gumTreeJavaMethodPaths.kt b/src/examples/kotlin/astminer/gumTreeJavaMethodPaths.kt new file mode 100644 index 00000000..36a615ce --- /dev/null +++ b/src/examples/kotlin/astminer/gumTreeJavaMethodPaths.kt @@ -0,0 +1,53 @@ +package astminer + +import astminer.common.forFilesWithSuffix +import astminer.common.model.FunctionInfo +import astminer.common.model.LabeledResult +import astminer.parse.gumtree.GumTreeNode +import astminer.parse.gumtree.java.GumTreeJavaFunctionSplitter +import astminer.parse.gumtree.java.GumTreeJavaParser +import astminer.storage.path.Code2VecPathStorage +import astminer.storage.path.PathBasedStorageConfig +import java.io.File + +private fun getCsvFriendlyMethodId(functionInfo: FunctionInfo): String { + val className = functionInfo.enclosingElement?.name ?: "" + val methodName = functionInfo.name + val parameterTypes = functionInfo.parameters.joinToString("|") { it.name } + return "$className.$methodName($parameterTypes)" +} + +/** + * Retrieve paths from all Java files, using a GumTree parser. + * GumTreeMethodSplitter is used to extract individual method nodes from the compilation unit tree. + */ +fun gumTreeJavaMethodPaths() { + val inputDir = "src/test/resources/examples" + val outputDir = "examples_output/gumtree_java_method_paths" + + val storage = Code2VecPathStorage(outputDir, PathBasedStorageConfig(5, 5)) + + File(inputDir).forFilesWithSuffix(".java") { file -> + // parse file + val fileNode = GumTreeJavaParser().parseInputStream(file.inputStream()) + + // extract method nodes + val methodNodes = GumTreeJavaFunctionSplitter().splitIntoFunctions(fileNode, file.path) + + methodNodes.forEach { methodInfo -> + // Retrieve a method identifier + println("Method name: ${methodInfo.name}, " + + "modifiers: ${methodInfo.modifiers}, " + + "annotations: ${methodInfo.annotations}") + val entityId = "${file.path}::${getCsvFriendlyMethodId(methodInfo)}" + val labelingResult = LabeledResult(fileNode, entityId, file.path) + storage.store(labelingResult) + } + } + + storage.close() +} + +fun main() { + gumTreeJavaMethodPaths() +} diff --git a/src/examples/kotlin/astminer/gumTreeJavaPaths.kt b/src/examples/kotlin/astminer/gumTreeJavaPaths.kt new file mode 100644 index 00000000..05a0633f --- /dev/null +++ b/src/examples/kotlin/astminer/gumTreeJavaPaths.kt @@ -0,0 +1,22 @@ +package astminer + +import astminer.config.* +import astminer.pipeline.Pipeline + +/** + * Retrieve paths from Java files, using a GumTree parser. + */ +fun gumTreeJavaPaths() { + val config = PipelineConfig( + inputDir = "src/test/resources/examples", + outputDir = "examples_output/gumtree_java_paths_kotlin_api", + parser = ParserConfig(ParserType.GumTree, listOf(FileExtension.Java)), + labelExtractor = FileNameExtractorConfig(), + storage = Code2VecPathStorageConfig(5, 5) + ) + Pipeline(config).run() +} + +fun main() { + gumTreeJavaPaths() +} diff --git a/src/examples/kotlin/astminer/gumTreePythonMethodPaths.kt b/src/examples/kotlin/astminer/gumTreePythonMethodPaths.kt new file mode 100644 index 00000000..a56e5538 --- /dev/null +++ b/src/examples/kotlin/astminer/gumTreePythonMethodPaths.kt @@ -0,0 +1,50 @@ +package astminer + +import astminer.common.forFilesWithSuffix +import astminer.common.model.FunctionInfo +import astminer.common.model.LabeledResult +import astminer.parse.gumtree.GumTreeNode +import astminer.parse.gumtree.python.GumTreePythonFunctionSplitter +import astminer.parse.gumtree.python.GumTreePythonParser +import astminer.storage.path.Code2VecPathStorage +import astminer.storage.path.PathBasedStorageConfig +import java.io.File + +private fun getCsvFriendlyMethodId(functionInfo: FunctionInfo): String { + val className = functionInfo.enclosingElement?.name ?: "" + val methodName = functionInfo.name + val parameterTypes = functionInfo.parameters.joinToString("|") { it.name } + return "$className.$methodName($parameterTypes)" +} + +/** + * Retrieve paths from all Python methods in files using GumTree parser + */ +fun gumTreePythonMethodPaths() { + val inputDir = "src/test/resources/gumTreeMethodSplitter" + val outputDir = "examples_output/gumtree_python_methods_paths" + + val storage = Code2VecPathStorage(outputDir, PathBasedStorageConfig(5, 5)) + + File(inputDir).forFilesWithSuffix(".py") { file -> + // parse file + val fileNode = GumTreePythonParser().parseInputStream(file.inputStream()) + + // extract method nodes + val methodNodes = GumTreePythonFunctionSplitter().splitIntoFunctions(fileNode, file.path) + + methodNodes.forEach { methodInfo -> + // Retrieve a method identifier + val entityId = "${file.path}::${getCsvFriendlyMethodId(methodInfo)}" + val labelingResult = LabeledResult(fileNode, entityId, file.path) + // Retrieve paths from each method individually and store them + storage.store(labelingResult) + } + } + + storage.close() +} + +fun main() { + gumTreePythonMethodPaths() +} \ No newline at end of file diff --git a/src/examples/kotlin/astminer/methodNamePredictionPipeline.kt b/src/examples/kotlin/astminer/methodNamePredictionPipeline.kt new file mode 100644 index 00000000..107b9391 --- /dev/null +++ b/src/examples/kotlin/astminer/methodNamePredictionPipeline.kt @@ -0,0 +1,30 @@ +package astminer + +import astminer.config.* +import astminer.pipeline.Pipeline + +/** + * Prepare data for training code2vec model for method name prediction task. + * Target language is Java, using ANTLR parser. + */ +fun methodNamePredictionPipeline() { + val inputDir = "src/test/resources/examples" + val outputDir = "examples_output/method_name_prediction_code2vec" + + val pipelineConfig = PipelineConfig( + inputDir = inputDir, + outputDir = outputDir, + parser = ParserConfig(ParserType.Antlr, listOf(FileExtension.Java)), + labelExtractor = FunctionNameExtractorConfig(), + storage = Code2VecPathStorageConfig( + maxPathLength = 5, + maxPathWidth = 5 + ) + ) + + Pipeline(pipelineConfig).run() +} + +fun main() { + methodNamePredictionPipeline() +} diff --git a/src/examples/kotlin/astminer/runAllExamples.kt b/src/examples/kotlin/astminer/runAllExamples.kt new file mode 100644 index 00000000..2c9f1bcb --- /dev/null +++ b/src/examples/kotlin/astminer/runAllExamples.kt @@ -0,0 +1,22 @@ +package astminer + +import astminer.parse.ParsingException + +/** + * Run all examples in one place. + */ +fun main() { + // Java api example + GumTreeJavaPaths.runExample() + // Kotlin api examples + antlrJavaAsts() + antlrJavaMethodPaths() + antlrJavaScriptPaths() + antlrPythonPaths() + collectFeatures() + fuzzyCppPathsWithPreprocessing() + gumTreeJavaMethodPaths() + gumTreeJavaPaths() + try { gumTreePythonMethodPaths() } catch (ex: ParsingException) { println("No python parser to run this example") } + methodNamePredictionPipeline() +} \ No newline at end of file diff --git a/src/jmh/kotlin/cli/BenchmarksSetup.kt b/src/jmh/kotlin/cli/BenchmarksSetup.kt deleted file mode 100644 index adf98bfe..00000000 --- a/src/jmh/kotlin/cli/BenchmarksSetup.kt +++ /dev/null @@ -1,50 +0,0 @@ -package cli - -import java.io.File - - -// How to start benchmark: -// 1. gradle daemons should be stopped before, so execute ./gradlew --stop -// 2. jmh plugin is unable to compile code incrementally, so execute ./gradlew clean -// 3. to run benchmarks execute ./gradlew jmh - -open class BenchmarksSetup() { - - private val cliPath = BenchmarksSetup::class.java.protectionDomain.codeSource.location.path.split("/build")[0] - val simpleProjectPath: String = "$cliPath/src/jmh/resources/gradle" - val simpleProjectResultsPath: String = "$cliPath/build/results/simpleProject" - val longFilePath: String = "$cliPath/src/jmh/resources/LongJavaFile.java" - val longFileResultsPath: String = "$cliPath/build/results/LongJavaFile" - val bigProjectPath: String = "$cliPath/src/jmh/resources/intellij-community" - val bigProjectResultsPath: String = "$cliPath/build/results/bigProject" - - fun setup() { - val resourcesPath = "$cliPath/src/jmh/resources" - if (isDirectoryEmpty(simpleProjectPath)) { - println("Gradle project is downloading for benchmark...") - val exitCode = cloneGitProject("v6.3.0", "https://github.com/gradle/gradle", resourcesPath) - if (exitCode != 0) { - throw DownloadException("Error with downloading Gradle project!") - } - } - if (isDirectoryEmpty(bigProjectPath)) { - println("Intellij IDEA project is downloading for benchmark...") - val exitCode = cloneGitProject("idea/193.7288.8", "https://github.com/JetBrains/intellij-community", resourcesPath) - if (exitCode != 0) { - throw DownloadException("Error with downloading Intellij IDEA project!") - } - } - } - - private fun cloneGitProject(tag: String, projectLink: String, directory: String) : Int { - val processBuilder = ProcessBuilder() - processBuilder.command("git", "clone", "--depth", "1", "-b", tag, projectLink) - .directory(File(directory)) - return processBuilder.start().waitFor() - } - - private fun isDirectoryEmpty(path: String) : Boolean { - val directory = File(path) - return !directory.isDirectory || directory.list()?.isEmpty() ?: false - } -} \ No newline at end of file diff --git a/src/jmh/kotlin/cli/Code2VecExtractorBenchmarks.kt b/src/jmh/kotlin/cli/Code2VecExtractorBenchmarks.kt deleted file mode 100644 index c5da396c..00000000 --- a/src/jmh/kotlin/cli/Code2VecExtractorBenchmarks.kt +++ /dev/null @@ -1,37 +0,0 @@ -package cli - -import org.openjdk.jmh.annotations.* -import astminer.cli.* - -@State(Scope.Benchmark) -open class Code2VecExtractorBenchmarks { - - @Setup - fun pathsSetup() { - BenchmarksSetup().setup() - } - - @Benchmark - fun simpleProject() { - val args = listOf("--project", BenchmarksSetup().simpleProjectPath, - "--output", BenchmarksSetup().simpleProjectResultsPath, - "--lang", "java") - Code2VecExtractor().main(args) - } - - @Benchmark - fun longFileProject() { - val args = listOf("--project", BenchmarksSetup().longFilePath, - "--output", BenchmarksSetup().longFileResultsPath, - "--lang", "java") - Code2VecExtractor().main(args) - } - - @Benchmark - fun bigProject() { - val args = listOf("--project", BenchmarksSetup().bigProjectPath, - "--output", BenchmarksSetup().bigProjectResultsPath, - "--lang", "java") - Code2VecExtractor().main(args) - } -} \ No newline at end of file diff --git a/src/jmh/kotlin/cli/DownloadException.kt b/src/jmh/kotlin/cli/DownloadException.kt deleted file mode 100644 index 69251cca..00000000 --- a/src/jmh/kotlin/cli/DownloadException.kt +++ /dev/null @@ -1,5 +0,0 @@ -package cli - -import java.lang.RuntimeException - -class DownloadException(message: String): RuntimeException(message) {} \ No newline at end of file diff --git a/src/jmh/kotlin/cli/PathContextsExtractorBenchmarks.kt b/src/jmh/kotlin/cli/PathContextsExtractorBenchmarks.kt deleted file mode 100644 index 90b1e67f..00000000 --- a/src/jmh/kotlin/cli/PathContextsExtractorBenchmarks.kt +++ /dev/null @@ -1,34 +0,0 @@ -package cli - -import org.openjdk.jmh.annotations.* -import astminer.cli.* - -@State(Scope.Benchmark) -open class PathContextsExtractorBenchmarks { - - @Setup - fun pathsSetup() { - BenchmarksSetup().setup() - } - - @Benchmark - fun simpleProject() { - val args = listOf("--project", BenchmarksSetup().simpleProjectPath, - "--output", BenchmarksSetup().simpleProjectResultsPath) - PathContextsExtractor().main(args) - } - - @Benchmark - fun longFileProject() { - val args = listOf("--project", BenchmarksSetup().longFilePath, - "--output", BenchmarksSetup().longFileResultsPath) - PathContextsExtractor().main(args) - } - - @Benchmark - fun bigProject() { - val args = listOf("--project", BenchmarksSetup().bigProjectPath, - "--output", BenchmarksSetup().bigProjectResultsPath) - PathContextsExtractor().main(args) - } -} \ No newline at end of file diff --git a/src/jmh/kotlin/cli/ProjectParserCsvBenchmarks.kt b/src/jmh/kotlin/cli/ProjectParserCsvBenchmarks.kt deleted file mode 100644 index 0e7ad5c3..00000000 --- a/src/jmh/kotlin/cli/ProjectParserCsvBenchmarks.kt +++ /dev/null @@ -1,34 +0,0 @@ -package cli - -import org.openjdk.jmh.annotations.* -import astminer.cli.* - -@State(Scope.Benchmark) -open class ProjectParserCsvBenchmarks { - - @Setup - fun pathsSetup() { - BenchmarksSetup().setup() - } - - @Benchmark - fun simpleProject() { - val args = listOf("--project", BenchmarksSetup().simpleProjectPath, - "--output", BenchmarksSetup().simpleProjectResultsPath) - ProjectParser().main(args) - } - - @Benchmark - fun longFileProject() { - val args = listOf("--project", BenchmarksSetup().longFilePath, - "--output", BenchmarksSetup().longFileResultsPath) - ProjectParser().main(args) - } - - @Benchmark - fun bigProject() { - val args = listOf("--project", BenchmarksSetup().bigProjectPath, - "--output", BenchmarksSetup().bigProjectResultsPath) - ProjectParser().main(args) - } -} \ No newline at end of file diff --git a/src/jmh/kotlin/cli/ProjectParserDotBenchmarks.kt b/src/jmh/kotlin/cli/ProjectParserDotBenchmarks.kt deleted file mode 100644 index 2b204f96..00000000 --- a/src/jmh/kotlin/cli/ProjectParserDotBenchmarks.kt +++ /dev/null @@ -1,37 +0,0 @@ -package cli - -import org.openjdk.jmh.annotations.* -import astminer.cli.* - -@State(Scope.Benchmark) -open class ProjectParserDotBenchmarks { - - @Setup - fun pathsSetup() { - BenchmarksSetup().setup() - } - - @Benchmark - fun simpleProject() { - val args = listOf("--project", BenchmarksSetup().simpleProjectPath, - "--output", BenchmarksSetup().simpleProjectResultsPath, - "--storage", "dot") - ProjectParser().main(args) - } - - @Benchmark - fun longFileProject() { - val args = listOf("--project", BenchmarksSetup().longFilePath, - "--output", BenchmarksSetup().longFileResultsPath, - "--storage", "dot") - ProjectParser().main(args) - } - - @Benchmark - fun bigProject() { - val args = listOf("--project", BenchmarksSetup().bigProjectPath, - "--output", BenchmarksSetup().bigProjectResultsPath, - "--storage", "dot") - ProjectParser().main(args) - } -} \ No newline at end of file diff --git a/src/jmh/resources/LongJavaFile.java b/src/jmh/resources/LongJavaFile.java deleted file mode 100644 index f5308edb..00000000 --- a/src/jmh/resources/LongJavaFile.java +++ /dev/null @@ -1,5110 +0,0 @@ -import java.lang.reflect.Array; -import java.math.BigInteger; -import java.util.*; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.stream.IntStream; - -import static java.util.stream.Collectors.toList; -import static java.util.stream.Collectors.toMap; -/** - CODE FROM https://github.com/TheAlgorithms/Java - USES ONLY FOR BENCHMARK - */ -public class LongJavaFileProject { - - static boolean swap(T[] array, int idx, int idy) { - T swap = array[idx]; - array[idx] = array[idy]; - array[idy] = swap; - return true; - } - - static > boolean less(T v, T w) { - return v.compareTo(w) < 0; - } - - static void print(List toPrint) { - toPrint.stream() - .map(Object::toString) - .map(str -> str + " ") - .forEach(System.out::print); - - System.out.println(); - } - - static void print(Object[] toPrint) { - System.out.println(Arrays.toString(toPrint)); - } - - static > void flip(T[] array, int left, int right) { - while (left <= right) { - swap(array, left++, right--); - } - } - private static final Random random = new Random(); - - - private static class BogoSort { - - private static final Random random = new Random(); - - - private static > boolean isSorted(T[] array) { - for (int i = 0; i < array.length - 1; i++) { - if (less(array[i + 1], array[i])) return false; - } - return true; - } - - // Randomly shuffles the array - private static void nextPermutation(T[] array) { - int length = array.length; - - for (int i = 0; i < array.length; i++) { - int randomIndex = i + random.nextInt(length - i); - swap(array, randomIndex, i); - } - } - - public > T[] sort(T[] array) { - while (!isSorted(array)) { - nextPermutation(array); - } - return array; - } - - // Driver Program - public static void main(String[] args) { - // Integer Input - Integer[] integers = {4, 23, 6, 78, 1, 54, 231, 9, 12}; - - BogoSort bogoSort = new BogoSort(); - - // print a sorted array - print(bogoSort.sort(integers)); - - // String Input - String[] strings = {"c", "a", "e", "b", "d"}; - - print(bogoSort.sort(strings)); - } - } - - class BubbleSort { - - public > T[] sort(T[] array) { - for (int i = 0, size = array.length; i < size - 1; ++i) { - boolean swapped = false; - for (int j = 0; j < size - 1 - i; ++j) { - if (less(array[j], array[j + 1])) { - swap(array, j, j + 1); - swapped = true; - } - } - if (!swapped) { - break; - } - } - return array; - } - } - - class CocktailShakerSort { - - public > T[] sort(T[] array) { - - int length = array.length; - int left = 0; - int right = length - 1; - int swappedLeft, swappedRight; - while (left < right) { - // front - swappedRight = 0; - for (int i = left; i < right; i++) { - if (less(array[i + 1], array[i])) { - swap(array, i, i + 1); - swappedRight = i; - } - } - // back - right = swappedRight; - swappedLeft = length - 1; - for (int j = right; j > left; j--) { - if (less(array[j], array[j - 1])) { - swap(array, j - 1, j); - swappedLeft = j; - } - } - left = swappedLeft; - } - return array; - - } - } - - class CombSort { - - // To find gap between elements - private int nextGap(int gap) { - // Shrink gap by Shrink factor - gap = (gap * 10) / 13; - return (gap < 1) ? 1 : gap; - } - - public > T[] sort(T[] arr) { - int size = arr.length; - - // initialize gap - int gap = size; - - // Initialize swapped as true to make sure that loop runs - boolean swapped = true; - - // Keep running while gap is more than 1 and last iteration caused a swap - while (gap != 1 || swapped) { - // Find next gap - gap = nextGap(gap); - - // Initialize swapped as false so that we can check if swap happened or not - swapped = false; - - // Compare all elements with current gap - for (int i = 0; i < size - gap; i++) { - if (less(arr[i + gap], arr[i])) { - // Swap arr[i] and arr[i+gap] - swapped = swap(arr, i, i + gap); - } - } - } - return arr; - } - } - - public > T[] sort(T[] unsorted) { - return sort(Arrays.asList(unsorted)).toArray(unsorted); - } - - public > List sort(List list) { - - Map frequency = new TreeMap<>(); - // The final output array - List sortedArray = new ArrayList<>(list.size()); - - // Counting the frequency of @param array elements - list.forEach(v -> frequency.put(v, frequency.getOrDefault(v, 0) + 1)); - - // Filling the sortedArray - for (Map.Entry element : frequency.entrySet()) { - for (int j = 0; j < element.getValue(); j++) { - sortedArray.add(element.getKey()); - } - } - - return sortedArray; - } - - private static > List streamSort(List list) { - return list.stream() - .collect(toMap(k -> k, v -> 1, (v1, v2) -> v1 + v2, TreeMap::new)) - .entrySet() - .stream() - .flatMap(entry -> IntStream.rangeClosed(1, entry.getValue()).mapToObj(t -> entry.getKey())) - .collect(toList()); - } - - class CycleSort { - - public > T[] sort(T[] arr) { - int n = arr.length; - - // traverse array elements - for (int j = 0; j <= n - 2; j++) { - // initialize item as starting point - T item = arr[j]; - - // Find position where we put the item. - int pos = j; - for (int i = j + 1; i < n; i++) - if (less(arr[i], item)) pos++; - - // If item is already in correct position - if (pos == j) continue; - - // ignore all duplicate elements - while (item.compareTo(arr[pos]) == 0) - pos += 1; - - // put the item to it's right position - if (pos != j) { - item = replace(arr, pos, item); - } - - // Rotate rest of the cycle - while (pos != j) { - pos = j; - - // Find position where we put the element - for (int i = j + 1; i < n; i++) - if (less(arr[i], item)) { - pos += 1; - } - - - // ignore all duplicate elements - while (item.compareTo(arr[pos]) == 0) - pos += 1; - - // put the item to it's right position - if (item != arr[pos]) { - item = replace(arr, pos, item); - } - } - } - - return arr; - } - - private > T replace(T[] arr, int pos, T item) { - T temp = item; - item = arr[pos]; - arr[pos] = temp; - return item; - } - } - - class GnomeSort { - - public > T[] sort(T[] arr) { - int i = 1; - int j = 2; - while (i < arr.length) { - if (less(arr[i - 1], arr[i])) i = j++; - else { - swap(arr, i - 1, i); - if (--i == 0) { - i = j++; - } - } - } - - return null; - } - } - - public class HeapSort { - - - private class Heap> { - - private T[] heap; - - public Heap(T[] heap) { - this.heap = heap; - } - - private void heapSubtree(int rootIndex, int lastChild) { - int leftIndex = rootIndex * 2 + 1; - int rightIndex = rootIndex * 2 + 2; - T root = heap[rootIndex]; - if (rightIndex <= lastChild) { // if has right and left children - T left = heap[leftIndex]; - T right = heap[rightIndex]; - if (less(left, right) && less(left, root)) { - swap(heap, leftIndex, rootIndex); - heapSubtree(leftIndex, lastChild); - } else if (less(right, root)) { - swap(heap, rightIndex, rootIndex); - heapSubtree(rightIndex, lastChild); - } - } else if (leftIndex <= lastChild) { // if no right child, but has left child - T left = heap[leftIndex]; - if (less(left, root)) { - swap(heap, leftIndex, rootIndex); - heapSubtree(leftIndex, lastChild); - } - } - } - - - private void makeMinHeap(int root) { - int leftIndex = root * 2 + 1; - int rightIndex = root * 2 + 2; - boolean hasLeftChild = leftIndex < heap.length; - boolean hasRightChild = rightIndex < heap.length; - if (hasRightChild) { //if has left and right - makeMinHeap(leftIndex); - makeMinHeap(rightIndex); - heapSubtree(root, heap.length - 1); - } else if (hasLeftChild) { - heapSubtree(root, heap.length - 1); - } - } - - private T getRoot(int size) { - swap(heap, 0, size); - heapSubtree(0, size - 1); - return heap[size]; // return old root - } - - - } - - public > T[] sort(T[] unsorted) { - return sort(Arrays.asList(unsorted)).toArray(unsorted); - } - - public > List sort(List unsorted) { - int size = unsorted.size(); - - @SuppressWarnings("unchecked") - Heap heap = new Heap<>(unsorted.toArray((T[]) new Comparable[unsorted.size()])); - - heap.makeMinHeap(0); // make min heap using index 0 as root. - List sorted = new ArrayList<>(size); - while (size > 0) { - T min = heap.getRoot(--size); - sorted.add(min); - } - - return sorted; - } - } - - class MergeSort { - - public > T[] sort(T[] unsorted) { - doSort(unsorted, 0, unsorted.length - 1); - return unsorted; - } - - private > void doSort(T[] arr, int left, int right) { - if (left < right) { - int mid = left + (right - left) / 2; - doSort(arr, left, mid); - doSort(arr, mid + 1, right); - merge(arr, left, mid, right); - } - - } - - private > void merge(T[] arr, int left, int mid, int right) { - int length = right - left + 1; - T[] temp = (T[]) new Comparable[length]; - int i = left; - int j = mid + 1; - int k = 0; - - while (i <= mid && j <= right) { - if (arr[i].compareTo(arr[j]) <= 0) { - temp[k++] = arr[i++]; - } else { - temp[k++] = arr[j++]; - } - } - - while (i <= mid) { - temp[k++] = arr[i++]; - } - - while (j <= right) { - temp[k++] = arr[j++]; - } - - System.arraycopy(temp, 0, arr, left, length); - } - } - - class QuickSort { - - public > T[] sort(T[] array) { - doSort(array, 0, array.length - 1); - return array; - } - - private > void doSort(T[] array, int left, int right) { - if (left < right) { - int pivot = randomPartition(array, left, right); - doSort(array, left, pivot - 1); - doSort(array, pivot, right); - } - } - - private > int randomPartition(T[] array, int left, int right) { - int randomIndex = left + (int) (Math.random() * (right - left + 1)); - swap(array, randomIndex, right); - return partition(array, left, right); - } - - private > int partition(T[] array, int left, int right) { - int mid = (left + right) / 2; - T pivot = array[mid]; - - while (left <= right) { - while (less(array[left], pivot)) { - ++left; - } - while (less(pivot, array[right])) { - --right; - } - if (left <= right) { - swap(array, left, right); - ++left; - --right; - } - } - return left; - } - } - - class RadixSort { - - private int getMax(int[] arr, int n) { - int mx = arr[0]; - for (int i = 1; i < n; i++) - if (arr[i] > mx) - mx = arr[i]; - return mx; - } - - private void countSort(int[] arr, int n, int exp) { - int[] output = new int[n]; - int i; - int[] count = new int[10]; - Arrays.fill(count, 0); - - for (i = 0; i < n; i++) - count[(arr[i] / exp) % 10]++; - - for (i = 1; i < 10; i++) - count[i] += count[i - 1]; - - for (i = n - 1; i >= 0; i--) { - output[count[(arr[i] / exp) % 10] - 1] = arr[i]; - count[(arr[i] / exp) % 10]--; - } - - for (i = 0; i < n; i++) - arr[i] = output[i]; - } - - private void radixsort(int[] arr, int n) { - - int m = getMax(arr, n); - - - for (int exp = 1; m / exp > 0; exp *= 10) - countSort(arr, n, exp); - } - - - void print(int[] arr, int n) { - for (int i = 0; i < n; i++) - System.out.print(arr[i] + " "); - } - } - - public class AnyBaseToAnyBase { - - static final int MINIMUM_BASE = 2; - static final int MAXIMUM_BASE = 36; - - public void main(String[] args) { - Scanner in = new Scanner(System.in); - String n; - int b1, b2; - while (true) { - try { - System.out.print("Enter number: "); - n = in.next(); - System.out.print("Enter beginning base (between " + MINIMUM_BASE + " and " + MAXIMUM_BASE + "): "); - b1 = in.nextInt(); - if (b1 > MAXIMUM_BASE || b1 < MINIMUM_BASE) { - System.out.println("Invalid base!"); - continue; - } - if (!validForBase(n, b1)) { - System.out.println("The number is invalid for this base!"); - continue; - } - System.out.print("Enter end base (between " + MINIMUM_BASE + " and " + MAXIMUM_BASE + "): "); - b2 = in.nextInt(); - if (b2 > MAXIMUM_BASE || b2 < MINIMUM_BASE) { - System.out.println("Invalid base!"); - continue; - } - break; - } catch (InputMismatchException e) { - System.out.println("Invalid input."); - in.next(); - } - } - System.out.println(base2base(n, b1, b2)); - in.close(); - } - - public boolean validForBase(String n, int base) { - char[] validDigits = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', - 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', - 'W', 'X', 'Y', 'Z'}; - // digitsForBase contains all the valid digits for the base given - char[] digitsForBase = Arrays.copyOfRange(validDigits, 0, base); - - // Convert character array into set for convenience of contains() method - HashSet digitsList = new HashSet<>(); - for (int i = 0; i < digitsForBase.length; i++) - digitsList.add(digitsForBase[i]); - - // Check that every digit in n is within the list of valid digits for that base. - for (char c : n.toCharArray()) - if (!digitsList.contains(c)) - return false; - - return true; - } - - public String base2base(String n, int b1, int b2) { - int decimalValue = 0, charB2; - char charB1; - String output = ""; - // Go through every character of n - for (int i = 0; i < n.length(); i++) { - // store the character in charB1 - charB1 = n.charAt(i); - // if it is a non-number, convert it to a decimal value >9 and store it in charB2 - if (charB1 >= 'A' && charB1 <= 'Z') - charB2 = 10 + (charB1 - 'A'); - // Else, store the integer value in charB2 - else - charB2 = charB1 - '0'; - // Convert the digit to decimal and add it to the - // decimalValue of n - decimalValue = decimalValue * b1 + charB2; - } - while (decimalValue != 0) { - // If the remainder is a digit < 10, simply add it to - // the left side of the new number. - if (decimalValue % b2 < 10) - output = Integer.toString(decimalValue % b2) + output; - else - output = (char) ((decimalValue % b2) + 55) + output; - // Divide by the new base again - decimalValue /= b2; - } - return output; - } - } - - public class RomanToInteger { - - private Map map = new HashMap() { - /** - * - */ - private final long serialVersionUID = 87605733047260530L; - - { - put('I', 1); - put('V', 5); - put('X', 10); - put('L', 50); - put('C', 100); - put('D', 500); - put('M', 1000); - } - }; - - public int romanToInt(String A) { - - char prev = ' '; - - int sum = 0; - - int newPrev = 0; - for (int i = A.length() - 1; i >= 0; i--) { - char c = A.charAt(i); - - if (prev != ' ') { - // checking current Number greater then previous or not - newPrev = map.get(prev) > newPrev ? map.get(prev) : newPrev; - } - - int currentNum = map.get(c); - - // if current number greater then prev max previous then add - if (currentNum >= newPrev) { - sum += currentNum; - } else { - // subtract upcoming number until upcoming number not greater then prev max - sum -= currentNum; - } - - prev = c; - } - - return sum; - } - } - - public class Bag implements Iterable { - - private Node firstElement; // first element of the bag - private int size; // size of bag - - private class Node { - private Element content; - private Node nextElement; - } - - /** - * Create an empty bag - */ - public Bag() { - firstElement = null; - size = 0; - } - - /** - * @return true if this bag is empty, false otherwise - */ - public boolean isEmpty() { - return firstElement == null; - } - - /** - * @return the number of elements - */ - public int size() { - return size; - } - - /** - * @param element - the element to add - */ - public void add(Element element) { - Node oldfirst = firstElement; - firstElement = new Node<>(); - firstElement.content = element; - firstElement.nextElement = oldfirst; - size++; - } - - /** - * Checks if the bag contains a specific element - * - * @param element which you want to look for - * @return true if bag contains element, otherwise false - */ - public boolean contains(Element element) { - Iterator iterator = this.iterator(); - while (iterator.hasNext()) { - if (iterator.next().equals(element)) { - return true; - } - } - return false; - } - - /** - * @return an iterator that iterates over the elements in this bag in arbitrary order - */ - public Iterator iterator() { - return new ListIterator<>(firstElement); - } - - @SuppressWarnings("hiding") - private class ListIterator implements Iterator { - private Node currentElement; - - public ListIterator(Node firstElement) { - currentElement = firstElement; - } - - public boolean hasNext() { - return currentElement != null; - } - - /** - * remove is not allowed in a bag - */ - @Override - public void remove() { - throw new UnsupportedOperationException(); - } - - public Element next() { - if (!hasNext()) - throw new NoSuchElementException(); - Element element = currentElement.content; - currentElement = currentElement.nextElement; - return element; - } - } - } - - public class CircularBuffer { - private char[] _buffer; - public final int _buffer_size; - private int _write_index = 0; - private int _read_index = 0; - private AtomicInteger _readable_data = new AtomicInteger(0); - - public CircularBuffer(int buffer_size) { - if (!IsPowerOfTwo(buffer_size)) { - throw new IllegalArgumentException(); - } - this._buffer_size = buffer_size; - _buffer = new char[buffer_size]; - } - - private boolean IsPowerOfTwo(int i) { - return (i & (i - 1)) == 0; - } - - private int getTrueIndex(int i) { - return i % _buffer_size; - } - - - public Character readOutChar() { - Character result = null; - - - //if we have data to read - if (_readable_data.get() > 0) { - - result = Character.valueOf(_buffer[getTrueIndex(_read_index)]); - _readable_data.decrementAndGet(); - _read_index++; - } - - return result; - } - - public boolean writeToCharBuffer(char c) { - boolean result = false; - - //if we can write to the buffer - if (_readable_data.get() < _buffer_size) { - //write to buffer - _buffer[getTrueIndex(_write_index)] = c; - _readable_data.incrementAndGet(); - _write_index++; - result = true; - } - - return result; - } - - private class TestWriteWorker implements Runnable { - String _alphabet = "abcdefghijklmnopqrstuvwxyz0123456789"; - Random _random = new Random(); - CircularBuffer _buffer; - - public TestWriteWorker(CircularBuffer cb) { - this._buffer = cb; - } - - private char getRandomChar() { - return _alphabet.charAt(_random.nextInt(_alphabet.length())); - } - - public void run() { - while (!Thread.interrupted()) { - if (!_buffer.writeToCharBuffer(getRandomChar())) { - Thread.yield(); - try { - Thread.sleep(10); - } catch (InterruptedException e) { - return; - } - } - } - } - } - - private class TestReadWorker implements Runnable { - CircularBuffer _buffer; - - public TestReadWorker(CircularBuffer cb) { - this._buffer = cb; - } - - public void run() { - System.out.println("Printing Buffer:"); - while (!Thread.interrupted()) { - Character c = _buffer.readOutChar(); - if (c != null) { - System.out.print(c.charValue()); - } else { - Thread.yield(); - try { - Thread.sleep(10); - } catch (InterruptedException e) { - System.out.println(); - return; - } - } - } - } - } - } - - class BellmanFord { - int vertex,edge; - private Edge edges[]; - private int index=0; - BellmanFord(int v,int e) - { - vertex=v; - edge=e; - edges=new Edge[e]; - } - class Edge - { - int u,v; - int w; - Edge(int a,int b,int c) - { - u=a; - v=b; - w=c; - } - } - - void printPath(int p[],int i) - { - if(p[i]==-1)//Found the path back to parent - return; - printPath(p,p[i]); - System.out.print(i+" "); - } - public void main(String args[]) - { - BellmanFord obj=new BellmanFord(0,0);//Dummy object to call nonstatic variables - obj.go(); - } - public void go()//Interactive run for understanding the class first time. Assumes source vertex is 0 and shows distaance to all vertices - { - Scanner sc=new Scanner(System.in);//Grab scanner object for user input - int i,v,e,u,ve,w,j,neg=0; - System.out.println("Enter no. of vertices and edges please"); - v=sc.nextInt(); - e=sc.nextInt(); - Edge arr[]=new Edge[e];//Array of edges - System.out.println("Input edges"); - for(i=0;idist[arr[j].u]+arr[j].w) - { - dist[arr[j].v]=dist[arr[j].u]+arr[j].w;//Update - p[arr[j].v]=arr[j].u; - } - } - } - //Final cycle for negative checking - for(j=0;jdist[arr[j].u]+arr[j].w) - { - neg=1; - System.out.println("Negative cycle"); - break; - } - if(neg==0)//Go ahead and show results of computaion - { - System.out.println("Distances are: "); - for(i=0;idist[arr[j].u]+arr[j].w) - { - dist[arr[j].v]=dist[arr[j].u]+arr[j].w;//Update - p[arr[j].v]=arr[j].u; - } - } - } - //Final cycle for negative checking - for(j=0;jdist[arr[j].u]+arr[j].w) - { - neg=1; - System.out.println("Negative cycle"); - break; - } - if(neg==0)//Go ahead and show results of computaion - { - System.out.println("Distance is: "+dist[end]); - System.out.println("Path followed:"); - System.out.print(source+" "); - printPath(p,end); - System.out.println(); - } - } - /** - *@param x Source Vertex - * @param y End vertex - * @param z Weight - */ - public void addEdge(int x,int y,int z)//Adds unidirectionl Edge - { - edges[index++]=new Edge(x,y,z); - } - public Edge[] getEdgeArray() - { - return edges; - } - } - - class Graph> { - - class Node { - E name; - - public Node(E name) { - this.name = name; - } - } - - class Edge { - Node startNode, endNode; - - public Edge(Node startNode, Node endNode) { - this.startNode = startNode; - this.endNode = endNode; - } - } - - ArrayList edgeList; - ArrayList nodeList; - - public Graph() { - edgeList = new ArrayList(); - nodeList = new ArrayList(); - } - - public void addEdge(E startNode, E endNode) { - Node start = null, end = null; - for (Node node : nodeList) { - if (startNode.compareTo(node.name) == 0) { - start = node; - } else if (endNode.compareTo(node.name) == 0) { - end = node; - } - } - if (start == null) { - start = new Node(startNode); - nodeList.add(start); - } - if (end == null) { - end = new Node(endNode); - nodeList.add(end); - } - - edgeList.add(new Edge(start, end)); - } - - public int countGraphs() { - int count = 0; - Set markedNodes = new HashSet(); - - for (Node n : nodeList) { - if (!markedNodes.contains(n)) { - markedNodes.add(n); - markedNodes.addAll(depthFirstSearch(n, new ArrayList())); - count++; - } - } - - return count; - } - - public ArrayList depthFirstSearch(Node n, ArrayList visited) { - visited.add(n); - for (Edge e : edgeList) { - if (e.startNode.equals(n) && !visited.contains(e.endNode)) { - depthFirstSearch(e.endNode, visited); - } - } - return visited; - } - } - - class Cycle { - - private int nodes, edges; - private int[][] adjacencyMatrix; - private boolean[] visited; - ArrayList> cycles = new ArrayList>(); - - - public Cycle() { - Scanner in = new Scanner(System.in); - System.out.print("Enter the no. of nodes: "); - nodes = in.nextInt(); - System.out.print("Enter the no. of Edges: "); - edges = in.nextInt(); - - adjacencyMatrix = new int[nodes][nodes]; - visited = new boolean[nodes]; - - for (int i = 0; i < nodes; i++) { - visited[i] = false; - } - - System.out.println("Enter the details of each edges "); - - for (int i = 0; i < edges; i++) { - int start, end; - start = in.nextInt(); - end = in.nextInt(); - adjacencyMatrix[start][end] = 1; - } - in.close(); - - } - - public void start() { - for (int i = 0; i < nodes; i++) { - ArrayList temp = new ArrayList<>(); - dfs(i, i, temp); - for (int j = 0; j < nodes; j++) { - adjacencyMatrix[i][j] = 0; - adjacencyMatrix[j][i] = 0; - } - } - } - - private void dfs(Integer start, Integer curr, ArrayList temp) { - temp.add(curr); - visited[curr] = true; - for (int i = 0; i < nodes; i++) { - if (adjacencyMatrix[curr][i] == 1) { - if (i == start) { - cycles.add(new ArrayList(temp)); - } else { - if (!visited[i]) { - dfs(start, i, temp); - } - } - } - } - - if (temp.size() > 0) { - temp.remove(temp.size() - 1); - } - visited[curr] = false; - } - - public void printAll() { - for (int i = 0; i < cycles.size(); i++) { - for (int j = 0; j < cycles.get(i).size(); j++) { - System.out.print(cycles.get(i).get(j) + " -> "); - } - System.out.println(cycles.get(i).get(0)); - System.out.println(); - } - - } - - } - - public class Cycles { - public void main(String[] args) { - Cycle c = new Cycle(); - c.start(); - c.printAll(); - } - } - - public class MatrixGraphs { - - public void main(String args[]) { - AdjacencyMatrixGraph graph = new AdjacencyMatrixGraph(10); - graph.addEdge(1, 2); - graph.addEdge(1, 5); - graph.addEdge(2, 5); - graph.addEdge(1, 2); - graph.addEdge(2, 3); - graph.addEdge(3, 4); - graph.addEdge(4, 1); - graph.addEdge(2, 3); - System.out.println(graph); - } - - } - - class AdjacencyMatrixGraph { - private int _numberOfVertices; - private int _numberOfEdges; - private int[][] _adjacency; - - static final int EDGE_EXIST = 1; - static final int EDGE_NONE = 0; - - public AdjacencyMatrixGraph(int givenNumberOfVertices) { - this.setNumberOfVertices(givenNumberOfVertices); - this.setNumberOfEdges(0); - this.setAdjacency(new int[givenNumberOfVertices][givenNumberOfVertices]); - for (int i = 0; i < givenNumberOfVertices; i++) { - for (int j = 0; j < givenNumberOfVertices; j++) { - this.adjacency()[i][j] = AdjacencyMatrixGraph.EDGE_NONE; - } - } - } - - private void setNumberOfVertices(int newNumberOfVertices) { - this._numberOfVertices = newNumberOfVertices; - } - - public int numberOfVertices() { - return this._numberOfVertices; - } - - private void setNumberOfEdges(int newNumberOfEdges) { - this._numberOfEdges = newNumberOfEdges; - } - - public int numberOfEdges() { - return this._numberOfEdges; - } - - private void setAdjacency(int[][] newAdjacency) { - this._adjacency = newAdjacency; - } - - private int[][] adjacency() { - return this._adjacency; - } - - private boolean adjacencyOfEdgeDoesExist(int from, int to) { - return (this.adjacency()[from][to] != AdjacencyMatrixGraph.EDGE_NONE); - } - - public boolean vertexDoesExist(int aVertex) { - if (aVertex >= 0 && aVertex < this.numberOfVertices()) { - return true; - } else { - return false; - } - } - - public boolean edgeDoesExist(int from, int to) { - if (this.vertexDoesExist(from) && this.vertexDoesExist(to)) { - return (this.adjacencyOfEdgeDoesExist(from, to)); - } - - return false; - } - - public boolean addEdge(int from, int to) { - if (this.vertexDoesExist(from) && this.vertexDoesExist(to)) { - if (!this.adjacencyOfEdgeDoesExist(from, to)) { - this.adjacency()[from][to] = AdjacencyMatrixGraph.EDGE_EXIST; - this.adjacency()[to][from] = AdjacencyMatrixGraph.EDGE_EXIST; - this.setNumberOfEdges(this.numberOfEdges() + 1); - return true; - } - } - - return false; - } - - public boolean removeEdge(int from, int to) { - if (!this.vertexDoesExist(from) || !this.vertexDoesExist(to)) { - if (this.adjacencyOfEdgeDoesExist(from, to)) { - this.adjacency()[from][to] = AdjacencyMatrixGraph.EDGE_NONE; - this.adjacency()[to][from] = AdjacencyMatrixGraph.EDGE_NONE; - this.setNumberOfEdges(this.numberOfEdges() - 1); - return true; - } - } - return false; - } - - public String toString() { - String s = new String(); - s = " "; - for (int i = 0; i < this.numberOfVertices(); i++) { - s = s + String.valueOf(i) + " "; - } - s = s + " \n"; - - for (int i = 0; i < this.numberOfVertices(); i++) { - s = s + String.valueOf(i) + " : "; - for (int j = 0; j < this.numberOfVertices(); j++) { - s = s + String.valueOf(this._adjacency[i][j]) + " "; - } - s = s + "\n"; - } - return s; - } - } - - public class FordFulkerson { - final static int INF = 987654321; - // edges - int V; - int[][] capacity, flow; - - public void main(String[] args) { - System.out.println("V : 6"); - V = 6; - capacity = new int[V][V]; - - capacity[0][1] = 12; - capacity[0][3] = 13; - capacity[1][2] = 10; - capacity[2][3] = 13; - capacity[2][4] = 3; - capacity[2][5] = 15; - capacity[3][2] = 7; - capacity[3][4] = 15; - capacity[4][5] = 17; - - System.out.println("Max capacity in networkFlow : " + networkFlow(0, 5)); - } - - private int networkFlow(int source, int sink) { - flow = new int[V][V]; - int totalFlow = 0; - while (true) { - Vector parent = new Vector<>(V); - for (int i = 0; i < V; i++) - parent.add(-1); - Queue q = new LinkedList<>(); - parent.set(source, source); - q.add(source); - while (!q.isEmpty() && parent.get(sink) == -1) { - int here = q.peek(); - q.poll(); - for (int there = 0; there < V; ++there) - if (capacity[here][there] - flow[here][there] > 0 && parent.get(there) == -1) { - q.add(there); - parent.set(there, here); - } - } - if (parent.get(sink) == -1) - break; - - int amount = INF; - String printer = "path : "; - StringBuilder sb = new StringBuilder(); - for (int p = sink; p != source; p = parent.get(p)) { - amount = Math.min(capacity[parent.get(p)][p] - flow[parent.get(p)][p], amount); - sb.append(p + "-"); - } - sb.append(source); - for (int p = sink; p != source; p = parent.get(p)) { - flow[parent.get(p)][p] += amount; - flow[p][parent.get(p)] -= amount; - } - totalFlow += amount; - printer += sb.reverse() + " / max flow : " + totalFlow; - System.out.println(printer); - } - - return totalFlow; - } - } - - class LongestCommonSubsequence { - - public String getLCS(String str1, String str2) { - - //At least one string is null - if (str1 == null || str2 == null) - return null; - - //At least one string is empty - if (str1.length() == 0 || str2.length() == 0) - return ""; - - String[] arr1 = str1.split(""); - String[] arr2 = str2.split(""); - - //lcsMatrix[i][j] = LCS of first i elements of arr1 and first j characters of arr2 - int[][] lcsMatrix = new int[arr1.length + 1][arr2.length + 1]; - - for (int i = 0; i < arr1.length + 1; i++) - lcsMatrix[i][0] = 0; - for (int j = 1; j < arr2.length + 1; j++) - lcsMatrix[0][j] = 0; - for (int i = 1; i < arr1.length + 1; i++) { - for (int j = 1; j < arr2.length + 1; j++) { - if (arr1[i - 1].equals(arr2[j - 1])) { - lcsMatrix[i][j] = lcsMatrix[i - 1][j - 1] + 1; - } else { - lcsMatrix[i][j] = lcsMatrix[i - 1][j] > lcsMatrix[i][j - 1] ? lcsMatrix[i - 1][j] : lcsMatrix[i][j - 1]; - } - } - } - return lcsString(str1, str2, lcsMatrix); - } - - public String lcsString(String str1, String str2, int[][] lcsMatrix) { - StringBuilder lcs = new StringBuilder(); - int i = str1.length(), - j = str2.length(); - while (i > 0 && j > 0) { - if (str1.charAt(i - 1) == str2.charAt(j - 1)) { - lcs.append(str1.charAt(i - 1)); - i--; - j--; - } else if (lcsMatrix[i - 1][j] > lcsMatrix[i][j - 1]) { - i--; - } else { - j--; - } - } - return lcs.reverse().toString(); - } - - public void main(String[] args) { - String str1 = "DSGSHSRGSRHTRD"; - String str2 = "DATRGAGTSHS"; - String lcs = getLCS(str1, str2); - - //Print LCS - if (lcs != null) { - System.out.println("String 1: " + str1); - System.out.println("String 2: " + str2); - System.out.println("LCS: " + lcs); - System.out.println("LCS length: " + lcs.length()); - } - } - } - - public final class ClosestPair { - - - /** - * Number of points - */ - int numberPoints = 0; - /** - * Input data, maximum 10000. - */ - private Location[] array; - /** - * Minimum point coordinate. - */ - Location point1 = null; - /** - * Minimum point coordinate. - */ - Location point2 = null; - /** - * Minimum point length. - */ - private double minNum = Double.MAX_VALUE; - - private int secondCount = 0; - - ClosestPair(int points) { - numberPoints = points; - array = new Location[numberPoints]; - } - - public class Location { - - double x = 0; - double y = 0; - - Location(final double xpar, final double ypar) { //Save x, y coordinates - this.x = xpar; - this.y = ypar; - } - - } - - public Location[] createLocation(int numberValues) { - return new Location[numberValues]; - - } - - public Location buildLocation(double x, double y) { - return new Location(x, y); - } - - public int xPartition( - final Location[] a, final int first, final int last) { - - Location pivot = a[last]; // pivot - int pIndex = last; - int i = first - 1; - Location temp; // Temporarily store value for position transformation - for (int j = first; j <= last - 1; j++) { - if (a[j].x <= pivot.x) { // Less than or less than pivot - i++; - temp = a[i]; // array[i] <-> array[j] - a[i] = a[j]; - a[j] = temp; - } - } - i++; - temp = a[i]; // array[pivot] <-> array[i] - a[i] = a[pIndex]; - a[pIndex] = temp; - return i; // pivot index - } - - public int yPartition( - final Location[] a, final int first, final int last) { - - Location pivot = a[last]; // pivot - int pIndex = last; - int i = first - 1; - Location temp; // Temporarily store value for position transformation - for (int j = first; j <= last - 1; j++) { - if (a[j].y <= pivot.y) { // Less than or less than pivot - i++; - temp = a[i]; // array[i] <-> array[j] - a[i] = a[j]; - a[j] = temp; - } - } - i++; - temp = a[i]; // array[pivot] <-> array[i] - a[i] = a[pIndex]; - a[pIndex] = temp; - return i; // pivot index - } - - public void xQuickSort( - final Location[] a, final int first, final int last) { - - if (first < last) { - int q = xPartition(a, first, last); // pivot - xQuickSort(a, first, q - 1); // Left - xQuickSort(a, q + 1, last); // Right - } - } - - public void yQuickSort( - final Location[] a, final int first, final int last) { - - if (first < last) { - int q = yPartition(a, first, last); // pivot - yQuickSort(a, first, q - 1); // Left - yQuickSort(a, q + 1, last); // Right - } - } - public double closestPair(final Location[] a, final int indexNum) { - - Location[] divideArray = new Location[indexNum]; - System.arraycopy(a, 0, divideArray, 0, indexNum); // Copy previous array - int totalNum = indexNum; // number of coordinates in the divideArray - int divideX = indexNum / 2; // Intermediate value for divide - Location[] leftArray = new Location[divideX]; //divide - left array - //divide-right array - Location[] rightArray = new Location[totalNum - divideX]; - if (indexNum <= 3) { // If the number of coordinates is 3 or less - return bruteForce(divideArray); - } - //divide-left array - System.arraycopy(divideArray, 0, leftArray, 0, divideX); - //divide-right array - System.arraycopy( - divideArray, divideX, rightArray, 0, totalNum - divideX); - - double minLeftArea = 0; //Minimum length of left array - double minRightArea = 0; //Minimum length of right array - double minValue = 0; //Minimum lengt - - minLeftArea = closestPair(leftArray, divideX); // recursive closestPair - minRightArea = closestPair(rightArray, totalNum - divideX); - // window size (= minimum length) - minValue = Math.min(minLeftArea, minRightArea); - - // Create window. Set the size for creating a window - // and creating a new array for the coordinates in the window - for (int i = 0; i < totalNum; i++) { - double xGap = Math.abs(divideArray[divideX].x - divideArray[i].x); - if (xGap < minValue) { - secondCount++; // size of the array - } else { - if (divideArray[i].x > divideArray[divideX].x) { - break; - } - } - } - // new array for coordinates in window - Location[] firstWindow = new Location[secondCount]; - int k = 0; - for (int i = 0; i < totalNum; i++) { - double xGap = Math.abs(divideArray[divideX].x - divideArray[i].x); - if (xGap < minValue) { // if it's inside a window - firstWindow[k] = divideArray[i]; // put in an array - k++; - } else { - if (divideArray[i].x > divideArray[divideX].x) { - break; - } - } - } - yQuickSort(firstWindow, 0, secondCount - 1); // Sort by y coordinates - /* Coordinates in Window */ - double length = 0; - // size comparison within window - for (int i = 0; i < secondCount - 1; i++) { - for (int j = (i + 1); j < secondCount; j++) { - double xGap = Math.abs(firstWindow[i].x - firstWindow[j].x); - double yGap = Math.abs(firstWindow[i].y - firstWindow[j].y); - if (yGap < minValue) { - length = Math.sqrt(Math.pow(xGap, 2) + Math.pow(yGap, 2)); - // If measured distance is less than current min distance - if (length < minValue) { - // Change minimum distance to current distance - minValue = length; - // Conditional for registering final coordinate - if (length < minNum) { - minNum = length; - point1 = firstWindow[i]; - point2 = firstWindow[j]; - } - } - } else { - break; - } - } - } - secondCount = 0; - return minValue; - } - - public double bruteForce(final Location[] arrayParam) { - - double minValue = Double.MAX_VALUE; // minimum distance - double length = 0; - double xGap = 0; // Difference between x coordinates - double yGap = 0; // Difference between y coordinates - double result = 0; - - if (arrayParam.length == 2) { - // Difference between x coordinates - xGap = (arrayParam[0].x - arrayParam[1].x); - // Difference between y coordinates - yGap = (arrayParam[0].y - arrayParam[1].y); - // distance between coordinates - length = Math.sqrt(Math.pow(xGap, 2) + Math.pow(yGap, 2)); - // Conditional statement for registering final coordinate - if (length < minNum) { - minNum = length; - - } - point1 = arrayParam[0]; - point2 = arrayParam[1]; - result = length; - } - if (arrayParam.length == 3) { - for (int i = 0; i < arrayParam.length - 1; i++) { - for (int j = (i + 1); j < arrayParam.length; j++) { - // Difference between x coordinates - xGap = (arrayParam[i].x - arrayParam[j].x); - // Difference between y coordinates - yGap = (arrayParam[i].y - arrayParam[j].y); - // distance between coordinates - length = - Math.sqrt(Math.pow(xGap, 2) + Math.pow(yGap, 2)); - // If measured distance is less than current min distance - if (length < minValue) { - // Change minimum distance to current distance - minValue = length; - if (length < minNum) { - // Registering final coordinate - minNum = length; - point1 = arrayParam[i]; - point2 = arrayParam[j]; - } - } - } - } - result = minValue; - - } - return result; // If only one point returns 0. - } - - public void main(final String[] args) { - - //Input data consists of one x-coordinate and one y-coordinate - - ClosestPair cp = new ClosestPair(12); - cp.array[0] = cp.buildLocation(2, 3); - cp.array[1] = cp.buildLocation(2, 16); - cp.array[2] = cp.buildLocation(3, 9); - cp.array[3] = cp.buildLocation(6, 3); - cp.array[4] = cp.buildLocation(7, 7); - cp.array[5] = cp.buildLocation(19, 4); - cp.array[6] = cp.buildLocation(10, 11); - cp.array[7] = cp.buildLocation(15, 2); - cp.array[8] = cp.buildLocation(15, 19); - cp.array[9] = cp.buildLocation(16, 11); - cp.array[10] = cp.buildLocation(17, 13); - cp.array[11] = cp.buildLocation(9, 12); - - System.out.println("Input data"); - System.out.println("Number of points: " + cp.array.length); - for (int i = 0; i < cp.array.length; i++) { - System.out.println("x: " + cp.array[i].x + ", y: " + cp.array[i].y); - } - - cp.xQuickSort(cp.array, 0, cp.array.length - 1); // Sorting by x value - - double result; // minimum distance - - result = cp.closestPair(cp.array, cp.array.length); - System.out.println("Output Data"); - System.out.println("(" + cp.point1.x + ", " + cp.point1.y + ")"); - System.out.println("(" + cp.point2.x + ", " + cp.point2.y + ")"); - System.out.println("Minimum Distance : " + result); - - } - } - - public class SkylineAlgorithm { - private ArrayList points; - public SkylineAlgorithm() { - points = new ArrayList<>(); - } - - public ArrayList getPoints() { - return points; - } - - public ArrayList produceSubSkyLines(ArrayList list) { - - // part where function exits flashback - int size = list.size(); - if (size == 1) { - return list; - } else if (size == 2) { - if (list.get(0).dominates(list.get(1))) { - list.remove(1); - } else { - if (list.get(1).dominates(list.get(0))) { - list.remove(0); - } - } - return list; - } - - // recursive part of the function - ArrayList leftHalf = new ArrayList<>(); - ArrayList rightHalf = new ArrayList<>(); - for (int i = 0; i < list.size(); i++) { - if (i < list.size() / 2) { - leftHalf.add(list.get(i)); - } else { - rightHalf.add(list.get(i)); - } - } - ArrayList leftSubSkyLine = produceSubSkyLines(leftHalf); - ArrayList rightSubSkyLine = produceSubSkyLines(rightHalf); - - // skyline is produced - return produceFinalSkyLine(leftSubSkyLine, rightSubSkyLine); - } - - public ArrayList produceFinalSkyLine(ArrayList left, ArrayList right) { - - // dominated points of ArrayList left are removed - for (int i = 0; i < left.size() - 1; i++) { - if (left.get(i).x == left.get(i + 1).x && left.get(i).y > left.get(i + 1).y) { - left.remove(i); - i--; - } - } - - // minimum y-value is found - int min = left.get(0).y; - for (int i = 1; i < left.size(); i++) { - if (min > left.get(i).y) { - min = left.get(i).y; - if (min == 1) { - i = left.size(); - } - } - } - - // dominated points of ArrayList right are removed - for (int i = 0; i < right.size(); i++) { - if (right.get(i).y >= min) { - right.remove(i); - i--; - } - } - - // final skyline found and returned - left.addAll(right); - return left; - } - - - public class Point { - private int x; - private int y; - - public Point(int x, int y) { - this.x = x; - this.y = y; - } - - /** - * @return x, the x-value - */ - public int getX() { - return x; - } - - /** - * @return y, the y-value - */ - public int getY() { - return y; - } - - public boolean dominates(Point p1) { - // checks if p1 is dominated - return (this.x < p1.x && this.y <= p1.y) || (this.x <= p1.x && this.y < p1.y); - } - } - - class XComparator implements Comparator { - @Override - public int compare(Point a, Point b) { - return Integer.compare(a.x, b.x); - } - } - } - - public class AES { - - private final int[] RCON = { 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, - 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, - 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, - 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, - 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, - 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, - 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, - 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, - 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, - 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, - 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, - 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, - 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, - 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, - 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d }; - - private final int[] SBOX = { 0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, - 0xD7, 0xAB, 0x76, 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, - 0xC0, 0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15, 0x04, - 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75, 0x09, 0x83, 0x2C, - 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84, 0x53, 0xD1, 0x00, 0xED, 0x20, - 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF, 0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, - 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8, 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, - 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2, 0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, - 0x3D, 0x64, 0x5D, 0x19, 0x73, 0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, - 0x5E, 0x0B, 0xDB, 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, - 0x79, 0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08, 0xBA, - 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A, 0x70, 0x3E, 0xB5, - 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E, 0xE1, 0xF8, 0x98, 0x11, 0x69, - 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF, 0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, - 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16 }; - - private final int[] INVERSE_SBOX = { 0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38, 0xBF, 0x40, 0xA3, 0x9E, - 0x81, 0xF3, 0xD7, 0xFB, 0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87, 0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, - 0xE9, 0xCB, 0x54, 0x7B, 0x94, 0x32, 0xA6, 0xC2, 0x23, 0x3D, 0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E, - 0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2, 0x76, 0x5B, 0xA2, 0x49, 0x6D, 0x8B, 0xD1, 0x25, 0x72, 0xF8, - 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92, 0x6C, 0x70, 0x48, 0x50, - 0xFD, 0xED, 0xB9, 0xDA, 0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84, 0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, - 0xD3, 0x0A, 0xF7, 0xE4, 0x58, 0x05, 0xB8, 0xB3, 0x45, 0x06, 0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02, - 0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B, 0x3A, 0x91, 0x11, 0x41, 0x4F, 0x67, 0xDC, 0xEA, 0x97, 0xF2, - 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73, 0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85, 0xE2, 0xF9, 0x37, 0xE8, - 0x1C, 0x75, 0xDF, 0x6E, 0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89, 0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, - 0xBE, 0x1B, 0xFC, 0x56, 0x3E, 0x4B, 0xC6, 0xD2, 0x79, 0x20, 0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4, - 0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31, 0xB1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xEC, 0x5F, 0x60, 0x51, - 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D, 0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF, 0xA0, 0xE0, 0x3B, 0x4D, - 0xAE, 0x2A, 0xF5, 0xB0, 0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61, 0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, - 0xD6, 0x26, 0xE1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0C, 0x7D }; - - private final int[] MULT2 = { 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, - 0x1a, 0x1c, 0x1e, 0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, - 0x3e, 0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e, 0x60, - 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, 0x74, 0x76, 0x78, 0x7a, 0x7c, 0x7e, 0x80, 0x82, 0x84, - 0x86, 0x88, 0x8a, 0x8c, 0x8e, 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e, 0xa0, 0xa2, 0xa4, 0xa6, 0xa8, - 0xaa, 0xac, 0xae, 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe, 0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, - 0xce, 0xd0, 0xd2, 0xd4, 0xd6, 0xd8, 0xda, 0xdc, 0xde, 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee, 0xf0, - 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe, 0x1b, 0x19, 0x1f, 0x1d, 0x13, 0x11, 0x17, 0x15, 0x0b, 0x09, 0x0f, - 0x0d, 0x03, 0x01, 0x07, 0x05, 0x3b, 0x39, 0x3f, 0x3d, 0x33, 0x31, 0x37, 0x35, 0x2b, 0x29, 0x2f, 0x2d, 0x23, - 0x21, 0x27, 0x25, 0x5b, 0x59, 0x5f, 0x5d, 0x53, 0x51, 0x57, 0x55, 0x4b, 0x49, 0x4f, 0x4d, 0x43, 0x41, 0x47, - 0x45, 0x7b, 0x79, 0x7f, 0x7d, 0x73, 0x71, 0x77, 0x75, 0x6b, 0x69, 0x6f, 0x6d, 0x63, 0x61, 0x67, 0x65, 0x9b, - 0x99, 0x9f, 0x9d, 0x93, 0x91, 0x97, 0x95, 0x8b, 0x89, 0x8f, 0x8d, 0x83, 0x81, 0x87, 0x85, 0xbb, 0xb9, 0xbf, - 0xbd, 0xb3, 0xb1, 0xb7, 0xb5, 0xab, 0xa9, 0xaf, 0xad, 0xa3, 0xa1, 0xa7, 0xa5, 0xdb, 0xd9, 0xdf, 0xdd, 0xd3, - 0xd1, 0xd7, 0xd5, 0xcb, 0xc9, 0xcf, 0xcd, 0xc3, 0xc1, 0xc7, 0xc5, 0xfb, 0xf9, 0xff, 0xfd, 0xf3, 0xf1, 0xf7, - 0xf5, 0xeb, 0xe9, 0xef, 0xed, 0xe3, 0xe1, 0xe7, 0xe5 }; - - private final int[] MULT3 = { 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, - 0x17, 0x12, 0x11, 0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, 0x3a, 0x39, 0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, - 0x21, 0x60, 0x63, 0x66, 0x65, 0x6c, 0x6f, 0x6a, 0x69, 0x78, 0x7b, 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71, 0x50, - 0x53, 0x56, 0x55, 0x5c, 0x5f, 0x5a, 0x59, 0x48, 0x4b, 0x4e, 0x4d, 0x44, 0x47, 0x42, 0x41, 0xc0, 0xc3, 0xc6, - 0xc5, 0xcc, 0xcf, 0xca, 0xc9, 0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1, 0xf0, 0xf3, 0xf6, 0xf5, 0xfc, - 0xff, 0xfa, 0xf9, 0xe8, 0xeb, 0xee, 0xed, 0xe4, 0xe7, 0xe2, 0xe1, 0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf, 0xaa, - 0xa9, 0xb8, 0xbb, 0xbe, 0xbd, 0xb4, 0xb7, 0xb2, 0xb1, 0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99, 0x88, - 0x8b, 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81, 0x9b, 0x98, 0x9d, 0x9e, 0x97, 0x94, 0x91, 0x92, 0x83, 0x80, 0x85, - 0x86, 0x8f, 0x8c, 0x89, 0x8a, 0xab, 0xa8, 0xad, 0xae, 0xa7, 0xa4, 0xa1, 0xa2, 0xb3, 0xb0, 0xb5, 0xb6, 0xbf, - 0xbc, 0xb9, 0xba, 0xfb, 0xf8, 0xfd, 0xfe, 0xf7, 0xf4, 0xf1, 0xf2, 0xe3, 0xe0, 0xe5, 0xe6, 0xef, 0xec, 0xe9, - 0xea, 0xcb, 0xc8, 0xcd, 0xce, 0xc7, 0xc4, 0xc1, 0xc2, 0xd3, 0xd0, 0xd5, 0xd6, 0xdf, 0xdc, 0xd9, 0xda, 0x5b, - 0x58, 0x5d, 0x5e, 0x57, 0x54, 0x51, 0x52, 0x43, 0x40, 0x45, 0x46, 0x4f, 0x4c, 0x49, 0x4a, 0x6b, 0x68, 0x6d, - 0x6e, 0x67, 0x64, 0x61, 0x62, 0x73, 0x70, 0x75, 0x76, 0x7f, 0x7c, 0x79, 0x7a, 0x3b, 0x38, 0x3d, 0x3e, 0x37, - 0x34, 0x31, 0x32, 0x23, 0x20, 0x25, 0x26, 0x2f, 0x2c, 0x29, 0x2a, 0x0b, 0x08, 0x0d, 0x0e, 0x07, 0x04, 0x01, - 0x02, 0x13, 0x10, 0x15, 0x16, 0x1f, 0x1c, 0x19, 0x1a }; - - private final int[] MULT9 = { 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, - 0x65, 0x7e, 0x77, 0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf, 0xd8, 0xd1, 0xca, 0xc3, 0xfc, 0xf5, 0xee, - 0xe7, 0x3b, 0x32, 0x29, 0x20, 0x1f, 0x16, 0x0d, 0x04, 0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c, 0xab, - 0xa2, 0xb9, 0xb0, 0x8f, 0x86, 0x9d, 0x94, 0xe3, 0xea, 0xf1, 0xf8, 0xc7, 0xce, 0xd5, 0xdc, 0x76, 0x7f, 0x64, - 0x6d, 0x52, 0x5b, 0x40, 0x49, 0x3e, 0x37, 0x2c, 0x25, 0x1a, 0x13, 0x08, 0x01, 0xe6, 0xef, 0xf4, 0xfd, 0xc2, - 0xcb, 0xd0, 0xd9, 0xae, 0xa7, 0xbc, 0xb5, 0x8a, 0x83, 0x98, 0x91, 0x4d, 0x44, 0x5f, 0x56, 0x69, 0x60, 0x7b, - 0x72, 0x05, 0x0c, 0x17, 0x1e, 0x21, 0x28, 0x33, 0x3a, 0xdd, 0xd4, 0xcf, 0xc6, 0xf9, 0xf0, 0xeb, 0xe2, 0x95, - 0x9c, 0x87, 0x8e, 0xb1, 0xb8, 0xa3, 0xaa, 0xec, 0xe5, 0xfe, 0xf7, 0xc8, 0xc1, 0xda, 0xd3, 0xa4, 0xad, 0xb6, - 0xbf, 0x80, 0x89, 0x92, 0x9b, 0x7c, 0x75, 0x6e, 0x67, 0x58, 0x51, 0x4a, 0x43, 0x34, 0x3d, 0x26, 0x2f, 0x10, - 0x19, 0x02, 0x0b, 0xd7, 0xde, 0xc5, 0xcc, 0xf3, 0xfa, 0xe1, 0xe8, 0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2, 0xa9, - 0xa0, 0x47, 0x4e, 0x55, 0x5c, 0x63, 0x6a, 0x71, 0x78, 0x0f, 0x06, 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30, 0x9a, - 0x93, 0x88, 0x81, 0xbe, 0xb7, 0xac, 0xa5, 0xd2, 0xdb, 0xc0, 0xc9, 0xf6, 0xff, 0xe4, 0xed, 0x0a, 0x03, 0x18, - 0x11, 0x2e, 0x27, 0x3c, 0x35, 0x42, 0x4b, 0x50, 0x59, 0x66, 0x6f, 0x74, 0x7d, 0xa1, 0xa8, 0xb3, 0xba, 0x85, - 0x8c, 0x97, 0x9e, 0xe9, 0xe0, 0xfb, 0xf2, 0xcd, 0xc4, 0xdf, 0xd6, 0x31, 0x38, 0x23, 0x2a, 0x15, 0x1c, 0x07, - 0x0e, 0x79, 0x70, 0x6b, 0x62, 0x5d, 0x54, 0x4f, 0x46 }; - - private final int[] MULT11 = { 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, - 0x7f, 0x62, 0x69, 0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81, 0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, - 0xd9, 0x7b, 0x70, 0x6d, 0x66, 0x57, 0x5c, 0x41, 0x4a, 0x23, 0x28, 0x35, 0x3e, 0x0f, 0x04, 0x19, 0x12, 0xcb, - 0xc0, 0xdd, 0xd6, 0xe7, 0xec, 0xf1, 0xfa, 0x93, 0x98, 0x85, 0x8e, 0xbf, 0xb4, 0xa9, 0xa2, 0xf6, 0xfd, 0xe0, - 0xeb, 0xda, 0xd1, 0xcc, 0xc7, 0xae, 0xa5, 0xb8, 0xb3, 0x82, 0x89, 0x94, 0x9f, 0x46, 0x4d, 0x50, 0x5b, 0x6a, - 0x61, 0x7c, 0x77, 0x1e, 0x15, 0x08, 0x03, 0x32, 0x39, 0x24, 0x2f, 0x8d, 0x86, 0x9b, 0x90, 0xa1, 0xaa, 0xb7, - 0xbc, 0xd5, 0xde, 0xc3, 0xc8, 0xf9, 0xf2, 0xef, 0xe4, 0x3d, 0x36, 0x2b, 0x20, 0x11, 0x1a, 0x07, 0x0c, 0x65, - 0x6e, 0x73, 0x78, 0x49, 0x42, 0x5f, 0x54, 0xf7, 0xfc, 0xe1, 0xea, 0xdb, 0xd0, 0xcd, 0xc6, 0xaf, 0xa4, 0xb9, - 0xb2, 0x83, 0x88, 0x95, 0x9e, 0x47, 0x4c, 0x51, 0x5a, 0x6b, 0x60, 0x7d, 0x76, 0x1f, 0x14, 0x09, 0x02, 0x33, - 0x38, 0x25, 0x2e, 0x8c, 0x87, 0x9a, 0x91, 0xa0, 0xab, 0xb6, 0xbd, 0xd4, 0xdf, 0xc2, 0xc9, 0xf8, 0xf3, 0xee, - 0xe5, 0x3c, 0x37, 0x2a, 0x21, 0x10, 0x1b, 0x06, 0x0d, 0x64, 0x6f, 0x72, 0x79, 0x48, 0x43, 0x5e, 0x55, 0x01, - 0x0a, 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30, 0x59, 0x52, 0x4f, 0x44, 0x75, 0x7e, 0x63, 0x68, 0xb1, 0xba, 0xa7, - 0xac, 0x9d, 0x96, 0x8b, 0x80, 0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce, 0xd3, 0xd8, 0x7a, 0x71, 0x6c, 0x67, 0x56, - 0x5d, 0x40, 0x4b, 0x22, 0x29, 0x34, 0x3f, 0x0e, 0x05, 0x18, 0x13, 0xca, 0xc1, 0xdc, 0xd7, 0xe6, 0xed, 0xf0, - 0xfb, 0x92, 0x99, 0x84, 0x8f, 0xbe, 0xb5, 0xa8, 0xa3 }; - - private final int[] MULT13 = { 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, - 0x51, 0x46, 0x4b, 0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, 0xfe, 0xf3, 0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, - 0x9b, 0xbb, 0xb6, 0xa1, 0xac, 0x8f, 0x82, 0x95, 0x98, 0xd3, 0xde, 0xc9, 0xc4, 0xe7, 0xea, 0xfd, 0xf0, 0x6b, - 0x66, 0x71, 0x7c, 0x5f, 0x52, 0x45, 0x48, 0x03, 0x0e, 0x19, 0x14, 0x37, 0x3a, 0x2d, 0x20, 0x6d, 0x60, 0x77, - 0x7a, 0x59, 0x54, 0x43, 0x4e, 0x05, 0x08, 0x1f, 0x12, 0x31, 0x3c, 0x2b, 0x26, 0xbd, 0xb0, 0xa7, 0xaa, 0x89, - 0x84, 0x93, 0x9e, 0xd5, 0xd8, 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6, 0xd6, 0xdb, 0xcc, 0xc1, 0xe2, 0xef, 0xf8, - 0xf5, 0xbe, 0xb3, 0xa4, 0xa9, 0x8a, 0x87, 0x90, 0x9d, 0x06, 0x0b, 0x1c, 0x11, 0x32, 0x3f, 0x28, 0x25, 0x6e, - 0x63, 0x74, 0x79, 0x5a, 0x57, 0x40, 0x4d, 0xda, 0xd7, 0xc0, 0xcd, 0xee, 0xe3, 0xf4, 0xf9, 0xb2, 0xbf, 0xa8, - 0xa5, 0x86, 0x8b, 0x9c, 0x91, 0x0a, 0x07, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29, 0x62, 0x6f, 0x78, 0x75, 0x56, - 0x5b, 0x4c, 0x41, 0x61, 0x6c, 0x7b, 0x76, 0x55, 0x58, 0x4f, 0x42, 0x09, 0x04, 0x13, 0x1e, 0x3d, 0x30, 0x27, - 0x2a, 0xb1, 0xbc, 0xab, 0xa6, 0x85, 0x88, 0x9f, 0x92, 0xd9, 0xd4, 0xc3, 0xce, 0xed, 0xe0, 0xf7, 0xfa, 0xb7, - 0xba, 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94, 0xdf, 0xd2, 0xc5, 0xc8, 0xeb, 0xe6, 0xf1, 0xfc, 0x67, 0x6a, 0x7d, - 0x70, 0x53, 0x5e, 0x49, 0x44, 0x0f, 0x02, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c, 0x0c, 0x01, 0x16, 0x1b, 0x38, - 0x35, 0x22, 0x2f, 0x64, 0x69, 0x7e, 0x73, 0x50, 0x5d, 0x4a, 0x47, 0xdc, 0xd1, 0xc6, 0xcb, 0xe8, 0xe5, 0xf2, - 0xff, 0xb4, 0xb9, 0xae, 0xa3, 0x80, 0x8d, 0x9a, 0x97 }; - - private final int[] MULT14 = { 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, - 0x46, 0x54, 0x5a, 0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca, 0x90, 0x9e, 0x8c, 0x82, 0xa8, 0xa6, 0xb4, - 0xba, 0xdb, 0xd5, 0xc7, 0xc9, 0xe3, 0xed, 0xff, 0xf1, 0xab, 0xa5, 0xb7, 0xb9, 0x93, 0x9d, 0x8f, 0x81, 0x3b, - 0x35, 0x27, 0x29, 0x03, 0x0d, 0x1f, 0x11, 0x4b, 0x45, 0x57, 0x59, 0x73, 0x7d, 0x6f, 0x61, 0xad, 0xa3, 0xb1, - 0xbf, 0x95, 0x9b, 0x89, 0x87, 0xdd, 0xd3, 0xc1, 0xcf, 0xe5, 0xeb, 0xf9, 0xf7, 0x4d, 0x43, 0x51, 0x5f, 0x75, - 0x7b, 0x69, 0x67, 0x3d, 0x33, 0x21, 0x2f, 0x05, 0x0b, 0x19, 0x17, 0x76, 0x78, 0x6a, 0x64, 0x4e, 0x40, 0x52, - 0x5c, 0x06, 0x08, 0x1a, 0x14, 0x3e, 0x30, 0x22, 0x2c, 0x96, 0x98, 0x8a, 0x84, 0xae, 0xa0, 0xb2, 0xbc, 0xe6, - 0xe8, 0xfa, 0xf4, 0xde, 0xd0, 0xc2, 0xcc, 0x41, 0x4f, 0x5d, 0x53, 0x79, 0x77, 0x65, 0x6b, 0x31, 0x3f, 0x2d, - 0x23, 0x09, 0x07, 0x15, 0x1b, 0xa1, 0xaf, 0xbd, 0xb3, 0x99, 0x97, 0x85, 0x8b, 0xd1, 0xdf, 0xcd, 0xc3, 0xe9, - 0xe7, 0xf5, 0xfb, 0x9a, 0x94, 0x86, 0x88, 0xa2, 0xac, 0xbe, 0xb0, 0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc, 0xce, - 0xc0, 0x7a, 0x74, 0x66, 0x68, 0x42, 0x4c, 0x5e, 0x50, 0x0a, 0x04, 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20, 0xec, - 0xe2, 0xf0, 0xfe, 0xd4, 0xda, 0xc8, 0xc6, 0x9c, 0x92, 0x80, 0x8e, 0xa4, 0xaa, 0xb8, 0xb6, 0x0c, 0x02, 0x10, - 0x1e, 0x34, 0x3a, 0x28, 0x26, 0x7c, 0x72, 0x60, 0x6e, 0x44, 0x4a, 0x58, 0x56, 0x37, 0x39, 0x2b, 0x25, 0x0f, - 0x01, 0x13, 0x1d, 0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71, 0x63, 0x6d, 0xd7, 0xd9, 0xcb, 0xc5, 0xef, 0xe1, 0xf3, - 0xfd, 0xa7, 0xa9, 0xbb, 0xb5, 0x9f, 0x91, 0x83, 0x8d }; - - public BigInteger scheduleCore(BigInteger t, int rconCounter) { - String rBytes = t.toString(16); - - // Add zero padding - while (rBytes.length() < 8) { - rBytes = "0" + rBytes; - } - - // rotate the first 16 bits to the back - String rotatingBytes = rBytes.substring(0, 2); - String fixedBytes = rBytes.substring(2); - - rBytes = fixedBytes + rotatingBytes; - - // apply S-Box to all 8-Bit Substrings - for (int i = 0; i < 4; i++) { - String currentByteBits = rBytes.substring(i * 2, (i + 1) * 2); - - int currentByte = Integer.parseInt(currentByteBits, 16); - currentByte = SBOX[currentByte]; - - // add the current RCON value to the first byte - if (i == 0) { - currentByte = currentByte ^ RCON[rconCounter]; - } - - currentByteBits = Integer.toHexString(currentByte); - - // Add zero padding - - while (currentByteBits.length() < 2) { - currentByteBits = '0' + currentByteBits; - } - - // replace bytes in original string - rBytes = rBytes.substring(0, i * 2) + currentByteBits + rBytes.substring((i + 1) * 2); - } - - // t = new BigInteger(rBytes, 16); - // return t; - return new BigInteger(rBytes, 16); - } - - public BigInteger[] keyExpansion(BigInteger initialKey) { - BigInteger[] roundKeys = { initialKey, new BigInteger("0"), new BigInteger("0"), new BigInteger("0"), - new BigInteger("0"), new BigInteger("0"), new BigInteger("0"), new BigInteger("0"), new BigInteger("0"), - new BigInteger("0"), new BigInteger("0"), }; - - // initialize rcon iteration - int rconCounter = 1; - - for (int i = 1; i < 11; i++) { - - // get the previous 32 bits the key - BigInteger t = roundKeys[i - 1].remainder(new BigInteger("100000000", 16)); - - // split previous key into 8-bit segments - BigInteger[] prevKey = { roundKeys[i - 1].remainder(new BigInteger("100000000", 16)), - roundKeys[i - 1].remainder(new BigInteger("10000000000000000", 16)) - .divide(new BigInteger("100000000", 16)), - roundKeys[i - 1].remainder(new BigInteger("1000000000000000000000000", 16)) - .divide(new BigInteger("10000000000000000", 16)), - roundKeys[i - 1].divide(new BigInteger("1000000000000000000000000", 16)), }; - - // run schedule core - t = scheduleCore(t, rconCounter); - rconCounter += 1; - - // Calculate partial round key - BigInteger t0 = t.xor(prevKey[3]); - BigInteger t1 = t0.xor(prevKey[2]); - BigInteger t2 = t1.xor(prevKey[1]); - BigInteger t3 = t2.xor(prevKey[0]); - - // Join round key segments - t2 = t2.multiply(new BigInteger("100000000", 16)); - t1 = t1.multiply(new BigInteger("10000000000000000", 16)); - t0 = t0.multiply(new BigInteger("1000000000000000000000000", 16)); - roundKeys[i] = t0.add(t1).add(t2).add(t3); - - } - return roundKeys; - } - - public int[] splitBlockIntoCells(BigInteger block) { - - int[] cells = new int[16]; - String blockBits = block.toString(2); - - // Append leading 0 for full "128-bit" string - while (blockBits.length() < 128) { - blockBits = '0' + blockBits; - } - - // split 128 to 8 bit cells - for (int i = 0; i < cells.length; i++) { - String cellBits = blockBits.substring(8 * i, 8 * (i + 1)); - cells[i] = Integer.parseInt(cellBits, 2); - } - - return cells; - } - - public BigInteger mergeCellsIntoBlock(int[] cells) { - - String blockBits = ""; - for (int i = 0; i < 16; i++) { - String cellBits = Integer.toBinaryString(cells[i]); - - // Append leading 0 for full "8-bit" strings - while (cellBits.length() < 8) { - cellBits = '0' + cellBits; - } - - blockBits += cellBits; - } - - return new BigInteger(blockBits, 2); - } - - public BigInteger addRoundKey(BigInteger ciphertext, BigInteger key) { - return ciphertext.xor(key); - } - - public BigInteger subBytes(BigInteger ciphertext) { - - int[] cells = splitBlockIntoCells(ciphertext); - - for (int i = 0; i < 16; i++) { - cells[i] = SBOX[cells[i]]; - } - - return mergeCellsIntoBlock(cells); - } - - public BigInteger subBytesDec(BigInteger ciphertext) { - - int[] cells = splitBlockIntoCells(ciphertext); - - for (int i = 0; i < 16; i++) { - cells[i] = INVERSE_SBOX[cells[i]]; - } - - return mergeCellsIntoBlock(cells); - } - - public BigInteger shiftRows(BigInteger ciphertext) { - int[] cells = splitBlockIntoCells(ciphertext); - int[] output = new int[16]; - - // do nothing in the first row - output[0] = cells[0]; - output[4] = cells[4]; - output[8] = cells[8]; - output[12] = cells[12]; - - // shift the second row backwards by one cell - output[1] = cells[5]; - output[5] = cells[9]; - output[9] = cells[13]; - output[13] = cells[1]; - - // shift the third row backwards by two cell - output[2] = cells[10]; - output[6] = cells[14]; - output[10] = cells[2]; - output[14] = cells[6]; - - // shift the forth row backwards by tree cell - output[3] = cells[15]; - output[7] = cells[3]; - output[11] = cells[7]; - output[15] = cells[11]; - - return mergeCellsIntoBlock(output); - } - - public BigInteger shiftRowsDec(BigInteger ciphertext) { - int[] cells = splitBlockIntoCells(ciphertext); - int[] output = new int[16]; - - // do nothing in the first row - output[0] = cells[0]; - output[4] = cells[4]; - output[8] = cells[8]; - output[12] = cells[12]; - - // shift the second row forwards by one cell - output[1] = cells[13]; - output[5] = cells[1]; - output[9] = cells[5]; - output[13] = cells[9]; - - // shift the third row forwards by two cell - output[2] = cells[10]; - output[6] = cells[14]; - output[10] = cells[2]; - output[14] = cells[6]; - - // shift the forth row forwards by tree cell - output[3] = cells[7]; - output[7] = cells[11]; - output[11] = cells[15]; - output[15] = cells[3]; - - return mergeCellsIntoBlock(output); - } - - public BigInteger mixColumns(BigInteger ciphertext) { - - int[] cells = splitBlockIntoCells(ciphertext); - int[] outputCells = new int[16]; - - for (int i = 0; i < 4; i++) { - int[] row = { cells[i * 4], cells[i * 4 + 1], cells[i * 4 + 2], cells[i * 4 + 3] }; - - outputCells[i * 4] = MULT2[row[0]] ^ MULT3[row[1]] ^ row[2] ^ row[3]; - outputCells[i * 4 + 1] = row[0] ^ MULT2[row[1]] ^ MULT3[row[2]] ^ row[3]; - outputCells[i * 4 + 2] = row[0] ^ row[1] ^ MULT2[row[2]] ^ MULT3[row[3]]; - outputCells[i * 4 + 3] = MULT3[row[0]] ^ row[1] ^ row[2] ^ MULT2[row[3]]; - } - return mergeCellsIntoBlock(outputCells); - } - - public BigInteger mixColumnsDec(BigInteger ciphertext) { - - int[] cells = splitBlockIntoCells(ciphertext); - int[] outputCells = new int[16]; - - for (int i = 0; i < 4; i++) { - int[] row = { cells[i * 4], cells[i * 4 + 1], cells[i * 4 + 2], cells[i * 4 + 3] }; - - outputCells[i * 4] = MULT14[row[0]] ^ MULT11[row[1]] ^ MULT13[row[2]] ^ MULT9[row[3]]; - outputCells[i * 4 + 1] = MULT9[row[0]] ^ MULT14[row[1]] ^ MULT11[row[2]] ^ MULT13[row[3]]; - outputCells[i * 4 + 2] = MULT13[row[0]] ^ MULT9[row[1]] ^ MULT14[row[2]] ^ MULT11[row[3]]; - outputCells[i * 4 + 3] = MULT11[row[0]] ^ MULT13[row[1]] ^ MULT9[row[2]] ^ MULT14[row[3]]; - } - return mergeCellsIntoBlock(outputCells); - } - - public BigInteger encrypt(BigInteger plainText, BigInteger key) { - BigInteger[] roundKeys = keyExpansion(key); - - // Initial round - plainText = addRoundKey(plainText, roundKeys[0]); - - // Main rounds - for (int i = 1; i < 10; i++) { - plainText = subBytes(plainText); - plainText = shiftRows(plainText); - plainText = mixColumns(plainText); - plainText = addRoundKey(plainText, roundKeys[i]); - } - - // Final round - plainText = subBytes(plainText); - plainText = shiftRows(plainText); - plainText = addRoundKey(plainText, roundKeys[10]); - - return plainText; - } - - public BigInteger decrypt(BigInteger cipherText, BigInteger key) { - - BigInteger[] roundKeys = keyExpansion(key); - - // Invert final round - cipherText = addRoundKey(cipherText, roundKeys[10]); - cipherText = shiftRowsDec(cipherText); - cipherText = subBytesDec(cipherText); - - // Invert main rounds - for (int i = 9; i > 0; i--) { - cipherText = addRoundKey(cipherText, roundKeys[i]); - cipherText = mixColumnsDec(cipherText); - cipherText = shiftRowsDec(cipherText); - cipherText = subBytesDec(cipherText); - } - - // Invert initial round - cipherText = addRoundKey(cipherText, roundKeys[0]); - - return cipherText; - } - - public void main(String[] args) { - - try (Scanner input = new Scanner(System.in)) { - System.out.println("Enter (e) letter for encrpyt or (d) letter for decrypt :"); - char choice = input.nextLine().charAt(0); - String in; - switch (choice) { - case 'E': - case 'e': - System.out.println("Choose a plaintext block (128-Bit Integer in base 16):"); - in = input.nextLine(); - BigInteger plaintext = new BigInteger(in, 16); - System.out.println("Choose a Key (128-Bit Integer in base 16):"); - in = input.nextLine(); - BigInteger encryptionKey = new BigInteger(in, 16); - System.out.println("The encrypted message is: \n" + encrypt(plaintext, encryptionKey).toString(16)); - break; - case 'D': - case 'd': - System.out.println("Enter your ciphertext block (128-Bit Integer in base 16):"); - in = input.nextLine(); - BigInteger ciphertext = new BigInteger(in, 16); - System.out.println("Choose a Key (128-Bit Integer in base 16):"); - in = input.nextLine(); - BigInteger decryptionKey = new BigInteger(in, 16); - System.out.println("The deciphered message is:\n" + decrypt(ciphertext, decryptionKey).toString(16)); - break; - default: - System.out.println("** End **"); - } - } - } - } - - public static class ColumnarTranspositionCipher { - - private static String keyword; - private static Object[][] table; - private static String abecedarium; - public static final String ABECEDARIUM = "abcdefghijklmnopqrstuvwxyzABCDEFG" - + "HIJKLMNOPQRSTUVWXYZ0123456789,.;:-@"; - private static final String ENCRYPTION_FIELD = "≈"; - private static final char ENCRYPTION_FIELD_CHAR = '≈'; - - public static String encrpyter(String word, String keyword) { - ColumnarTranspositionCipher.keyword = keyword; - abecedariumBuilder(500); - table = tableBuilder(word); - Object[][] sortedTable = sortTable(table); - String wordEncrypted = ""; - for (int i = 0; i < sortedTable[i].length; i++) { - for (int j = 1; j < sortedTable.length; j++) { - wordEncrypted += sortedTable[j][i]; - } - } - return wordEncrypted; - } - - public static String encrpyter(String word, String keyword, String abecedarium) { - ColumnarTranspositionCipher.keyword = keyword; - if (abecedarium != null) { - ColumnarTranspositionCipher.abecedarium = abecedarium; - } else { - ColumnarTranspositionCipher.abecedarium = ABECEDARIUM; - } - table = tableBuilder(word); - Object[][] sortedTable = sortTable(table); - String wordEncrypted = ""; - for (int i = 0; i < sortedTable[0].length; i++) { - for (int j = 1; j < sortedTable.length; j++) { - wordEncrypted += sortedTable[j][i]; - } - } - return wordEncrypted; - } - - public static String decrypter() { - String wordDecrypted = ""; - for (int i = 1; i < table.length; i++) { - for (Object item : table[i]) { - wordDecrypted += item; - } - } - return wordDecrypted.replaceAll(ENCRYPTION_FIELD, ""); - } - - private static Object[][] tableBuilder(String word) { - Object[][] table = new Object[numberOfRows(word) + 1][keyword.length()]; - char[] wordInChards = word.toCharArray(); - //Fils in the respective numbers - table[0] = findElements(); - int charElement = 0; - for (int i = 1; i < table.length; i++) { - for (int j = 0; j < table[i].length; j++) { - if (charElement < wordInChards.length) { - table[i][j] = wordInChards[charElement]; - charElement++; - } else { - table[i][j] = ENCRYPTION_FIELD_CHAR; - } - } - } - return table; - } - - private static int numberOfRows(String word) { - if ((double) word.length() / keyword.length() > word.length() / keyword.length()) { - return (word.length() / keyword.length()) + 1; - } else { - return word.length() / keyword.length(); - } - } - - private static Object[] findElements() { - Object[] charValues = new Object[keyword.length()]; - for (int i = 0; i < charValues.length; i++) { - int charValueIndex = abecedarium.indexOf(keyword.charAt(i)); - charValues[i] = charValueIndex > -1 ? charValueIndex : null; - } - return charValues; - } - - private static Object[][] sortTable(Object[][] table) { - Object[][] tableSorted = new Object[table.length][table[0].length]; - for (int i = 0; i < tableSorted.length; i++) { - System.arraycopy(table[i], 0, tableSorted[i], 0, tableSorted[i].length); - } - for (int i = 0; i < tableSorted[0].length; i++) { - for (int j = i + 1; j < tableSorted[0].length; j++) { - if ((int) tableSorted[0][i] > (int) table[0][j]) { - Object[] column = getColumn(tableSorted, tableSorted.length, i); - switchColumns(tableSorted, j, i, column); - } - } - } - return tableSorted; - } - - private static Object[] getColumn(Object[][] table, int rows, int column) { - Object[] columnArray = new Object[rows]; - for (int i = 0; i < rows; i++) { - columnArray[i] = table[i][column]; - } - return columnArray; - } - - private static void switchColumns(Object[][] table, int firstColumnIndex, - int secondColumnIndex, Object[] columnToSwitch) { - for (int i = 0; i < table.length; i++) { - table[i][secondColumnIndex] = table[i][firstColumnIndex]; - table[i][firstColumnIndex] = columnToSwitch[i]; - } - } - - private static void abecedariumBuilder(int value) { - abecedarium = ""; - for (int i = 0; i < value; i++) { - abecedarium += (char) i; - } - } - - private static void showTable() { - for (Object[] table1 : table) { - for (Object item : table1) { - System.out.print(item + " "); - } - System.out.println(); - } - } - - public void main(String[] args) { - String keywordForExample = "asd215"; - String wordBeingEncrypted = "This is a test of the Columnar Transposition Cipher"; - System.out.println("### Example of Columnar Transposition Cipher ###\n"); - System.out.println("Word being encryped ->>> " + wordBeingEncrypted); - System.out.println("Word encrypted ->>> " + ColumnarTranspositionCipher - .encrpyter(wordBeingEncrypted, keywordForExample)); - System.out.println("Word decryped ->>> " + ColumnarTranspositionCipher - .decrypter()); - System.out.println("\n### Encrypted Table ###"); - showTable(); - } - } - - public final class ClosestPairs { - - - /** - * Number of points - */ - int numberPoints = 0; - /** - * Input data, maximum 10000. - */ - private Location[] array; - - Location point1 = null; - - Location point2 = null; - - private double minNum = Double.MAX_VALUE; - - private int secondCount = 0; - - ClosestPairs(int points) { - numberPoints = points; - array = new Location[numberPoints]; - } - - - public class Location { - - double x = 0; - double y = 0; - - - Location(final double xpar, final double ypar) { //Save x, y coordinates - this.x = xpar; - this.y = ypar; - } - - } - - public Location[] createLocation(int numberValues) { - return new Location[numberValues]; - - } - - public Location buildLocation(double x, double y) { - return new Location(x, y); - } - - public int xPartition( - final Location[] a, final int first, final int last) { - - Location pivot = a[last]; // pivot - int pIndex = last; - int i = first - 1; - Location temp; // Temporarily store value for position transformation - for (int j = first; j <= last - 1; j++) { - if (a[j].x <= pivot.x) { // Less than or less than pivot - i++; - temp = a[i]; // array[i] <-> array[j] - a[i] = a[j]; - a[j] = temp; - } - } - i++; - temp = a[i]; // array[pivot] <-> array[i] - a[i] = a[pIndex]; - a[pIndex] = temp; - return i; // pivot index - } - - public int yPartition( - final Location[] a, final int first, final int last) { - - Location pivot = a[last]; // pivot - int pIndex = last; - int i = first - 1; - Location temp; // Temporarily store value for position transformation - for (int j = first; j <= last - 1; j++) { - if (a[j].y <= pivot.y) { // Less than or less than pivot - i++; - temp = a[i]; // array[i] <-> array[j] - a[i] = a[j]; - a[j] = temp; - } - } - i++; - temp = a[i]; // array[pivot] <-> array[i] - a[i] = a[pIndex]; - a[pIndex] = temp; - return i; // pivot index - } - - public void xQuickSort( - final Location[] a, final int first, final int last) { - - if (first < last) { - int q = xPartition(a, first, last); // pivot - xQuickSort(a, first, q - 1); // Left - xQuickSort(a, q + 1, last); // Right - } - } - - public void yQuickSort( - final Location[] a, final int first, final int last) { - - if (first < last) { - int q = yPartition(a, first, last); // pivot - yQuickSort(a, first, q - 1); // Left - yQuickSort(a, q + 1, last); // Right - } - } - - public double closestPair(final Location[] a, final int indexNum) { - - Location[] divideArray = new Location[indexNum]; - System.arraycopy(a, 0, divideArray, 0, indexNum); // Copy previous array - int totalNum = indexNum; // number of coordinates in the divideArray - int divideX = indexNum / 2; // Intermediate value for divide - Location[] leftArray = new Location[divideX]; //divide - left array - //divide-right array - Location[] rightArray = new Location[totalNum - divideX]; - if (indexNum <= 3) { // If the number of coordinates is 3 or less - return bruteForce(divideArray); - } - //divide-left array - System.arraycopy(divideArray, 0, leftArray, 0, divideX); - //divide-right array - System.arraycopy( - divideArray, divideX, rightArray, 0, totalNum - divideX); - - double minLeftArea = 0; //Minimum length of left array - double minRightArea = 0; //Minimum length of right array - double minValue = 0; //Minimum lengt - - minLeftArea = closestPair(leftArray, divideX); // recursive closestPair - minRightArea = closestPair(rightArray, totalNum - divideX); - // window size (= minimum length) - minValue = Math.min(minLeftArea, minRightArea); - - // Create window. Set the size for creating a window - // and creating a new array for the coordinates in the window - for (int i = 0; i < totalNum; i++) { - double xGap = Math.abs(divideArray[divideX].x - divideArray[i].x); - if (xGap < minValue) { - secondCount++; // size of the array - } else { - if (divideArray[i].x > divideArray[divideX].x) { - break; - } - } - } - // new array for coordinates in window - Location[] firstWindow = new Location[secondCount]; - int k = 0; - for (int i = 0; i < totalNum; i++) { - double xGap = Math.abs(divideArray[divideX].x - divideArray[i].x); - if (xGap < minValue) { // if it's inside a window - firstWindow[k] = divideArray[i]; // put in an array - k++; - } else { - if (divideArray[i].x > divideArray[divideX].x) { - break; - } - } - } - yQuickSort(firstWindow, 0, secondCount - 1); // Sort by y coordinates - /* Coordinates in Window */ - double length = 0; - // size comparison within window - for (int i = 0; i < secondCount - 1; i++) { - for (int j = (i + 1); j < secondCount; j++) { - double xGap = Math.abs(firstWindow[i].x - firstWindow[j].x); - double yGap = Math.abs(firstWindow[i].y - firstWindow[j].y); - if (yGap < minValue) { - length = Math.sqrt(Math.pow(xGap, 2) + Math.pow(yGap, 2)); - // If measured distance is less than current min distance - if (length < minValue) { - // Change minimum distance to current distance - minValue = length; - // Conditional for registering final coordinate - if (length < minNum) { - minNum = length; - point1 = firstWindow[i]; - point2 = firstWindow[j]; - } - } - } else { - break; - } - } - } - secondCount = 0; - return minValue; - } - - public double bruteForce(final Location[] arrayParam) { - - double minValue = Double.MAX_VALUE; // minimum distance - double length = 0; - double xGap = 0; // Difference between x coordinates - double yGap = 0; // Difference between y coordinates - double result = 0; - - if (arrayParam.length == 2) { - // Difference between x coordinates - xGap = (arrayParam[0].x - arrayParam[1].x); - // Difference between y coordinates - yGap = (arrayParam[0].y - arrayParam[1].y); - // distance between coordinates - length = Math.sqrt(Math.pow(xGap, 2) + Math.pow(yGap, 2)); - // Conditional statement for registering final coordinate - if (length < minNum) { - minNum = length; - - } - point1 = arrayParam[0]; - point2 = arrayParam[1]; - result = length; - } - if (arrayParam.length == 3) { - for (int i = 0; i < arrayParam.length - 1; i++) { - for (int j = (i + 1); j < arrayParam.length; j++) { - // Difference between x coordinates - xGap = (arrayParam[i].x - arrayParam[j].x); - // Difference between y coordinates - yGap = (arrayParam[i].y - arrayParam[j].y); - // distance between coordinates - length = - Math.sqrt(Math.pow(xGap, 2) + Math.pow(yGap, 2)); - // If measured distance is less than current min distance - if (length < minValue) { - // Change minimum distance to current distance - minValue = length; - if (length < minNum) { - // Registering final coordinate - minNum = length; - point1 = arrayParam[i]; - point2 = arrayParam[j]; - } - } - } - } - result = minValue; - - } - return result; // If only one point returns 0. - } - - public void main(final String[] args) { - - //Input data consists of one x-coordinate and one y-coordinate - - ClosestPair cp = new ClosestPair(12); - cp.array[0] = cp.buildLocation(2, 3); - cp.array[1] = cp.buildLocation(2, 16); - cp.array[2] = cp.buildLocation(3, 9); - cp.array[3] = cp.buildLocation(6, 3); - cp.array[4] = cp.buildLocation(7, 7); - cp.array[5] = cp.buildLocation(19, 4); - cp.array[6] = cp.buildLocation(10, 11); - cp.array[7] = cp.buildLocation(15, 2); - cp.array[8] = cp.buildLocation(15, 19); - cp.array[9] = cp.buildLocation(16, 11); - cp.array[10] = cp.buildLocation(17, 13); - cp.array[11] = cp.buildLocation(9, 12); - - System.out.println("Input data"); - System.out.println("Number of points: " + cp.array.length); - for (int i = 0; i < cp.array.length; i++) { - System.out.println("x: " + cp.array[i].x + ", y: " + cp.array[i].y); - } - - cp.xQuickSort(cp.array, 0, cp.array.length - 1); // Sorting by x value - - double result; // minimum distance - - result = cp.closestPair(cp.array, cp.array.length); - // ClosestPair start - // minimum distance coordinates and distance output - System.out.println("Output Data"); - System.out.println("(" + cp.point1.x + ", " + cp.point1.y + ")"); - System.out.println("(" + cp.point2.x + ", " + cp.point2.y + ")"); - System.out.println("Minimum Distance : " + result); - - } - } - - public class ColumnarTranspositionCiphers { - - private String keyword; - private Object[][] table; - private String abecedarium; - public static final String ABECEDARIUM = "abcdefghijklmnopqrstuvwxyzABCDEFG" - + "HIJKLMNOPQRSTUVWXYZ0123456789,.;:-@"; - private static final String ENCRYPTION_FIELD = "≈"; - private static final char ENCRYPTION_FIELD_CHAR = '≈'; - - public String encrpyter(String word, String keyword) { - ColumnarTranspositionCipher.keyword = keyword; - abecedariumBuilder(500); - table = tableBuilder(word); - Object[][] sortedTable = sortTable(table); - String wordEncrypted = ""; - for (int i = 0; i < sortedTable[i].length; i++) { - for (int j = 1; j < sortedTable.length; j++) { - wordEncrypted += sortedTable[j][i]; - } - } - return wordEncrypted; - } - - public String encrpyter(String word, String keyword, - String abecedarium) { - ColumnarTranspositionCipher.keyword = keyword; - if (abecedarium != null) { - ColumnarTranspositionCipher.abecedarium = abecedarium; - } else { - ColumnarTranspositionCipher.abecedarium = ABECEDARIUM; - } - table = tableBuilder(word); - Object[][] sortedTable = sortTable(table); - String wordEncrypted = ""; - for (int i = 0; i < sortedTable[0].length; i++) { - for (int j = 1; j < sortedTable.length; j++) { - wordEncrypted += sortedTable[j][i]; - } - } - return wordEncrypted; - } - - public String decrypter() { - String wordDecrypted = ""; - for (int i = 1; i < table.length; i++) { - for (Object item : table[i]) { - wordDecrypted += item; - } - } - return wordDecrypted.replaceAll(ENCRYPTION_FIELD, ""); - } - - private Object[][] tableBuilder(String word) { - Object[][] table = new Object[numberOfRows(word) + 1][keyword.length()]; - char[] wordInChards = word.toCharArray(); - //Fils in the respective numbers - table[0] = findElements(); - int charElement = 0; - for (int i = 1; i < table.length; i++) { - for (int j = 0; j < table[i].length; j++) { - if (charElement < wordInChards.length) { - table[i][j] = wordInChards[charElement]; - charElement++; - } else { - table[i][j] = ENCRYPTION_FIELD_CHAR; - } - } - } - return table; - } - - private int numberOfRows(String word) { - if ((double) word.length() / keyword.length() > word.length() / keyword.length()) { - return (word.length() / keyword.length()) + 1; - } else { - return word.length() / keyword.length(); - } - } - - /** - * - * @return charValues - */ - private Object[] findElements() { - Object[] charValues = new Object[keyword.length()]; - for (int i = 0; i < charValues.length; i++) { - int charValueIndex = abecedarium.indexOf(keyword.charAt(i)); - charValues[i] = charValueIndex > -1 ? charValueIndex : null; - } - return charValues; - } - - private Object[][] sortTable(Object[][] table) { - Object[][] tableSorted = new Object[table.length][table[0].length]; - for (int i = 0; i < tableSorted.length; i++) { - System.arraycopy(table[i], 0, tableSorted[i], 0, tableSorted[i].length); - } - for (int i = 0; i < tableSorted[0].length; i++) { - for (int j = i + 1; j < tableSorted[0].length; j++) { - if ((int) tableSorted[0][i] > (int) table[0][j]) { - Object[] column = getColumn(tableSorted, tableSorted.length, i); - switchColumns(tableSorted, j, i, column); - } - } - } - return tableSorted; - } - - private Object[] getColumn(Object[][] table, int rows, int column) { - Object[] columnArray = new Object[rows]; - for (int i = 0; i < rows; i++) { - columnArray[i] = table[i][column]; - } - return columnArray; - } - - private void switchColumns(Object[][] table, int firstColumnIndex, - int secondColumnIndex, Object[] columnToSwitch) { - for (int i = 0; i < table.length; i++) { - table[i][secondColumnIndex] = table[i][firstColumnIndex]; - table[i][firstColumnIndex] = columnToSwitch[i]; - } - } - - private void abecedariumBuilder(int value) { - abecedarium = ""; - for (int i = 0; i < value; i++) { - abecedarium += (char) i; - } - } - - private void showTable() { - for (Object[] table1 : table) { - for (Object item : table1) { - System.out.print(item + " "); - } - System.out.println(); - } - } - - public void main(String[] args) { - String keywordForExample = "asd215"; - String wordBeingEncrypted = "This is a test of the Columnar Transposition Cipher"; - System.out.println("### Example of Columnar Transposition Cipher ###\n"); - System.out.println("Word being encryped ->>> " + wordBeingEncrypted); - System.out.println("Word encrypted ->>> " + ColumnarTranspositionCipher - .encrpyter(wordBeingEncrypted, keywordForExample)); - System.out.println("Word decryped ->>> " + ColumnarTranspositionCipher - .decrypter()); - System.out.println("\n### Encrypted Table ###"); - showTable(); - } - } - - public class BinarySearchTree> implements Collection, Iterable { - private Node root = null; - private int size = 0; - - /** - * @return number of elements in the collection - */ - @Override - public int size() { - return this.size; - } - - /** - * @return if collection is empty - */ - @Override - public boolean isEmpty() { - return size == 0; - } - - /** - * @param element to check for contains - * @return if the object contains in collection - */ - @Override - @SuppressWarnings("unchecked") - public boolean contains(Object element) { - return !isEmpty() && root.contains((T) element); - } - - /** - * @return tree iterator - */ - @Override - public Iterator iterator() { - return new BSTIterator(); - } - - /** - * Method adding elements from the collection to array - * @param array to add elements - * @param parameter of array elements - * @return array with elements from the collection - */ - @Override - @SuppressWarnings("unchecked") - public T1[] toArray(T1[] array) { - ArrayList result = new ArrayList<>(); - for (T tmp : this) { - result.add((T1) tmp); - } - return result.toArray(array); - } - - /** - * @return elements of collection as array of Objects in increasing order - */ - @Override - public Object[] toArray() { - return toArray(new Object[size]); - } - - /** - * Method adding element to the collection - * @param value element to add - * @return if the element will be added - */ - @Override - public boolean add(T value) { - if (root == null) { - root = new Node(value); - ++size; - return true; - } - return root.add(value); - } - - /** - * Method removing element from the collection - * @param value to remove from collection - * @return if the element will be removed - */ - @Override - @SuppressWarnings("unchecked") - public boolean remove(Object value) { - return !isEmpty() && root.remove((T) value); - } - - /** - * @param collection to check for contains - * @return if Tree contains every element from the collection - */ - @Override - public boolean containsAll(Collection collection) { - boolean result = true; - for (Object tmp : collection) { - result &= contains(tmp); - } - return result; - } - - /** - * @param collection with elements to add - * @return if all elements will be added - */ - @Override - public boolean addAll(Collection collection) { - boolean result = true; - for (T tmp : collection) { - result &= add(tmp); - } - return result; - } - - /** - * @param collection with elements to remove - * @return if all elements will be removed - */ - @Override - public boolean removeAll(Collection collection) { - boolean result = true; - for (Object tmp : collection) { - result &= remove(tmp); - } - return result; - } - - /** - * @param collection with elements to retain - * @return if this collection will be changed - */ - @Override - public boolean retainAll(Collection collection) { - boolean result = false; - for (Object tmp : collection) { - if (contains(tmp)) { - remove(tmp); - result = true; - } - } - return result; - } - - /** - * Clear the collection - */ - @Override - public void clear() { - root = null; - size = 0; - } - - /** - * @return String representation of the collection - */ - @Override - public String toString() { - return isEmpty() ? "null" : root.toString(); - } - - /** - * Methods gets all Nodes from current subtree to the List - * @param node subtree to get Nodes - * @param elements List to add elements - * @return List with elements - */ - private ArrayList getAll(Node node, ArrayList elements) { - if (node.left != null) { - getAll(node.left, elements); - } - elements.add(node); - if (node.right != null) { - getAll(node.right, elements); - } - return elements; - } - - /** Class realizing the Binary Search Tree Iterator */ - private class BSTIterator implements Iterator { - private ArrayList elements = new ArrayList<>(); - - private BSTIterator() { - if (!isEmpty()) { - BinarySearchTree.this.getAll(root, elements); - } - } - - /** - * @return if the next element exist - */ - @Override - public boolean hasNext() { - return !elements.isEmpty() && treeContainsAtLeastOneElement(); - } - - /** - * @return if the List contains at least one element from the tree - */ - private boolean treeContainsAtLeastOneElement() { - for (Node tmp : elements) { - if (BinarySearchTree.this.contains(tmp.value)) { - return true; - } - } - return false; - } - - /** - * @return value of next element, null if it does not exist - */ - @Override - public T next() { - if (elements.isEmpty()) { - return null; - } - if (!root.contains(elements.get(0).value)) { - elements.remove(0); - return next(); - } - return elements.remove(0).value; - } - } - - /** - * Class describes Node of the Binary Tree - */ - private class Node { - private T value; - private Node parent; - private Node left = null; - private Node right = null; - - private Node(T value) { - this.value = value; - this.parent = null; - } - - private Node(T value, Node parent) { - this.value = value; - this.parent = parent; - } - - /** - * Method adds an element with a specified value - * @param value of new element - */ - private boolean add(T value) { - if (value.compareTo(this.value) < 0) { - if (left == null) { - left = new Node(value, this); - ++size; - return true; - } - left.add(value); - } else if (value.compareTo(this.value) > 0) { - if (right == null) { - right = new Node(value, this); - ++size; - return true; - } - right.add(value); - } - return false; - } - - /** - * Method removes an element with a specified value - * @param value of element to remove - */ - private boolean remove(T value) { - boolean result = false; - if (value.compareTo(this.value) < 0) { - result = left != null && left.remove(value); - } else if (value.compareTo(this.value) > 0) { - result = right != null && right.remove(value); - } else { - --size; - this.remove(); - } - return result; - } - - /** - * Method removing current Node - */ - private void remove() { - if (left != null && right != null) { - Node newNode = this.findMinimalInRightSubtree(); - value = newNode.value; - changeNode(newNode); - } else if (left != null) { - changeNode(left); - } else if (right != null) { - changeNode(right); - } else { - changeNode(null); - } - } - - /** - * @return Node with minimum value in the subtree - */ - private Node findMinimalInRightSubtree() { - Node current = this.right; - while (current.left != null) { - current = current.left; - } - return current; - } - - private void changeNode(Node newNode) { - if (newNode == null) { - if (parent == null) { - root = null; - } else { - if (equals(parent.left)) { - parent.left = null; - } else { - parent.right = null; - } - } - return; - } - value = newNode.value; - if (newNode.equals(newNode.parent.left)) { - newNode.parent.left = newNode.left; - } else { - newNode.parent.right = newNode.right; - } - } - - /** - * @param element to check for contains - * @return if the object contains in the subtree - */ - private boolean contains(T element) { - if (value.equals(element)) { - return true; - } - if (value.compareTo(element) > 0) { - return left != null && left.contains(element); - } - return right != null && right.contains(element); - } - - /** - * @return String representation of the subtree - */ - @Override - public String toString() { - StringBuilder result = new StringBuilder(); - result.append("(").append(value.toString()).append(" "); - result.append(left == null ? "null" : left.toString()).append(" "); - result.append(right == null ? "null" : right.toString()).append(")"); - return result.toString(); - } - - } - } - - public final class Point2D implements Comparable { - - /** - * Compares two points by x-coordinate. - */ - public final Comparator X_ORDER = new XOrder(); - - /** - * Compares two points by y-coordinate. - */ - public final Comparator Y_ORDER = new YOrder(); - - /** - * Compares two points by polar radius. - */ - public final Comparator R_ORDER = new ROrder(); - - private final double x; // x coordinate - private final double y; // y coordinate - - /** - * Initializes a new point (x, y). - * @param x the x-coordinate - * @param y the y-coordinate - * @throws IllegalArgumentException if either {@code x} or {@code y} - * is {@code Double.NaN}, {@code Double.POSITIVE_INFINITY} or - * {@code Double.NEGATIVE_INFINITY} - */ - public Point2D(double x, double y) { - if (Double.isInfinite(x) || Double.isInfinite(y)) - throw new IllegalArgumentException("Coordinates must be finite"); - if (Double.isNaN(x) || Double.isNaN(y)) - throw new IllegalArgumentException("Coordinates cannot be NaN"); - if (x == 0.0) this.x = 0.0; // convert -0.0 to +0.0 - else this.x = x; - - if (y == 0.0) this.y = 0.0; // convert -0.0 to +0.0 - else this.y = y; - } - - /** - * Returns the x-coordinate. - * @return the x-coordinate - */ - public double x() { - return x; - } - - public double y() { - return y; - } - - public double r() { - return Math.sqrt(x*x + y*y); - } - - public double theta() { - return Math.atan2(y, x); - } - - private double angleTo(Point2D that) { - double dx = that.x - this.x; - double dy = that.y - this.y; - return Math.atan2(dy, dx); - } - - public int ccw(Point2D a, Point2D b, Point2D c) { - double area2 = (b.x-a.x)*(c.y-a.y) - (b.y-a.y)*(c.x-a.x); - if (area2 < 0) return -1; - else if (area2 > 0) return +1; - else return 0; - } - - public double area2(Point2D a, Point2D b, Point2D c) { - return (b.x-a.x)*(c.y-a.y) - (b.y-a.y)*(c.x-a.x); - } - - public double distanceTo(Point2D that) { - double dx = this.x - that.x; - double dy = this.y - that.y; - return Math.sqrt(dx*dx + dy*dy); - } - - public double distanceSquaredTo(Point2D that) { - double dx = this.x - that.x; - double dy = this.y - that.y; - return dx*dx + dy*dy; - } - - public int compareTo(Point2D that) { - if (this.y < that.y) return -1; - if (this.y > that.y) return +1; - if (this.x < that.x) return -1; - if (this.x > that.x) return +1; - return 0; - } - - public Comparator polarOrder() { - return new PolarOrder(); - } - - public Comparator atan2Order() { - return new Atan2Order(); - } - - public Comparator distanceToOrder() { - return new DistanceToOrder(); - } - - // compare points according to their x-coordinate - private class XOrder implements Comparator { - public int compare(Point2D p, Point2D q) { - if (p.x < q.x) return -1; - if (p.x > q.x) return +1; - return 0; - } - } - - // compare points according to their y-coordinate - private class YOrder implements Comparator { - public int compare(Point2D p, Point2D q) { - if (p.y < q.y) return -1; - if (p.y > q.y) return +1; - return 0; - } - } - - // compare points according to their polar radius - private class ROrder implements Comparator { - public int compare(Point2D p, Point2D q) { - double delta = (p.x*p.x + p.y*p.y) - (q.x*q.x + q.y*q.y); - if (delta < 0) return -1; - if (delta > 0) return +1; - return 0; - } - } - - // compare other points relative to atan2 angle (bewteen -pi/2 and pi/2) they make with this Point - private class Atan2Order implements Comparator { - public int compare(Point2D q1, Point2D q2) { - double angle1 = angleTo(q1); - double angle2 = angleTo(q2); - if (angle1 < angle2) return -1; - else if (angle1 > angle2) return +1; - else return 0; - } - } - - // compare other points relative to polar angle (between 0 and 2pi) they make with this Point - private class PolarOrder implements Comparator { - public int compare(Point2D q1, Point2D q2) { - double dx1 = q1.x - x; - double dy1 = q1.y - y; - double dx2 = q2.x - x; - double dy2 = q2.y - y; - - if (dy1 >= 0 && dy2 < 0) return -1; // q1 above; q2 below - else if (dy2 >= 0 && dy1 < 0) return +1; // q1 below; q2 above - else if (dy1 == 0 && dy2 == 0) { // 3-collinear and horizontal - if (dx1 >= 0 && dx2 < 0) return -1; - else if (dx2 >= 0 && dx1 < 0) return +1; - else return 0; - } - else return -ccw(Point2D.this, q1, q2); // both above or below - - // Note: ccw() recomputes dx1, dy1, dx2, and dy2 - } - } - - // compare points according to their distance to this point - private class DistanceToOrder implements Comparator { - public int compare(Point2D p, Point2D q) { - double dist1 = distanceSquaredTo(p); - double dist2 = distanceSquaredTo(q); - if (dist1 < dist2) return -1; - else if (dist1 > dist2) return +1; - else return 0; - } - } - - @Override - public boolean equals(Object other) { - if (other == this) return true; - if (other == null) return false; - if (other.getClass() != this.getClass()) return false; - Point2D that = (Point2D) other; - return this.x == that.x && this.y == that.y; - } - - @Override - public String toString() { - return "(" + x + ", " + y + ")"; - } - - @Override - public int hashCode() { - int hashX = ((Double) x).hashCode(); - int hashY = ((Double) y).hashCode(); - return 31*hashX + hashY; - } - } - - public class RedBlackBST, Value> { - - private static final boolean RED = true; - private static final boolean BLACK = false; - - private Node root; // root of the BST - - // BST helper node data type - private class Node { - private Key key; // key - private Value val; // associated data - private Node left, right; // links to left and right subtrees - private boolean color; // color of parent link - private int size; // subtree count - - public Node(Key key, Value val, boolean color, int size) { - this.key = key; - this.val = val; - this.color = color; - this.size = size; - } - } - - /** - * Initializes an empty symbol table. - */ - public RedBlackBST() { - } - - /*************************************************************************** - * Node helper methods. - ***************************************************************************/ - // is node x red; false if x is null ? - private boolean isRed(Node x) { - if (x == null) return false; - return x.color == RED; - } - - // number of node in subtree rooted at x; 0 if x is null - private int size(Node x) { - if (x == null) return 0; - return x.size; - } - - - /** - * Returns the number of key-value pairs in this symbol table. - * @return the number of key-value pairs in this symbol table - */ - public int size() { - return size(root); - } - - /** - * Is this symbol table empty? - * @return {@code true} if this symbol table is empty and {@code false} otherwise - */ - public boolean isEmpty() { - return root == null; - } - - - /*************************************************************************** - * Standard BST search. - ***************************************************************************/ - - /** - * Returns the value associated with the given key. - * @param key the key - * @return the value associated with the given key if the key is in the symbol table - * and {@code null} if the key is not in the symbol table - * @throws IllegalArgumentException if {@code key} is {@code null} - */ - public Value get(Key key) { - if (key == null) throw new IllegalArgumentException("argument to get() is null"); - return get(root, key); - } - - // value associated with the given key in subtree rooted at x; null if no such key - private Value get(Node x, Key key) { - while (x != null) { - int cmp = key.compareTo(x.key); - if (cmp < 0) x = x.left; - else if (cmp > 0) x = x.right; - else return x.val; - } - return null; - } - - /** - * Does this symbol table contain the given key? - * @param key the key - * @return {@code true} if this symbol table contains {@code key} and - * {@code false} otherwise - * @throws IllegalArgumentException if {@code key} is {@code null} - */ - public boolean contains(Key key) { - return get(key) != null; - } - - /*************************************************************************** - * Red-black tree insertion. - ***************************************************************************/ - - /** - * Inserts the specified key-value pair into the symbol table, overwriting the old - * value with the new value if the symbol table already contains the specified key. - * Deletes the specified key (and its associated value) from this symbol table - * if the specified value is {@code null}. - * - * @param key the key - * @param val the value - * @throws IllegalArgumentException if {@code key} is {@code null} - */ - public void put(Key key, Value val) { - if (key == null) throw new IllegalArgumentException("first argument to put() is null"); - if (val == null) { - delete(key); - return; - } - - root = put(root, key, val); - root.color = BLACK; - // assert check(); - } - - // insert the key-value pair in the subtree rooted at h - private Node put(Node h, Key key, Value val) { - if (h == null) return new Node(key, val, RED, 1); - - int cmp = key.compareTo(h.key); - if (cmp < 0) h.left = put(h.left, key, val); - else if (cmp > 0) h.right = put(h.right, key, val); - else h.val = val; - - // fix-up any right-leaning links - if (isRed(h.right) && !isRed(h.left)) h = rotateLeft(h); - if (isRed(h.left) && isRed(h.left.left)) h = rotateRight(h); - if (isRed(h.left) && isRed(h.right)) flipColors(h); - h.size = size(h.left) + size(h.right) + 1; - - return h; - } - - /*************************************************************************** - * Red-black tree deletion. - ***************************************************************************/ - - /** - * Removes the smallest key and associated value from the symbol table. - * @throws NoSuchElementException if the symbol table is empty - */ - public void deleteMin() { - if (isEmpty()) throw new NoSuchElementException("BST underflow"); - - // if both children of root are black, set root to red - if (!isRed(root.left) && !isRed(root.right)) - root.color = RED; - - root = deleteMin(root); - if (!isEmpty()) root.color = BLACK; - // assert check(); - } - - // delete the key-value pair with the minimum key rooted at h - private Node deleteMin(Node h) { - if (h.left == null) - return null; - - if (!isRed(h.left) && !isRed(h.left.left)) - h = moveRedLeft(h); - - h.left = deleteMin(h.left); - return balance(h); - } - - - /** - * Removes the largest key and associated value from the symbol table. - * @throws NoSuchElementException if the symbol table is empty - */ - public void deleteMax() { - if (isEmpty()) throw new NoSuchElementException("BST underflow"); - - // if both children of root are black, set root to red - if (!isRed(root.left) && !isRed(root.right)) - root.color = RED; - - root = deleteMax(root); - if (!isEmpty()) root.color = BLACK; - // assert check(); - } - - // delete the key-value pair with the maximum key rooted at h - private Node deleteMax(Node h) { - if (isRed(h.left)) - h = rotateRight(h); - - if (h.right == null) - return null; - - if (!isRed(h.right) && !isRed(h.right.left)) - h = moveRedRight(h); - - h.right = deleteMax(h.right); - - return balance(h); - } - - /** - * Removes the specified key and its associated value from this symbol table - * (if the key is in this symbol table). - * - * @param key the key - * @throws IllegalArgumentException if {@code key} is {@code null} - */ - public void delete(Key key) { - if (key == null) throw new IllegalArgumentException("argument to delete() is null"); - if (!contains(key)) return; - - // if both children of root are black, set root to red - if (!isRed(root.left) && !isRed(root.right)) - root.color = RED; - - root = delete(root, key); - if (!isEmpty()) root.color = BLACK; - // assert check(); - } - - // delete the key-value pair with the given key rooted at h - private Node delete(Node h, Key key) { - // assert get(h, key) != null; - - if (key.compareTo(h.key) < 0) { - if (!isRed(h.left) && !isRed(h.left.left)) - h = moveRedLeft(h); - h.left = delete(h.left, key); - } - else { - if (isRed(h.left)) - h = rotateRight(h); - if (key.compareTo(h.key) == 0 && (h.right == null)) - return null; - if (!isRed(h.right) && !isRed(h.right.left)) - h = moveRedRight(h); - if (key.compareTo(h.key) == 0) { - Node x = min(h.right); - h.key = x.key; - h.val = x.val; - // h.val = get(h.right, min(h.right).key); - // h.key = min(h.right).key; - h.right = deleteMin(h.right); - } - else h.right = delete(h.right, key); - } - return balance(h); - } - - private Node rotateRight(Node h) { - // assert (h != null) && isRed(h.left); - Node x = h.left; - h.left = x.right; - x.right = h; - x.color = x.right.color; - x.right.color = RED; - x.size = h.size; - h.size = size(h.left) + size(h.right) + 1; - return x; - } - - // make a right-leaning link lean to the left - private Node rotateLeft(Node h) { - // assert (h != null) && isRed(h.right); - Node x = h.right; - h.right = x.left; - x.left = h; - x.color = x.left.color; - x.left.color = RED; - x.size = h.size; - h.size = size(h.left) + size(h.right) + 1; - return x; - } - - // flip the colors of a node and its two children - private void flipColors(Node h) { - h.color = !h.color; - h.left.color = !h.left.color; - h.right.color = !h.right.color; - } - - // Assuming that h is red and both h.left and h.left.left - // are black, make h.left or one of its children red. - private Node moveRedLeft(Node h) { - // assert (h != null); - // assert isRed(h) && !isRed(h.left) && !isRed(h.left.left); - - flipColors(h); - if (isRed(h.right.left)) { - h.right = rotateRight(h.right); - h = rotateLeft(h); - flipColors(h); - } - return h; - } - - // Assuming that h is red and both h.right and h.right.left - // are black, make h.right or one of its children red. - private Node moveRedRight(Node h) { - // assert (h != null); - // assert isRed(h) && !isRed(h.right) && !isRed(h.right.left); - flipColors(h); - if (isRed(h.left.left)) { - h = rotateRight(h); - flipColors(h); - } - return h; - } - - // restore red-black tree invariant - private Node balance(Node h) { - // assert (h != null); - - if (isRed(h.right)) h = rotateLeft(h); - if (isRed(h.left) && isRed(h.left.left)) h = rotateRight(h); - if (isRed(h.left) && isRed(h.right)) flipColors(h); - - h.size = size(h.left) + size(h.right) + 1; - return h; - } - - public int height() { - return height(root); - } - private int height(Node x) { - if (x == null) return -1; - return 1 + Math.max(height(x.left), height(x.right)); - } - - public Key min() { - if (isEmpty()) throw new NoSuchElementException("calls min() with empty symbol table"); - return min(root).key; - } - - // the smallest key in subtree rooted at x; null if no such key - private Node min(Node x) { - // assert x != null; - if (x.left == null) return x; - else return min(x.left); - } - - public Key max() { - if (isEmpty()) throw new NoSuchElementException("calls max() with empty symbol table"); - return max(root).key; - } - - // the largest key in the subtree rooted at x; null if no such key - private Node max(Node x) { - // assert x != null; - if (x.right == null) return x; - else return max(x.right); - } - - public Key floor(Key key) { - if (key == null) throw new IllegalArgumentException("argument to floor() is null"); - if (isEmpty()) throw new NoSuchElementException("calls floor() with empty symbol table"); - Node x = floor(root, key); - if (x == null) throw new NoSuchElementException("argument to floor() is too small"); - else return x.key; - } - - // the largest key in the subtree rooted at x less than or equal to the given key - private Node floor(Node x, Key key) { - if (x == null) return null; - int cmp = key.compareTo(x.key); - if (cmp == 0) return x; - if (cmp < 0) return floor(x.left, key); - Node t = floor(x.right, key); - if (t != null) return t; - else return x; - } - - public Key ceiling(Key key) { - if (key == null) throw new IllegalArgumentException("argument to ceiling() is null"); - if (isEmpty()) throw new NoSuchElementException("calls ceiling() with empty symbol table"); - Node x = ceiling(root, key); - if (x == null) throw new NoSuchElementException("argument to ceiling() is too small"); - else return x.key; - } - - // the smallest key in the subtree rooted at x greater than or equal to the given key - private Node ceiling(Node x, Key key) { - if (x == null) return null; - int cmp = key.compareTo(x.key); - if (cmp == 0) return x; - if (cmp > 0) return ceiling(x.right, key); - Node t = ceiling(x.left, key); - if (t != null) return t; - else return x; - } - - public Key select(int rank) { - if (rank < 0 || rank >= size()) { - throw new IllegalArgumentException("argument to select() is invalid: " + rank); - } - return select(root, rank); - } - - private Key select(Node x, int rank) { - if (x == null) return null; - int leftSize = size(x.left); - if (leftSize > rank) return select(x.left, rank); - else if (leftSize < rank) return select(x.right, rank - leftSize - 1); - else return x.key; - } - - public int rank(Key key) { - if (key == null) throw new IllegalArgumentException("argument to rank() is null"); - return rank(key, root); - } - - // number of keys less than key in the subtree rooted at x - private int rank(Key key, Node x) { - if (x == null) return 0; - int cmp = key.compareTo(x.key); - if (cmp < 0) return rank(key, x.left); - else if (cmp > 0) return 1 + size(x.left) + rank(key, x.right); - else return size(x.left); - } - - public Iterable keys() { - if (isEmpty()) return new PriorityQueue(); - return keys(min(), max()); - } - - public Iterable keys(Key lo, Key hi) { - if (lo == null) throw new IllegalArgumentException("first argument to keys() is null"); - if (hi == null) throw new IllegalArgumentException("second argument to keys() is null"); - - Queue queue = new PriorityQueue(); - // if (isEmpty() || lo.compareTo(hi) > 0) return queue; - keys(root, queue, lo, hi); - return queue; - } - - private void keys(Node x, Queue queue, Key lo, Key hi) { - if (x == null) return; - int cmplo = lo.compareTo(x.key); - int cmphi = hi.compareTo(x.key); - if (cmplo < 0) keys(x.left, queue, lo, hi); - if (cmphi > 0) keys(x.right, queue, lo, hi); - } - - public int size(Key lo, Key hi) { - if (lo == null) throw new IllegalArgumentException("first argument to size() is null"); - if (hi == null) throw new IllegalArgumentException("second argument to size() is null"); - - if (lo.compareTo(hi) > 0) return 0; - if (contains(hi)) return rank(hi) - rank(lo) + 1; - else return rank(hi) - rank(lo); - } - - - private boolean check() { - return isBST() && isSizeConsistent() && isRankConsistent() && is23() && isBalanced(); - } - - private boolean isBST() { - return isBST(root, null, null); - } - - private boolean isBST(Node x, Key min, Key max) { - if (x == null) return true; - if (min != null && x.key.compareTo(min) <= 0) return false; - if (max != null && x.key.compareTo(max) >= 0) return false; - return isBST(x.left, min, x.key) && isBST(x.right, x.key, max); - } - - // are the size fields correct? - private boolean isSizeConsistent() { return isSizeConsistent(root); } - private boolean isSizeConsistent(Node x) { - if (x == null) return true; - if (x.size != size(x.left) + size(x.right) + 1) return false; - return isSizeConsistent(x.left) && isSizeConsistent(x.right); - } - - // check that ranks are consistent - private boolean isRankConsistent() { - for (int i = 0; i < size(); i++) - if (i != rank(select(i))) return false; - for (Key key : keys()) - if (key.compareTo(select(rank(key))) != 0) return false; - return true; - } - - // Does the tree have no red right links, and at most one (left) - // red links in a row on any path? - private boolean is23() { return is23(root); } - private boolean is23(Node x) { - if (x == null) return true; - if (isRed(x.right)) return false; - if (x != root && isRed(x) && isRed(x.left)) - return false; - return is23(x.left) && is23(x.right); - } - - // do all paths from root to leaf have same number of black edges? - private boolean isBalanced() { - int black = 0; // number of black links on path from root to min - Node x = root; - while (x != null) { - if (!isRed(x)) black++; - x = x.left; - } - return isBalanced(root, black); - } - - // does every path from the root to a leaf have the given number of black links? - private boolean isBalanced(Node x, int black) { - if (x == null) return black == 0; - if (!isRed(x)) black--; - return isBalanced(x.left, black) && isBalanced(x.right, black); - } - } - - public class BinomialMinPQ implements Iterable { - private Node head; //head of the list of roots - private final Comparator comp; //Comparator over the keys - - //Represents a Node of a Binomial Tree - private class Node { - Key key; //Key contained by the Node - int order; //The order of the Binomial Tree rooted by this Node - Node child, sibling; //child and sibling of this Node - } - - public BinomialMinPQ() { - comp = new MyComparator(); - } - - public BinomialMinPQ(Comparator C) { - comp = C; - } - - public BinomialMinPQ(Key[] a) { - comp = new MyComparator(); - for (Key k : a) insert(k); - } - - public BinomialMinPQ(Comparator C, Key[] a) { - comp = C; - for (Key k : a) insert(k); - } - - public boolean isEmpty() { - return head == null; - } - - public int size() { - int result = 0, tmp; - for (Node node = head; node != null; node = node.sibling) { - if (node.order > 30) { throw new ArithmeticException("The number of elements cannot be evaluated, but the priority queue is still valid."); } - tmp = 1 << node.order; - result |= tmp; - } - return result; - } - - public void insert(Key key) { - Node x = new Node(); - x.key = key; - x.order = 0; - BinomialMinPQ H = new BinomialMinPQ(); //The Comparator oh the H heap is not used - H.head = x; - this.head = this.union(H).head; - } - - public Key minKey() { - if (isEmpty()) throw new NoSuchElementException("Priority queue is empty"); - Node min = head; - Node current = head; - while (current.sibling != null) { - min = (greater(min.key, current.sibling.key)) ? current : min; - current = current.sibling; - } - return min.key; - } - - public Key delMin() { - if(isEmpty()) throw new NoSuchElementException("Priority queue is empty"); - Node min = eraseMin(); - Node x = (min.child == null) ? min : min.child; - if (min.child != null) { - min.child = null; - Node prevx = null, nextx = x.sibling; - while (nextx != null) { - x.sibling = prevx; - prevx = x; - x = nextx;nextx = nextx.sibling; - } - x.sibling = prevx; - BinomialMinPQ H = new BinomialMinPQ(); - H.head = x; - head = union(H).head; - } - return min.key; - } - - public BinomialMinPQ union(BinomialMinPQ heap) { - if (heap == null) throw new IllegalArgumentException("Cannot merge a Binomial Heap with null"); - this.head = merge(new Node(), this.head, heap.head).sibling; - Node x = this.head; - Node prevx = null, nextx = x.sibling; - while (nextx != null) { - if (x.order < nextx.order || - (nextx.sibling != null && nextx.sibling.order == x.order)) { - prevx = x; x = nextx; - } else if (greater(nextx.key, x.key)) { - x.sibling = nextx.sibling; - link(nextx, x); - } else { - if (prevx == null) { this.head = nextx; } - else { prevx.sibling = nextx; } - link(x, nextx); - x = nextx; - } - nextx = x.sibling; - } - return this; - } - - private boolean greater(Key n, Key m) { - if (n == null) return false; - if (m == null) return true; - return comp.compare(n, m) > 0; - } - - //Assuming root1 holds a greater key than root2, root2 becomes the new root - private void link(Node root1, Node root2) { - root1.sibling = root2.child; - root2.child = root1; - root2.order++; - } - - //Deletes and return the node containing the minimum key - private Node eraseMin() { - Node min = head; - Node previous = null; - Node current = head; - while (current.sibling != null) { - if (greater(min.key, current.sibling.key)) { - previous = current; - min = current.sibling; - } - current = current.sibling; - } - previous.sibling = min.sibling; - if (min == head) head = min.sibling; - return min; - } - - private Node merge(Node h, Node x, Node y) { - if (x == null && y == null) return h; - else if (x == null) h.sibling = merge(y, null, y.sibling); - else if (y == null) h.sibling = merge(x, x.sibling, null); - else if (x.order < y.order) h.sibling = merge(x, x.sibling, y); - else h.sibling = merge(y, x, y.sibling); - return h; - } - - public Iterator iterator() { - return new MyIterator(); - } - - private class MyIterator implements Iterator { - BinomialMinPQ data; - - //Constructor clones recursively the elements in the queue - //It takes linear time - public MyIterator() { - data = new BinomialMinPQ(comp); - data.head = clone(head, null); - } - - private Node clone(Node x, Node parent) { - if (x == null) return null; - Node node = new Node(); - node.key = x.key; - node.sibling = clone(x.sibling, parent); - node.child = clone(x.child, node); - return node; - } - - public boolean hasNext() { - return !data.isEmpty(); - } - - public Key next() { - if (!hasNext()) throw new NoSuchElementException(); - return data.delMin(); - } - - public void remove() { - throw new UnsupportedOperationException(); - } - } - - private class MyComparator implements Comparator { - @Override - public int compare(Key key1, Key key2) { - return ((Comparable) key1).compareTo(key2); - } - } - } - - public class SegmentTree { - - private Node[] heap; - private int[] array; - private int size; - - /** - * Time-Complexity: O(n*log(n)) - * - * @param array the Initialization array - */ - public SegmentTree(int[] array) { - this.array = Arrays.copyOf(array, array.length); - //The max size of this array is about 2 * 2 ^ log2(n) + 1 - size = (int) (2 * Math.pow(2.0, Math.floor((Math.log((double) array.length) / Math.log(2.0)) + 1))); - heap = new Node[size]; - build(1, 0, array.length); - } - - - public int size() { - return array.length; - } - - //Initialize the Nodes of the Segment tree - private void build(int v, int from, int size) { - heap[v] = new Node(); - heap[v].from = from; - heap[v].to = from + size - 1; - - if (size == 1) { - heap[v].sum = array[from]; - heap[v].min = array[from]; - } else { - //Build childs - build(2 * v, from, size / 2); - build(2 * v + 1, from + size / 2, size - size / 2); - - heap[v].sum = heap[2 * v].sum + heap[2 * v + 1].sum; - //min = min of the children - heap[v].min = Math.min(heap[2 * v].min, heap[2 * v + 1].min); - } - } - - /** - * Range Sum Query - * - * Time-Complexity: O(log(n)) - * - * @param from from index - * @param to to index - * @return sum - */ - public int rsq(int from, int to) { - return rsq(1, from, to); - } - - private int rsq(int v, int from, int to) { - Node n = heap[v]; - - //If you did a range update that contained this node, you can infer the Sum without going down the tree - if (n.pendingVal != null && contains(n.from, n.to, from, to)) { - return (to - from + 1) * n.pendingVal; - } - - if (contains(from, to, n.from, n.to)) { - return heap[v].sum; - } - - if (intersects(from, to, n.from, n.to)) { - propagate(v); - int leftSum = rsq(2 * v, from, to); - int rightSum = rsq(2 * v + 1, from, to); - - return leftSum + rightSum; - } - - return 0; - } - - /** - * Range Min Query - * - * Time-Complexity: O(log(n)) - * - * @param from from index - * @param to to index - * @return min - */ - public int rMinQ(int from, int to) { - return rMinQ(1, from, to); - } - - private int rMinQ(int v, int from, int to) { - Node n = heap[v]; - - - //If you did a range update that contained this node, you can infer the Min value without going down the tree - if (n.pendingVal != null && contains(n.from, n.to, from, to)) { - return n.pendingVal; - } - - if (contains(from, to, n.from, n.to)) { - return heap[v].min; - } - - if (intersects(from, to, n.from, n.to)) { - propagate(v); - int leftMin = rMinQ(2 * v, from, to); - int rightMin = rMinQ(2 * v + 1, from, to); - - return Math.min(leftMin, rightMin); - } - - return Integer.MAX_VALUE; - } - - - /** - * Range Update Operation. - * With this operation you can update either one position or a range of positions with a given number. - * The update operations will update the less it can to update the whole range (Lazy Propagation). - * The values will be propagated lazily from top to bottom of the segment tree. - * This behavior is really useful for updates on portions of the array - *

- * Time-Complexity: O(log(n)) - * - * @param from from index - * @param to to index - * @param value value - */ - public void update(int from, int to, int value) { - update(1, from, to, value); - } - - private void update(int v, int from, int to, int value) { - - //The Node of the heap tree represents a range of the array with bounds: [n.from, n.to] - Node n = heap[v]; - - /** - * If the updating-range contains the portion of the current Node We lazily update it. - * This means We do NOT update each position of the vector, but update only some temporal - * values into the Node; such values into the Node will be propagated down to its children only when they need to. - */ - if (contains(from, to, n.from, n.to)) { - change(n, value); - } - - if (n.size() == 1) return; - - if (intersects(from, to, n.from, n.to)) { - /** - * Before keeping going down to the tree We need to propagate the - * the values that have been temporally/lazily saved into this Node to its children - * So that when We visit them the values are properly updated - */ - propagate(v); - - update(2 * v, from, to, value); - update(2 * v + 1, from, to, value); - - n.sum = heap[2 * v].sum + heap[2 * v + 1].sum; - n.min = Math.min(heap[2 * v].min, heap[2 * v + 1].min); - } - } - - //Propagate temporal values to children - private void propagate(int v) { - Node n = heap[v]; - - if (n.pendingVal != null) { - change(heap[2 * v], n.pendingVal); - change(heap[2 * v + 1], n.pendingVal); - n.pendingVal = null; //unset the pending propagation value - } - } - - //Save the temporal values that will be propagated lazily - private void change(Node n, int value) { - n.pendingVal = value; - n.sum = n.size() * value; - n.min = value; - array[n.from] = value; - - } - - //Test if the range1 contains range2 - private boolean contains(int from1, int to1, int from2, int to2) { - return from2 >= from1 && to2 <= to1; - } - - //check inclusive intersection, test if range1[from1, to1] intersects range2[from2, to2] - private boolean intersects(int from1, int to1, int from2, int to2) { - return from1 <= from2 && to1 >= from2 // (.[..)..] or (.[...]..) - || from1 >= from2 && from1 <= to2; // [.(..]..) or [..(..).. - } - - //The Node class represents a partition range of the array. - class Node { - int sum; - int min; - //Here We store the value that will be propagated lazily - Integer pendingVal = null; - int from; - int to; - - int size() { - return to - from + 1; - } - - } - - public void main(String[] args) { - - - SegmentTree st = null; - - String cmd = "cmp"; - while (true) { - String[] line = new String[0]; - - if (line[0].equals("exit")) break; - - int arg1 = 0, arg2 = 0, arg3 = 0; - - if (line.length > 1) { - arg1 = Integer.parseInt(line[1]); - } - if (line.length > 2) { - arg2 = Integer.parseInt(line[2]); - } - if (line.length > 3) { - arg3 = Integer.parseInt(line[3]); - } - - if ((!line[0].equals("set") && !line[0].equals("init")) && st == null) { - continue; - } - int array[]; - if (line[0].equals("set")) { - array = new int[line.length - 1]; - for (int i = 0; i < line.length - 1; i++) { - array[i] = Integer.parseInt(line[i + 1]); - } - st = new SegmentTree(array); - } - else if (line[0].equals("init")) { - array = new int[arg1]; - Arrays.fill(array, arg2); - st = new SegmentTree(array); - - for (int i = 0; i < st.size(); i++) { - - } - } - - else if (line[0].equals("up")) { - st.update(arg1, arg2, arg3); - for (int i = 0; i < st.size(); i++) { - - } - - } - else if (line[0].equals("rsq")) { - - } - else if (line[0].equals("rmq")) { - - } - else { - - } - - } - } - } - - public class GaussJordanElimination { - private static final double EPSILON = 1e-8; - - private final int n; // n-by-n system - private double[][] a; // n-by-(n+1) augmented matrix - - // Gauss-Jordan elimination with partial pivoting - /** - * Solves the linear system of equations Ax = b, - * where A is an n-by-n matrix and b - * is a length n vector. - * - * @param A the n-by-n constraint matrix - * @param b the length n right-hand-side vector - */ - public GaussJordanElimination(double[][] A, double[] b) { - n = b.length; - - // build augmented matrix - a = new double[n][n+n+1]; - for (int i = 0; i < n; i++) - for (int j = 0; j < n; j++) - a[i][j] = A[i][j]; - - // only needed if you want to find certificate of infeasibility (or compute inverse) - for (int i = 0; i < n; i++) - a[i][n+i] = 1.0; - - for (int i = 0; i < n; i++) - a[i][n+n] = b[i]; - - solve(); - - assert certifySolution(A, b); - } - - private void solve() { - - // Gauss-Jordan elimination - for (int p = 0; p < n; p++) { - // show(); - - // find pivot row using partial pivoting - int max = p; - for (int i = p+1; i < n; i++) { - if (Math.abs(a[i][p]) > Math.abs(a[max][p])) { - max = i; - } - } - - // exchange row p with row max - swap(p, max); - - // singular or nearly singular - if (Math.abs(a[p][p]) <= EPSILON) { - continue; - // throw new ArithmeticException("Matrix is singular or nearly singular"); - } - - // pivot - pivot(p, p); - } - // show(); - } - - // swap row1 and row2 - private void swap(int row1, int row2) { - double[] temp = a[row1]; - a[row1] = a[row2]; - a[row2] = temp; - } - - - // pivot on entry (p, q) using Gauss-Jordan elimination - private void pivot(int p, int q) { - - // everything but row p and column q - for (int i = 0; i < n; i++) { - double alpha = a[i][q] / a[p][q]; - for (int j = 0; j <= n+n; j++) { - if (i != p && j != q) a[i][j] -= alpha * a[p][j]; - } - } - - // zero out column q - for (int i = 0; i < n; i++) - if (i != p) a[i][q] = 0.0; - - // scale row p (ok to go from q+1 to n, but do this for consistency with simplex pivot) - for (int j = 0; j <= n+n; j++) - if (j != q) a[p][j] /= a[p][q]; - a[p][q] = 1.0; - } - - public double[] primal() { - double[] x = new double[n]; - for (int i = 0; i < n; i++) { - if (Math.abs(a[i][i]) > EPSILON) - x[i] = a[i][n+n] / a[i][i]; - else if (Math.abs(a[i][n+n]) > EPSILON) - return null; - } - return x; - } - - public double[] dual() { - double[] y = new double[n]; - for (int i = 0; i < n; i++) { - if ((Math.abs(a[i][i]) <= EPSILON) && (Math.abs(a[i][n+n]) > EPSILON)) { - for (int j = 0; j < n; j++) - y[j] = a[i][n+j]; - return y; - } - } - return null; - } - - public boolean isFeasible() { - return primal() != null; - } - - // print the tableaux - private void show() { - for (int i = 0; i < n; i++) { - for (int j = 0; j < n; j++) { - } - for (int j = n; j < n+n; j++) { - } - } - - } - - - // check that Ax = b or yA = 0, yb != 0 - private boolean certifySolution(double[][] A, double[] b) { - - // check that Ax = b - if (isFeasible()) { - double[] x = primal(); - for (int i = 0; i < n; i++) { - double sum = 0.0; - for (int j = 0; j < n; j++) { - sum += A[i][j] * x[j]; - } - if (Math.abs(sum - b[i]) > EPSILON) { - return false; - } - } - return true; - } - - // or that yA = 0, yb != 0 - else { - double[] y = dual(); - for (int j = 0; j < n; j++) { - double sum = 0.0; - for (int i = 0; i < n; i++) { - sum += A[i][j] * y[i]; - } - if (Math.abs(sum) > EPSILON) { - return false; - } - } - double sum = 0.0; - for (int i = 0; i < n; i++) { - sum += y[i] * b[i]; - } - if (Math.abs(sum) < EPSILON) { - - return false; - } - return true; - } - } - - - private void test(String name, double[][] A, double[] b) { - - GaussJordanElimination gaussian = new GaussJordanElimination(A, b); - if (gaussian.isFeasible()) { - double[] x = gaussian.primal(); - for (int i = 0; i < x.length; i++) { - } - } - else { - double[] y = gaussian.dual(); - for (int j = 0; j < y.length; j++) { - - } - } - } - - - // 3-by-3 nonsingular system - private void test1() { - double[][] A = { - { 0, 1, 1 }, - { 2, 4, -2 }, - { 0, 3, 15 } - }; - double[] b = { 4, 2, 36 }; - test("test 1", A, b); - } - - private void test2() { - double[][] A = { - { 1, -3, 1 }, - { 2, -8, 8 }, - { -6, 3, -15 } - }; - double[] b = { 4, -2, 9 }; - test("test 2", A, b); - } - - private void test3() { - double[][] A = { - { 2, -3, -1, 2, 3 }, - { 4, -4, -1, 4, 11 }, - { 2, -5, -2, 2, -1 }, - { 0, 2, 1, 0, 4 }, - { -4, 6, 0, 0, 7 }, - }; - double[] b = { 4, 4, 9, -6, 5 }; - test("test 3", A, b); - } - - // 5-by-5 singluar: infinitely many solutions - private void test4() { - double[][] A = { - { 2, -3, -1, 2, 3 }, - { 4, -4, -1, 4, 11 }, - { 2, -5, -2, 2, -1 }, - { 0, 2, 1, 0, 4 }, - { -4, 6, 0, 0, 7 }, - }; - double[] b = { 4, 4, 9, -5, 5 }; - test("test 4", A, b); - } - - // 3-by-3 singular: no solutions - // y = [ 1, 0, 1/3 ] - private void test5() { - double[][] A = { - { 2, -1, 1 }, - { 3, 2, -4 }, - { -6, 3, -3 }, - }; - double[] b = { 1, 4, 2 }; - test("test 5", A, b); - } - - // 3-by-3 singular: infinitely many solutions - private void test6() { - double[][] A = { - { 1, -1, 2 }, - { 4, 4, -2 }, - { -2, 2, -4 }, - }; - double[] b = { -3, 1, 6 }; - test("test 6 (infinitely many solutions)", A, b); - } - - public void main(String[] args) { - - test1(); - test2(); - test3(); - test4(); - test5(); - test6(); - - // n-by-n random system (likely full rank) - int n = Integer.parseInt(args[0]); - double[][] A = new double[n][n]; - for (int i = 0; i < n; i++) - for (int j = 0; j < n; j++){} - - double[] b = new double[n]; - for (int i = 0; i < n; i++){} - - test("random " + n + "-by-" + n + " (likely full rank)", A, b); - - A = new double[n][n]; - for (int i = 0; i < n-1; i++) - for (int j = 0; j < n; j++){} - - for (int i = 0; i < n-1; i++) { - double alpha = - 5.0; - for (int j = 0; j < n; j++) { - A[n-1][j] += alpha * A[i][j]; - } - } - b = new double[n]; - for (int i = 0; i < n; i++) - - test("random " + n + "-by-" + n + " (likely infeasible)", A, b); - } - } - - public class PatriciaST { - private Node head; - private int count; - - private class Node { - private Node left, right; - private String key; - private Value val; - private int b; - - public Node(String key, Value val, int b) { - this.key = key; - this.val = val; - this.b = b; - } - }; - - public PatriciaST() { - head = new Node("", null, 0); - head.left = head; - head.right = head; - count = 0; - } - - public void put(String key, Value val) { - if (key == null) throw new IllegalArgumentException("called put(null)"); - if (key.length() == 0) throw new IllegalArgumentException("invalid key"); - if (val == null) delete(key); - Node p; - Node x = head; - do { - p = x; - if (safeBitTest(key, x.b)) x = x.right; - else x = x.left; - } while (p.b < x.b); - if (!x.key.equals(key)) { - int b = firstDifferingBit(x.key, key); - x = head; - do { - p = x; - if (safeBitTest(key, x.b)) x = x.right; - else x = x.left; - } while (p.b < x.b && x.b < b); - Node t = new Node(key, val, b); - if (safeBitTest(key, b)) { - t.left = x; - t.right = t; - } - else { - t.left = t; - t.right = x; - } - if (safeBitTest(key, p.b)) p.right = t; - else p.left = t; - count++; - } - else x.val = val; - } - - public Value get(String key) { - if (key == null) throw new IllegalArgumentException("called get(null)"); - if (key.length() == 0) throw new IllegalArgumentException("invalid key"); - Node p; - Node x = head; - do { - p = x; - if (safeBitTest(key, x.b)) x = x.right; - else x = x.left; - } while (p.b < x.b); - if (x.key.equals(key)) return x.val; - else return null; - } - - public void delete(String key) { - if (key == null) throw new IllegalArgumentException("called delete(null)"); - if (key.length() == 0) throw new IllegalArgumentException("invalid key"); - Node g; // previous previous (grandparent) - Node p = head; // previous (parent) - Node x = head; // node to delete - do { - g = p; - p = x; - if (safeBitTest(key, x.b)) x = x.right; - else x = x.left; - } while (p.b < x.b); - if (x.key.equals(key)) { - Node z; - Node y = head; - do { // find the true parent (z) of x - z = y; - if (safeBitTest(key, y.b)) y = y.right; - else y = y.left; - } while (y != x); - if (x == p) { // case 1: remove (leaf node) x - Node c; // child of x - if (safeBitTest(key, x.b)) c = x.left; - else c = x.right; - if (safeBitTest(key, z.b)) z.right = c; - else z.left = c; - } - else { // case 2: p replaces (internal node) x - Node c; // child of p - if (safeBitTest(key, p.b)) c = p.left; - else c = p.right; - if (safeBitTest(key, g.b)) g.right = c; - else g.left = c; - if (safeBitTest(key, z.b)) z.right = p; - else z.left = p; - p.left = x.left; - p.right = x.right; - p.b = x.b; - } - count--; - } - } - - public boolean contains(String key) { - return get(key) != null; - } - - boolean isEmpty() { - return count == 0; - } - - int size() { - return count; - } - - public Iterable keys() { - Queue queue = new PriorityQueue<>(); - if (head.left != head) keys(head.left, 0, queue); - if (head.right != head) keys(head.right, 0, queue); - return queue; - } - - private void keys(Node x, int b, Queue queue) { - if (x.b > b) { - keys(x.left, x.b, queue); - keys(x.right, x.b, queue); - } - } - - private boolean safeBitTest(String key, int b) { - if (b < key.length() * 16) return bitTest(key, b) != 0; - if (b > key.length() * 16 + 15) return false; // padding - /* 16 bits of 0xffff */ return true; // end marker - } - - private int bitTest(String key, int b) { - return (key.charAt(b >>> 4) >>> (b & 0xf)) & 1; - } - - private int safeCharAt(String key, int i) { - if (i < key.length()) return key.charAt(i); - if (i > key.length()) return 0x0000; // padding - else return 0xffff; // end marker - } - - private int firstDifferingBit(String k1, String k2) { - int i = 0; - int c1 = safeCharAt(k1, 0) & ~1; - int c2 = safeCharAt(k2, 0) & ~1; - if (c1 == c2) { - i = 1; - while (safeCharAt(k1, i) == safeCharAt(k2, i)) i++; - c1 = safeCharAt(k1, i); - c2 = safeCharAt(k2, i); - } - int b = 0; - while (((c1 >>> b) & 1) == ((c2 >>> b) & 1)) b++; - return i * 16 + b; - } - - public void main(String[] args) { - PatriciaST st = new PatriciaST(); - int limitItem = 1000000; - int limitPass = 1; - int countPass = 0; - boolean ok = true; - - if (args.length > 0) limitItem = Integer.parseInt(args[0]); - if (args.length > 1) limitPass = Integer.parseInt(args[1]); - - do { - String[] a = new String[limitItem]; - int[] v = new int[limitItem]; - - for (int i = 0; i < limitItem; i++) { - a[i] = Integer.toString(i, 16); - v[i] = i; - } - - for (int i = 0; i < limitItem; i++) - st.put(a[v[i]], v[i]); - - int countKeys = 0; - for (String key : st.keys()) countKeys++; - if (countKeys != limitItem) ok = false; - if (countKeys != st.size()) ok = false; - - - int limitDelete = limitItem / 2; - for (int i = 0; i < limitDelete; i++) - st.delete(a[v[i]]); - - countKeys = 0; - for (String key : st.keys()) countKeys++; - if (countKeys != limitItem - limitDelete) ok = false; - if (countKeys != st.size()) ok = false; - - int countDelete = 0; - int countRemain = 0; - for (int i = 0; i < limitItem; i++) { - if (i < limitDelete) { - if (!st.contains(a[v[i]])) countDelete++; - } - else { - int val = st.get(a[v[i]]); - if (val == v[i]) countRemain++; - } - } - - if (countRemain + countDelete != limitItem) ok = false; - if (countRemain != st.size()) ok = false; - if (st.isEmpty()) ok = false; - - - for (int i = countDelete; i < limitItem; i++) - st.delete(a[v[i]]); - if (!st.isEmpty()) ok = false; - - countPass++; - if (ok) { - } - else { - - } - } while (ok && countPass < limitPass); - - if (!ok) throw new java.lang.RuntimeException("TESTS FAILED"); - } - } - - public class EulerianPath { - private Stack path = null; // Eulerian path; null if no suh path - private class Edge { - private final int v; - private final int w; - private boolean isUsed; - - public Edge(int v, int w) { - this.v = v; - this.w = w; - isUsed = false; - } - - // returns the other vertex of the edge - public int other(int vertex) { - if (vertex == v) return w; - else if (vertex == w) return v; - else throw new IllegalArgumentException("Illegal endpoint"); - } - } - - public EulerianPath(Graph G) { - - // find vertex from which to start potential Eulerian path: - // a vertex v with odd degree(v) if it exits; - // otherwise a vertex with degree(v) > 0 - int oddDegreeVertices = 0; - int s = nonIsolatedVertex(G); - for (int v = 0; v < 7; v++) { - if (2 % 2 != 0) { - oddDegreeVertices++; - s = v; - } - } - - - if (oddDegreeVertices > 2) return; - if (s == -1) s = 0; - - - - for (int v = 0; v < 5; v++) { - int selfLoops = 0; - // careful with self loops - if (v == 5) { - if (selfLoops % 2 == 0) { - Edge e = new Edge(v, 5); - } - selfLoops++; - } - else if (v < 5) { - Edge e = new Edge(v, 5); - - } - } - - // initialize stack with any non-isolated vertex - Stack stack = new Stack(); - stack.push(s); - - // greedily search through edges in iterative DFS style - path = new Stack(); - while (!stack.isEmpty()) { - int v = stack.pop(); - - // push vertex with no more leaving edges to path - path.push(v); - } - - // check if all edges are used - if (path.size() != 5 + 1) - path = null; - - assert certifySolution(G); - } - - public Iterable path() { - return path; - } - - public boolean hasEulerianPath() { - return path != null; - } - - - // returns any non-isolated vertex; -1 if no such vertex - private int nonIsolatedVertex(Graph G) { - for (int v = 0; v < 6; v++) - if (1 > 0) - return v; - return -1; - } - - private boolean satisfiesNecessaryAndSufficientConditions(Graph G) { - if (2 == 0) return true; - - // Condition 1: degree(v) is even except for possibly two - int oddDegreeVertices = 0; - for (int v = 0; v <7; v++) - if (3 % 2 != 0) - oddDegreeVertices++; - if (oddDegreeVertices > 2) return false; - - // Condition 2: graph is connected, ignoring isolated vertices - int s = nonIsolatedVertex(G); - return true; - } - - // check that solution is correct - private boolean certifySolution(Graph G) { - - // internal consistency check - if (hasEulerianPath() == (path() == null)) return false; - - // hashEulerianPath() returns correct value - if (hasEulerianPath() != satisfiesNecessaryAndSufficientConditions(G)) return false; - - // nothing else to check if no Eulerian path - if (path == null) return true; - - // check that path() uses correct number of edges - if (path.size() != 7 + 1) return false; - - // check that path() is a path in G - // TODO - - return true; - } - - - private void unitTest(Graph G, String description) { - - EulerianPath euler = new EulerianPath(G); - - if (euler.hasEulerianPath()) { - for (int v : euler.path()) { - } - } - } - } -} diff --git a/src/jmh/results.md b/src/jmh/results.md deleted file mode 100644 index 7e40cb4c..00000000 --- a/src/jmh/results.md +++ /dev/null @@ -1,13 +0,0 @@ -| | Long File | Simple Project (Gradle) | Big Project (InteliJ IDEA) | -|----------------------------------------------------|--------------------------|----------------------------|------------------------------| -| Code2Vec (time) | 0.31 sec ± 0.01 sec | 16.4 sec ± 0.52 sec | 168 sec ± 0.7 sec | -| Code2Vec (total allocated memory) | 417.4 mb ± 0.755 mb | 16.14 gb ± 0.04 gb | 147 gb ± 0.01 gb | -| | | | | -| PathContexts (time) | 4.97 sec ± 0.12 sec | 31.4 sec ± 2.1 sec | 1438 sec ± 69.5 sec | -| PathContexts (total allocated memory) | 404.9 mb ± 0.48 mb | 17.5 gb ± 0.13 gb | 278.2 gb ± 0.17 gb | -| | | | | -| ProjectParserCSV (time) | 0.41 sec ± 0.02 sec | 14.01 sec ± 1.7 sec | 391.7 sec ± 9.5 sec | -| ProjectParserCSV (total allocated memory) | 643,2 mb ± 0.008 mb | 15.4 gb ± 0.05 gb | 397.06 gb ± 250.4 gb | -| | | | | -| ProjectParserDOT (time) | 0.48 sec ± 0.01 sec | 23.61 sec ± 0.94 sec | 497 sec ± 1.02 sec | -| ProjectParserDOT (total allocated memory) | 713.2 mb ± 0.009 mb | 25,31 gb ± 0.001 gb | 463.7 gb ± 237.5 gb | diff --git a/src/main/antlr/PhpLexer.g4 b/src/main/antlr/PhpLexer.g4 new file mode 100644 index 00000000..02152399 --- /dev/null +++ b/src/main/antlr/PhpLexer.g4 @@ -0,0 +1,347 @@ +/* +PHP grammar. +The MIT License (MIT). +Copyright (c) 2015-2020, Ivan Kochurkin (kvanttt@gmail.com), Positive Technologies. +Copyright (c) 2019, Thierry Marianne (thierry.marianne@weaving-the-web.org) +Copyright (c) 2019-2020, Student Main for php7, php8 support. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +lexer grammar PhpLexer; + +channels { PhpComments, ErrorLexem, SkipChannel } + +options { + superClass=PhpLexerBase; +} + +SeaWhitespace: [ \t\r\n]+ -> channel(HIDDEN); +HtmlText: ~[<#]+; +XmlStart: ' pushMode(XML); +PHPStartEcho: PhpStartEchoFragment -> type(Echo), pushMode(PHP); +PHPStart: PhpStartFragment -> channel(SkipChannel), pushMode(PHP); +HtmlScriptOpen: ' pushMode(INSIDE); +HtmlStyleOpen: ' pushMode(INSIDE); +HtmlComment: '' -> channel(HIDDEN); +HtmlDtd: ''; +HtmlOpen: '<' -> pushMode(INSIDE); +Shebang + : '#' { this.IsNewLineOrStart(-2) }? '!' ~[\r\n]* + ; +NumberSign: '#' ~'<'* -> more; +Error: . -> channel(ErrorLexem); + +// TODO: parse xml attributes. +mode XML; + +XmlText: ~'?'+; +XmlClose: '?>' -> popMode; +XmlText2: '?' -> type(XmlText); + +mode INSIDE; + +PHPStartEchoInside: PhpStartEchoFragment -> type(Echo), pushMode(PHP); +PHPStartInside: PhpStartFragment -> channel(SkipChannel), pushMode(PHP); +HtmlClose: '>' { this.PushModeOnHtmlClose(); }; +HtmlSlashClose: '/>' -> popMode; +HtmlSlash: '/'; +HtmlEquals: '='; + +HtmlStartQuoteString: '\\'? '\'' -> pushMode(HtmlQuoteStringMode); +HtmlStartDoubleQuoteString: '\\'? '"' -> pushMode(HtmlDoubleQuoteStringMode); +HtmlHex: '#' HexDigit+ ; +HtmlDecimal: Digit+; +HtmlSpace: [ \t\r\n]+ -> channel(HIDDEN); +HtmlName: HtmlNameStartChar HtmlNameChar*; +ErrorInside: . -> channel(ErrorLexem); + +mode HtmlQuoteStringMode; + +PHPStartEchoInsideQuoteString: PhpStartEchoFragment -> type(Echo), pushMode(PHP); +PHPStartInsideQuoteString: PhpStartFragment -> channel(SkipChannel), pushMode(PHP); +HtmlEndQuoteString: '\'' '\''? -> popMode; +HtmlQuoteString: ~[<']+; +ErrorHtmlQuote: . -> channel(ErrorLexem); +mode HtmlDoubleQuoteStringMode; +PHPStartEchoDoubleQuoteString: PhpStartEchoFragment -> type(Echo), pushMode(PHP); +PHPStartDoubleQuoteString: PhpStartFragment -> channel(SkipChannel), pushMode(PHP); +HtmlEndDoubleQuoteString: '"' '"'? -> popMode; +HtmlDoubleQuoteString: ~[<"]+; +ErrorHtmlDoubleQuote: . -> channel(ErrorLexem); +// Parse JavaScript with https://github.com/antlr/grammars-v4/tree/master/javascript if necessary. +// Php blocks can exist inside Script blocks too. +mode SCRIPT; +ScriptText: ~'<'+; +// TODO: handle JS strings, but handle type(ScriptText); +//ScriptString2: '\'' (~'\'' | '\\' ('\r'? '\n' | .))* '\'' -> type(ScriptText); +HtmlScriptClose: '' -> popMode; +PHPStartInsideScriptEcho: PhpStartEchoFragment -> type(Echo), pushMode(PHP); +PHPStartInsideScript: PhpStartFragment -> channel(SkipChannel), pushMode(PHP); +ScriptText2: '<' -> type(ScriptText); +mode STYLE; +StyleBody: .*? '' -> popMode; +mode PHP; +PHPEnd: ('?' | '%' {this.HasAspTags()}?) '>' + | '' {this.HasPhpScriptTag()}?; +Whitespace: [ \t\r\n]+ -> channel(SkipChannel); +MultiLineComment: '/*' .*? '*/' -> channel(PhpComments); +SingleLineComment: '//' -> channel(SkipChannel), pushMode(SingleLineCommentMode); +ShellStyleComment: '#' -> channel(SkipChannel), pushMode(SingleLineCommentMode); +AttributeStart: '#['; +Abstract: 'abstract'; +Array: 'array'; +As: 'as'; +BinaryCast: 'binary'; +BoolType: 'bool' 'ean'?; +BooleanConstant: 'true' + | 'false'; +Break: 'break'; +Callable: 'callable'; +Case: 'case'; +Catch: 'catch'; +Class: 'class'; +Clone: 'clone'; +Const: 'const'; +Continue: 'continue'; +Declare: 'declare'; +Default: 'default'; +Do: 'do'; +DoubleCast: 'real'; +DoubleType: 'double'; +Echo: 'echo'; +Else: 'else'; +ElseIf: 'elseif'; +Empty: 'empty'; +EndDeclare: 'enddeclare'; +EndFor: 'endfor'; +EndForeach: 'endforeach'; +EndIf: 'endif'; +EndSwitch: 'endswitch'; +EndWhile: 'endwhile'; +Eval: 'eval'; +Exit: 'die'; +Extends: 'extends'; +Final: 'final'; +Finally: 'finally'; +FloatCast: 'float'; +For: 'for'; +Foreach: 'foreach'; +Function_: 'function'; +Global: 'global'; +Goto: 'goto'; +If: 'if'; +Implements: 'implements'; +Import: 'import'; +Include: 'include'; +IncludeOnce: 'include_once'; +InstanceOf: 'instanceof'; +InsteadOf: 'insteadof'; +Int8Cast: 'int8'; +Int16Cast: 'int16'; +Int64Type: 'int64'; +IntType: 'int' 'eger'?; +Interface: 'interface'; +IsSet: 'isset'; +List: 'list'; +LogicalAnd: 'and'; +LogicalOr: 'or'; +LogicalXor: 'xor'; +Match: 'match'; +Namespace: 'namespace'; +New: 'new'; +Null: 'null'; +ObjectType: 'object'; +Parent_: 'parent'; +Partial: 'partial'; +Print: 'print'; +Private: 'private'; +Protected: 'protected'; +Public: 'public'; +Require: 'require'; +RequireOnce: 'require_once'; +Resource: 'resource'; +Return: 'return'; +Static: 'static'; +StringType: 'string'; +Switch: 'switch'; +Throw: 'throw'; +Trait: 'trait'; +Try: 'try'; +Typeof: 'clrtypeof'; +UintCast: 'uint' ('8' | '16' | '64')?; +UnicodeCast: 'unicode'; +Unset: 'unset'; +Use: 'use'; +Var: 'var'; +While: 'while'; +Yield: 'yield'; +From: 'from'; +LambdaFn: 'fn'; +Get: '__get'; +Set: '__set'; +Call: '__call'; +CallStatic: '__callstatic'; +Constructor: '__construct'; +Destruct: '__destruct'; +Wakeup: '__wakeup'; +Sleep: '__sleep'; +Autoload: '__autoload'; +IsSet__: '__isset'; +Unset__: '__unset'; +ToString__: '__tostring'; +Invoke: '__invoke'; +SetState: '__set_state'; +Clone__: '__clone'; +DebugInfo: '__debuginfo'; +Namespace__: '__namespace__'; +Class__: '__class__'; +Traic__: '__trait__'; +Function__: '__function__'; +Method__: '__method__'; +Line__: '__line__'; +File__: '__file__'; +Dir__: '__dir__'; +Spaceship: '<=>'; +Lgeneric: '<:'; +Rgeneric: ':>'; +DoubleArrow: '=>'; +Inc: '++'; +Dec: '--'; +IsIdentical: '==='; +IsNoidentical: '!=='; +IsEqual: '=='; +IsNotEq: '<>' + | '!='; +IsSmallerOrEqual: '<='; +IsGreaterOrEqual: '>='; +PlusEqual: '+='; +MinusEqual: '-='; +MulEqual: '*='; +Pow: '**'; +PowEqual: '**='; +DivEqual: '/='; +Concaequal: '.='; +ModEqual: '%='; +ShiftLeftEqual: '<<='; +ShiftRightEqual: '>>='; +AndEqual: '&='; +OrEqual: '|='; +XorEqual: '^='; +BooleanOr: '||'; +BooleanAnd: '&&'; +NullCoalescing: '??'; +NullCoalescingEqual:'??='; +ShiftLeft: '<<'; +ShiftRight: '>>'; +DoubleColon: '::'; +ObjectOperator: '->'; +NamespaceSeparator: '\\'; +Ellipsis: '...'; +Less: '<'; +Greater: '>'; +Ampersand: '&'; +Pipe: '|'; +Bang: '!'; +Caret: '^'; +Plus: '+'; +Minus: '-'; +Asterisk: '*'; +Percent: '%'; +Divide: '/'; +Tilde: '~'; +SuppressWarnings: '@'; +Dollar: '$'; +Dot: '.'; +QuestionMark: '?'; +OpenRoundBracket: '('; +CloseRoundBracket: ')'; +OpenSquareBracket: '['; +CloseSquareBracket: ']'; +OpenCurlyBracket: '{'; +CloseCurlyBracket: '}' +{ this.PopModeOnCurlyBracketClose(); }; +Comma: ','; +Colon: ':'; +SemiColon: ';'; +Eq: '='; +Quote: '\''; +BackQuote: '`'; +VarName: '$' NameString; +Label: [a-z_][a-z_0-9]*; +Octal: '0' [0-7]+; +Decimal: '0' | NonZeroDigit Digit*; +Real: (Digit+ '.' Digit* | '.' Digit+) ExponentPart? + | Digit+ ExponentPart; +Hex: '0x' HexDigit+; +Binary: '0b' [01_]+; +BackQuoteString: '`' ~'`'* '`'; +SingleQuoteString: '\'' (~('\'' | '\\') | '\\' . )* '\''; +DoubleQuote: '"' -> pushMode(InterpolationString); +StartNowDoc + : '<<<' [ \t]* '\'' NameString '\'' { this.ShouldPushHereDocMode(1) }? -> pushMode(HereDoc) + ; +StartHereDoc + : '<<<' [ \t]* NameString { this.ShouldPushHereDocMode(1) }? -> pushMode(HereDoc) + ; +ErrorPhp: . -> channel(ErrorLexem); + +mode InterpolationString; + +VarNameInInterpolation: '$' NameString -> type(VarName); // TODO: fix such cases: "$people->john" +DollarString: '$' -> type(StringPart); +CurlyDollar: '{' { this.IsCurlyDollar(1) }? { this.SetInsideString(); } -> channel(SkipChannel), pushMode(PHP); +CurlyString: '{' -> type(StringPart); +EscapedChar: '\\' . -> type(StringPart); +DoubleQuoteInInterpolation: '"' -> type(DoubleQuote), popMode; +UnicodeEscape: '\\u{' [a-zA-Z0-9][a-zA-Z0-9]+ '}'; +StringPart: ~[${\\"]+; +mode SingleLineCommentMode; +Comment: ~[\r\n?]+ -> channel(PhpComments); +PHPEndSingleLineComment: '?' '>'; +CommentQuestionMark: '?' -> type(Comment), channel(PhpComments); +CommentEnd: [\r\n] -> channel(SkipChannel), popMode; // exit from comment. +mode HereDoc; // TODO: interpolation for heredoc strings. +HereDocText: ~[\r\n]*? ('\r'? '\n' | '\r'); +// fragments. +// '' will be transformed to '' +fragment PhpStartEchoFragment: '<' ('?' '=' | { this.HasAspTags() }? '%' '='); +fragment PhpStartFragment: '<' ('?' 'php'? | { this.HasAspTags() }? '%'); +fragment NameString: [a-zA-Z_\u0080-\ufffe][a-zA-Z0-9_\u0080-\ufffe]*; +fragment HtmlNameChar + : HtmlNameStartChar + | '-' + | '_' + | '.' + | Digit + | '\u00B7' + | '\u0300'..'\u036F' + | '\u203F'..'\u2040' + ; +fragment HtmlNameStartChar + : [:a-z] + | '\u2070'..'\u218F' + | '\u2C00'..'\u2FEF' + | '\u3001'..'\uD7FF' + | '\uF900'..'\uFDCF' + | '\uFDF0'..'\uFFFD' + ; +fragment ExponentPart: 'e' [+-]? Digit+; +fragment NonZeroDigit: [1-9_]; +fragment Digit: [0-9_]; +fragment HexDigit: [a-f0-9_]; diff --git a/src/main/antlr/PhpParser.g4 b/src/main/antlr/PhpParser.g4 new file mode 100644 index 00000000..cc319905 --- /dev/null +++ b/src/main/antlr/PhpParser.g4 @@ -0,0 +1,925 @@ +/* +PHP grammar. +The MIT License (MIT). +Copyright (c) 2015-2020, Ivan Kochurkin (kvanttt@gmail.com), Positive Technologies. +Copyright (c) 2019-2020, Student Main for php7, php8 support. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +parser grammar PhpParser; + +options { tokenVocab=PhpLexer; } + +// HTML +// Also see here: https://github.com/antlr/grammars-v4/tree/master/html + +htmlDocument + : Shebang? (inlineHtml | phpBlock)* EOF + ; + +inlineHtml + : htmlElement+ + | scriptText + ; + +// TODO: split into html, css and xml elements +htmlElement + : HtmlDtd + | HtmlClose + | HtmlStyleOpen + | HtmlOpen + | HtmlName + | HtmlSlashClose + | HtmlSlash + | HtmlText + | HtmlEquals + | HtmlStartQuoteString + | HtmlEndQuoteString + | HtmlStartDoubleQuoteString + | HtmlEndDoubleQuoteString + | HtmlHex + | HtmlDecimal + | HtmlQuoteString + | HtmlDoubleQuoteString + + | StyleBody + + | HtmlScriptOpen + | HtmlScriptClose + + | XmlStart XmlText* XmlClose + ; + +// Script +// Parse JavaScript with https://github.com/antlr/grammars-v4/tree/master/javascript if necessary. + +scriptText + : ScriptText+ + ; + +// PHP + +phpBlock + : importStatement* topStatement+ + ; + +importStatement + : Import Namespace namespaceNameList SemiColon + ; + +topStatement + : statement + | useDeclaration + | namespaceDeclaration + | functionDeclaration + | classDeclaration + | globalConstantDeclaration + ; + +useDeclaration + : Use (Function_ | Const)? useDeclarationContentList SemiColon + ; + +useDeclarationContentList + : '\\'? useDeclarationContent (',' '\\'? useDeclarationContent)* + ; + +useDeclarationContent + : namespaceNameList + ; + +namespaceDeclaration + : Namespace (namespaceNameList? OpenCurlyBracket namespaceStatement* CloseCurlyBracket | namespaceNameList SemiColon) + ; + +namespaceStatement + : statement + | useDeclaration + | functionDeclaration + | classDeclaration + | globalConstantDeclaration + ; + +functionDeclaration + : attributes? Function_ '&'? identifier typeParameterListInBrackets? '(' formalParameterList ')' (':' QuestionMark? typeHint)? blockStatement + ; + +classDeclaration + : attributes? Private? modifier? Partial? ( + classEntryType identifier typeParameterListInBrackets? (Extends qualifiedStaticTypeRef)? (Implements interfaceList)? + | Interface identifier typeParameterListInBrackets? (Extends interfaceList)? ) + OpenCurlyBracket classStatement* CloseCurlyBracket + ; + +classEntryType + : Class + | Trait + ; + +interfaceList + : qualifiedStaticTypeRef (',' qualifiedStaticTypeRef)* + ; + +typeParameterListInBrackets + : '<:' typeParameterList ':>' + | '<:' typeParameterWithDefaultsList ':>' + | '<:' typeParameterList ',' typeParameterWithDefaultsList ':>' + ; + +typeParameterList + : typeParameterDecl (',' typeParameterDecl)* + ; + +typeParameterWithDefaultsList + : typeParameterWithDefaultDecl (',' typeParameterWithDefaultDecl)* + ; + +typeParameterDecl + : attributes? identifier + ; + +typeParameterWithDefaultDecl + : attributes? identifier Eq (qualifiedStaticTypeRef | primitiveType) + ; + +genericDynamicArgs + : '<:' typeRef (',' typeRef)* ':>' + ; + +attributes + : attributeGroup+ + ; + +attributeGroup + : AttributeStart (identifier ':')? attribute (',' attribute)* ']' + ; + +attribute + : qualifiedNamespaceName arguments? + ; + +innerStatementList + : innerStatement* + ; + +innerStatement + : statement + | functionDeclaration + | classDeclaration + ; + +// Statements + +statement + : identifier ':' + | blockStatement + | ifStatement + | whileStatement + | doWhileStatement + | forStatement + | switchStatement + | breakStatement + | continueStatement + | returnStatement + | yieldExpression SemiColon + | globalStatement + | staticVariableStatement + | echoStatement + | expressionStatement + | unsetStatement + | foreachStatement + | tryCatchFinally + | throwStatement + | gotoStatement + | declareStatement + | emptyStatement + | inlineHtmlStatement + ; + +emptyStatement + : SemiColon + ; + +blockStatement + : OpenCurlyBracket innerStatementList CloseCurlyBracket + ; + +ifStatement + : If parentheses statement elseIfStatement* elseStatement? + | If parentheses ':' innerStatementList elseIfColonStatement* elseColonStatement? EndIf SemiColon + ; + +elseIfStatement + : ElseIf parentheses statement + ; + +elseIfColonStatement + : ElseIf parentheses ':' innerStatementList + ; + +elseStatement + : Else statement + ; + +elseColonStatement + : Else ':' innerStatementList + ; + +whileStatement + : While parentheses (statement | ':' innerStatementList EndWhile SemiColon) + ; + +doWhileStatement + : Do statement While parentheses SemiColon + ; + +forStatement + : For '(' forInit? SemiColon expressionList? SemiColon forUpdate? ')' (statement | ':' innerStatementList EndFor SemiColon ) + ; + +forInit + : expressionList + ; + +forUpdate + : expressionList + ; + +switchStatement + : Switch parentheses (OpenCurlyBracket SemiColon? switchBlock* CloseCurlyBracket | ':' SemiColon? switchBlock* EndSwitch SemiColon) + ; + +switchBlock + : ((Case expression | Default) (':' | SemiColon))+ innerStatementList + ; + +breakStatement + : Break expression? SemiColon + ; + +continueStatement + : Continue expression? SemiColon + ; + +returnStatement + : Return expression? SemiColon + ; + +expressionStatement + : expression SemiColon + ; + +unsetStatement + : Unset '(' chainList ')' SemiColon + ; + +foreachStatement + : Foreach + ( '(' chain As '&'? assignable ('=>' '&'? chain)? ')' + | '(' expression As assignable ('=>' '&'? chain)? ')' + | '(' chain As List '(' assignmentList ')' ')' ) + (statement | ':' innerStatementList EndForeach SemiColon) + ; + +tryCatchFinally + : Try blockStatement (catchClause+ finallyStatement? | catchClause* finallyStatement) + ; + +catchClause + : Catch '(' qualifiedStaticTypeRef ('|' qualifiedStaticTypeRef)* VarName ')' blockStatement + ; + +finallyStatement + : Finally blockStatement + ; + +throwStatement + : Throw expression SemiColon + ; + +gotoStatement + : Goto identifier SemiColon + ; + +declareStatement + : Declare '(' declareList ')' (statement | ':' innerStatementList EndDeclare SemiColon) + ; + +inlineHtmlStatement + : inlineHtml+ + ; + +declareList + : identifierInitializer (',' identifierInitializer)* + ; + +formalParameterList + : formalParameter? (',' formalParameter)* ','? + ; + +formalParameter + : attributes? memberModifier? QuestionMark? typeHint? '&'? '...'? variableInitializer + ; + +typeHint + : qualifiedStaticTypeRef + | Callable + | primitiveType + | typeHint '|' typeHint + ; + +globalStatement + : Global globalVar (',' globalVar)* SemiColon + ; + +globalVar + : VarName + | Dollar chain + | Dollar OpenCurlyBracket expression CloseCurlyBracket + ; + +echoStatement + : Echo expressionList SemiColon + ; + +staticVariableStatement + : Static variableInitializer (',' variableInitializer)* SemiColon + ; + +classStatement + : attributes? ( propertyModifiers typeHint? variableInitializer (',' variableInitializer)* SemiColon + | memberModifiers? ( Const typeHint? identifierInitializer (',' identifierInitializer)* SemiColon + | Function_ '&'? identifier typeParameterListInBrackets? '(' formalParameterList ')' + baseCtorCall? methodBody)) + | Use qualifiedNamespaceNameList traitAdaptations + ; + +traitAdaptations + : SemiColon + | OpenCurlyBracket traitAdaptationStatement* CloseCurlyBracket + ; + +traitAdaptationStatement + : traitPrecedence + | traitAlias + ; + +traitPrecedence + : qualifiedNamespaceName '::' identifier InsteadOf qualifiedNamespaceNameList SemiColon + ; + +traitAlias + : traitMethodReference As (memberModifier | memberModifier? identifier) SemiColon + ; + +traitMethodReference + : (qualifiedNamespaceName '::')? identifier + ; + +baseCtorCall + : ':' identifier arguments? + ; + +methodBody + : SemiColon + | blockStatement + ; + +propertyModifiers + : memberModifiers + | Var + ; + +memberModifiers + : memberModifier+ + ; + +variableInitializer + : VarName (Eq constantInitializer)? + ; + +identifierInitializer + : identifier Eq constantInitializer + ; + +globalConstantDeclaration + : attributes? Const identifierInitializer (',' identifierInitializer)* SemiColon + ; + +expressionList + : expression (',' expression)* + ; + +parentheses + : '(' (expression | yieldExpression) ')' + ; + +// Expressions +// Grouped by priorities: http://php.net/manual/en/language.operators.precedence.php +expression + : Clone expression #CloneExpression + | newExpr #NewExpression + + | stringConstant '[' expression ']' #IndexerExpression + + | '(' castOperation ')' expression #CastExpression + | ('~' | '@') expression #UnaryOperatorExpression + + | ('!' | '+' | '-') expression #UnaryOperatorExpression + + | ('++' | '--') chain #PrefixIncDecExpression + | chain ('++' | '--') #PostfixIncDecExpression + + | Print expression #PrintExpression + + | chain #ChainExpression + | constant #ScalarExpression + | string #ScalarExpression + | Label #ScalarExpression + + | BackQuoteString #BackQuoteStringExpression + | parentheses #ParenthesisExpression + | arrayCreation #ArrayCreationExpression + + | Yield #SpecialWordExpression + | List '(' assignmentList ')' Eq expression #SpecialWordExpression + | IsSet '(' chainList ')' #SpecialWordExpression + | Empty '(' chain ')' #SpecialWordExpression + | Eval '(' expression ')' #SpecialWordExpression + | Exit ( '(' ')' | parentheses )? #SpecialWordExpression + | (Include | IncludeOnce) expression #SpecialWordExpression + | (Require | RequireOnce) expression #SpecialWordExpression + + | lambdaFunctionExpr #LambdaFunctionExpression + | matchExpr #MatchExpression + + | expression op='**' expression #ArithmeticExpression + | expression InstanceOf typeRef #InstanceOfExpression + | expression op=('*' | Divide | '%') expression #ArithmeticExpression + + | expression op=('+' | '-' | '.') expression #ArithmeticExpression + + | expression op=('<<' | '>>') expression #ComparisonExpression + | expression op=(Less | '<=' | Greater | '>=') expression #ComparisonExpression + | expression op=('===' | '!==' | '==' | IsNotEq) expression #ComparisonExpression + + | expression op='&' expression #BitwiseExpression + | expression op='^' expression #BitwiseExpression + | expression op='|' expression #BitwiseExpression + | expression op='&&' expression #BitwiseExpression + | expression op='||' expression #BitwiseExpression + + | expression op=QuestionMark expression? ':' expression #ConditionalExpression + | expression op='??' expression #NullCoalescingExpression + | expression op='<=>' expression #SpaceshipExpression + + | Throw expression #SpecialWordExpression + + | assignable assignmentOperator attributes? expression #AssignmentExpression + | assignable Eq attributes? '&' (chain | newExpr) #AssignmentExpression + + | expression op=LogicalAnd expression #LogicalExpression + | expression op=LogicalXor expression #LogicalExpression + | expression op=LogicalOr expression #LogicalExpression + ; + +assignable + : chain + | arrayCreation + ; + +arrayCreation + : (Array '(' arrayItemList? ')' | '[' arrayItemList? ']') ('[' expression ']')? + ; + +lambdaFunctionExpr + : Static? Function_ '&'? '(' formalParameterList ')' lambdaFunctionUseVars? (':' typeHint)? blockStatement + | LambdaFn '(' formalParameterList')' '=>' expression + ; + +matchExpr + : Match '(' expression ')' OpenCurlyBracket matchItem (',' matchItem)* ','? CloseCurlyBracket + ; + +matchItem + : expression (',' expression)* '=>' expression + ; + +newExpr + : New typeRef arguments? + ; + +assignmentOperator + : Eq + | '+=' + | '-=' + | '*=' + | '**=' + | '/=' + | '.=' + | '%=' + | '&=' + | '|=' + | '^=' + | '<<=' + | '>>=' + | '??=' + ; + +yieldExpression + : Yield (expression ('=>' expression)? | From expression) + ; + +arrayItemList + : arrayItem (',' arrayItem)* ','? + ; + +arrayItem + : expression ('=>' expression)? + | (expression '=>')? '&' chain + ; + +lambdaFunctionUseVars + : Use '(' lambdaFunctionUseVar (',' lambdaFunctionUseVar)* ')' + ; + +lambdaFunctionUseVar + : '&'? VarName + ; + +qualifiedStaticTypeRef + : qualifiedNamespaceName genericDynamicArgs? + | Static + ; + +typeRef + : (qualifiedNamespaceName | indirectTypeRef) genericDynamicArgs? + | primitiveType + | Static + | anonymousClass + ; + +anonymousClass + : attributes? Private? modifier? Partial? ( + classEntryType typeParameterListInBrackets? (Extends qualifiedStaticTypeRef)? (Implements interfaceList)? + | Interface identifier typeParameterListInBrackets? (Extends interfaceList)? ) + OpenCurlyBracket classStatement* CloseCurlyBracket + ; + +indirectTypeRef + : chainBase ('->' keyedFieldName)* + ; + +qualifiedNamespaceName + : Namespace? '\\'? namespaceNameList + ; + +namespaceNameList + : identifier + | identifier ('\\' identifier)* ('\\' namespaceNameTail)? + ; + +namespaceNameTail + : identifier (As identifier)? + | OpenCurlyBracket namespaceNameTail (','namespaceNameTail)* ','? CloseCurlyBracket + ; + +qualifiedNamespaceNameList + : qualifiedNamespaceName (',' qualifiedNamespaceName)* + ; + +arguments + : '(' ( actualArgument (',' actualArgument)* | yieldExpression)? ','? ')' + ; + +actualArgument + : argumentName? '...'? expression + | '&' chain + ; + +argumentName + : identifier ':' + ; + +constantInitializer + : constant + | string + | Array '(' (arrayItemList ','?)? ')' + | '[' (arrayItemList ','?)? ']' + | ('+' | '-') constantInitializer + ; + +constant + : Null + | literalConstant + | magicConstant + | classConstant + | qualifiedNamespaceName + ; + +literalConstant + : Real + | BooleanConstant + | numericConstant + | stringConstant + ; + +numericConstant + : Octal + | Decimal + | Hex + | Binary + ; + +classConstant + : (Class | Parent_) '::' (identifier | Constructor | Get | Set) + | (qualifiedStaticTypeRef | keyedVariable | string) '::' (identifier | keyedVariable) // 'foo'::$bar works in php7 + ; + +stringConstant + : Label + ; + +string + : StartHereDoc HereDocText+ + | StartNowDoc HereDocText+ + | SingleQuoteString + | DoubleQuote interpolatedStringPart* DoubleQuote + ; + +interpolatedStringPart + : StringPart + | UnicodeEscape + | chain + ; + +chainList + : chain (',' chain)* + ; + +chain + : chainOrigin memberAccess* + //| arrayCreation // [$a,$b]=$c + ; + +chainOrigin + : chainBase + | functionCall + | '(' newExpr ')' + ; + +memberAccess + : '->' keyedFieldName actualArguments? + ; + +functionCall + : functionCallName actualArguments + ; + +functionCallName + : qualifiedNamespaceName + | classConstant + | chainBase + | parentheses + ; + +actualArguments + : genericDynamicArgs? arguments squareCurlyExpression* + ; + +chainBase + : keyedVariable ('::' keyedVariable)? + | qualifiedStaticTypeRef '::' keyedVariable + ; + +keyedFieldName + : keyedSimpleFieldName + | keyedVariable + ; + +keyedSimpleFieldName + : (identifier | OpenCurlyBracket expression CloseCurlyBracket) squareCurlyExpression* + ; + +keyedVariable + : Dollar* (VarName | Dollar OpenCurlyBracket expression CloseCurlyBracket) squareCurlyExpression* + ; + +squareCurlyExpression + : '[' expression? ']' + | OpenCurlyBracket expression CloseCurlyBracket + ; + +assignmentList + : assignmentListElement? (',' assignmentListElement?)* + ; + +assignmentListElement + : chain + | List '(' assignmentList ')' + | arrayItem + ; + +modifier + : Abstract + | Final + ; + +identifier + : Label + + | Abstract + | Array + | As + | BinaryCast + | BoolType + | BooleanConstant + | Break + | Callable + | Case + | Catch + | Class + | Clone + | Const + | Continue + | Declare + | Default + | Do + | DoubleCast + | DoubleType + | Echo + | Else + | ElseIf + | Empty + | EndDeclare + | EndFor + | EndForeach + | EndIf + | EndSwitch + | EndWhile + | Eval + | Exit + | Extends + | Final + | Finally + | FloatCast + | For + | Foreach + | Function_ + | Global + | Goto + | If + | Implements + | Import + | Include + | IncludeOnce + | InstanceOf + | InsteadOf + | Int16Cast + | Int64Type + | Int8Cast + | Interface + | IntType + | IsSet + | List + | LogicalAnd + | LogicalOr + | LogicalXor + | Namespace + | New + | Null + | ObjectType + | Parent_ + | Partial + | Print + | Private + | Protected + | Public + | Require + | RequireOnce + | Resource + | Return + | Static + | StringType + | Switch + | Throw + | Trait + | Try + | Typeof + | UintCast + | UnicodeCast + | Unset + | Use + | Var + | While + | Yield + | From + + | Get + | Set + | Call + | CallStatic + | Constructor + | Destruct + | Wakeup + | Sleep + | Autoload + | IsSet__ + | Unset__ + | ToString__ + | Invoke + | SetState + | Clone__ + | DebugInfo + | Namespace__ + | Class__ + | Traic__ + | Function__ + | Method__ + | Line__ + | File__ + | Dir__ + ; + +memberModifier + : Public + | Protected + | Private + | Static + | Abstract + | Final + ; + +magicConstant + : Namespace__ + | Class__ + | Traic__ + | Function__ + | Method__ + | Line__ + | File__ + | Dir__ + ; + +magicMethod + : Get + | Set + | Call + | CallStatic + | Constructor + | Destruct + | Wakeup + | Sleep + | Autoload + | IsSet__ + | Unset__ + | ToString__ + | Invoke + | SetState + | Clone__ + | DebugInfo + ; + +primitiveType + : BoolType + | IntType + | Int64Type + | DoubleType + | StringType + | Resource + | ObjectType + | Array + ; + +castOperation + : BoolType + | Int8Cast + | Int16Cast + | IntType + | Int64Type + | UintCast + | DoubleCast + | DoubleType + | FloatCast + | StringType + | BinaryCast + | UnicodeCast + | Array + | ObjectType + | Resource + | Unset + ; \ No newline at end of file diff --git a/src/main/java/astminer/examples/AllJavaFiles.java b/src/main/java/astminer/examples/AllJavaFiles.java deleted file mode 100644 index 26789b59..00000000 --- a/src/main/java/astminer/examples/AllJavaFiles.java +++ /dev/null @@ -1,52 +0,0 @@ -package astminer.examples; - -import astminer.common.model.*; -import astminer.parse.java.GumTreeJavaParser; -import astminer.paths.*; - -import java.io.FileInputStream; -import java.io.IOException; -import java.nio.file.*; -import java.nio.file.attribute.BasicFileAttributes; -import java.util.Collection; -import java.util.stream.Collectors; - -//Retrieve paths from Java files, using a GumTree parser. -public class AllJavaFiles { - private static final String INPUT_FOLDER = "src/test/resources/gumTreeMethodSplitter"; - private static final String OUTPUT_FOLDER = "out_examples/allJavaFiles_GumTree_java"; - - public static void runExample() { - final PathMiner miner = new PathMiner(new PathRetrievalSettings(5,5)); - final CountingPathStorage pathStorage = new CsvPathStorage(OUTPUT_FOLDER, Long.MAX_VALUE, Long.MAX_VALUE); - - final Path inputFolder = Paths.get(INPUT_FOLDER); - - FileVisitor fileVisitor = new SimpleFileVisitor() { - @Override - public FileVisitResult visitFile(Path file, BasicFileAttributes attributes) throws IOException { - Node fileTree = new GumTreeJavaParser().parseInputStream(new FileInputStream(file.toFile())); - if (fileTree == null) { - return FileVisitResult.CONTINUE; - } - final Collection paths = miner.retrievePaths(fileTree); - final Collection pathContexts = paths - .stream() - .map(node -> - PathUtilKt.toPathContext(node, (Node::getToken)) - ).collect(Collectors.toList()); - - pathStorage.store(new LabeledPathContexts<>(file.toAbsolutePath().toString(), pathContexts)); - - return FileVisitResult.CONTINUE; - } - }; - - try { - Files.walkFileTree(inputFolder, fileVisitor); - pathStorage.close(); - } catch (IOException e) { - e.printStackTrace(); - } - } -} diff --git a/src/main/java/me/vovak/antlr/parser/CaseChangingCharStream.java b/src/main/java/me/vovak/antlr/parser/CaseChangingCharStream.java new file mode 100644 index 00000000..c91537b3 --- /dev/null +++ b/src/main/java/me/vovak/antlr/parser/CaseChangingCharStream.java @@ -0,0 +1,82 @@ +package me.vovak.antlr.parser; + +import org.antlr.v4.runtime.CharStream; +import org.antlr.v4.runtime.misc.Interval; + +/** + * This class supports case-insensitive lexing by wrapping an existing + * {@link CharStream} and forcing the lexer to see either upper or + * lowercase characters. Grammar literals should then be either upper or + * lower case such as 'BEGIN' or 'begin'. The text of the character + * stream is unaffected. Example: input 'BeGiN' would match lexer rule + * 'BEGIN' if constructor parameter upper=true but getText() would return + * 'BeGiN'. + */ +public class CaseChangingCharStream implements CharStream { + + final CharStream stream; + final boolean upper; + + /** + * Constructs a new CaseChangingCharStream wrapping the given {@link CharStream} forcing + * all characters to upper case or lower case. + * @param stream The stream to wrap. + * @param upper If true force each symbol to upper case, otherwise force to lower. + */ + public CaseChangingCharStream(CharStream stream, boolean upper) { + this.stream = stream; + this.upper = upper; + } + + @Override + public String getText(Interval interval) { + return stream.getText(interval); + } + + @Override + public void consume() { + stream.consume(); + } + + @Override + public int LA(int i) { + int c = stream.LA(i); + if (c <= 0) { + return c; + } + if (upper) { + return Character.toUpperCase(c); + } + return Character.toLowerCase(c); + } + + @Override + public int mark() { + return stream.mark(); + } + + @Override + public void release(int marker) { + stream.release(marker); + } + + @Override + public int index() { + return stream.index(); + } + + @Override + public void seek(int index) { + stream.seek(index); + } + + @Override + public int size() { + return stream.size(); + } + + @Override + public String getSourceName() { + return stream.getSourceName(); + } +} \ No newline at end of file diff --git a/src/main/java/me/vovak/antlr/parser/PhpLexerBase.java b/src/main/java/me/vovak/antlr/parser/PhpLexerBase.java new file mode 100644 index 00000000..92a8bed3 --- /dev/null +++ b/src/main/java/me/vovak/antlr/parser/PhpLexerBase.java @@ -0,0 +1,192 @@ +package me.vovak.antlr.parser; + +/* +PHP grammar. +The MIT License (MIT). +Copyright (c) 2015-2019, Ivan Kochurkin (kvanttt@gmail.com), Positive Technologies. +Copyright (c) 2019, Thierry Marianne (thierry.marianne@weaving-the-web.org) +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +import org.antlr.v4.runtime.*; + +import java.util.Stack; + +public abstract class PhpLexerBase extends Lexer +{ + protected boolean AspTags = true; + protected boolean _scriptTag; + protected boolean _styleTag; + protected String _heredocIdentifier; + protected int _prevTokenType; + protected String _htmlNameText; + protected boolean _phpScript; + protected boolean _insideString; + + public PhpLexerBase(CharStream input) { + super(input); + } + + @Override + public Token nextToken() { + CommonToken token = (CommonToken)super.nextToken(); + + if (token.getType() == PhpLexer.PHPEnd || token.getType() == PhpLexer.PHPEndSingleLineComment) + { + if (_mode == PhpLexer.SingleLineCommentMode) + { + // SingleLineCommentMode for such allowed syntax: + // + popMode(); // exit from SingleLineComment mode. + } + popMode(); // exit from PHP mode. + + if ("".equals(token.getText())) + { + _phpScript = false; + token.setType(PhpLexer.HtmlScriptClose); + } + else + { + // Add semicolon to the end of statement if it is absente. + // For example: + if (_prevTokenType == PhpLexer.SemiColon || _prevTokenType == PhpLexer.Colon + || _prevTokenType == PhpLexer.OpenCurlyBracket || _prevTokenType == PhpLexer.CloseCurlyBracket) + { + token.setChannel(PhpLexer.SkipChannel); + } + else + { + token = new CommonToken(PhpLexer.SemiColon); + } + } + } + else if (token.getType() == PhpLexer.HtmlName) + { + _htmlNameText = token.getText(); + } + else if (token.getType() == PhpLexer.HtmlDoubleQuoteString) + { + if ("php".equals(token.getText()) && "language".equals(_htmlNameText)) + { + _phpScript = true; + } + } + else if (_mode == PhpLexer.HereDoc) + { + // Heredoc and Nowdoc syntax support: http://php.net/manual/en/language.types.string.php#language.types.string.syntax.heredoc + switch (token.getType()) + { + case PhpLexer.StartHereDoc: + case PhpLexer.StartNowDoc: + _heredocIdentifier = token.getText().substring(3).trim().replace("'",""); + break; + + case PhpLexer.HereDocText: + if (CheckHeredocEnd(token.getText())) + { + popMode(); + + String heredocIdentifier = GetHeredocIdentifier(token.getText()); + if (token.getText().trim().endsWith(";")) + { + token = new CommonToken(PhpLexer.SemiColon, heredocIdentifier + ";\n"); + } + else + { + token = (CommonToken)super.nextToken(); + token.setText(heredocIdentifier + "\n;"); + } + } + break; + } + } + else if (_mode == PhpLexer.PHP) + { + if (_channel != PhpLexer.HIDDEN) + { + _prevTokenType = token.getType(); + } + } + + return token; + } + + private String GetHeredocIdentifier(String text) { + String trimmedText = text.trim(); + boolean semi = (trimmedText.length() > 0) ? (trimmedText.charAt(trimmedText.length() - 1) == ';') : false; + return semi ? trimmedText.substring(0, trimmedText.length() - 1) : trimmedText; + } + + private boolean CheckHeredocEnd(String text) { + return GetHeredocIdentifier(text).equals(_heredocIdentifier); + } + + protected boolean IsNewLineOrStart(int pos) { + return this._input.LA(pos) <= 0 || this._input.LA(pos) == '\r' || this._input.LA(pos) == '\n'; + } + + protected void PushModeOnHtmlClose() { + popMode(); + if (_scriptTag) + { + if (!_phpScript) + { + pushMode(PhpLexer.SCRIPT); + } + else + { + pushMode(PhpLexer.PHP); + } + _scriptTag = false; + } + else if (_styleTag) + { + pushMode(PhpLexer.STYLE); + _styleTag = false; + } + } + + protected boolean HasAspTags() { + return this.AspTags; + } + + protected boolean HasPhpScriptTag() { + return this._phpScript; + } + + protected void PopModeOnCurlyBracketClose() { + if (_insideString) + { + _insideString = false; + setChannel(PhpLexer.SkipChannel); + popMode(); + } + } + + protected boolean ShouldPushHereDocMode(int pos) { + return _input.LA(pos) == '\r' || _input.LA(pos) == '\n'; + } + + protected boolean IsCurlyDollar(int pos) { + return _input.LA(pos) == '$'; + } + + protected void SetInsideString() { + _insideString = true; + } +} \ No newline at end of file diff --git a/src/main/kotlin/astminer/Main.kt b/src/main/kotlin/astminer/Main.kt index dba480d5..e451b931 100644 --- a/src/main/kotlin/astminer/Main.kt +++ b/src/main/kotlin/astminer/Main.kt @@ -1,20 +1,64 @@ package astminer -import astminer.cli.* - -fun main(args: Array) { - if (args.isEmpty()) { - println(""" - You should specify the task as the first argument ("preprocess", "parse", "pathContexts", or "code2vec"). - For more information run `./cli.sh taskName --help` - """.trimIndent()) - } else { - return when (args[0]) { - "preprocess" -> ProjectPreprocessor().main(args.sliceArray(1 until args.size)) - "parse" -> ProjectParser().main(args.sliceArray(1 until args.size)) - "pathContexts" -> PathContextsExtractor().main(args.sliceArray(1 until args.size)) - "code2vec" -> Code2VecExtractor().main(args.sliceArray(1 until args.size)) - else -> throw Exception("The first argument should be task's name: either 'preprocess', 'parse', 'pathContexts', or 'code2vec'") +import astminer.common.model.FunctionInfoPropertyNotImplementedException +import astminer.config.PipelineConfig +import astminer.pipeline.Pipeline +import astminer.pipeline.branch.IllegalFilterException +import astminer.pipeline.branch.IllegalLabelExtractorException +import com.charleskorn.kaml.PolymorphismStyle +import com.charleskorn.kaml.Yaml +import com.charleskorn.kaml.YamlConfiguration +import com.github.ajalt.clikt.core.CliktCommand +import com.github.ajalt.clikt.parameters.arguments.argument +import com.github.ajalt.clikt.parameters.types.file +import kotlinx.serialization.SerializationException +import kotlinx.serialization.decodeFromString +import mu.KotlinLogging +import java.io.File + +private val logger = KotlinLogging.logger("Main") + +class PipelineRunner : CliktCommand(name = "") { + val config: File by argument("config", help = "Path to config").file( + mustExist = true, + canBeDir = false, + mustBeReadable = true + ) + + override fun run() { + try { + val config = yaml.decodeFromString(config.readText()) + Pipeline(config).run() + } catch (e: SerializationException) { + report("There was a problem in the config", e) + } catch (e: IllegalLabelExtractorException) { + report("PipelineBranch for given label extractor not found", e) + } catch (e: IllegalFilterException) { + report("The chosen filter is not implemented for the chosen granularity", e) + } catch (e: FunctionInfoPropertyNotImplementedException) { + report( + "The chosen parser does not implement the required properties. " + + "Consider implementing them or change the parser", + e + ) } } -} \ No newline at end of file + + private fun report(message: String, e: Exception) { + logger.error(e) { message } + println("$message:\n$e") + } + + companion object { + private const val POLYMORPHISM_PROPERTY_NAME = "name" + + private val yaml = Yaml( + configuration = YamlConfiguration( + polymorphismStyle = PolymorphismStyle.Property, + polymorphismPropertyName = POLYMORPHISM_PROPERTY_NAME + ) + ) + } +} + +fun main(args: Array) = PipelineRunner().main(args) diff --git a/src/main/kotlin/astminer/ast/CsvAstStorage.kt b/src/main/kotlin/astminer/ast/CsvAstStorage.kt deleted file mode 100644 index 6dd30223..00000000 --- a/src/main/kotlin/astminer/ast/CsvAstStorage.kt +++ /dev/null @@ -1,61 +0,0 @@ -package astminer.ast - -import astminer.common.model.AstStorage -import astminer.common.model.Node -import astminer.common.preOrder -import astminer.common.storage.* -import java.io.File -import java.io.PrintWriter - -/** - * Stores multiple ASTs by their roots and saves them in .csv format. - * Output consists of 3 .csv files: with node types, with tokens and with ASTs. - */ -class CsvAstStorage(override val directoryPath: String) : AstStorage { - - private val tokensMap: RankedIncrementalIdStorage = RankedIncrementalIdStorage() - private val nodeTypesMap: RankedIncrementalIdStorage = RankedIncrementalIdStorage() - - private val astsOutputStream: PrintWriter - - init { - File(directoryPath).mkdirs() - val astsFile = File("$directoryPath/asts.csv") - astsFile.createNewFile() - astsOutputStream = PrintWriter(astsFile) - astsOutputStream.write("id,ast\n") - } - - override fun store(root: Node, label: String, filePath: String) { - for (node in root.preOrder()) { - tokensMap.record(node.getToken()) - nodeTypesMap.record(node.getTypeLabel()) - } - dumpAst(root, label) - } - - override fun close() { - dumpTokenStorage(File("$directoryPath/tokens.csv")) - dumpNodeTypesStorage(File("$directoryPath/node_types.csv")) - - astsOutputStream.close() - } - - private fun dumpTokenStorage(file: File) { - dumpIdStorageToCsv(tokensMap, "token", tokenToCsvString, file) - } - - private fun dumpNodeTypesStorage(file: File) { - dumpIdStorageToCsv(nodeTypesMap, "node_type", nodeTypeToCsvString, file) - } - - private fun dumpAst(root: Node, id: String) { - astsOutputStream.write("$id,${astString(root)}\n") - } - - internal fun astString(node: Node): String { - return "${tokensMap.getId(node.getToken())} ${nodeTypesMap.getId(node.getTypeLabel())}{${ - node.getChildren().joinToString(separator = "", transform = ::astString) - }}" - } -} \ No newline at end of file diff --git a/src/main/kotlin/astminer/cli/Code2VecExtractor.kt b/src/main/kotlin/astminer/cli/Code2VecExtractor.kt deleted file mode 100644 index 402c9092..00000000 --- a/src/main/kotlin/astminer/cli/Code2VecExtractor.kt +++ /dev/null @@ -1,181 +0,0 @@ -package astminer.cli - -import astminer.common.getProjectFilesWithExtension -import astminer.common.getNormalizedToken -import astminer.common.model.LabeledPathContexts -import astminer.common.model.Node -import astminer.common.model.ParseResult -import astminer.paths.Code2VecPathStorage -import astminer.paths.PathMiner -import astminer.paths.PathRetrievalSettings -import astminer.paths.toPathContext -import com.github.ajalt.clikt.core.CliktCommand -import com.github.ajalt.clikt.parameters.options.* -import com.github.ajalt.clikt.parameters.types.int -import com.github.ajalt.clikt.parameters.types.long -import java.io.File - -class Code2VecExtractor(private val customLabelExtractor: LabelExtractor? = null) : CliktCommand() { - - private val supportedLanguages = listOf("java", "c", "cpp", "py") - - val extensions: List by option( - "--lang", - help = "Comma-separated list of file extensions that will be parsed.\n" + - "Supports 'c', 'cpp', 'java', 'py', defaults to all these extensions." - ).split(",").default(supportedLanguages) - - val projectRoot: String by option( - "--project", - help = "Path to the project that will be parsed" - ).required() - - val outputDirName: String by option( - "--output", - help = "Path to directory where the output will be stored" - ).required() - - val maxPathLength: Int by option( - "--maxL", - help = "Maximum length of path for code2vec" - ).int().default(8) - - val maxPathWidth: Int by option( - "--maxW", - help = "Maximum width of path. " + - "Note, that here width is the difference between token indices in contrast to the original code2vec." - ).int().default(3) - - val maxPathContexts: Int by option( - "--maxContexts", - help = "Number of path contexts to keep from each method." - ).int().default(500) - - val maxTokens: Long by option( - "--maxTokens", - help = "Keep only contexts with maxTokens most popular tokens." - ).long().default(Long.MAX_VALUE) - - val maxPaths: Long by option( - "--maxPaths", - help = "Keep only contexts with maxTokens most popular paths." - ).long().default(Long.MAX_VALUE) - - val granularityLevel: String by option( - "--granularity", - help = "Choose level of granularity ('file' or 'method', defaults to 'file')" - ).default("file") - - val folderLabel: Boolean by option( - "--folder-label", - help = "if passed with file-level granularity, the folder name is used to label paths" - ).flag(default = false) - - val isMethodNameHide: Boolean by option( - "--hide-method-name", - help = "if passed with method level granularity, the names of all methods are replaced with placeholder token" - ).flag(default = false) - - val isTokenSplitted: Boolean by option( - "--split-tokens", - help = "if passed, split tokens into sequence of tokens" - ).flag(default = false) - - val excludeModifiers: List by option( - "--filter-modifiers", - help = "Comma-separated list of function's modifiers, which should be filtered." + - "Works only for method-level granulation." - ).split(",").default(emptyList()) - - val excludeAnnotations: List by option( - "--filter-annotations", - help = "Comma-separated list of function's annotations, which should be filtered." + - "Works only for method-level granulation." - ).split(",").default(emptyList()) - - val filterConstructors: Boolean by option( - "--remove-constructors", - help = "Remove constructor methods, works for method-level granulation" - ).flag(default = false) - - val javaParser: String by option( - "--java-parser", - help = "Choose a parser for .java files." + - "'gumtree' for GumTree parser, 'antlr' for antlr parser." - ).default("gumtree") - - val maxMethodNameLength: Int by option( - "--max-method-name-length", - help = "Filtering methods with a large sequence of subtokens in their names" - ).int().default(-1) - - val maxTokenLength: Int by option( - "--max-token-length", - help = "Filter methods containing a long sequence of subtokens in the ast node" - ).int().default(-1) - - val maxTreeSize: Int by option( - "--max-tree-size", - help = "Filter methods by their ast size" - ).int().default(-1) - - private fun extractFromTree( - parseResult: ParseResult, - miner: PathMiner, - storage: Code2VecPathStorage, - labelExtractor: LabelExtractor - ) { - val labeledParseResults = labelExtractor.toLabeledData(parseResult) - - // Retrieve paths from every node individually - labeledParseResults.forEach { (root, label) -> - val paths = miner.retrievePaths(root).take(maxPathContexts) - storage.store(LabeledPathContexts(label, paths.map { - toPathContext(it) { node -> - node.getNormalizedToken() - } - })) - } - } - - private fun extract(labelExtractor: LabelExtractor) { - val outputDir = File(outputDirName) - for (extension in extensions) { - val miner = PathMiner(PathRetrievalSettings(maxPathLength, maxPathWidth)) - - val outputDirForLanguage = outputDir.resolve(extension) - outputDirForLanguage.mkdir() - // Choose type of storage - val storage = Code2VecPathStorage(outputDirForLanguage.path, maxPaths, maxTokens) - // Choose type of parser - val parser = getParser( - extension, - javaParser - ) - // Parse project one file at a time - parser.parseFiles(getProjectFilesWithExtension(File(projectRoot), extension)) { - normalizeParseResult(it, isTokenSplitted) - // Retrieve labeled data - extractFromTree(it, miner, storage, labelExtractor) - } - // Save stored data on disk - storage.close() - } - } - - override fun run() { - val labelExtractor = customLabelExtractor ?: getLabelExtractor( - granularityLevel, - javaParser, - isMethodNameHide, - excludeModifiers, - excludeAnnotations, - filterConstructors, - maxMethodNameLength, - maxTokenLength, - maxTreeSize, - folderLabel - ) - extract(labelExtractor) - } -} \ No newline at end of file diff --git a/src/main/kotlin/astminer/cli/FilterPredicates.kt b/src/main/kotlin/astminer/cli/FilterPredicates.kt deleted file mode 100644 index 69e11878..00000000 --- a/src/main/kotlin/astminer/cli/FilterPredicates.kt +++ /dev/null @@ -1,89 +0,0 @@ -package astminer.cli - -import astminer.common.model.MethodInfo -import astminer.common.model.Node -import astminer.common.preOrder -import astminer.common.splitToSubtokens - -abstract class MethodFilterPredicate { - open fun isFiltered(methodInfo: MethodInfo): Boolean = false - - fun typeBasedFilterPredicate(root: Node?, nodeType: String, excludeValues: List): Boolean { - root?.getChildrenOfType(nodeType)?.forEach { - if (it.getToken() in excludeValues) { - return false - } - } - return true - } -} - -class ModifierFilterPredicate(private val excludeModifiers: List) : - MethodFilterPredicate() { - - // TODO: add other parsers - - private fun gumTreeModifierFilter(root: Node?) : Boolean = - typeBasedFilterPredicate(root, "Modifier", excludeModifiers) - - override fun isFiltered(methodInfo: MethodInfo): Boolean = - gumTreeModifierFilter(methodInfo.method.root) -} - -class AnnotationFilterPredicate(private val excludeAnnotations: List) : - MethodFilterPredicate() { - - // TODO: add other parsers - - private fun gumTreeAnnotationFilter(root: Node?) : Boolean = - typeBasedFilterPredicate( - root?.getChildOfType("MarkerAnnotation"), "SimpleName", excludeAnnotations - ) - - override fun isFiltered(methodInfo: MethodInfo): Boolean = - gumTreeAnnotationFilter(methodInfo.method.root) -} - -class ConstructorFilterPredicate : MethodFilterPredicate() { - - override fun isFiltered(methodInfo: MethodInfo): Boolean { - return methodInfo.name() != methodInfo.enclosingElementName() - } -} - -class MethodNameLengthFilterPredicate(private val maxLength: Int) : MethodFilterPredicate() { - override fun isFiltered(methodInfo: MethodInfo): Boolean { - if (maxLength == -1) { - return true - } - val nameNode = methodInfo.method.nameNode - return if (nameNode != null) { - splitToSubtokens(nameNode.getToken()).size <= maxLength - } else { - false - } - } -} - -class TokenLengthFilterPredicate(private val maxLength: Int) : MethodFilterPredicate() { - override fun isFiltered(methodInfo: MethodInfo): Boolean { - if (maxLength == -1) { - return true - } - methodInfo.method.root.preOrder().forEach { node -> - if (splitToSubtokens(node.getToken()).size > maxLength) { - return false - } - } - return true - } -} - -class TreeSizeFilterPredicate(private val maxSize: Int) : MethodFilterPredicate() { - override fun isFiltered(methodInfo: MethodInfo): Boolean { - if (maxSize == -1) { - return true - } - return methodInfo.method.root.preOrder().size <= maxSize - } -} diff --git a/src/main/kotlin/astminer/cli/LabelExtractors.kt b/src/main/kotlin/astminer/cli/LabelExtractors.kt deleted file mode 100644 index 1d195449..00000000 --- a/src/main/kotlin/astminer/cli/LabelExtractors.kt +++ /dev/null @@ -1,127 +0,0 @@ -package astminer.cli - -import astminer.common.model.MethodInfo -import astminer.common.model.Node -import astminer.common.model.ParseResult -import astminer.common.preOrder -import astminer.common.setNormalizedToken -import astminer.parse.antlr.SimpleNode -import astminer.parse.antlr.java.JavaMethodSplitter -import astminer.parse.antlr.python.PythonMethodSplitter -import astminer.parse.cpp.FuzzyMethodSplitter -import astminer.parse.cpp.FuzzyNode -import astminer.parse.java.GumTreeJavaNode -import astminer.parse.java.GumTreeMethodSplitter -import java.io.File - - -data class LabeledParseResult(val root: T, val label: String) - - -interface LabelExtractor { - fun toLabeledData(parseResult: ParseResult): List> -} - -abstract class FileLabelExtractor : LabelExtractor { - - override fun toLabeledData( - parseResult: ParseResult - ): List> { - val (root, filePath) = parseResult - return if (root == null) { - emptyList() - } else { - val label = extractLabel(root, filePath) ?: return emptyList() - listOf(LabeledParseResult(root, label)) - } - } - - abstract fun extractLabel(root: Node, filePath: String): String? -} - -abstract class MethodLabelExtractor( - open val filterPredicates: Collection = emptyList(), - open val javaParser: String = "gumtree" -) : LabelExtractor { - - override fun toLabeledData( - parseResult: ParseResult - ): List> { - val (root, filePath) = parseResult - if (root == null) { - return emptyList() - } - val fileExtension = File(filePath).extension - val methodInfos = when (fileExtension) { - "c", "cpp" -> { - val methodSplitter = FuzzyMethodSplitter() - methodSplitter.splitIntoMethods(root as FuzzyNode) - } - "java" -> { - when (javaParser) { - "gumtree" -> { - val methodSplitter = GumTreeMethodSplitter() - methodSplitter.splitIntoMethods(root as GumTreeJavaNode) - } - "antlr" -> { - val methodSplitter = JavaMethodSplitter() - methodSplitter.splitIntoMethods(root as SimpleNode) - } - else -> { - throw UnsupportedOperationException("Unsupported parser $javaParser") - } - } - } - "py" -> { - val methodSplitter = PythonMethodSplitter() - methodSplitter.splitIntoMethods(root as SimpleNode) - } - else -> throw UnsupportedOperationException("Unsupported extension $fileExtension") - }.filter { methodInfo -> - filterPredicates.all { predicate -> - predicate.isFiltered(methodInfo) - } - } - return methodInfos.mapNotNull { - val label = extractLabel(it, filePath) ?: return@mapNotNull null - LabeledParseResult(it.method.root, label) - } - } - - abstract fun extractLabel(methodInfo: MethodInfo, filePath: String): String? -} - -class FilePathExtractor : FileLabelExtractor() { - override fun extractLabel(root: Node, filePath: String): String? { - return filePath - } -} - -class FolderExtractor : FileLabelExtractor() { - override fun extractLabel(root: Node, filePath: String): String? { - return File(filePath).parentFile.name - } -} - -class MethodNameExtractor( - val hideMethodNames: Boolean = false, - override val filterPredicates: Collection = emptyList(), - override val javaParser: String = "gumtree" -) : MethodLabelExtractor(filterPredicates, javaParser) { - - override fun extractLabel(methodInfo: MethodInfo, filePath: String): String? { - val methodNameNode = methodInfo.method.nameNode ?: return null - val methodRoot = methodInfo.method.root - val methodName = methodInfo.name() ?: return null - - if (hideMethodNames) { - methodRoot.preOrder().forEach { node -> - if (node.getToken() == methodName) { - node.setNormalizedToken("SELF") - } - } - methodNameNode.setNormalizedToken("METHOD_NAME") - } - return methodName - } -} diff --git a/src/main/kotlin/astminer/cli/PathContextsExtractor.kt b/src/main/kotlin/astminer/cli/PathContextsExtractor.kt deleted file mode 100644 index 7cd53b9d..00000000 --- a/src/main/kotlin/astminer/cli/PathContextsExtractor.kt +++ /dev/null @@ -1,135 +0,0 @@ -package astminer.cli - -import astminer.common.getNormalizedToken -import astminer.common.getProjectFilesWithExtension -import astminer.common.model.* -import astminer.parse.antlr.java.JavaParser -import astminer.parse.antlr.python.PythonParser -import astminer.parse.cpp.FuzzyCppParser -import astminer.parse.java.GumTreeJavaParser -import astminer.paths.Code2VecPathStorage -import astminer.paths.PathMiner -import astminer.paths.PathRetrievalSettings -import astminer.paths.toPathContext -import com.github.ajalt.clikt.core.CliktCommand -import com.github.ajalt.clikt.parameters.options.* -import com.github.ajalt.clikt.parameters.types.int -import com.github.ajalt.clikt.parameters.types.long -import java.io.File -import java.lang.IllegalArgumentException - -class PathContextsExtractor(private val customLabelExtractor: LabelExtractor? = null) : CliktCommand() { - - /** - * @param parser class that implements parsing - * @param extension file extension to choose files for parsing - */ - private data class SupportedLanguage(val parser: Parser, val extension: String) - - /** - * List of supported language extensions and corresponding parsers. - */ - private val supportedLanguages = listOf( - SupportedLanguage(GumTreeJavaParser(), "java"), - SupportedLanguage(FuzzyCppParser(), "c"), - SupportedLanguage(FuzzyCppParser(), "cpp"), - SupportedLanguage(PythonParser(), "py") - ) - - val extensions: List by option( - "--lang", - help = "File extensions that will be parsed" - ).split(",").default(supportedLanguages.map { it.extension }) - - val projectRoot: String by option( - "--project", - help = "Path to the project that will be parsed" - ).required() - - val outputDirName: String by option( - "--output", - help = "Path to directory where the output will be stored" - ).required() - - val maxPathLength: Int by option( - "--maxL", - help = "Maximum length of path for code2vec" - ).int().default(8) - - val maxPathWidth: Int by option( - "--maxW", - help = "Maximum width of path. " + - "Note, that here width is the difference between token indices in contrast to the original code2vec." - ).int().default(3) - - val maxPathContexts: Int by option( - "--maxContexts", - help = "Number of path contexts to keep from each method." - ).int().default(500) - - val maxTokens: Long by option( - "--maxTokens", - help = "Keep only contexts with maxTokens most popular tokens." - ).long().default(Long.MAX_VALUE) - - val maxPaths: Long by option( - "--maxPaths", - help = "Keep only contexts with maxTokens most popular paths." - ).long().default(Long.MAX_VALUE) - - val javaParser: String by option( - "--java-parser", - help = "Choose a parser for .java files." + - "'gumtree' for GumTree parser, 'antlr' for antlr parser." - ).default("gumtree") - - private fun getParser(extension: String): Parser { - if (extension == "java") { - return when(javaParser) { - "gumtree" -> GumTreeJavaParser() - "antlr" -> JavaParser() - else -> throw IllegalArgumentException("javaParser should be `antlr` or `gumtree`, not $javaParser") - } - } - for (language in supportedLanguages) { - if (extension == language.extension) { - return language.parser - } - } - throw UnsupportedOperationException("Unsupported extension $extension") - } - - private fun extractPathContexts(labelExtractor: LabelExtractor) { - val outputDir = File(outputDirName) - for (extension in extensions) { - val miner = PathMiner(PathRetrievalSettings(maxPathLength, maxPathWidth)) - val parser = getParser(extension) - - val outputDirForLanguage = outputDir.resolve(extension) - outputDirForLanguage.mkdir() - val storage = Code2VecPathStorage(outputDirForLanguage.path, maxPaths, maxTokens) - - val files = getProjectFilesWithExtension(File(projectRoot), extension) - parser.parseFiles(files) { parseResult -> - normalizeParseResult(parseResult, splitTokens = true) - val labeledParseResults = labelExtractor.toLabeledData(parseResult) - labeledParseResults.forEach { (root, label) -> - val paths = miner.retrievePaths(root).take(maxPathContexts) - storage.store(LabeledPathContexts(label, paths.map { astPath -> - toPathContext(astPath) { node -> - node.getNormalizedToken() - } - })) - } - } - - // Save stored data on disk - storage.close() - } - } - - override fun run() { - val labelExtractor = customLabelExtractor ?: FilePathExtractor() - extractPathContexts(labelExtractor) - } -} \ No newline at end of file diff --git a/src/main/kotlin/astminer/cli/ProjectParser.kt b/src/main/kotlin/astminer/cli/ProjectParser.kt deleted file mode 100644 index 4e161ecc..00000000 --- a/src/main/kotlin/astminer/cli/ProjectParser.kt +++ /dev/null @@ -1,160 +0,0 @@ -package astminer.cli - -import astminer.ast.CsvAstStorage -import astminer.ast.DotAstStorage -import astminer.common.getProjectFilesWithExtension -import astminer.common.model.AstStorage -import astminer.common.preOrder -import com.github.ajalt.clikt.core.CliktCommand -import com.github.ajalt.clikt.parameters.options.* -import com.github.ajalt.clikt.parameters.types.int -import java.io.File - -class ProjectParser(private val customLabelExtractor: LabelExtractor? = null) : CliktCommand() { - - private val supportedLanguages = listOf("java", "c", "cpp", "py") - - val extensions: List by option( - "--lang", - help = "Comma-separated list of file extensions that will be parsed.\n" + - "Supports 'c', 'cpp', 'java', 'py', defaults to all these extensions." - ).split(",").default(supportedLanguages) - - val projectRoot: String by option( - "--project", - help = "Path to the project that will be parsed" - ).required() - - val outputDirName: String by option( - "--output", - help = "Path to directory where the output will be stored" - ).required() - - val astStorageType: String by option( - "--storage", - help = "AST storage type ('dot' or 'csv', defaults to 'csv')" - ).default("csv") - - val granularityLevel: String by option( - "--granularity", - help = "Choose level of granularity ('file' or 'method', defaults to 'file')" - ).default("file") - - val isMethodNameHide: Boolean by option( - "--hide-method-name", - help = "if passed with method level granularity, the names of all methods are replaced with placeholder token" - ).flag(default = false) - - val isTokenSplitted: Boolean by option( - "--split-tokens", - help = "if passed, split tokens into sequence of tokens" - ).flag(default = false) - - val excludeModifiers: List by option( - "--filter-modifiers", - help = "Comma-separated list of function's modifiers, which should be filtered." + - "Works only for method-level granulation." - ).split(",").default(emptyList()) - - val excludeAnnotations: List by option( - "--filter-annotations", - help = "Comma-separated list of function's annotations, which should be filtered." + - "Works only for method-level granulation." - ).split(",").default(emptyList()) - - val filterConstructors: Boolean by option( - "--remove-constructors", - help = "Remove constructor methods, works for method-level granulation" - ).flag(default = false) - - val excludeNodes: List by option( - "--remove-nodes", - help = "Comma-separated list of node types, which must be removed from asts." - ).split(",").default(emptyList()) - - val javaParser: String by option( - "--java-parser", - help = "Choose a parser for .java files." + - "'gumtree' for GumTree parser, 'antlr' for antlr parser." - ).default("gumtree") - - val maxMethodNameLength: Int by option( - "--max-method-name-length", - help = "Filtering methods with a large sequence of subtokens in their names" - ).int().default(-1) - - val maxTokenLength: Int by option( - "--max-token-length", - help = "Filter methods containing a long sequence of subtokens in the ast node" - ).int().default(-1) - - val maxTreeSize: Int by option( - "--max-tree-size", - help = "Filter methods by their ast size" - ).int().default(-1) - - val folderLabel: Boolean by option( - "--folder-label", - help = "if passed with file-level granularity, the folder name is used to label paths" - ).flag(default = false) - - - private fun getStorage(storageType: String, directoryPath: String): AstStorage { - return when (storageType) { - "csv" -> CsvAstStorage(directoryPath) - "dot" -> DotAstStorage(directoryPath) - else -> { - throw UnsupportedOperationException("Unsupported AST storage $storageType") - } - } - } - - private fun parsing(labelExtractor: LabelExtractor) { - val outputDir = File(outputDirName) - for (extension in extensions) { - // Create directory for current extension - val outputDirForLanguage = outputDir.resolve(extension) - // Choose type of storage - val storage = getStorage(astStorageType, outputDirForLanguage.path) - // Choose type of parser - val parser = getParser( - extension, - javaParser - ) - // Parse project - val filesToParse = getProjectFilesWithExtension(File(projectRoot), extension) - parser.parseFiles(filesToParse) { parseResult -> - normalizeParseResult(parseResult, isTokenSplitted) - val labeledParseResults = labelExtractor.toLabeledData(parseResult) - labeledParseResults.forEach { (root, label) -> - root.preOrder().forEach { node -> - excludeNodes.forEach { node.removeChildrenOfType(it) } - } - root.apply { - // Save AST as it is or process it to extract features / path-based representations - storage.store(root, label, parseResult.filePath) - } - } - } - // Save stored data on disk - storage.close() - } - - } - - override fun run() { - val labelExtractor = customLabelExtractor ?: getLabelExtractor( - granularityLevel, - javaParser, - isMethodNameHide, - excludeModifiers, - excludeAnnotations, - filterConstructors, - maxMethodNameLength, - maxTokenLength, - maxTreeSize, - folderLabel - ) - parsing(labelExtractor) - } -} \ No newline at end of file diff --git a/src/main/kotlin/astminer/cli/ProjectPreprocessor.kt b/src/main/kotlin/astminer/cli/ProjectPreprocessor.kt deleted file mode 100644 index 52b6f5f2..00000000 --- a/src/main/kotlin/astminer/cli/ProjectPreprocessor.kt +++ /dev/null @@ -1,33 +0,0 @@ -package astminer.cli - -import astminer.parse.cpp.FuzzyCppParser -import com.github.ajalt.clikt.core.CliktCommand -import com.github.ajalt.clikt.parameters.options.option -import com.github.ajalt.clikt.parameters.options.required -import java.io.File - -/** - * Preprocess C/C++ project located in [projectRoot] and save the preprocessed files in [preprocessDir], replicating - * structure of the original project. - */ -class ProjectPreprocessor : CliktCommand() { - - val projectRoot: String by option( - "--project", - help = "Path to the project that will be parsed" - ).required() - - val preprocessDir: String by option( - "--output", - help = "Path to directory where the preprocessed data will be stored" - ).required() - - private fun preprocessing() { - val parser = FuzzyCppParser() - parser.preprocessProject(File(projectRoot), File(preprocessDir)) - } - - override fun run() { - preprocessing() - } -} \ No newline at end of file diff --git a/src/main/kotlin/astminer/cli/utils.kt b/src/main/kotlin/astminer/cli/utils.kt deleted file mode 100644 index edf36664..00000000 --- a/src/main/kotlin/astminer/cli/utils.kt +++ /dev/null @@ -1,86 +0,0 @@ -package astminer.cli - -import astminer.parse.antlr.java.JavaParser -import astminer.parse.antlr.python.PythonParser -import astminer.parse.cpp.FuzzyCppParser -import astminer.parse.java.GumTreeJavaParser -import astminer.common.model.Node -import astminer.common.model.ParseResult -import astminer.common.model.Parser -import astminer.common.preOrder -import astminer.common.setNormalizedToken -import astminer.common.splitToSubtokens - -fun getParser( - extension: String, - javaParser: String -): Parser { - return when (extension) { - "java" -> { - when (javaParser) { - "gumtree" -> GumTreeJavaParser() - "antlr" -> JavaParser() - else -> { - throw UnsupportedOperationException("Unsupported parser for java extension $javaParser") - } - } - } - "c" -> FuzzyCppParser() - "cpp" -> FuzzyCppParser() - "py" -> PythonParser() - else -> { - throw UnsupportedOperationException("Unsupported extension $extension") - } - } -} - -fun separateToken(token: String, separator: CharSequence = "|"): String { - return splitToSubtokens(token).joinToString(separator) -} - -fun processNodeToken(node: Node, splitToken: Boolean) { - if (splitToken) { - node.setNormalizedToken(separateToken(node.getToken())) - } else { - node.setNormalizedToken() - } -} - -fun normalizeParseResult(parseResult: ParseResult, splitTokens: Boolean) { - parseResult.root?.preOrder()?.forEach { node -> processNodeToken(node, splitTokens) } -} - -fun getLabelExtractor( - granularityLevel: String, - javaParser: String, - hideMethodNames: Boolean, - excludeModifiers: List, - excludeAnnotations: List, - filterConstructors: Boolean, - maxMethodNameLength: Int, - maxTokenLength: Int, - maxTreeSize: Int, - useFolderName: Boolean -): LabelExtractor { - when (granularityLevel) { - "file" -> { - return if (useFolderName) { - FolderExtractor() - } else { - FilePathExtractor() - } - } - "method" -> { - val filterPredicates = mutableListOf( - ModifierFilterPredicate(excludeModifiers), AnnotationFilterPredicate(excludeAnnotations), - MethodNameLengthFilterPredicate(maxMethodNameLength), TokenLengthFilterPredicate(maxTokenLength), - TreeSizeFilterPredicate(maxTreeSize) - ) - if (filterConstructors) { - filterPredicates.add(ConstructorFilterPredicate()) - } - return MethodNameExtractor(hideMethodNames, filterPredicates, javaParser) - } - } - throw UnsupportedOperationException("Unsupported granularity level $granularityLevel") -} diff --git a/src/main/kotlin/astminer/common/FileUtil.kt b/src/main/kotlin/astminer/common/FileUtil.kt index badcc4d6..09471b68 100644 --- a/src/main/kotlin/astminer/common/FileUtil.kt +++ b/src/main/kotlin/astminer/common/FileUtil.kt @@ -11,9 +11,7 @@ import java.io.File * @param file file in which the number of lines is counted * @return number of lines in a given file */ -fun numberOfLines(file: File): Int { - return file.readLines().filter { it != "" }.size -} +fun numberOfLines(file: File): Int = file.readLines().filter { it != "" }.size /** * Changes extension of a given file to the new one. @@ -38,7 +36,7 @@ fun addClassWrapper(file: File, className: String) { } /** - * Checks if java file has any syntax errors, that can be identified via [Java8Parser][me.vovak.antlr.parser.Java8Parser] + * Checks if java file has any syntax errors that can be identified via [Java8Parser][me.vovak.antlr.parser.Java8Parser] * @param javaFile file which is checked for correct syntax * @return true if there are syntax errors and false otherwise */ @@ -56,4 +54,12 @@ fun getProjectFiles(projectRoot: File, filter: (File) -> Boolean = { true }) = p .toList() fun getProjectFilesWithExtension(projectRoot: File, extension: String): List = - getProjectFiles(projectRoot) { it.isFile && it.extension == extension } \ No newline at end of file + getProjectFiles(projectRoot) { it.isFile && it.extension == extension } + +fun iterateFiles(dir: File, condition: (File) -> Boolean, action: (File) -> Unit) { + dir.walkTopDown().filter { it.isFile && condition(it) }.forEach { action.invoke(it) } +} + +fun File.forFilesWithSuffix(extension: String, action: (File) -> Unit) { + iterateFiles(this, { file: File -> file.path.endsWith(extension) }, action) +} diff --git a/src/main/kotlin/astminer/common/TreeUtil.kt b/src/main/kotlin/astminer/common/TreeUtil.kt index 53d2127e..1a53158d 100644 --- a/src/main/kotlin/astminer/common/TreeUtil.kt +++ b/src/main/kotlin/astminer/common/TreeUtil.kt @@ -1,81 +1,32 @@ package astminer.common -import astminer.common.model.Node -import java.util.ArrayList - - -fun Node.postOrderIterator(): Iterator { - //TODO implement properly - return postOrder().listIterator() -} - -fun Node.preOrderIterator(): Iterator { - return preOrder().listIterator() -} - -fun doTraversePostOrder(node: Node, resultList: MutableList) { - node.getChildren().forEach { doTraversePostOrder(it, resultList) } - resultList.add(node) -} - -fun doTraversePreOrder(node: Node, resultList: MutableList) { - resultList.add(node) - node.getChildren().forEach { doTraversePreOrder(it, resultList) } -} - -fun Node.postOrder(): List { - val result: MutableList = ArrayList() - doTraversePostOrder(this, result) - return result -} - -fun Node.preOrder(): List { - val result: MutableList = ArrayList() - doTraversePreOrder(this, result) - return result -} - -const val NORMALIZED_TOKEN_KEY = "normalized_token" -const val DEFAULT_TOKEN = "EMPTY_TOKEN" - -/** - * Set normalized token for a node with default normalizing function. - */ -fun Node.setNormalizedToken() { - setMetadata(NORMALIZED_TOKEN_KEY, normalizeToken(getToken(), DEFAULT_TOKEN)) -} - -/** - * Set normalized token to a custom value. - */ -fun Node.setNormalizedToken(normalizedToken: String) { - setMetadata(NORMALIZED_TOKEN_KEY, normalizedToken) -} - -fun Node.getNormalizedToken(): String = getMetadata(NORMALIZED_TOKEN_KEY)?.toString() ?: DEFAULT_TOKEN +const val EMPTY_TOKEN = "EMPTY" /** * The function was adopted from the original code2vec implementation in order to match their behavior: * https://github.com/tech-srl/code2vec/blob/master/JavaExtractor/JPredict/src/main/java/JavaExtractor/Common/Common.java */ + +val newLineReg = "\\\\n".toRegex() +val whitespaceReg = "//s+".toRegex() +val quotesApostrophesCommasReg = "[\"',]".toRegex() +val unicodeWeirdCharReg = "\\P{Print}".toRegex() +val notALetterReg = "[^A-Za-z]".toRegex() + fun normalizeToken(token: String, defaultToken: String): String { - val cleanToken = token.toLowerCase() - .replace("\\\\n".toRegex(), "") // escaped new line - .replace("//s+".toRegex(), "") // whitespaces - .replace("[\"',]".toRegex(), "") // quotes, apostrophies, commas - .replace("\\P{Print}".toRegex(), "") // unicode weird characters + val cleanToken = token.lowercase() + .replace(newLineReg, "") // escaped new line + .replace(whitespaceReg, "") // whitespaces + .replace(quotesApostrophesCommasReg, "") // quotes, apostrophes, commas + .replace(unicodeWeirdCharReg, "") // unicode weird characters - val stripped = cleanToken.replace("[^A-Za-z]".toRegex(), "") + val stripped = cleanToken.replace(notALetterReg, "") - return if (stripped.isEmpty()) { + return stripped.ifEmpty { val carefulStripped = cleanToken.replace(" ", "_") - if (carefulStripped.isEmpty()) { + carefulStripped.ifEmpty { defaultToken - } else { - carefulStripped } - } else { - stripped } } @@ -83,9 +34,12 @@ fun normalizeToken(token: String, defaultToken: String): String { * The function was adopted from the original code2vec implementation in order to match their behavior: * https://github.com/tech-srl/code2vec/blob/master/JavaExtractor/JPredict/src/main/java/JavaExtractor/Common/Common.java */ + +val splitRegex = "(?<=[a-z])(?=[A-Z])|_|[0-9]|(?<=[A-Z])(?=[A-Z][a-z])|\\s+".toRegex() + fun splitToSubtokens(token: String) = token - .trim() - .split("(?<=[a-z])(?=[A-Z])|_|[0-9]|(?<=[A-Z])(?=[A-Z][a-z])|\\s+".toRegex()) - .map { s -> normalizeToken(s, "") } - .filter { it.isNotEmpty() } - .toList() + .trim() + .split(splitRegex) + .map { s -> normalizeToken(s, "") } + .filter { it.isNotEmpty() } + .toList() diff --git a/src/main/kotlin/astminer/common/model/FunctionInfoModel.kt b/src/main/kotlin/astminer/common/model/FunctionInfoModel.kt new file mode 100644 index 00000000..cacdd94c --- /dev/null +++ b/src/main/kotlin/astminer/common/model/FunctionInfoModel.kt @@ -0,0 +1,48 @@ +package astminer.common.model + +interface TreeFunctionSplitter { + fun splitIntoFunctions(root: T, filePath: String): Collection> +} + +class FunctionInfoPropertyNotImplementedException(propertyName: String) : + UnsupportedOperationException( + "The property `$propertyName` of FunctionInfo for this language and parser type is not implemented yet. " + + "Consider implementing it." + ) + +private fun notImplemented(propertyName: String): Nothing = + throw FunctionInfoPropertyNotImplementedException(propertyName) + +interface FunctionInfo { + val nameNode: T? + get() = notImplemented("nameNode") + val name: String? + get() = nameNode?.originalToken + val root: T + get() = notImplemented("root") + val filePath: String + get() = notImplemented("filePath") + val annotations: List? + get() = notImplemented("annotations") + val modifiers: List? + get() = notImplemented("modifiers") + val parameters: List? + get() = notImplemented("parameters") + val returnType: String? + get() = notImplemented("returnType") + val enclosingElement: EnclosingElement? + get() = notImplemented("enclosingElement") + val isConstructor: Boolean + get() = notImplemented("isConstructor") +} + +data class FunctionInfoParameter(val name: String, val type: String?) + +data class EnclosingElement(val type: EnclosingElementType, val name: String?, val root: T) + +enum class EnclosingElementType { + Class, + Function, + Method, + VariableDeclaration, +} diff --git a/src/main/kotlin/astminer/common/model/ParsingModel.kt b/src/main/kotlin/astminer/common/model/ParsingModel.kt index 84fa1f2b..7bcb453d 100644 --- a/src/main/kotlin/astminer/common/model/ParsingModel.kt +++ b/src/main/kotlin/astminer/common/model/ParsingModel.kt @@ -1,34 +1,61 @@ package astminer.common.model +import astminer.common.EMPTY_TOKEN +import astminer.common.splitToSubtokens import java.io.File import java.io.InputStream +import java.util.* +abstract class Node(val originalToken: String?) { + abstract val typeLabel: String + abstract val children: List + abstract val parent: Node? -interface Node { - fun getTypeLabel(): String - fun getChildren(): List - fun getParent(): Node? - fun getToken(): String - fun isLeaf(): Boolean + val normalizedToken: String = + originalToken?.let { + val subtokens = splitToSubtokens(it) + if (subtokens.isEmpty()) EMPTY_TOKEN else subtokens.joinToString(TOKEN_DELIMITER) + } ?: EMPTY_TOKEN - fun getMetadata(key: String): Any? - fun setMetadata(key: String, value: Any) + var technicalToken: String? = null + val token: String + get() = technicalToken ?: normalizedToken + + val metadata: MutableMap = HashMap() + fun isLeaf() = children.isEmpty() + + override fun toString(): String = "$typeLabel : $token" fun prettyPrint(indent: Int = 0, indentSymbol: String = "--") { repeat(indent) { print(indentSymbol) } - print(getTypeLabel()) - if (getToken().isNotEmpty()) { - println(" : ${getToken()}") - } else { - println() - } - getChildren().forEach { it.prettyPrint(indent + 1, indentSymbol) } + println(this) + children.forEach { it.prettyPrint(indent + 1, indentSymbol) } + } + + open fun getChildrenOfType(typeLabel: String) = children.filter { it.typeLabel == typeLabel } + open fun getChildOfType(typeLabel: String) = getChildrenOfType(typeLabel).firstOrNull() + + abstract fun removeChildrenOfType(typeLabel: String) + + private fun doTraversePreOrder(resultList: MutableList) { + resultList.add(this) + children.forEach { it.doTraversePreOrder(resultList) } + } + + fun preOrderIterator(): Iterator = preOrder().listIterator() + open fun preOrder(): List = mutableListOf().also { doTraversePreOrder(it) } + + private fun doTraversePostOrder(resultList: MutableList) { + children.forEach { it.doTraversePostOrder(resultList) } + resultList.add(this) } - fun getChildrenOfType(typeLabel: String) = getChildren().filter { it.getTypeLabel() == typeLabel } - fun getChildOfType(typeLabel: String) = getChildrenOfType(typeLabel).firstOrNull() + fun postOrderIterator(): Iterator = postOrder().listIterator() + open fun postOrder(): List = mutableListOf().also { doTraversePostOrder(it) } - fun removeChildrenOfType(typeLabel: String) + companion object { + const val TOKEN_DELIMITER = "|" + } } interface Parser { @@ -37,31 +64,17 @@ interface Parser { * @param content input stream to parse * @return root of the AST */ - fun parseInputStream(content: InputStream): T? + fun parseInputStream(content: InputStream): T /** * Parse file into an AST. * @param file file to parse - * @return ParseResult instance + * @return ParseResult instance */ - fun parseFile(file: File) = ParseResult(parseInputStream(file.inputStream()), file.path) - - /** - * Parse list of files. - * @param files files to parse - * @return list of ParseResult instances, one for each parsed file - */ - @Deprecated("Please use parseFiles (List, (ParseResult) -> Any) to avoid clogging memory") - fun parseFiles(files: List): List> = files.map { ParseResult(parseInputStream(it.inputStream()), it.path) } - - /** - * Parse list of files. - * @param files files to parse - * @param handleResult handler to invoke on each file parse result - */ - fun parseFiles(files: List, handleResult: (ParseResult) -> Any) { - files.forEach { handleResult(parseFile(it)) } - } + fun parseFile(file: File) = parseInputStream(file.inputStream()) } -data class ParseResult(val root: T?, val filePath: String) +class ParserNotInstalledException(parser: String, language: String, val e: Exception) : Exception() { + override val message: String = "Tools for parsing $language with $parser were not properly installed" + override val cause: Throwable = e +} diff --git a/src/main/kotlin/astminer/common/model/ParsingResultModel.kt b/src/main/kotlin/astminer/common/model/ParsingResultModel.kt new file mode 100644 index 00000000..b4652590 --- /dev/null +++ b/src/main/kotlin/astminer/common/model/ParsingResultModel.kt @@ -0,0 +1,82 @@ +package astminer.common.model + +import astminer.parse.ParsingException +import mu.KotlinLogging +import java.io.File +import kotlin.concurrent.thread +import kotlin.math.ceil + +private val logger = KotlinLogging.logger("HandlerFactory") + +interface ParsingResultFactory { + fun parse(file: File): ParsingResult + + fun parseFiles( + files: List, + action: (ParsingResult) -> T + ): List { + val results = mutableListOf() + files.map { file -> + try { + results.add(action(parse(file))) + } catch (parsingException: ParsingException) { + logger.error(parsingException) { "Failed to parse file ${file.path}" } + results.add(null) + } + } + return results + } + + fun parseFilesInThreads( + files: List, + numOfThreads: Int, + action: (ParsingResult) -> T + ): List { + val results = mutableListOf() + val threads = mutableListOf() + + if (files.isEmpty()) { return emptyList() } + + synchronized(results) { + files.chunked(ceil(files.size.toDouble() / numOfThreads).toInt()).filter { it.isNotEmpty() } + .map { chunk -> + threads.add(thread { results.addAll(parseFiles(chunk, action)) }) + } + } + threads.map { it.join() } + return results + } +} + +interface PreprocessingParsingResultFactory : ParsingResultFactory { + fun preprocess(file: File, outputDir: File? = null): File + + /** + * Run preprocessing and parsing for all files. + * @param files list of files to be parsed with preprocessing + * @param action action to do with parsed files (e.g. save on the disk) + */ + override fun parseFiles( + files: List, + action: (ParsingResult) -> T + ) = + files.map { file -> + try { + val preprocessedFile = preprocess(file) + val result = action(parse(preprocessedFile)) + preprocessedFile.delete() + result + } catch (parsingException: ParsingException) { + logger.error(parsingException) { "Failed to parse file ${file.path}" } + null + } + } +} + +abstract class ParsingResult(internal val file: File) { + abstract val root: T + protected abstract val splitter: TreeFunctionSplitter + + fun splitIntoFunctions(): Collection> = + splitter.splitIntoFunctions(root, file.path) +} diff --git a/src/main/kotlin/astminer/common/model/PathContextsModel.kt b/src/main/kotlin/astminer/common/model/PathContextsModel.kt index 6ca55927..7c133226 100644 --- a/src/main/kotlin/astminer/common/model/PathContextsModel.kt +++ b/src/main/kotlin/astminer/common/model/PathContextsModel.kt @@ -1,6 +1,5 @@ package astminer.common.model - data class ASTPath(val upwardNodes: List, val topNode: Node, val downwardNodes: List) enum class Direction { UP, DOWN, TOP } diff --git a/src/main/kotlin/astminer/common/model/PipelineModel.kt b/src/main/kotlin/astminer/common/model/PipelineModel.kt new file mode 100644 index 00000000..476f81f3 --- /dev/null +++ b/src/main/kotlin/astminer/common/model/PipelineModel.kt @@ -0,0 +1,92 @@ +package astminer.common.model + +import java.io.Closeable +import java.io.File + +interface Filter + +interface LabelExtractor + +interface FileFilter : Filter { + fun validate(parseResult: ParsingResult): Boolean +} + +interface FunctionFilter : Filter { + fun validate(functionInfo: FunctionInfo): Boolean +} + +interface FileLabelExtractor : LabelExtractor { + fun process(parseResult: ParsingResult): LabeledResult? +} + +interface FunctionLabelExtractor : LabelExtractor { + fun process(functionInfo: FunctionInfo): LabeledResult? +} + +/** + * An AST subtree with a label and the path of the source file. + * @property root The root of the AST subtree. + * @property label Any label for this subtree. + * @property filePath The path to the source file where the AST is from. + */ +data class LabeledResult(val root: T, val label: String, val filePath: String) + +fun ParsingResult.labeledWith(label: String): LabeledResult = LabeledResult(root, label, file.path) + +/** + * Storage saved labeled results to disk in a specified format. + * Storage might extract any data from labeled result. + * For instance, it might extract paths from trees + */ +interface Storage : Closeable { + val outputDirectoryPath: String + + fun store(labeledResult: LabeledResult, holdout: DatasetHoldout = DatasetHoldout.None) + + fun storeSynchronously(labeledResult: LabeledResult, holdout: DatasetHoldout = DatasetHoldout.None) { + synchronized(this) { + store(labeledResult, holdout) + } + } + + fun store(labeledResults: Iterable>, holdout: DatasetHoldout = DatasetHoldout.None) { + for (labeledResult in labeledResults) { + store(labeledResult, holdout) + } + } + + fun storeSynchronously( + labeledResults: Iterable>, + holdout: DatasetHoldout = DatasetHoldout.None + ) = synchronized(this) { + store(labeledResults, holdout) + } +} + +enum class DatasetHoldout(val dirName: String) { + Train("train"), + Validation("val"), + Test("test"), + None("data"); +} + +/** Returns map with three entries (keys: train data pool, validation data pool and test data pool; + * values: holdout directories) if dataset structure is present. + * One pool (None) otherwise.**/ +fun findDatasetHoldouts(inputDir: File): Map { + val trainDir = inputDir.resolve(DatasetHoldout.Train.dirName) + val valDir = inputDir.resolve(DatasetHoldout.Validation.dirName) + val testDir = inputDir.resolve(DatasetHoldout.Test.dirName) + + return if (trainDir.exists() && valDir.exists() && testDir.exists()) { + mapOf( + DatasetHoldout.Train to trainDir, + DatasetHoldout.Validation to valDir, + DatasetHoldout.Test to testDir + ) + } else { + mapOf( + DatasetHoldout.None to inputDir + ) + } +} diff --git a/src/main/kotlin/astminer/common/model/StorageModel.kt b/src/main/kotlin/astminer/common/model/StorageModel.kt deleted file mode 100644 index bd2436c2..00000000 --- a/src/main/kotlin/astminer/common/model/StorageModel.kt +++ /dev/null @@ -1,23 +0,0 @@ -package astminer.common.model - - -/** - * Stores path-contexts and their labels and saves them to directory. - */ -interface PathStorage { - val directoryPath: String - val tokensLimit: Long - val pathsLimit: Long - fun store(labeledPathContexts: LabeledPathContexts) - fun close() -} - -/** - * Stores ASTs in form of their root and saves them to directory. - */ -interface AstStorage { - val directoryPath: String - fun store(root: Node, label: String) = store(root, label, "") - fun store(root: Node, label: String, filePath: String) - fun close() -} diff --git a/src/main/kotlin/astminer/common/model/TreeSplittingModel.kt b/src/main/kotlin/astminer/common/model/TreeSplittingModel.kt deleted file mode 100644 index c75bfb9d..00000000 --- a/src/main/kotlin/astminer/common/model/TreeSplittingModel.kt +++ /dev/null @@ -1,41 +0,0 @@ -package astminer.common.model - -interface TreeMethodSplitter { - fun splitIntoMethods(root: T): Collection> -} - -class MethodInfo( - val method: MethodNode, - val enclosingElement: ElementNode, - val methodParameters: List> -) { - fun name() = method.name() - fun returnType() = method.returnType() - - fun enclosingElementName() = enclosingElement.name() -} - -class MethodNode( - val root: T, - val returnTypeNode: T?, - val nameNode: T? -) { - fun name() = nameNode?.getToken() - fun returnType() = returnTypeNode?.getToken() -} - -class ElementNode( - val root: T?, - val nameNode: T? -) { - fun name() = nameNode?.getToken() -} - -data class ParameterNode( - val root: T, - val returnTypeNode: T?, - val nameNode: T? -) { - fun name() = nameNode?.getToken() - fun returnType() = returnTypeNode?.getToken() -} diff --git a/src/main/kotlin/astminer/common/storage/CsvFileUtil.kt b/src/main/kotlin/astminer/common/storage/CsvFileUtil.kt index 46702d18..0751e274 100644 --- a/src/main/kotlin/astminer/common/storage/CsvFileUtil.kt +++ b/src/main/kotlin/astminer/common/storage/CsvFileUtil.kt @@ -3,17 +3,19 @@ package astminer.common.storage import astminer.common.model.OrientedNodeType import java.io.File -fun dumpIdStorageToCsv(storage: RankedIncrementalIdStorage, - typeHeader: String, - csvSerializer: (T) -> String, - file: File, - limit: Long = Long.MAX_VALUE) { +fun dumpIdStorageToCsv( + storage: RankedIncrementalIdStorage, + typeHeader: String, + csvSerializer: (T) -> String, + file: File, + limit: Long? = null +) { file.printWriter().use { out -> out.println("id,$typeHeader") storage.idPerItem.forEach { val id = it.value val item = it.key - if (storage.getKeyRank(item) <= limit) { + if (limit == null || storage.getKeyRank(item) <= limit) { out.println("$id,${csvSerializer.invoke(item)}") } } diff --git a/src/main/kotlin/astminer/common/storage/RankedIncrementalIdStorage.kt b/src/main/kotlin/astminer/common/storage/RankedIncrementalIdStorage.kt index 0f372baf..6a20b0e2 100644 --- a/src/main/kotlin/astminer/common/storage/RankedIncrementalIdStorage.kt +++ b/src/main/kotlin/astminer/common/storage/RankedIncrementalIdStorage.kt @@ -1,53 +1,85 @@ package astminer.common.storage +typealias Id = Long + +/** + * This storage automatically assigns each item an id + * and records how many times each item has been recorded in the storage. + * It ranks items by the number of times they have been recorded. + */ class RankedIncrementalIdStorage { private var keyCounter = 0L - val idPerItem: MutableMap = HashMap() - private val idCountMap: MutableMap = HashMap() - private var idCountRanks: Map? = null + val idPerItem: MutableMap = HashMap() + private val idCountMap: MutableMap = HashMap() + private var idCountRanks: Map? = null private fun putAndIncrementKey(item: T): Long { idPerItem[item] = ++keyCounter return keyCounter } - private fun incrementIdCount(id: Long) { + private fun incrementIdCount(id: Id) { idCountMap[id] = idCountMap.getOrDefault(id, 0) + 1 } - fun record(item: T): Long { + /** + * Puts the item into the storage or increments the count of [item] in the storage if it is already present. + * @param item The item to be put in the storage + * @return The id of the recorded item + */ + fun record(item: T): Id { val id = idPerItem[item] ?: putAndIncrementKey(item) incrementIdCount(id) return id } + /** + * Returns the id of the item if the item was recorded with the record(item) method, returns 0 otherwise. + */ fun getId(item: T): Long = idPerItem[item] ?: 0 - fun getIdCount(id: Long) = idCountMap.getOrDefault(id, 0) + /** + * Returns the number of times the item with the provided [id] has been recorded in the storage. + */ + fun getIdCount(id: Id) = idCountMap.getOrDefault(id, 0) - fun lookUpValue(id: Long): T? { - return idPerItem.entries.firstOrNull { it.value == id }?.key - } + /** + * Returns the item by its [id] + */ + fun lookUpValue(id: Id): T? = idPerItem.entries.firstOrNull { it.value == id }?.key + /** + * Returns the rank of the [item] + * @see getIdRank + */ fun getKeyRank(item: T) = getIdRank(getId(item)) - fun getIdRank(id: Long): Long { + /** + * Returns the rank of the item with this [id]. + * The item that has been recorded in the storage the most times has the rank 1, + * the second most recorded item has the rank 2, and so on... + */ + fun getIdRank(id: Id): Long { if (idCountRanks == null) { computeRanks() } return idCountRanks?.get(id) ?: 0 } + /** + * Computes the ranks + * @see getIdRank + */ fun computeRanks() { - val sortedEntries = idCountMap.entries - .sortedBy { it.value } - .reversed() - .map { it.key } - .toList() - val idRankMap = mutableMapOf() - for (i in sortedEntries.indices) { - idRankMap[sortedEntries[i]] = (i + 1).toLong() + val sortedIds = idCountMap.entries + .sortedBy { it.value } + .reversed() + .map { it.key } + .toList() + val idRankMap = mutableMapOf() + for ((index, id) in sortedIds.withIndex()) { + idRankMap[id] = (index + 1).toLong() } idCountRanks = idRankMap } -} \ No newline at end of file +} diff --git a/src/main/kotlin/astminer/config/FilterConfigs.kt b/src/main/kotlin/astminer/config/FilterConfigs.kt new file mode 100644 index 00000000..0d0760bd --- /dev/null +++ b/src/main/kotlin/astminer/config/FilterConfigs.kt @@ -0,0 +1,75 @@ +package astminer.config + +import astminer.common.model.Filter +import astminer.filters.* +import kotlinx.serialization.SerialName +import kotlinx.serialization.Serializable +import kotlinx.serialization.Transient + +/** + * Base class for all filter configs. See below + */ +@Serializable +sealed class FilterConfig { + abstract val filterImpl: Filter +} + +/** + * @see TreeSizeFilter + */ +@Serializable +@SerialName("by tree size") +data class TreeSizeFilterConfig(val minTreeSize: Int = 0, val maxTreeSize: Int? = null) : FilterConfig() { + @Transient + override val filterImpl = TreeSizeFilter(minTreeSize, maxTreeSize) +} + +/** + * @see ModifierFilter + */ +@Serializable +@SerialName("by modifiers") +data class ModifierFilterConfig(val modifiers: List) : FilterConfig() { + @Transient + override val filterImpl = ModifierFilter(modifiers) +} + +/** + * @see AnnotationFilter + */ +@Serializable +@SerialName("by annotations") +data class AnnotationFilterConfig(val annotations: List) : FilterConfig() { + @Transient + override val filterImpl = AnnotationFilter(annotations) +} + +/** + * @see ConstructorFilter + */ +@Serializable +@SerialName("no constructors") +object ConstructorFilterConfig : FilterConfig() { + @Transient + override val filterImpl = ConstructorFilter +} + +/** + * @see FunctionNameWordsNumberFilter + */ +@Serializable +@SerialName("by function name length") +data class FunctionNameWordsNumberFilterConfig(val maxWordsNumber: Int) : FilterConfig() { + @Transient + override val filterImpl = FunctionNameWordsNumberFilter(maxWordsNumber) +} + +/** + * @see WordsNumberFilter + */ +@Serializable +@SerialName("by words number") +data class WordsNumberFilterConfig(val maxTokenWordsNumber: Int) : FilterConfig() { + @Transient + override val filterImpl = WordsNumberFilter(maxTokenWordsNumber) +} diff --git a/src/main/kotlin/astminer/config/LabelExtractorConfigs.kt b/src/main/kotlin/astminer/config/LabelExtractorConfigs.kt new file mode 100644 index 00000000..679b46a9 --- /dev/null +++ b/src/main/kotlin/astminer/config/LabelExtractorConfigs.kt @@ -0,0 +1,42 @@ +package astminer.config + +import astminer.common.model.LabelExtractor +import astminer.labelextractor.* +import kotlinx.serialization.SerialName +import kotlinx.serialization.Serializable +import kotlinx.serialization.Transient + +@Serializable +sealed class LabelExtractorConfig { + abstract val labelExtractorImpl: LabelExtractor +} + +/** + * @see FileNameExtractor + */ +@Serializable +@SerialName("file name") +class FileNameExtractorConfig : LabelExtractorConfig() { + @Transient + override val labelExtractorImpl = FileNameExtractor +} + +/** + * @see FolderNameExtractor + */ +@Serializable +@SerialName("folder name") +class FolderNameExtractorConfig : LabelExtractorConfig() { + @Transient + override val labelExtractorImpl = FolderNameExtractor +} + +/** + * @see FunctionNameLabelExtractor + */ +@Serializable +@SerialName("function name") +class FunctionNameExtractorConfig : LabelExtractorConfig() { + @Transient + override val labelExtractorImpl = FunctionNameLabelExtractor +} diff --git a/src/main/kotlin/astminer/config/ParserConfig.kt b/src/main/kotlin/astminer/config/ParserConfig.kt new file mode 100644 index 00000000..1cb924bd --- /dev/null +++ b/src/main/kotlin/astminer/config/ParserConfig.kt @@ -0,0 +1,50 @@ +package astminer.config + +import kotlinx.serialization.SerialName +import kotlinx.serialization.Serializable + +/** + * This config is used to select the parsers that should be used + * If given type = "antlr" and languages = ["py", "java"] + * then 2 ANTLR parsers will be used (java antler parser and python antlr parser) + * @param name Type of the parser + * @param languages File extensions that should be parsed + */ +@Serializable +data class ParserConfig( + val name: ParserType, + val languages: List +) + +@Serializable +enum class ParserType { + @SerialName("antlr") + Antlr, + + @SerialName("gumtree") + GumTree, + + @SerialName("fuzzy") + Fuzzy +} + +@Serializable +enum class FileExtension(val fileExtension: String) { + @SerialName("py") + Python("py"), + + @SerialName("java") + Java("java"), + + @SerialName("js") + JavaScript("js"), + + @SerialName("c") + C("c"), + + @SerialName("cpp") + Cpp("cpp"), + + @SerialName("php") + PHP("php") +} diff --git a/src/main/kotlin/astminer/config/PipelineConfig.kt b/src/main/kotlin/astminer/config/PipelineConfig.kt new file mode 100644 index 00000000..c0f78f54 --- /dev/null +++ b/src/main/kotlin/astminer/config/PipelineConfig.kt @@ -0,0 +1,26 @@ +package astminer.config + +import kotlinx.serialization.SerialName +import kotlinx.serialization.Serializable +import kotlinx.serialization.SerializationException + +/** + * Config which defines the pipeline + * @see astminer.pipeline.Pipeline + */ +@Serializable +data class PipelineConfig( + val inputDir: String, + val outputDir: String, + val parser: ParserConfig, + val filters: List = emptyList(), + @SerialName("label") val labelExtractor: LabelExtractorConfig, + val storage: StorageConfig, + val numOfThreads: Int = 1 +) { + init { + if (numOfThreads <= 0) { + throw SerializationException("Number of threads must be a positive integer") + } + } +} diff --git a/src/main/kotlin/astminer/config/StorageConfigs.kt b/src/main/kotlin/astminer/config/StorageConfigs.kt new file mode 100644 index 00000000..42b0788b --- /dev/null +++ b/src/main/kotlin/astminer/config/StorageConfigs.kt @@ -0,0 +1,83 @@ +package astminer.config + +import astminer.common.model.Storage +import astminer.storage.ast.CsvAstStorage +import astminer.storage.ast.DotAstStorage +import astminer.storage.ast.JsonAstStorage +import astminer.storage.path.Code2SeqPathStorage +import astminer.storage.path.Code2VecPathStorage +import astminer.storage.path.PathBasedStorageConfig +import kotlinx.serialization.SerialName +import kotlinx.serialization.Serializable +import kotlinx.serialization.Transient + +/** + * Config for storage that saved the results on the disk + */ +@Serializable +sealed class StorageConfig { + abstract fun createStorage(outputDirectoryPath: String): Storage +} + +/** + * @see astminer.storage.ast.CsvAstStorage + */ +@Serializable +@SerialName("csv AST") +class CsvAstStorageConfig : StorageConfig() { + override fun createStorage(outputDirectoryPath: String) = CsvAstStorage(outputDirectoryPath) +} + +/** + * @see astminer.storage.ast.DotAstStorage + */ +@Serializable +@SerialName("dot AST") +class DotAstStorageConfig : StorageConfig() { + override fun createStorage(outputDirectoryPath: String) = DotAstStorage(outputDirectoryPath) +} + +/** + * @see JsonAstStorage + */ +@Serializable +@SerialName("json AST") +class JsonAstStorageConfig(private val withPaths: Boolean = false) : StorageConfig() { + override fun createStorage(outputDirectoryPath: String) = JsonAstStorage(outputDirectoryPath, withPaths) +} + +/** + * Config for [astminer.storage.path.Code2VecPathStorage] + */ +@Serializable +@SerialName("code2vec") +data class Code2VecPathStorageConfig( + val maxPathLength: Int, + val maxPathWidth: Int, + val maxTokens: Long? = null, + val maxPaths: Long? = null, + val maxPathContextsPerEntity: Int? = null, +) : StorageConfig() { + @Transient + private val pathBasedStorageConfig = + PathBasedStorageConfig(maxPathLength, maxPathWidth, maxTokens, maxPaths, maxPathContextsPerEntity) + + override fun createStorage(outputDirectoryPath: String) = + Code2VecPathStorage(outputDirectoryPath, pathBasedStorageConfig) +} + +@Serializable +@SerialName("code2seq") +data class Code2SeqPathStorageConfig( + @SerialName("length") val maxPathLength: Int, + @SerialName("width") val maxPathWidth: Int, + val maxPathContextsPerEntity: Int? = null, + val nodesToNumber: Boolean = true +) : StorageConfig() { + @Transient + private val pathBasedStorageConfig = + PathBasedStorageConfig(maxPathLength, maxPathWidth, maxPathContextsPerEntity = maxPathContextsPerEntity) + + override fun createStorage(outputDirectoryPath: String) = + Code2SeqPathStorage(outputDirectoryPath, pathBasedStorageConfig, nodesToNumber) +} diff --git a/src/main/kotlin/astminer/examples/AllCppFiles.kt b/src/main/kotlin/astminer/examples/AllCppFiles.kt deleted file mode 100644 index 9396f598..00000000 --- a/src/main/kotlin/astminer/examples/AllCppFiles.kt +++ /dev/null @@ -1,36 +0,0 @@ -@file:JvmName("CppExample") - -package astminer.examples - -import astminer.common.getProjectFilesWithExtension -import astminer.common.model.LabeledPathContexts -import astminer.parse.cpp.FuzzyCppParser -import astminer.paths.PathMiner -import astminer.paths.PathRetrievalSettings -import astminer.paths.CsvPathStorage -import astminer.paths.toPathContext -import java.io.File - -// Retrieve paths from .cpp preprocessed files, using a fuzzyc2cpg parser. -fun allCppFiles() { - val inputDir = File("src/test/resources/examples/cpp") - - val miner = PathMiner(PathRetrievalSettings(5, 5)) - val outputDir = "out_examples/allCppFiles" - val storage = CsvPathStorage(outputDir) - val parser = FuzzyCppParser() - val preprocOutputFolder = File("preprocessed") - - parser.preprocessProject(inputDir, preprocOutputFolder) - - val files = getProjectFilesWithExtension(preprocOutputFolder, "cpp") - - parser.parseFiles(files) { parseResult -> - if (parseResult.root != null) { - val paths = miner.retrievePaths(parseResult.root) - storage.store(LabeledPathContexts(parseResult.filePath, paths.map { toPathContext(it) })) - } - } - - storage.close() -} diff --git a/src/main/kotlin/astminer/examples/AllJavaAst.kt b/src/main/kotlin/astminer/examples/AllJavaAst.kt deleted file mode 100644 index 42c4f91a..00000000 --- a/src/main/kotlin/astminer/examples/AllJavaAst.kt +++ /dev/null @@ -1,19 +0,0 @@ -package astminer.examples - -import astminer.ast.CsvAstStorage -import astminer.parse.antlr.java.JavaParser -import java.io.File - -// Retrieve ASTs from Java files, using a generated parser. -fun allJavaAsts() { - val folder = "src/test/resources/examples/" - - val storage = CsvAstStorage("out_examples/allJavaAstsAntlr") - - File(folder).forFilesWithSuffix(".java") { file -> - val node = JavaParser().parseInputStream(file.inputStream()) ?: return@forFilesWithSuffix - storage.store(node, label = file.path) - } - - storage.close() -} \ No newline at end of file diff --git a/src/main/kotlin/astminer/examples/AllJavaFiles.kt b/src/main/kotlin/astminer/examples/AllJavaFiles.kt deleted file mode 100644 index e51f95bf..00000000 --- a/src/main/kotlin/astminer/examples/AllJavaFiles.kt +++ /dev/null @@ -1,36 +0,0 @@ -package astminer.examples - -import astminer.common.model.LabeledPathContexts -import astminer.parse.antlr.java.JavaMethodSplitter -import astminer.parse.antlr.java.JavaParser -import astminer.paths.PathMiner -import astminer.paths.PathRetrievalSettings -import astminer.paths.CsvPathStorage -import astminer.paths.toPathContext -import java.io.File - -//Retrieve paths from Java files, using a generated parser. -fun allJavaFiles() { - val inputDir = "src/test/resources/examples/" - - val miner = PathMiner(PathRetrievalSettings(5, 5)) - val outputDir = "out_examples/allJavaFilesAntlr" - val storage = CsvPathStorage(outputDir) - - File(inputDir).forFilesWithSuffix("11.java") { file -> - val node = JavaParser().parseInputStream(file.inputStream()) ?: return@forFilesWithSuffix - val paths = miner.retrievePaths(node) - node.prettyPrint() - JavaMethodSplitter().splitIntoMethods(node).forEach { - println(it.name()) - println(it.returnType()) - println(it.enclosingElementName()) - it.methodParameters.forEach { parameters -> - println("${parameters.name()} ${parameters.returnType()}") - } - } - storage.store(LabeledPathContexts(file.path, paths.map { toPathContext(it) })) - } - - storage.close() -} diff --git a/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt b/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt deleted file mode 100644 index 0c34108b..00000000 --- a/src/main/kotlin/astminer/examples/AllJavaFilesGumTree.kt +++ /dev/null @@ -1,27 +0,0 @@ -package astminer.examples - -import astminer.common.model.LabeledPathContexts -import astminer.parse.java.GumTreeJavaParser -import astminer.paths.PathMiner -import astminer.paths.PathRetrievalSettings -import astminer.paths.CsvPathStorage -import astminer.paths.toPathContext -import java.io.File - -//Retrieve paths from Java files, using a GumTree parser. -fun allJavaFilesGumTree() { - val inputDir = "src/test/resources/gumTreeMethodSplitter/" - - val miner = PathMiner(PathRetrievalSettings(5, 5)) - val outputDir = "out_examples/allJavaFilesGumTree" - val storage = CsvPathStorage(outputDir) - - File(inputDir).forFilesWithSuffix(".java") { file -> - val node = GumTreeJavaParser().parseInputStream(file.inputStream()) ?: return@forFilesWithSuffix - val paths = miner.retrievePaths(node) - - storage.store(LabeledPathContexts(file.path, paths.map { toPathContext(it) })) - } - - storage.close() -} diff --git a/src/main/kotlin/astminer/examples/AllJavaMethods.kt b/src/main/kotlin/astminer/examples/AllJavaMethods.kt deleted file mode 100644 index 19ca4f23..00000000 --- a/src/main/kotlin/astminer/examples/AllJavaMethods.kt +++ /dev/null @@ -1,49 +0,0 @@ -package astminer.examples - -import astminer.common.model.LabeledPathContexts -import astminer.common.model.MethodInfo -import astminer.parse.java.GumTreeJavaNode -import astminer.parse.java.GumTreeJavaParser -import astminer.parse.java.GumTreeMethodSplitter -import astminer.paths.PathMiner -import astminer.paths.PathRetrievalSettings -import astminer.paths.CsvPathStorage -import astminer.paths.toPathContext -import java.io.File - - -private fun getCsvFriendlyMethodId(methodInfo: MethodInfo): String { - val className = methodInfo.enclosingElementName() ?: "" - val methodName = methodInfo.name() ?: "unknown_method" - val parameterTypes = methodInfo.methodParameters.joinToString("|") { it.name() ?: "_" } - return "$className.$methodName($parameterTypes)" -} - - -//Retrieve paths from all Java files, using a GumTree parser. -//GumTreeMethodSplitter is used to extract individual method nodes from the compilation unit tree. -fun allJavaMethods() { - val inputDir = "src/test/resources/gumTreeMethodSplitter" - - val miner = PathMiner(PathRetrievalSettings(5, 5)) - val outputDir = "out_examples/allJavaMethods" - val storage = CsvPathStorage(outputDir) - - File(inputDir).forFilesWithSuffix(".java") { file -> - //parse file - val fileNode = GumTreeJavaParser().parseInputStream(file.inputStream()) ?: return@forFilesWithSuffix - - //extract method nodes - val methodNodes = GumTreeMethodSplitter().splitIntoMethods(fileNode) - - methodNodes.forEach { methodInfo -> - //Retrieve paths from every node individually - val paths = miner.retrievePaths(methodInfo.method.root) - //Retrieve a method identifier - val entityId = "${file.path}::${getCsvFriendlyMethodId(methodInfo)}" - storage.store(LabeledPathContexts(entityId, paths.map { toPathContext(it) })) - } - } - - storage.close() -} diff --git a/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt b/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt deleted file mode 100644 index 9b9a23c5..00000000 --- a/src/main/kotlin/astminer/examples/AllJavaScriptFiles.kt +++ /dev/null @@ -1,26 +0,0 @@ -package astminer.examples - -import astminer.common.model.LabeledPathContexts -import astminer.parse.antlr.javascript.JavaScriptParser -import astminer.paths.CsvPathStorage -import astminer.paths.PathMiner -import astminer.paths.PathRetrievalSettings -import astminer.paths.toPathContext -import java.io.File - -fun allJavaScriptFiles() { - val folder = "src/test/resources/examples" - val outputDir = "out_examples/allJavaScriptFilesAntlr" - - val miner = PathMiner(PathRetrievalSettings(5, 5)) - val storage = CsvPathStorage(outputDir) - - File(folder).forFilesWithSuffix(".js") {file -> - val node = JavaScriptParser().parseInputStream(file.inputStream()) ?: return@forFilesWithSuffix - val paths = miner.retrievePaths(node) - - storage.store(LabeledPathContexts(file.path, paths.map { toPathContext(it) })) - } - - storage.close() -} \ No newline at end of file diff --git a/src/main/kotlin/astminer/examples/AllPythonFiles.kt b/src/main/kotlin/astminer/examples/AllPythonFiles.kt deleted file mode 100644 index 01e1e111..00000000 --- a/src/main/kotlin/astminer/examples/AllPythonFiles.kt +++ /dev/null @@ -1,27 +0,0 @@ -package astminer.examples - -import astminer.common.model.LabeledPathContexts -import astminer.parse.antlr.python.PythonParser -import astminer.paths.PathMiner -import astminer.paths.PathRetrievalSettings -import astminer.paths.CsvPathStorage -import astminer.paths.toPathContext -import java.io.File - - -fun allPythonFiles() { - val inputDir = "src/test/resources/examples/" - - val miner = PathMiner(PathRetrievalSettings(5, 5)) - val outputDir = "out_examples/allPythonFiles" - val storage = CsvPathStorage(outputDir) - - File(inputDir).forFilesWithSuffix(".py") { file -> - val node = PythonParser().parseInputStream(file.inputStream()) ?: return@forFilesWithSuffix - val paths = miner.retrievePaths(node) - - storage.store(LabeledPathContexts(file.path, paths.map { toPathContext(it) })) - } - - storage.close() -} diff --git a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt b/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt deleted file mode 100644 index ffc0f565..00000000 --- a/src/main/kotlin/astminer/examples/Code2VecJavaMethods.kt +++ /dev/null @@ -1,42 +0,0 @@ -package astminer.examples - -import astminer.common.* -import astminer.common.model.LabeledPathContexts -import astminer.parse.antlr.java.JavaMethodSplitter -import astminer.parse.antlr.java.JavaParser -import astminer.paths.* -import java.io.File - - -//Retrieve paths from all Java files, using a GumTree parser. -//GumTreeMethodSplitter is used to extract individual method nodes from the compilation unit tree. -fun code2vecJavaMethods() { - val folder = "src/test/resources/code2vecPathMining" - val outputDir = "out_examples/code2vecPathMining" - - val miner = PathMiner(PathRetrievalSettings(5, 5)) - - val storage = Code2VecPathStorage(outputDir) - - File(folder).forFilesWithSuffix(".java") { file -> - //parse file - val fileNode = JavaParser().parseInputStream(file.inputStream()) ?: return@forFilesWithSuffix - - //extract method nodes - val methods = JavaMethodSplitter().splitIntoMethods(fileNode) - - methods.forEach { methodInfo -> - val methodNameNode = methodInfo.method.nameNode ?: return@forEach - val methodRoot = methodInfo.method.root - val label = splitToSubtokens(methodNameNode.getToken()).joinToString("|") - methodRoot.preOrder().forEach { it.setNormalizedToken() } - methodNameNode.setNormalizedToken("METHOD_NAME") - - // Retrieve paths from every node individually - val paths = miner.retrievePaths(methodRoot) - storage.store(LabeledPathContexts(label, paths.map { toPathContext(it) { node -> node.getNormalizedToken() } })) - } - } - - storage.close() -} diff --git a/src/main/kotlin/astminer/examples/Common.kt b/src/main/kotlin/astminer/examples/Common.kt deleted file mode 100644 index 402a9a13..00000000 --- a/src/main/kotlin/astminer/examples/Common.kt +++ /dev/null @@ -1,11 +0,0 @@ -package astminer.examples - -import java.io.File - -fun iterateFiles(dir: File, condition: (File) -> Boolean, action: (File) -> Unit) { - dir.walkTopDown().filter { it.isFile && condition(it) }.forEach { action.invoke(it) } -} - -fun File.forFilesWithSuffix(extension: String, action: (File) -> Unit) { - iterateFiles(this, ({ file: File -> file.path.endsWith(extension) }), action) -} \ No newline at end of file diff --git a/src/main/kotlin/astminer/examples/FeatureExtraction.kt b/src/main/kotlin/astminer/examples/FeatureExtraction.kt deleted file mode 100644 index 318088ff..00000000 --- a/src/main/kotlin/astminer/examples/FeatureExtraction.kt +++ /dev/null @@ -1,32 +0,0 @@ -package astminer.examples - -import astminer.common.numberOfLines -import astminer.featureextraction.* -import astminer.parse.java.GumTreeJavaParser -import java.io.File - - -fun parseAndCollectFeatures() { - val parser = GumTreeJavaParser() - val features : List> = listOf(Depth, NumberOfNodes, BranchingFactor, CompressiblePathLengths, Tokens, NodeTypes) - - val folderInput = "./testData/featureextraction" - val folderOutput = "out_examples/featureextraction" - - val storage = TreeFeatureValueStorage(",") - storage.storeFeatures(features) - - File(folderInput).forFilesWithSuffix("java") { fileInput -> - val fileName = fileInput.name - val nol = numberOfLines(fileInput) - - val tree = ParsedTree(parser.className(), parser.parseInputStream(fileInput.inputStream()) ?: return@forFilesWithSuffix, fileName, nol) - storage.storeParsedTree(tree) - } - - storage.save(folderOutput) -} - -fun main() { - parseAndCollectFeatures() -} \ No newline at end of file diff --git a/src/main/kotlin/astminer/featureextraction/TreeFeature.kt b/src/main/kotlin/astminer/featureextraction/TreeFeature.kt index 7a7401f0..9a76c7fc 100644 --- a/src/main/kotlin/astminer/featureextraction/TreeFeature.kt +++ b/src/main/kotlin/astminer/featureextraction/TreeFeature.kt @@ -1,7 +1,6 @@ package astminer.featureextraction import astminer.common.model.Node -import astminer.common.preOrder /** * Interface that describes tree feature. @@ -13,7 +12,7 @@ interface TreeFeature { * @param tree tree for which this feature is computed * @return computed feature */ - fun compute(tree: Node) : T + fun compute(tree: Node): T } /** @@ -21,7 +20,7 @@ interface TreeFeature { */ object Depth : TreeFeature { override fun compute(tree: Node): Int { - val max = tree.getChildren().map { compute(it) }.max() ?: 0 + val max = tree.children.map { compute(it) }.maxOrNull() ?: 0 return max + 1 } } @@ -47,22 +46,18 @@ object BranchingFactor : TreeFeature { * Tree feature for computing the number of nodes in a given tree. */ object NumberOfNodes : TreeFeature { - override fun compute(tree: Node): Int { - return tree.getChildren().map { compute(it) }.sum() + 1 - } + override fun compute(tree: Node): Int = tree.children.sumOf { compute(it) } + 1 } /** * Tree feature for computing list of all node tokens from a given tree. */ object Tokens : TreeFeature> { - override fun compute(tree: Node): List { - return findTokens(tree, ArrayList()) - } + override fun compute(tree: Node): List = findTokens(tree, ArrayList()) private fun findTokens(node: Node, tokensList: MutableList): List { - node.getChildren().forEach { findTokens(it, tokensList) } - tokensList.add(node.getToken()) + node.children.forEach { findTokens(it, tokensList) } + tokensList.add(node.token) return tokensList } } @@ -71,13 +66,11 @@ object Tokens : TreeFeature> { * Tree feature for computing list of all node types from a given tree. */ object NodeTypes : TreeFeature> { - override fun compute(tree: Node): List { - return findNodeTypes(tree, ArrayList()) - } + override fun compute(tree: Node): List = findNodeTypes(tree, ArrayList()) private fun findNodeTypes(node: Node, nodeTypesList: MutableList): List { - node.getChildren().forEach { findNodeTypes(it, nodeTypesList) } - nodeTypesList.add(node.getTypeLabel()) + node.children.forEach { findNodeTypes(it, nodeTypesList) } + nodeTypesList.add(node.typeLabel) return nodeTypesList } } @@ -93,20 +86,18 @@ object CompressiblePathLengths : TreeFeature> { return pathLengths } - private fun Node.isStartingNode() : Boolean { - return this.hasOneChild() && !(this.getParent()?.hasOneChild() ?: false) - } + private fun Node.isStartingNode(): Boolean = this.hasOneChild() && !(this.parent?.hasOneChild() ?: false) - private fun Node.hasOneChild() : Boolean = getChildren().size == 1 + private fun Node.hasOneChild(): Boolean = children.size == 1 - private fun findPathLengthFromStartingNode(node: Node) : Int { + private fun findPathLengthFromStartingNode(node: Node): Int { var length = 1 - var next = node.getChildren().first() + var next = node.children.first() while (next.hasOneChild()) { length++ - next = next.getChildren().first() + next = next.children.first() } return length } -} \ No newline at end of file +} diff --git a/src/main/kotlin/astminer/featureextraction/TreeFeatureValueStorage.kt b/src/main/kotlin/astminer/featureextraction/TreeFeatureValueStorage.kt index ffc5ab22..2239c7ce 100644 --- a/src/main/kotlin/astminer/featureextraction/TreeFeatureValueStorage.kt +++ b/src/main/kotlin/astminer/featureextraction/TreeFeatureValueStorage.kt @@ -11,17 +11,16 @@ import java.io.File * @property fileName name of parsed file * @property numberOfLines number of lines in parsed file */ -data class ParsedTree(val parserName : String, val tree: Node, val fileName : String, val numberOfLines: Int) +data class ParsedTree(val parserName: String, val tree: Node, val fileName: String, val numberOfLines: Int) /** * Gets simple name of Any. */ -fun Any.className() : String { - return this::class.java.simpleName -} +fun Any.className(): String = this::class.java.simpleName /** - * Class for store and save [tree features][astminer.featureextraction.TreeFeature] for [parsed trees][astminer.featureextraction.ParsedTree]. + * Class for store and save [tree features][astminer.featureextraction.TreeFeature] + * for [parsed trees][astminer.featureextraction.ParsedTree]. * @property separator separator which is used in resulting file to separate with tree features values */ class TreeFeatureValueStorage(private val separator: String) { @@ -32,10 +31,10 @@ class TreeFeatureValueStorage(private val separator: String) { private val idField = Field("Id") { parsedTrees.indexOf(it).toString() } private val parserField = Field("ParserName") { it.parserName } private val fileNameField = Field("FileName") { it.fileName } - private val NOLField = Field("NumberOfLines") { it.numberOfLines.toString() } + private val numOfLinesField = Field("NumberOfLines") { it.numberOfLines.toString() } private val fileName = "features.csv" - private val fields: List = listOf(idField, parserField, fileNameField, NOLField) + private val fields: List = listOf(idField, parserField, fileNameField, numOfLinesField) /** * Data class for additional fields that should be in resulting file with features. @@ -48,7 +47,7 @@ class TreeFeatureValueStorage(private val separator: String) { * Stores new tree feature to compute for stored parsed trees. * @param feature feature to store */ - fun storeFeature(feature : TreeFeature) { + fun storeFeature(feature: TreeFeature) { features.add(feature) } @@ -70,7 +69,8 @@ class TreeFeatureValueStorage(private val separator: String) { /** * Computes all stored features for all stored parsed trees and saves them in a given directory. - * @param directoryPath path to directory where tree features is saved. If this directory does not exist the new one creates. + * @param directoryPath path to directory where tree features is saved. + * If this directory does not exist the new one creates. */ fun save(directoryPath: String) { File(directoryPath).mkdirs() @@ -79,21 +79,20 @@ class TreeFeatureValueStorage(private val separator: String) { val lines = ArrayList() val csvHeaders = fields.joinToString(separator = separator) { it.header } - lines.add(features.map { it.className() }.fold(csvHeaders) { c, f -> "$c$separator$f" } ) + lines.add(features.map { it.className() }.fold(csvHeaders) { c, f -> "$c$separator$f" }) parsedTrees.forEach { t -> val csvFields = fields.joinToString(separator = separator) { it.value(t) } - lines.add(features.map { toCsvString(it.compute(t.tree)) }.fold(csvFields) { c, f -> "$c$separator$f" } ) + lines.add(features.map { toCsvString(it.compute(t.tree)) }.fold(csvFields) { c, f -> "$c$separator$f" }) } writeLinesToFile(lines, file) } - private fun toCsvString(a : Any?) : String { + private fun toCsvString(a: Any?): String { if (a is List<*>) { return "\"${a.joinToString { toCsvString(it) }.replace("\"","\"\"")}\"" } return a.toString() } - -} \ No newline at end of file +} diff --git a/src/main/kotlin/astminer/filters/CommonFilters.kt b/src/main/kotlin/astminer/filters/CommonFilters.kt new file mode 100644 index 00000000..a0f47848 --- /dev/null +++ b/src/main/kotlin/astminer/filters/CommonFilters.kt @@ -0,0 +1,32 @@ +package astminer.filters + +import astminer.common.model.* +import astminer.featureextraction.NumberOfNodes + +/** + * Filter that excludes trees which do not satisfy [minSize] <= tree size <= [maxSize]. + * @param minSize The minimum size of trees that pass the filter. + * @param maxSize The maximum size of trees that pass the filter. Set it to null if there should be no upper bound. + */ +class TreeSizeFilter(private val minSize: Int = 0, private val maxSize: Int? = null) : FileFilter, FunctionFilter { + private fun Node.treeSize() = NumberOfNodes.compute(this) + + private fun validateTree(root: Node): Boolean = + minSize <= root.treeSize() && (maxSize == null || root.treeSize() <= maxSize) + + override fun validate(functionInfo: FunctionInfo): Boolean = validateTree(functionInfo.root) + + override fun validate(parseResult: ParsingResult): Boolean = validateTree(parseResult.root) +} + +/** + * Filter that excludes trees that have more words than [maxWordsNumber] in any token of their node. + */ +class WordsNumberFilter(private val maxWordsNumber: Int) : FunctionFilter, FileFilter { + private fun validateTree(root: Node) = + !root.preOrder().any { node -> node.token.split(Node.TOKEN_DELIMITER).size > maxWordsNumber } + + override fun validate(functionInfo: FunctionInfo) = validateTree(functionInfo.root) + + override fun validate(parseResult: ParsingResult) = validateTree(parseResult.root) +} diff --git a/src/main/kotlin/astminer/filters/FunctionFilters.kt b/src/main/kotlin/astminer/filters/FunctionFilters.kt new file mode 100644 index 00000000..d449131d --- /dev/null +++ b/src/main/kotlin/astminer/filters/FunctionFilters.kt @@ -0,0 +1,43 @@ +package astminer.filters + +import astminer.common.model.FunctionFilter +import astminer.common.model.FunctionInfo +import astminer.common.model.Node +import astminer.common.splitToSubtokens + +/** + * Filter that excludes functions that have at least one of modifiers from the [excludeModifiers] list. + */ +class ModifierFilter(private val excludeModifiers: List) : FunctionFilter { + override fun validate(functionInfo: FunctionInfo): Boolean { + val functionModifiers = checkNotNull(functionInfo.modifiers) { "Modifiers weren't properly parsed" } + return functionModifiers.none { modifier -> modifier in excludeModifiers } + } +} + +/** + * Filter that excludes functions that have at least one of annotations from the [excludeAnnotations] list. + */ +class AnnotationFilter(private val excludeAnnotations: List) : FunctionFilter { + override fun validate(functionInfo: FunctionInfo): Boolean { + val functionAnnotations = checkNotNull(functionInfo.annotations) { "Annotations weren't properly parsed" } + return functionAnnotations.none { annotation -> annotation in excludeAnnotations } + } +} + +/** + * Filter that excludes constructors. + */ +object ConstructorFilter : FunctionFilter { + override fun validate(functionInfo: FunctionInfo) = !functionInfo.isConstructor +} + +/** + * Filter that excludes functions that have more than [maxWordsNumber] words in their names. + */ +class FunctionNameWordsNumberFilter(private val maxWordsNumber: Int) : FunctionFilter { + override fun validate(functionInfo: FunctionInfo): Boolean { + val name = functionInfo.name + return name != null && splitToSubtokens(name).size <= maxWordsNumber + } +} diff --git a/src/main/kotlin/astminer/labelextractor/FileLabelExtractors.kt b/src/main/kotlin/astminer/labelextractor/FileLabelExtractors.kt new file mode 100644 index 00000000..b7c2b6d8 --- /dev/null +++ b/src/main/kotlin/astminer/labelextractor/FileLabelExtractors.kt @@ -0,0 +1,21 @@ +package astminer.labelextractor + +import astminer.common.model.* + +/** + * Labels files with folder names + */ +object FileNameExtractor : FileLabelExtractor { + override fun process(parseResult: ParsingResult): LabeledResult = + parseResult.labeledWith(parseResult.file.name) +} + +/** + * Labels files with folder names + */ +object FolderNameExtractor : FileLabelExtractor { + override fun process(parseResult: ParsingResult): LabeledResult? { + val folderName = parseResult.file.parentFile?.name ?: return null + return parseResult.labeledWith(folderName) + } +} diff --git a/src/main/kotlin/astminer/labelextractor/FunctionNameLabelExtractor.kt b/src/main/kotlin/astminer/labelextractor/FunctionNameLabelExtractor.kt new file mode 100644 index 00000000..fa4dd5fe --- /dev/null +++ b/src/main/kotlin/astminer/labelextractor/FunctionNameLabelExtractor.kt @@ -0,0 +1,26 @@ +package astminer.labelextractor + +import astminer.common.model.FunctionInfo +import astminer.common.model.FunctionLabelExtractor +import astminer.common.model.LabeledResult +import astminer.common.model.Node + +/** + * Labels functions with their names. + * Hides the name of the function in the subtree and also all in the recursive calls. + */ +object FunctionNameLabelExtractor : FunctionLabelExtractor { + private const val HIDDEN_METHOD_NAME_TOKEN = "METHOD_NAME" + private const val RECURSIVE_CALL_TOKEN = "SELF" + + override fun process(functionInfo: FunctionInfo): LabeledResult? { + val normalizedName = functionInfo.nameNode?.normalizedToken ?: return null + functionInfo.root.preOrder().forEach { node -> + if (node.originalToken == functionInfo.nameNode?.originalToken) { + node.technicalToken = RECURSIVE_CALL_TOKEN + } + } + functionInfo.nameNode?.technicalToken = HIDDEN_METHOD_NAME_TOKEN + return LabeledResult(functionInfo.root, normalizedName, functionInfo.filePath) + } +} diff --git a/src/main/kotlin/astminer/parse/FindingUtils.kt b/src/main/kotlin/astminer/parse/FindingUtils.kt new file mode 100644 index 00000000..9ecd19bd --- /dev/null +++ b/src/main/kotlin/astminer/parse/FindingUtils.kt @@ -0,0 +1,11 @@ +package astminer.parse + +import astminer.common.model.Node + +inline fun T.findEnclosingElementBy(condition: (T) -> Boolean): T? { + var curNode = this.parent + while (!(curNode == null || condition(curNode as T))) { + curNode = curNode.parent + } + return curNode as? T +} diff --git a/src/main/kotlin/astminer/parse/ParsingException.kt b/src/main/kotlin/astminer/parse/ParsingException.kt new file mode 100644 index 00000000..ab24473b --- /dev/null +++ b/src/main/kotlin/astminer/parse/ParsingException.kt @@ -0,0 +1,4 @@ +package astminer.parse + +class ParsingException(parserType: String, language: String, exc: Exception? = null) : + IllegalStateException("Parser $parserType had problems parsing $language: ${exc?.message ?: "Unknown error."}") diff --git a/src/main/kotlin/astminer/parse/antlr/AntlrNode.kt b/src/main/kotlin/astminer/parse/antlr/AntlrNode.kt new file mode 100644 index 00000000..cc0899e3 --- /dev/null +++ b/src/main/kotlin/astminer/parse/antlr/AntlrNode.kt @@ -0,0 +1,31 @@ +package astminer.parse.antlr + +import astminer.common.model.Node + +class AntlrNode( + override val typeLabel: String, + override var parent: AntlrNode?, + originalToken: String? +) : Node(originalToken) { + + override val children: MutableList = mutableListOf() + + fun replaceChildren(newChildren: List) { + children.clear() + newChildren.forEach { it.parent = this } + children.addAll(newChildren) + } + + override fun getChildrenOfType(typeLabel: String) = children.filter { + decompressTypeLabel(it.typeLabel).firstOrNull() == typeLabel + } + + override fun getChildOfType(typeLabel: String): AntlrNode? = + getChildrenOfType(typeLabel).firstOrNull() + + override fun removeChildrenOfType(typeLabel: String) { + children.removeIf { it.typeLabel == typeLabel } + } + + override fun preOrder(): List = super.preOrder().map { it as AntlrNode } +} diff --git a/src/main/kotlin/astminer/parse/antlr/AntlrParsingResult.kt b/src/main/kotlin/astminer/parse/antlr/AntlrParsingResult.kt new file mode 100644 index 00000000..8a76397f --- /dev/null +++ b/src/main/kotlin/astminer/parse/antlr/AntlrParsingResult.kt @@ -0,0 +1,48 @@ +package astminer.parse.antlr + +import astminer.common.model.* +import astminer.parse.antlr.java.JavaFunctionSplitter +import astminer.parse.antlr.java.JavaParser +import astminer.parse.antlr.javascript.JavaScriptFunctionSplitter +import astminer.parse.antlr.javascript.JavaScriptParser +import astminer.parse.antlr.php.PHPFunctionSplitter +import astminer.parse.antlr.php.PHPParser +import astminer.parse.antlr.python.PythonFunctionSplitter +import astminer.parse.antlr.python.PythonParser +import java.io.File + +object AntlrJavaParsingResultFactory : ParsingResultFactory { + override fun parse(file: File) = AntlrJavaParsingResult(file) + + class AntlrJavaParsingResult(file: File) : ParsingResult(file) { + override val root = JavaParser().parseFile(file) + override val splitter = JavaFunctionSplitter() + } +} + +object AntlrPythonParsingResultFactory : ParsingResultFactory { + override fun parse(file: File) = AntlrPythonParsingResult(file) + + class AntlrPythonParsingResult(file: File) : ParsingResult(file) { + override val root = PythonParser().parseFile(file) + override val splitter = PythonFunctionSplitter() + } +} + +object AntlrJavascriptParsingResultFactory : ParsingResultFactory { + override fun parse(file: File) = AntlrJavascriptParsingResult(file) + + class AntlrJavascriptParsingResult(file: File) : ParsingResult(file) { + override val root = JavaScriptParser().parseFile(file) + override val splitter = JavaScriptFunctionSplitter() + } +} + +object AntlrPHPParsingResultFactory : ParsingResultFactory { + override fun parse(file: File): ParsingResult = AntlrPHPParsingResult(file) + + class AntlrPHPParsingResult(file: File) : ParsingResult(file) { + override val root = PHPParser().parseFile(file) + override val splitter = PHPFunctionSplitter() + } +} diff --git a/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt b/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt index 80cb6ab8..e9aea811 100644 --- a/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt +++ b/src/main/kotlin/astminer/parse/antlr/AntlrUtil.kt @@ -1,52 +1,50 @@ package astminer.parse.antlr +import astminer.common.EMPTY_TOKEN import astminer.common.model.Node import org.antlr.v4.runtime.ParserRuleContext import org.antlr.v4.runtime.Vocabulary import org.antlr.v4.runtime.tree.ErrorNode import org.antlr.v4.runtime.tree.TerminalNode -fun convertAntlrTree(tree: ParserRuleContext, ruleNames: Array, vocabulary: Vocabulary): SimpleNode { - return compressTree(convertRuleContext(tree, ruleNames, null, vocabulary)) -} +fun convertAntlrTree(tree: ParserRuleContext, ruleNames: Array, vocabulary: Vocabulary): AntlrNode = + compressTree(convertRuleContext(tree, ruleNames, null, vocabulary)) -private fun convertRuleContext(ruleContext: ParserRuleContext, ruleNames: Array, parent: Node?, vocabulary: Vocabulary): SimpleNode { +private fun convertRuleContext( + ruleContext: ParserRuleContext, + ruleNames: Array, + parent: AntlrNode?, + vocabulary: Vocabulary +): AntlrNode { val typeLabel = ruleNames[ruleContext.ruleIndex] - val currentNode = SimpleNode(typeLabel, parent, null) - val children: MutableList = ArrayList() + val currentNode = AntlrNode(typeLabel, parent, null) + val children: MutableList = ArrayList() ruleContext.children?.forEach { - if (it is TerminalNode) { - children.add(convertTerminal(it, currentNode, vocabulary)) - return@forEach - } - if (it is ErrorNode) { - children.add(convertErrorNode(it, currentNode)) - return@forEach + when (it) { + is TerminalNode -> children.add(convertTerminal(it, currentNode, vocabulary)) + is ErrorNode -> children.add(convertErrorNode(it, currentNode)) + else -> children.add(convertRuleContext(it as ParserRuleContext, ruleNames, currentNode, vocabulary)) } - children.add(convertRuleContext(it as ParserRuleContext, ruleNames, currentNode, vocabulary)) } - currentNode.setChildren(children) - + currentNode.replaceChildren(children) return currentNode } -private fun convertTerminal(terminalNode: TerminalNode, parent: Node?, vocabulary: Vocabulary): SimpleNode { - return SimpleNode(vocabulary.getSymbolicName(terminalNode.symbol.type), parent, terminalNode.symbol.text) -} +private fun convertTerminal(terminalNode: TerminalNode, parent: AntlrNode?, vocabulary: Vocabulary): AntlrNode = + AntlrNode(vocabulary.getSymbolicName(terminalNode.symbol.type), parent, terminalNode.symbol.text) -private fun convertErrorNode(errorNode: ErrorNode, parent: Node?): SimpleNode { - return SimpleNode("Error", parent, errorNode.text) -} +private fun convertErrorNode(errorNode: ErrorNode, parent: AntlrNode?): AntlrNode = + AntlrNode("Error", parent, errorNode.text) /** * Remove intermediate nodes that have a single child. */ -fun simplifyTree(tree: SimpleNode): SimpleNode { - return if (tree.getChildren().size == 1) { - simplifyTree(tree.getChildren().first() as SimpleNode) +fun simplifyTree(tree: AntlrNode): AntlrNode { + return if (tree.children.size == 1) { + simplifyTree(tree.children.first()) } else { - tree.setChildren(tree.getChildren().map { simplifyTree(it as SimpleNode) }.toMutableList()) + tree.replaceChildren(tree.children.map { simplifyTree(it) }.toMutableList()) tree } } @@ -54,21 +52,38 @@ fun simplifyTree(tree: SimpleNode): SimpleNode { /** * Compress paths of intermediate nodes that have a single child into individual nodes. */ -fun compressTree(root: SimpleNode): SimpleNode { - return if (root.getChildren().size == 1) { - val child = compressTree(root.getChildren().first() as SimpleNode) - val compressedNode = SimpleNode( - root.getTypeLabel() + "|" + child.getTypeLabel(), - root.getParent(), - child.getToken() +fun compressTree(root: AntlrNode): AntlrNode { + return if (root.children.size == 1) { + val child = compressTree(root.children.first()) + val compressedNode = AntlrNode( + root.typeLabel + "|" + child.typeLabel, + root.parent, + child.originalToken ) - compressedNode.setChildren(child.getChildren()) + compressedNode.replaceChildren(child.children) compressedNode } else { - root.setChildren(root.getChildren().map { compressTree(it as SimpleNode) }.toMutableList()) + root.replaceChildren(root.children.map { compressTree(it) }.toMutableList()) root } } - fun decompressTypeLabel(typeLabel: String) = typeLabel.split("|") + +fun AntlrNode.lastLabel() = decompressTypeLabel(typeLabel).last() + +fun AntlrNode.firstLabel() = decompressTypeLabel(typeLabel).first() + +fun AntlrNode.hasLastLabel(label: String): Boolean = lastLabel() == label + +fun AntlrNode.lastLabelIn(labels: List): Boolean = labels.contains(lastLabel()) + +fun AntlrNode.hasFirstLabel(label: String): Boolean = firstLabel() == label + +fun AntlrNode.firstLabelIn(labels: List): Boolean = labels.contains(firstLabel()) + +fun Node.getTokensFromSubtree(): String = + if (isLeaf()) originalToken ?: EMPTY_TOKEN else children.joinToString(separator = "") { it.getTokensFromSubtree() } + +fun AntlrNode.getItOrChildrenOfType(typeLabel: String): List = + if (hasLastLabel(typeLabel)) listOf(this) else this.getChildrenOfType(typeLabel).map { it } diff --git a/src/main/kotlin/astminer/parse/antlr/SimpleNode.kt b/src/main/kotlin/astminer/parse/antlr/SimpleNode.kt deleted file mode 100644 index 3bf196b6..00000000 --- a/src/main/kotlin/astminer/parse/antlr/SimpleNode.kt +++ /dev/null @@ -1,59 +0,0 @@ -package astminer.parse.antlr - -import astminer.common.model.Node - -class SimpleNode(private val typeLabel: String, private var parent: Node?, private var token: String?) : Node { - private val metadata: MutableMap = HashMap() - - private var children: MutableList = mutableListOf() - - fun setChildren(newChildren: List) { - children = newChildren.toMutableList() - children.forEach { (it as SimpleNode).setParent(this) } - } - - fun setParent(newParent: Node?) { - parent = newParent - } - - override fun getTypeLabel(): String { - return typeLabel - } - - override fun getChildren(): MutableList { - return children - } - - override fun getParent(): Node? { - return parent - } - - override fun getToken(): String { - return token ?: "null" - } - - fun setToken(newToken: String) { - token = newToken - } - - override fun isLeaf(): Boolean { - return children.isEmpty() - } - - override fun getMetadata(key: String): Any? { - return metadata[key] - } - - override fun setMetadata(key: String, value: Any) { - metadata[key] = value - } - - override fun getChildrenOfType(typeLabel: String) = getChildren().filter { - decompressTypeLabel(it.getTypeLabel()).firstOrNull() == typeLabel - } - - override fun removeChildrenOfType(typeLabel: String) { - children.removeIf { it.getTypeLabel() == typeLabel } - } - -} diff --git a/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt b/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt new file mode 100644 index 00000000..1a79687d --- /dev/null +++ b/src/main/kotlin/astminer/parse/antlr/java/AntlrJavaFunctionInfo.kt @@ -0,0 +1,73 @@ +package astminer.parse.antlr.java + +import astminer.common.model.* +import astminer.parse.antlr.* +import astminer.parse.findEnclosingElementBy +import mu.KotlinLogging + +private val logger = KotlinLogging.logger("Antlr-Java-function-info") + +class AntlrJavaFunctionInfo(override val root: AntlrNode, override val filePath: String) : FunctionInfo { + override val nameNode: AntlrNode? = collectNameNode() + override val returnType: String? = collectReturnType() + override val enclosingElement: EnclosingElement? = collectEnclosingClass() + + override val parameters: List? = + try { collectParameters() } catch (e: IllegalStateException) { + logger.warn { e.message } + null + } + + private fun collectNameNode(): AntlrNode? = root.getChildOfType(METHOD_NAME_NODE) + + private fun collectReturnType(): String? { + val returnTypeNode = root.getChildOfType(METHOD_RETURN_TYPE_NODE) + return returnTypeNode?.getTokensFromSubtree() + } + + private fun collectEnclosingClass(): EnclosingElement? { + val enclosingClassNode = root.findEnclosingElementBy { it.hasLastLabel(CLASS_DECLARATION_NODE) } ?: return null + return EnclosingElement( + type = EnclosingElementType.Class, + name = enclosingClassNode.getChildOfType(CLASS_NAME_NODE)?.originalToken, + root = enclosingClassNode + ) + } + + private fun collectParameters(): List { + val parametersRoot = root.getChildOfType(METHOD_PARAMETER_NODE) + val innerParametersRoot = parametersRoot?.getChildOfType(METHOD_PARAMETER_INNER_NODE) ?: return emptyList() + + if (innerParametersRoot.lastLabelIn(METHOD_SINGLE_PARAMETER_NODES)) { + return listOf(getParameterInfo(innerParametersRoot)) + } + + return innerParametersRoot.children.filter { + it.firstLabelIn(METHOD_SINGLE_PARAMETER_NODES) + }.map { singleParameter -> getParameterInfo(singleParameter) } + } + + private fun getParameterInfo(parameterNode: AntlrNode): FunctionInfoParameter { + val returnTypeNode = parameterNode.getChildOfType(PARAMETER_RETURN_TYPE_NODE) + val returnTypeToken = returnTypeNode?.getTokensFromSubtree() + + val parameterName = parameterNode.getChildOfType(PARAMETER_NAME_NODE)?.getTokensFromSubtree() + ?: error("Parameter name wasn't found") + + return FunctionInfoParameter(parameterName, returnTypeToken) + } + + companion object { + private const val METHOD_RETURN_TYPE_NODE = "typeTypeOrVoid" + private const val METHOD_NAME_NODE = "IDENTIFIER" + + private const val CLASS_DECLARATION_NODE = "classDeclaration" + private const val CLASS_NAME_NODE = "IDENTIFIER" + + private const val METHOD_PARAMETER_NODE = "formalParameters" + private const val METHOD_PARAMETER_INNER_NODE = "formalParameterList" + private val METHOD_SINGLE_PARAMETER_NODES = listOf("formalParameter", "lastFormalParameter") + private const val PARAMETER_RETURN_TYPE_NODE = "typeType" + private const val PARAMETER_NAME_NODE = "variableDeclaratorId" + } +} diff --git a/src/main/kotlin/astminer/parse/antlr/java/JavaFunctionSplitter.kt b/src/main/kotlin/astminer/parse/antlr/java/JavaFunctionSplitter.kt new file mode 100644 index 00000000..69099be6 --- /dev/null +++ b/src/main/kotlin/astminer/parse/antlr/java/JavaFunctionSplitter.kt @@ -0,0 +1,16 @@ +package astminer.parse.antlr.java + +import astminer.common.model.* +import astminer.parse.antlr.AntlrNode +import astminer.parse.antlr.hasLastLabel + +class JavaFunctionSplitter : TreeFunctionSplitter { + private val methodNodeType = "methodDeclaration" + + override fun splitIntoFunctions(root: AntlrNode, filePath: String): Collection> { + val methodRoots = root.preOrder().filter { + it.hasLastLabel(methodNodeType) + } + return methodRoots.map { AntlrJavaFunctionInfo(it, filePath) } + } +} diff --git a/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt b/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt deleted file mode 100644 index 48b1577c..00000000 --- a/src/main/kotlin/astminer/parse/antlr/java/JavaMethodSplitter.kt +++ /dev/null @@ -1,96 +0,0 @@ -package astminer.parse.antlr.java - -import astminer.common.* -import astminer.common.model.* -import astminer.parse.antlr.SimpleNode -import astminer.parse.antlr.decompressTypeLabel - -class JavaMethodSplitter : TreeMethodSplitter { - companion object { - private const val METHOD_NODE = "methodDeclaration" - private const val METHOD_RETURN_TYPE_NODE = "typeTypeOrVoid" - private const val METHOD_NAME_NODE = "IDENTIFIER" - - private const val CLASS_DECLARATION_NODE = "classDeclaration" - private const val CLASS_NAME_NODE = "IDENTIFIER" - - private const val METHOD_PARAMETER_NODE = "formalParameters" - private const val METHOD_PARAMETER_INNER_NODE = "formalParameterList" - private val METHOD_SINGLE_PARAMETER_NODE = listOf("formalParameter", "lastFormalParameter") - private const val PARAMETER_RETURN_TYPE_NODE = "typeType" - private const val PARAMETER_NAME_NODE = "variableDeclaratorId" - } - - override fun splitIntoMethods(root: SimpleNode): Collection> { - val methodRoots = root.preOrder().filter { - decompressTypeLabel(it.getTypeLabel()).last() == METHOD_NODE - } - return methodRoots.map { collectMethodInfo(it as SimpleNode) } - } - - private fun collectMethodInfo(methodNode: SimpleNode): MethodInfo { - val methodName = methodNode.getChildOfType(METHOD_NAME_NODE) as? SimpleNode - val methodReturnTypeNode = methodNode.getChildOfType(METHOD_RETURN_TYPE_NODE) as? SimpleNode - methodReturnTypeNode?.setToken(collectParameterToken(methodReturnTypeNode)) - - val classRoot = getEnclosingClass(methodNode) - val className = classRoot?.getChildOfType(CLASS_NAME_NODE) as? SimpleNode - - val parametersRoot = methodNode.getChildOfType(METHOD_PARAMETER_NODE) as? SimpleNode - val innerParametersRoot = parametersRoot?.getChildOfType(METHOD_PARAMETER_INNER_NODE) as? SimpleNode - - val parametersList = when { - innerParametersRoot != null -> getListOfParameters(innerParametersRoot) - parametersRoot != null -> getListOfParameters(parametersRoot) - else -> emptyList() - } - - return MethodInfo( - MethodNode(methodNode, methodReturnTypeNode, methodName), - ElementNode(classRoot, className), - parametersList - ) - } - - private fun getEnclosingClass(node: SimpleNode): SimpleNode? { - if (decompressTypeLabel(node.getTypeLabel()).last() == CLASS_DECLARATION_NODE) { - return node - } - val parentNode = node.getParent() as? SimpleNode - if (parentNode != null) { - return getEnclosingClass(parentNode) - } - return null - } - - private fun getListOfParameters(parametersRoot: SimpleNode): List> { - if (METHOD_SINGLE_PARAMETER_NODE.contains(decompressTypeLabel(parametersRoot.getTypeLabel()).last())) { - return listOf(getParameterInfoFromNode(parametersRoot)) - } - return parametersRoot.getChildren().filter { - val firstType = decompressTypeLabel(it.getTypeLabel()).first() - METHOD_SINGLE_PARAMETER_NODE.contains(firstType) - }.map { - getParameterInfoFromNode(it as SimpleNode) - } - } - - private fun getParameterInfoFromNode(parameterRoot: SimpleNode): ParameterNode { - val returnTypeNode = parameterRoot.getChildOfType(PARAMETER_RETURN_TYPE_NODE) as? SimpleNode - returnTypeNode?.setToken(collectParameterToken(returnTypeNode)) - return ParameterNode( - parameterRoot, - returnTypeNode, - parameterRoot.getChildOfType(PARAMETER_NAME_NODE) as? SimpleNode - ) - } - - private fun collectParameterToken(parameterRoot: SimpleNode): String { - if (parameterRoot.isLeaf()) { - return parameterRoot.getToken() - } - return parameterRoot.getChildren().joinToString(separator = "") { child -> - collectParameterToken(child as SimpleNode) - } - } -} \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/antlr/java/JavaParser.kt b/src/main/kotlin/astminer/parse/antlr/java/JavaParser.kt index 2dac7c70..f9ccd945 100644 --- a/src/main/kotlin/astminer/parse/antlr/java/JavaParser.kt +++ b/src/main/kotlin/astminer/parse/antlr/java/JavaParser.kt @@ -1,17 +1,18 @@ package astminer.parse.antlr.java import astminer.common.model.Parser -import astminer.parse.antlr.SimpleNode +import astminer.parse.ParsingException +import astminer.parse.antlr.AntlrNode import astminer.parse.antlr.convertAntlrTree -import org.antlr.v4.runtime.CommonTokenStream import me.vovak.antlr.parser.Java8Lexer import me.vovak.antlr.parser.Java8Parser import org.antlr.v4.runtime.CharStreams +import org.antlr.v4.runtime.CommonTokenStream import java.io.InputStream import java.lang.Exception -class JavaParser : Parser { - override fun parseInputStream(content: InputStream): SimpleNode? { +class JavaParser : Parser { + override fun parseInputStream(content: InputStream): AntlrNode { return try { val lexer = Java8Lexer(CharStreams.fromStream(content)) lexer.removeErrorListeners() @@ -21,7 +22,7 @@ class JavaParser : Parser { val context = parser.compilationUnit() convertAntlrTree(context, Java8Parser.ruleNames, Java8Parser.VOCABULARY) } catch (e: Exception) { - null + throw ParsingException("ANTLR", "Java", e) } } -} \ No newline at end of file +} diff --git a/src/main/kotlin/astminer/parse/antlr/javascript/AntlrJavaScriptElementInfo.kt b/src/main/kotlin/astminer/parse/antlr/javascript/AntlrJavaScriptElementInfo.kt new file mode 100644 index 00000000..d5c12555 --- /dev/null +++ b/src/main/kotlin/astminer/parse/antlr/javascript/AntlrJavaScriptElementInfo.kt @@ -0,0 +1,147 @@ +package astminer.parse.antlr.javascript + +import astminer.common.model.* +import astminer.parse.antlr.* +import astminer.parse.findEnclosingElementBy +import mu.KotlinLogging + +private val logger = KotlinLogging.logger("Antlr-Javascript-function-info") + +/** +Base class for describing JavaScript methods, functions or arrow functions. + */ +abstract class AntlrJavaScriptElementInfo(override val root: AntlrNode, override val filePath: String) : + FunctionInfo { + + protected fun collectEnclosingElement(): EnclosingElement? { + val enclosingElement = root.findEnclosingElementBy { + it.containsLabelIn(ENCLOSING_ELEMENT_NODES) + } ?: return null + return EnclosingElement( + type = getEnclosingElementType(enclosingElement), + name = getEnclosingElementName(enclosingElement), + root = enclosingElement + ) + } + + private fun AntlrNode.containsLabelIn(labels: List): Boolean = + decompressTypeLabel(typeLabel).intersect(labels).isNotEmpty() + + private fun getEnclosingElementName(enclosingRoot: AntlrNode?): String? { + return enclosingRoot?.children?.firstOrNull { + it.hasLastLabel(ENCLOSING_ELEMENT_NAME_NODE) + }?.originalToken + } + + private fun getEnclosingElementType(enclosingRoot: AntlrNode): EnclosingElementType { + return when (decompressTypeLabel(enclosingRoot.typeLabel).last()) { + "functionDeclaration" -> EnclosingElementType.Function + "classDeclaration" -> EnclosingElementType.Class + "methodDefinition" -> EnclosingElementType.Method + "variableDeclaration" -> EnclosingElementType.VariableDeclaration + else -> error("Couldn't derive enclosing element type") + } + } + + protected fun collectParameters(): List { + val parametersRoot = getParametersRoot() + val parameterNameNodes = when { + // No parameters found + parametersRoot == null -> emptyList() + + // Have only one parameter, which is indicated only by its name + parametersRoot.hasLastLabel(PARAMETER_NAME_NODE) -> listOf(parametersRoot) + + // Have many parameters or one indicated not only by it's name + else -> + parametersRoot + .getItOrChildrenOfType(SINGLE_PARAMETER_NODE) + .map { it.getChildOfType(PARAMETER_NAME_NODE) ?: it } + } + return parameterNameNodes.map { + check(it.originalToken != null) { "Parameter name wasn't found" } + FunctionInfoParameter(name = it.originalToken, type = null) + } + } + + abstract fun getParametersRoot(): AntlrNode? + + companion object { + private val ENCLOSING_ELEMENT_NODES = + listOf("functionDeclaration", "variableDeclaration", "classDeclaration", "methodDefinition") + private const val ENCLOSING_ELEMENT_NAME_NODE = "Identifier" + + private const val SINGLE_PARAMETER_NODE = "formalParameterArg" + private const val PARAMETER_NAME_NODE = "Identifier" + } +} + +class JavaScriptArrowInfo(root: AntlrNode, filePath: String) : AntlrJavaScriptElementInfo(root, filePath) { + + override val enclosingElement: EnclosingElement? = collectEnclosingElement() + override val nameNode: AntlrNode? = root.getChildOfType(ARROW_NAME_NODE) + + override val parameters: List? = + try { collectParameters() } catch (e: IllegalStateException) { + logger.warn { e.message } + null + } + + override fun getParametersRoot(): AntlrNode? { + val parameterRoot = root.getChildOfType(ARROW_PARAMETER_NODE) + return parameterRoot?.getChildOfType(ARROW_PARAMETER_INNER_NODE) ?: parameterRoot + } + + companion object { + private const val ARROW_NAME_NODE = "Identifier" + private const val ARROW_PARAMETER_NODE = "arrowFunctionParameters" + private const val ARROW_PARAMETER_INNER_NODE = "formalParameterList" + } +} + +class JavaScriptMethodInfo(root: AntlrNode, filePath: String) : AntlrJavaScriptElementInfo(root, filePath) { + + override val enclosingElement: EnclosingElement? = collectEnclosingElement() + override val nameNode: AntlrNode? = collectNameNode() + override val parameters: List? = + try { collectParameters() } catch (e: IllegalStateException) { + logger.warn { e.message } + null + } + + private fun collectNameNode(): AntlrNode? { + val methodNameParent = root.children.firstOrNull { + METHOD_GETTERS_SETTERS.contains(it.typeLabel) + } ?: root + + return methodNameParent.children.firstOrNull { + decompressTypeLabel(it.typeLabel).contains(METHOD_NAME_NODE) + } + } + + override fun getParametersRoot(): AntlrNode? = root.getChildOfType(METHOD_PARAMETER_NODE) + + companion object { + private val METHOD_GETTERS_SETTERS = listOf("getter", "setter") + private const val METHOD_NAME_NODE = "identifierName" + private const val METHOD_PARAMETER_NODE = "formalParameterList" + } +} + +class JavaScriptFunctionInfo(root: AntlrNode, filePath: String) : AntlrJavaScriptElementInfo(root, filePath) { + + override val enclosingElement: EnclosingElement? = collectEnclosingElement() + override val nameNode: AntlrNode? = root.getChildOfType(FUNCTION_NAME_NODE) + override val parameters: List? = + try { collectParameters() } catch (e: IllegalStateException) { + logger.warn { e.message } + null + } + + override fun getParametersRoot(): AntlrNode? = root.getChildOfType(FUNCTION_PARAMETER_NODE) + + companion object { + private const val FUNCTION_NAME_NODE = "Identifier" + private const val FUNCTION_PARAMETER_NODE = "formalParameterList" + } +} diff --git a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitter.kt b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitter.kt new file mode 100644 index 00000000..ed918519 --- /dev/null +++ b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitter.kt @@ -0,0 +1,32 @@ +package astminer.parse.antlr.javascript + +import astminer.common.model.* +import astminer.parse.antlr.AntlrNode +import astminer.parse.antlr.decompressTypeLabel + +/** + * Get all methods (in JavaScript there are divided into functions, arrow functions and methods) and information + * about their names, enclosing elements and parameters. + */ +class JavaScriptFunctionSplitter : TreeFunctionSplitter { + override fun splitIntoFunctions(root: AntlrNode, filePath: String): Collection> { + return root.preOrder().mapNotNull { node -> + when { + node.isArrowElement() -> JavaScriptArrowInfo(node, filePath) + node.isFunctionElement() -> JavaScriptFunctionInfo(node, filePath) + node.isMethodElement() -> JavaScriptMethodInfo(node, filePath) + else -> null + } + } + } + + private fun Node.isArrowElement() = this.getChildOfType(ARROW_NODE) != null + private fun Node.isFunctionElement() = this.getChildOfType(FUNCTION_NODE) != null + private fun Node.isMethodElement() = decompressTypeLabel(this.typeLabel).last() == METHOD_NODE + + companion object { + private const val METHOD_NODE = "methodDefinition" + private const val ARROW_NODE = "ARROW" + private const val FUNCTION_NODE = "Function" + } +} diff --git a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt deleted file mode 100644 index 58d74fbc..00000000 --- a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitter.kt +++ /dev/null @@ -1,184 +0,0 @@ -package astminer.parse.antlr.javascript - -import astminer.common.model.* -import astminer.common.preOrder -import astminer.parse.antlr.SimpleNode -import astminer.parse.antlr.decompressTypeLabel - -/** - * Get all methods (in JavaScript there are divided into functions, arrow functions and methods) and information - * about their names, enclosing elements and parameters. - */ -class JavaScriptMethodSplitter : TreeMethodSplitter { - companion object { - private const val METHOD_NODE = "methodDefinition" - private const val ARROW_NODE = "ARROW" - private const val FUNCTION_NODE = "Function" - } - - override fun splitIntoMethods(root: SimpleNode): Collection> { - val methodRoots: List = root.preOrder().map { node -> - when { - node.isArrowElement() -> ArrowElement(node as SimpleNode) - node.isFunctionElement() -> FunctionElement(node as SimpleNode) - node.isMethodElement() -> MethodElement(node as SimpleNode) - else -> null - } - }.filterNotNull() - - return methodRoots.map { it.getElementInfo() } - } - - private fun Node.isArrowElement() = this.getChildOfType(ARROW_NODE) != null - private fun Node.isFunctionElement() = this.getChildOfType(FUNCTION_NODE) != null - private fun Node.isMethodElement() = decompressTypeLabel(this.getTypeLabel()).last() == METHOD_NODE -} - -/** - Base class for describing JavaScript methods, functions or arrow functions. - */ -abstract class JavaScriptElement(private val element: SimpleNode) { - companion object { - private val ENCLOSING_ELEMENT_NODES = listOf("functionDeclaration", "variableDeclaration", "classDeclaration", "methodDefinition") - private const val ENCLOSING_ELEMENT_NAME_NODE = "Identifier" - - private const val SINGLE_PARAMETER_NODE = "formalParameterArg" - private const val PARAMETER_NAME_NODE = "Identifier" - } - - /** - * Gets [element]'s information about its root, name, enclosing elements and list of parameters. - * @return element info - */ - fun getElementInfo() : MethodInfo { - val enclosingRoot = getEnclosingElementRoot(element.getParent() as SimpleNode) - return MethodInfo( - MethodNode(element, null, getElementName()), - ElementNode(enclosingRoot, getEnclosingElementName(enclosingRoot)), - getElementParametersList(getElementParametersRoot()) - ) - } - - /** - * Gets root of [element]'s enclosing element as first one with typeLabel from [ENCLOSING_ELEMENT_NAME_NODE]. - * @param node for checking if it is root of enclosing element - * @return root of enclosing element - */ - open fun getEnclosingElementRoot(node: SimpleNode?): SimpleNode? { - if (node == null || decompressTypeLabel(node.getTypeLabel()).intersect(ENCLOSING_ELEMENT_NODES).isNotEmpty()) { - return node - } - return getEnclosingElementRoot(node.getParent() as? SimpleNode) - } - - /** - * Gets name node of [element]'s enclosing element. - * @param enclosingRoot - root of enclosing element - * @return name node of enclosing element - */ - open fun getEnclosingElementName(enclosingRoot: SimpleNode?) : SimpleNode? { - return enclosingRoot?.getChildren()?.firstOrNull { - decompressTypeLabel(it.getTypeLabel()).last() == ENCLOSING_ELEMENT_NAME_NODE - } as? SimpleNode - } - - /** - * Gets list of [element]'s parameters by looking for them among [parameterRoot]'s children. - * @param parameterRoot - parent node of all parameter's nodes - * @return list of [element]'s parameters - */ - open fun getElementParametersList(parameterRoot: SimpleNode?): List> { - return when { - parameterRoot == null -> emptyList() - parameterRoot.hasLastLabel(PARAMETER_NAME_NODE) -> listOf(ParameterNode(parameterRoot, null, parameterRoot)) - else -> parameterRoot.getItOrChildrenOfType(SINGLE_PARAMETER_NODE).map { - ParameterNode(it, null, it.getItOrChildrenOfType(PARAMETER_NAME_NODE).firstOrNull()) - } - } - } - - private fun Node.hasLastLabel(typeLabel: String): Boolean { - return decompressTypeLabel(getTypeLabel()).last() == typeLabel - } - - private fun SimpleNode.getItOrChildrenOfType(typeLabel: String) : List { - return if (hasLastLabel(typeLabel)) { - listOf(this) - } else { - this.getChildrenOfType(typeLabel).mapNotNull { it as? SimpleNode } - } - } - - /** - * Gets name of [element]. - * @return [element]'s name node - */ - abstract fun getElementName(): SimpleNode? - - /** - * Gets parent node of all [element]'s parameter nodes. - * @return parameters' parent node - */ - abstract fun getElementParametersRoot(): SimpleNode? -} - - -class ArrowElement(private val element: SimpleNode) : JavaScriptElement(element) { - companion object { - private const val ARROW_NAME_NODE = "Identifier" - private const val ARROW_PARAMETER_NODE = "arrowFunctionParameters" - private const val ARROW_PARAMETER_INNER_NODE = "formalParameterList" - } - - override fun getElementName(): SimpleNode? { - return element.getChildren().firstOrNull { - it.getTypeLabel() == ARROW_NAME_NODE - } as? SimpleNode - } - - override fun getElementParametersRoot(): SimpleNode? { - val parameterRoot = element.getChildOfType(ARROW_PARAMETER_NODE) as? SimpleNode - return parameterRoot?.getChildOfType(ARROW_PARAMETER_INNER_NODE) as? SimpleNode ?: parameterRoot - } -} - - -class FunctionElement(private val element: SimpleNode) : JavaScriptElement(element) { - companion object { - private const val FUNCTION_NAME_NODE = "Identifier" - private const val FUNCTION_PARAMETER_NODE = "formalParameterList" - } - - override fun getElementName(): SimpleNode? { - return element.getChildren().firstOrNull { - it.getTypeLabel() == FUNCTION_NAME_NODE - } as? SimpleNode - } - - override fun getElementParametersRoot(): SimpleNode? { - return element.getChildOfType(FUNCTION_PARAMETER_NODE) as? SimpleNode - } -} - - -class MethodElement(private val element: SimpleNode) : JavaScriptElement(element) { - companion object { - private val METHOD_GETTERS_SETTERS = listOf("getter", "setter") - private const val METHOD_NAME_NODE = "identifierName" - private const val METHOD_PARAMETER_NODE = "formalParameterList" - } - - override fun getElementName(): SimpleNode? { - val methodNameParent = element.getChildren().firstOrNull { - METHOD_GETTERS_SETTERS.contains(it.getTypeLabel()) - } as? SimpleNode ?: element - - return methodNameParent.getChildren().firstOrNull { - decompressTypeLabel(it.getTypeLabel()).contains(METHOD_NAME_NODE) - } as? SimpleNode - } - - override fun getElementParametersRoot(): SimpleNode? { - return element.getChildOfType(METHOD_PARAMETER_NODE) as? SimpleNode - } -} \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptParser.kt b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptParser.kt index 44ac5555..9c736ba1 100644 --- a/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptParser.kt +++ b/src/main/kotlin/astminer/parse/antlr/javascript/JavaScriptParser.kt @@ -1,7 +1,8 @@ package astminer.parse.antlr.javascript import astminer.common.model.Parser -import astminer.parse.antlr.SimpleNode +import astminer.parse.ParsingException +import astminer.parse.antlr.AntlrNode import astminer.parse.antlr.convertAntlrTree import me.vovak.antlr.parser.JavaScriptLexer import me.vovak.antlr.parser.JavaScriptParser @@ -10,8 +11,8 @@ import org.antlr.v4.runtime.CommonTokenStream import java.io.InputStream import java.lang.Exception -class JavaScriptParser : Parser { - override fun parseInputStream(content: InputStream): SimpleNode? { +class JavaScriptParser : Parser { + override fun parseInputStream(content: InputStream): AntlrNode { return try { val lexer = JavaScriptLexer(CharStreams.fromStream(content)) lexer.removeErrorListeners() @@ -21,7 +22,7 @@ class JavaScriptParser : Parser { val context = parser.program() convertAntlrTree(context, JavaScriptParser.ruleNames, JavaScriptParser.VOCABULARY) } catch (e: Exception) { - null + throw ParsingException("ANTLR", "JavaScript", e) } } -} \ No newline at end of file +} diff --git a/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt b/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt new file mode 100644 index 00000000..e7642cd2 --- /dev/null +++ b/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt @@ -0,0 +1,140 @@ +package astminer.parse.antlr.php + +import astminer.common.model.EnclosingElement +import astminer.common.model.EnclosingElementType +import astminer.common.model.FunctionInfo +import astminer.common.model.FunctionInfoParameter +import astminer.parse.antlr.* +import astminer.parse.findEnclosingElementBy +import mu.KotlinLogging + +private val logger = KotlinLogging.logger("ANTLR-PHP-function-info") + +class ANTLRPHPFunctionInfo(override val root: AntlrNode, override val filePath: String) : FunctionInfo { + override val returnType = getElementType(root) + override val nameNode: AntlrNode? = root.getChildOfType(FUNCTION_NAME) + override val enclosingElement: EnclosingElement? = collectEnclosingElement() + override val parameters: List? = + try { collectParameters() } catch (e: IllegalStateException) { + logger.warn { e.message } + null + } + + private fun collectParameters(): List { + // Parameters in this grammar have following structure (children order may be wrong): + // formal parameter list -> formal parameter -> Ampersand + // | -> type hint + // | -> ellipsis + // | -> var init -> var name + // | -> equal + // | -> default value + + // No parameters + val parameterList = root.getChildOfType(PARAMETERS_LIST) ?: return emptyList() + + // Checking if function have only one parameter + // without ellipsis, type hint or default value + if (parameterList.hasLastLabel(PARAMETER_NAME) || parameterList.hasLastLabel(VAR_DECLARATION)) { + return listOf(assembleParameter(parameterList)) + } + + // Otherwise find all parameters + return parameterList + .getItOrChildrenOfType(PARAMETER) + .mapNotNull { + try { + assembleParameter(it) + } catch (e: IllegalStateException) { + logger.warn { "Error during collecting parameters for $name in $filePath: ${e.message}" } + null + } + } + } + + private fun assembleParameter(parameterNode: AntlrNode): FunctionInfoParameter { + return FunctionInfoParameter( + name = getParameterName(parameterNode), + type = getElementType(parameterNode) + ) + } + + private fun getParameterName(parameterNode: AntlrNode): String { + // "...$args" in php equivalent to *args in python + val isSplattedArg = parameterNode.getChildOfType(ELLIPSIS) != null + + val isPassedByReference = parameterNode.getChildOfType(REFERENCE) != null + + if (parameterNode.hasLastLabel(PARAMETER_NAME)) { + return parameterNode.originalToken ?: error("No name was found for a parameter") + } + + val varInit = parameterNode.getItOrChildrenOfType(VAR_DECLARATION).first() + + val name = varInit.getItOrChildrenOfType(PARAMETER_NAME).first().originalToken + ?: error("No name was found for a parameter") + + return (if (isPassedByReference) "&" else "") + (if (isSplattedArg) "..." else "") + name + } + + private fun getElementType(element: AntlrNode): String? = element.getChildOfType(TYPE)?.originalToken + + private fun collectEnclosingElement(): EnclosingElement? { + val enclosing = root.findEnclosingElementBy { it.isPossibleEnclosing() } ?: return null + return try { + EnclosingElement( + root = enclosing, + name = getEnclosingElementName(enclosing), + type = getEnclosingType(enclosing) + ) + } catch (e: IllegalStateException) { + logger.warn { "Error during collecting enclosing element for $name in $filePath: ${e.message}" } + null + } + } + + private fun getEnclosingType(enclosing: AntlrNode): EnclosingElementType { + return when { + enclosing.isMethod() -> EnclosingElementType.Method + enclosing.isFunction() -> EnclosingElementType.Function + enclosing.isClass() -> EnclosingElementType.Class + enclosing.isAssignExpression() -> EnclosingElementType.VariableDeclaration + else -> error("No type can be associated") + } + } + + private fun getEnclosingElementName(enclosing: AntlrNode): String? { + return when { + enclosing.isFunction() || enclosing.isClass() -> enclosing.getChildOfType(FUNCTION_NAME)?.originalToken + enclosing.isAssignExpression() -> enclosing.children.find { it.hasLastLabel(PARAMETER_NAME) }?.originalToken + else -> error("No type can be associated") + } + } + + // No check for method because method is a function + private fun AntlrNode.isPossibleEnclosing() = isFunction() || isClass() || isAssignExpression() + + private fun AntlrNode.isMethod() = isFunction() && hasFirstLabel(CLASS_MEMBER) + + private fun AntlrNode.isFunction() = getChildOfType(LAMBDA_TOKEN) != null || getChildOfType(FUNCTION_TOKEN) != null + + private fun AntlrNode.isAssignExpression() = hasFirstLabel(EXPRESSION) && getChildOfType(ASSIGN_OP) != null + + private fun AntlrNode.isClass(): Boolean = hasLastLabel(CLASS_DECLARATION) + + companion object { + const val PARAMETERS_LIST = "formalParameterList" + const val PARAMETER = "formalParameter" + const val TYPE = "typeHint" + const val PARAMETER_NAME = "VarName" + const val CLASS_MEMBER = "classStatement" + const val FUNCTION_NAME = "identifier" + const val CLASS_DECLARATION = "classDeclaration" + const val VAR_DECLARATION = "variableInitializer" + const val ELLIPSIS = "Ellipsis" + const val EXPRESSION = "expression" + const val ASSIGN_OP = "assignmentOperator" + const val LAMBDA_TOKEN = "LambdaFn" + const val FUNCTION_TOKEN = "Function_" + const val REFERENCE = "Ampersand" + } +} diff --git a/src/main/kotlin/astminer/parse/antlr/php/PHPFunctionSplitter.kt b/src/main/kotlin/astminer/parse/antlr/php/PHPFunctionSplitter.kt new file mode 100644 index 00000000..cc640244 --- /dev/null +++ b/src/main/kotlin/astminer/parse/antlr/php/PHPFunctionSplitter.kt @@ -0,0 +1,18 @@ +package astminer.parse.antlr.php + +import astminer.common.model.FunctionInfo +import astminer.common.model.TreeFunctionSplitter +import astminer.parse.antlr.AntlrNode + +class PHPFunctionSplitter : TreeFunctionSplitter { + override fun splitIntoFunctions(root: AntlrNode, filePath: String): Collection> { + return root.preOrder() + .filter { it.typeLabel == LAMBDA_TOKEN || it.typeLabel == FUNCTION_TOKEN } + .mapNotNull { node -> node.parent?.let { statement -> ANTLRPHPFunctionInfo(statement, filePath) } } + } + + companion object { + const val LAMBDA_TOKEN = "LambdaFn" + const val FUNCTION_TOKEN = "Function_" + } +} diff --git a/src/main/kotlin/astminer/parse/antlr/php/PHPParser.kt b/src/main/kotlin/astminer/parse/antlr/php/PHPParser.kt new file mode 100644 index 00000000..6e9b97ba --- /dev/null +++ b/src/main/kotlin/astminer/parse/antlr/php/PHPParser.kt @@ -0,0 +1,36 @@ +package astminer.parse.antlr.php + +import astminer.common.model.Parser +import astminer.parse.ParsingException +import astminer.parse.antlr.AntlrNode +import astminer.parse.antlr.convertAntlrTree +import me.vovak.antlr.parser.CaseChangingCharStream +import me.vovak.antlr.parser.PhpLexer +import me.vovak.antlr.parser.PhpParser +import org.antlr.v4.runtime.CharStreams +import org.antlr.v4.runtime.CommonTokenStream +import java.io.InputStream + +// Be aware that this parser can have some troubles with +// parsing function modifiers and string concatenation via dot +// (AST just falls apart when class field contain dot concatenation) +// More details can be found in corresponding issues +// https://github.com/antlr/grammars-v4/issues/1991 +class PHPParser : Parser { + override fun parseInputStream(content: InputStream): AntlrNode { + return try { + val stream = CharStreams.fromStream(content) + // Php keywords are case-insensitive, so case changing stream must be used + // Tokens won't be in lower case in resulting tree + val lexer = PhpLexer(CaseChangingCharStream(stream, false)) + lexer.removeErrorListeners() + val tokens = CommonTokenStream(lexer) + val parser = PhpParser(tokens) + parser.removeErrorListeners() + val context = parser.htmlDocument() + convertAntlrTree(context, PhpParser.ruleNames, PhpParser.VOCABULARY) + } catch (e: Exception) { + throw ParsingException("ANTLR", "PHP", e) + } + } +} diff --git a/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt b/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt new file mode 100644 index 00000000..bfbb5600 --- /dev/null +++ b/src/main/kotlin/astminer/parse/antlr/python/AntlrPythonFunctionInfo.kt @@ -0,0 +1,103 @@ +package astminer.parse.antlr.python + +import astminer.common.model.* +import astminer.parse.antlr.* +import astminer.parse.findEnclosingElementBy +import mu.KotlinLogging + +private val logger = KotlinLogging.logger("Antlr-python-function-info") + +class AntlrPythonFunctionInfo(override val root: AntlrNode, override val filePath: String) : FunctionInfo { + override val nameNode: AntlrNode? = collectNameNode() + override val enclosingElement: EnclosingElement? = collectEnclosingElement() + override val parameters: List? = + try { collectParameters() } catch (e: IllegalStateException) { + logger.warn { e.message } + null + } + + private fun collectNameNode(): AntlrNode? = root.getChildOfType(FUNCTION_NAME_NODE) + + private fun collectParameters(): List { + val parametersRoot = root.getChildOfType(METHOD_PARAMETER_NODE) + val innerParametersRoot = parametersRoot?.getChildOfType(METHOD_PARAMETER_INNER_NODE) ?: return emptyList() + + val methodHaveOnlyOneParameter = + innerParametersRoot.lastLabelIn(listOf(METHOD_SINGLE_PARAMETER_NODE, PARAMETER_NAME_NODE)) + if (methodHaveOnlyOneParameter) { + return listOf(assembleMethodInfoParameter(innerParametersRoot)) + } + + return innerParametersRoot.getChildrenOfType(METHOD_SINGLE_PARAMETER_NODE).map { node -> + assembleMethodInfoParameter(node) + } + } + + private fun assembleMethodInfoParameter(parameterNode: AntlrNode): FunctionInfoParameter { + val parameterHaveNoDefaultOrType = parameterNode.hasLastLabel(PARAMETER_NAME_NODE) + val parameterNameNode = + if (parameterHaveNoDefaultOrType) parameterNode else parameterNode.getChildOfType(PARAMETER_NAME_NODE) + val parameterName = parameterNameNode?.originalToken + require(parameterName != null) { "Method name was not found" } + + val parameterType = parameterNode.getChildOfType(PARAMETER_TYPE_NODE)?.getTokensFromSubtree() + + return FunctionInfoParameter( + name = parameterName, + type = parameterType + ) + } + + // TODO: refactor remove nested whens + private fun collectEnclosingElement(): EnclosingElement? { + val enclosingNode = root.findEnclosingElementBy { it.lastLabelIn(POSSIBLE_ENCLOSING_ELEMENTS) } ?: return null + val type = when { + enclosingNode.hasLastLabel(CLASS_DECLARATION_NODE) -> EnclosingElementType.Class + enclosingNode.hasLastLabel(FUNCTION_NODE) -> + if (enclosingNode.isMethod()) EnclosingElementType.Method else EnclosingElementType.Function + else -> error("Enclosing node can only be function or class") + } + val name = when (type) { + EnclosingElementType.Class -> enclosingNode.getChildOfType(CLASS_NAME_NODE) + EnclosingElementType.Method, EnclosingElementType.Function -> + enclosingNode.getChildOfType(FUNCTION_NAME_NODE) + else -> error("Enclosing node can only be function or class") + }?.originalToken + return EnclosingElement( + type = type, + name = name, + root = enclosingNode + ) + } + + private fun Node.isMethod(): Boolean { + val outerBody = parent + if (outerBody?.typeLabel != BODY) return false + + val enclosingNode = outerBody.parent + require(enclosingNode != null) { "Found body without enclosing element" } + + val lastLabel = decompressTypeLabel(enclosingNode.typeLabel).last() + return lastLabel == CLASS_DECLARATION_NODE + } + + companion object { + private const val FUNCTION_NODE = "funcdef" + private const val FUNCTION_NAME_NODE = "NAME" + + private const val CLASS_DECLARATION_NODE = "classdef" + private const val CLASS_NAME_NODE = "NAME" + + private const val METHOD_PARAMETER_NODE = "parameters" + private const val METHOD_PARAMETER_INNER_NODE = "typedargslist" + private const val METHOD_SINGLE_PARAMETER_NODE = "tfpdef" + private const val PARAMETER_NAME_NODE = "NAME" + private const val PARAMETER_TYPE_NODE = "test" + // It's seems strange but it works because actual type label will be + // test|or_test|and_test|not_test|comparison|expr|xor_expr... + // ..|and_expr|shift_expr|arith_expr|term|factor|power|atom_expr|atom|NAME + + private val POSSIBLE_ENCLOSING_ELEMENTS = listOf(CLASS_DECLARATION_NODE, FUNCTION_NODE) + private const val BODY = "suite" + } +} diff --git a/src/main/kotlin/astminer/parse/antlr/python/PythonFunctionSplitter.kt b/src/main/kotlin/astminer/parse/antlr/python/PythonFunctionSplitter.kt new file mode 100644 index 00000000..73cbb3bf --- /dev/null +++ b/src/main/kotlin/astminer/parse/antlr/python/PythonFunctionSplitter.kt @@ -0,0 +1,16 @@ +package astminer.parse.antlr.python + +import astminer.common.model.* +import astminer.parse.antlr.AntlrNode +import astminer.parse.antlr.hasLastLabel + +class PythonFunctionSplitter : TreeFunctionSplitter { + private val methodNode = "funcdef" + + override fun splitIntoFunctions(root: AntlrNode, filePath: String): Collection> { + val methodRoots = root.preOrder().filter { + it.hasLastLabel(methodNode) + } + return methodRoots.map { AntlrPythonFunctionInfo(it, filePath) } + } +} diff --git a/src/main/kotlin/astminer/parse/antlr/python/PythonMethodSplitter.kt b/src/main/kotlin/astminer/parse/antlr/python/PythonMethodSplitter.kt deleted file mode 100644 index 746f52a2..00000000 --- a/src/main/kotlin/astminer/parse/antlr/python/PythonMethodSplitter.kt +++ /dev/null @@ -1,76 +0,0 @@ -package astminer.parse.antlr.python - -import astminer.common.* -import astminer.common.model.* -import astminer.parse.antlr.SimpleNode -import astminer.parse.antlr.decompressTypeLabel - - -class PythonMethodSplitter : TreeMethodSplitter { - - companion object { - private const val METHOD_NODE = "funcdef" - private const val METHOD_NAME_NODE = "NAME" - - private const val CLASS_DECLARATION_NODE = "classdef" - private const val CLASS_NAME_NODE = "NAME" - - private const val METHOD_PARAMETER_NODE = "parameters" - private const val METHOD_PARAMETER_INNER_NODE = "typedargslist" - private const val METHOD_SINGLE_PARAMETER_NODE = "tfpdef" - private const val PARAMETER_NAME_NODE = "NAME" - } - - override fun splitIntoMethods(root: SimpleNode): Collection> { - val methodRoots = root.preOrder().filter { - decompressTypeLabel(it.getTypeLabel()).last() == METHOD_NODE - } - return methodRoots.map { collectMethodInfo(it as SimpleNode) } - } - - private fun collectMethodInfo(methodNode: SimpleNode): MethodInfo { - val methodName = methodNode.getChildOfType(METHOD_NAME_NODE) as? SimpleNode - - val classRoot = getEnclosingClass(methodNode) - val className = classRoot?.getChildOfType(CLASS_NAME_NODE) as? SimpleNode - - val parametersRoot = methodNode.getChildOfType(METHOD_PARAMETER_NODE) as? SimpleNode - val innerParametersRoot = parametersRoot?.getChildOfType(METHOD_PARAMETER_INNER_NODE) as? SimpleNode - - val parametersList = when { - innerParametersRoot != null -> getListOfParameters(innerParametersRoot) - parametersRoot != null -> getListOfParameters(parametersRoot) - else -> emptyList() - } - - return MethodInfo( - MethodNode(methodNode, null, methodName), - ElementNode(classRoot, className), - parametersList - ) - } - - private fun getEnclosingClass(node: SimpleNode): SimpleNode? { - if (decompressTypeLabel(node.getTypeLabel()).last() == CLASS_DECLARATION_NODE) { - return node - } - val parentNode = node.getParent() as? SimpleNode - if (parentNode != null) { - return getEnclosingClass(parentNode) - } - return null - } - - private fun getListOfParameters(parameterRoot: SimpleNode): List> { - if (decompressTypeLabel(parameterRoot.getTypeLabel()).last() == PARAMETER_NAME_NODE) { - return listOf(ParameterNode(parameterRoot, null, parameterRoot)) - } - return parameterRoot.getChildrenOfType(METHOD_SINGLE_PARAMETER_NODE).map { - if (decompressTypeLabel(it.getTypeLabel()).last() == PARAMETER_NAME_NODE) { - ParameterNode(it as SimpleNode, null, it) - } else { - ParameterNode(it as SimpleNode, null, it.getChildOfType(PARAMETER_NAME_NODE) as SimpleNode) - } - } - } -} diff --git a/src/main/kotlin/astminer/parse/antlr/python/PythonParser.kt b/src/main/kotlin/astminer/parse/antlr/python/PythonParser.kt index 4c1e021a..66181fe1 100644 --- a/src/main/kotlin/astminer/parse/antlr/python/PythonParser.kt +++ b/src/main/kotlin/astminer/parse/antlr/python/PythonParser.kt @@ -1,17 +1,18 @@ package astminer.parse.antlr.python -import me.vovak.antlr.parser.Python3Lexer -import me.vovak.antlr.parser.Python3Parser import astminer.common.model.Parser -import astminer.parse.antlr.SimpleNode +import astminer.parse.ParsingException +import astminer.parse.antlr.AntlrNode import astminer.parse.antlr.convertAntlrTree +import me.vovak.antlr.parser.Python3Lexer +import me.vovak.antlr.parser.Python3Parser import org.antlr.v4.runtime.CharStreams import org.antlr.v4.runtime.CommonTokenStream import java.io.InputStream import java.lang.Exception -class PythonParser : Parser { - override fun parseInputStream(content: InputStream): SimpleNode? { +class PythonParser : Parser { + override fun parseInputStream(content: InputStream): AntlrNode { return try { val lexer = Python3Lexer(CharStreams.fromStream(content)) lexer.removeErrorListeners() @@ -21,7 +22,7 @@ class PythonParser : Parser { val context = parser.file_input() convertAntlrTree(context, Python3Parser.ruleNames, Python3Parser.VOCABULARY) } catch (e: Exception) { - return null + throw ParsingException("ANTLR", "Python", e) } } -} \ No newline at end of file +} diff --git a/src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt b/src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt deleted file mode 100644 index 8dcc14d0..00000000 --- a/src/main/kotlin/astminer/parse/cpp/FuzzyCppParser.kt +++ /dev/null @@ -1,188 +0,0 @@ -package astminer.parse.cpp - -import astminer.common.model.ParseResult -import astminer.common.model.Parser -import gremlin.scala.Key -import io.shiftleft.codepropertygraph.Cpg -import io.shiftleft.codepropertygraph.generated.EdgeTypes -import io.shiftleft.codepropertygraph.generated.NodeKeys -import io.shiftleft.codepropertygraph.generated.NodeTypes -import io.shiftleft.fuzzyc2cpg.FuzzyC2Cpg -import io.shiftleft.fuzzyc2cpg.output.inmemory.OutputModuleFactory -import org.apache.commons.io.FileUtils -import org.apache.tinkerpop.gremlin.structure.Edge -import org.apache.tinkerpop.gremlin.structure.Element -import org.apache.tinkerpop.gremlin.structure.Vertex -import java.io.File -import java.io.InputStream - -/** - * Parser of C/C++ files based on [FuzzyC2Cpg]. - * By default, it assumes that files have been preprocessed and skips all macroses. - */ -class FuzzyCppParser : Parser { - - companion object { - private val supportedExtensions = listOf("c", "cpp") - - data class ExpandableNodeKey( - val key: String, - val supportedNodeLabels: List, - val order: Int - ) - - private val expandableNodeKeys = listOf( - ExpandableNodeKey("NAME", listOf( - NodeTypes.TYPE, NodeTypes.TYPE_DECL, NodeTypes.TYPE_PARAMETER, NodeTypes.MEMBER, NodeTypes.TYPE_ARGUMENT, - NodeTypes.METHOD, NodeTypes.METHOD_PARAMETER_IN, NodeTypes.LOCAL, NodeTypes.MODIFIER, - NodeTypes.IDENTIFIER, NodeTypes.CALL, - NodeTypes.UNKNOWN - ), 0), - ExpandableNodeKey("TYPE_FULL_NAME", listOf( - NodeTypes.TYPE, - NodeTypes.METHOD_RETURN, NodeTypes.METHOD_PARAMETER_IN, NodeTypes.LOCAL, - NodeTypes.IDENTIFIER, - NodeTypes.UNKNOWN - ), 0), - ExpandableNodeKey("ALIAS_TYPE_FULL_NAME", listOf( - NodeTypes.TYPE_DECL, - NodeTypes.UNKNOWN - ), 0) - ) - - data class ReplaceableNodeKey(val key: String, val condition: (Vertex) -> Boolean) - - private val replaceableNodeKeys = listOf( - ReplaceableNodeKey("NAME") { v -> - v.keys().contains("NAME") && - v.value("NAME").startsWith("") - }, - ReplaceableNodeKey("PARSER_TYPE_NAME") { v -> - v.keys().contains("PARSER_TYPE_NAME") - } - ) - } - - /** - * Parse input stream and create an AST. - * If you already have a file with code you need to parse, better use [parseFiles] or [parseInputStream], - * otherwise temporary file for input stream will be created because of fuzzyc2cpg API. - * @param content to parse - * @return root of AST if content was parsed, null otherwise - */ - override fun parseInputStream(content: InputStream): FuzzyNode? { - val file = File.createTempFile("fuzzy", ".cpp") - file.deleteOnExit() - FileUtils.copyInputStreamToFile(content, file) - val nodes = parseFiles(listOf(file)) - return nodes[0].root - } - - /** - * @see [Parser.parseInputStream] - */ - override fun parseFiles(files: List): List> { - val outputModuleFactory = OutputModuleFactory() - val paths = files.map { it.path } - FuzzyC2Cpg(outputModuleFactory).runAndOutput(paths.toTypedArray()) - val cpg = outputModuleFactory.internalGraph - return cpg2Nodes(cpg) - } - - /** - * Convert [cpg][io.shiftleft.codepropertygraph.Cpg] created by fuzzyc2cpg - * to list of [FuzzyNode][astminer.parse.cpp.FuzzyNode]. - * Cpg may contain graphs for several files, in that case several ASTs will be created. - * @param cpg to be converted - * @return list of AST roots - */ - private fun cpg2Nodes(cpg: Cpg): List> { - val g = cpg.graph().traversal() - val vertexToNode = HashMap() - g.E().hasLabel(EdgeTypes.AST).forEach { addNodesFromEdge(it, vertexToNode) } - return g.V().hasLabel(NodeTypes.FILE).toList().map { ParseResult(vertexToNode[it], it.value("NAME")) } - } - - /** - * Run g++ preprocessor (if [preprocessCommand] is set) on a given file excluding 'include' directives. - * The result of preprocessing is stored in created directory [outputDir] - * @param file file to preprocess - * @param outputDir directory where the preprocessed file will be stored - * @param preprocessCommand bash command that runs preprocessing, "g++ -E" by default - */ - fun preprocessFile(file: File, outputDir: File, preprocessCommand: String = "g++ -E") { - outputDir.mkdirs() - preprocessCppCode(file, outputDir, preprocessCommand).runCommand(file.absoluteFile.parentFile) - } - - /** - * Run preprocessing for all .c and .cpp files in the [project][projectRoot]. - * The preprocessed files will be stored in [outputDir], replicating file hierarchy of the original project. - * @param projectRoot root of the project that should be preprocessed - * @param outputDir directory where the preprocessed files will be stored - */ - fun preprocessProject(projectRoot: File, outputDir: File) { - val files = projectRoot.walkTopDown() - .filter { file -> supportedExtensions.contains(file.extension) } - files.forEach { file -> - val relativeFilePath = file.relativeTo(projectRoot) - val outputPath = outputDir.resolve(relativeFilePath.parent) - outputPath.mkdirs() - preprocessFile(file, outputPath) - } - } - - /** - * Create string from element with its label and all its properties. - * @param e - element for converting to string - * @return created string - */ - fun elementToString(e: Element) = with(StringBuilder()) { - append("${e.label()} | ") - e.keys().forEach { k -> append("$k:${e.value(k)} ") } - appendln() - toString() - } - - private fun addNodesFromEdge(e: Edge, map: HashMap) { - val parentNode = map.getOrPut(e.outVertex()) { createNodeFromVertex(e.outVertex()) } - val childNode = map.getOrPut(e.inVertex()) { createNodeFromVertex(e.inVertex()) } - parentNode.addChild(childNode) - } - - private fun createNodeFromVertex(v: Vertex): FuzzyNode { - val token: String? = v.getValueOrNull(NodeKeys.CODE) - val order: Int? = v.getValueOrNull(NodeKeys.ORDER) - - for (replaceableNodeKey in replaceableNodeKeys) { - if (replaceableNodeKey.condition(v)) { - val node = FuzzyNode(v.value(replaceableNodeKey.key), token, order) - v.keys().forEach { k -> - node.setMetadata(k, v.value(k)) - } - return node - } - } - - val node = FuzzyNode(v.label(), token, order) - v.keys().forEach { k -> - for (expandableNodeKey in expandableNodeKeys) { - if (expandableNodeKey.key == k && expandableNodeKey.supportedNodeLabels.contains(v.label())) { - val keyNode = FuzzyNode(k, v.value(k).toString(), expandableNodeKey.order) - node.addChild(keyNode) - return@forEach - } - } - node.setMetadata(k, v.value(k)) - } - return node - } - - private fun Vertex.getValueOrNull(key: Key): T? { - return try { - this.value(key.name()) - } catch (e: IllegalStateException) { - null - } - } -} diff --git a/src/main/kotlin/astminer/parse/cpp/FuzzyMethodSplitter.kt b/src/main/kotlin/astminer/parse/cpp/FuzzyMethodSplitter.kt deleted file mode 100644 index 9e351c72..00000000 --- a/src/main/kotlin/astminer/parse/cpp/FuzzyMethodSplitter.kt +++ /dev/null @@ -1,62 +0,0 @@ -package astminer.parse.cpp - -import astminer.common.* -import astminer.common.model.* - -class FuzzyMethodSplitter : TreeMethodSplitter { - - companion object { - private const val METHOD_NODE = "METHOD" - private const val METHOD_NAME_NODE = "NAME" - private const val METHOD_RETURN_NODE = "METHOD_RETURN" - private const val METHOD_RETURN_TYPE_NODE = "TYPE_FULL_NAME" - - private const val CLASS_DECLARATION_NODE = "TYPE_DECL" - private const val CLASS_NAME_NODE = "NAME" - - private const val METHOD_PARAMETER_NODE = "METHOD_PARAMETER_IN" - private const val PARAMETER_NAME_NODE = "NAME" - private const val PARAMETER_TYPE_NODE = "TYPE_FULL_NAME" - } - - override fun splitIntoMethods(root: FuzzyNode): Collection> { - val methodRoots = root.preOrder().filter { it.getTypeLabel() == METHOD_NODE } - return methodRoots.map { collectMethodInfo(it as FuzzyNode) } - } - - private fun collectMethodInfo(methodNode: FuzzyNode): MethodInfo { - val methodReturnType = - methodNode.getChildOfType(METHOD_RETURN_NODE)?.getChildOfType(METHOD_RETURN_TYPE_NODE) as? FuzzyNode - val methodName = methodNode.getChildOfType(METHOD_NAME_NODE) as? FuzzyNode - - val classRoot = getEnclosingClass(methodNode) - val className = classRoot?.getChildOfType(CLASS_NAME_NODE) as? FuzzyNode - - val parameters = methodNode.getChildrenOfType(METHOD_PARAMETER_NODE) - val parameterNodes = parameters.map { node -> - val fuzzyNode = node as FuzzyNode - ParameterNode( - fuzzyNode, - fuzzyNode.getChildOfType(PARAMETER_TYPE_NODE) as? FuzzyNode, - fuzzyNode.getChildOfType(PARAMETER_NAME_NODE) as? FuzzyNode - ) - }.toList() - - return MethodInfo( - MethodNode(methodNode, methodReturnType, methodName), - ElementNode(classRoot, className), - parameterNodes - ) - } - - private fun getEnclosingClass(node: FuzzyNode): FuzzyNode? { - if (node.getTypeLabel() == CLASS_DECLARATION_NODE) { - return node - } - val parentNode = node.getParent() as? FuzzyNode - if (parentNode != null) { - return getEnclosingClass(parentNode) - } - return null - } -} \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/cpp/FuzzyNode.kt b/src/main/kotlin/astminer/parse/cpp/FuzzyNode.kt deleted file mode 100644 index 490803c9..00000000 --- a/src/main/kotlin/astminer/parse/cpp/FuzzyNode.kt +++ /dev/null @@ -1,65 +0,0 @@ -package astminer.parse.cpp - -import astminer.common.model.Node -import com.google.common.collect.TreeMultiset - -/** - * Node for AST, created by fuzzyc2cpg. - * @param typeLabel - node's label - * @param token - node's token - * @param order - node's order, which used to express the ordering of children in the AST when it matters - */ -class FuzzyNode(private val typeLabel: String, private val token: String?, order: Int?) : Node { - private val order = order ?: -1 - private val metadata: MutableMap = HashMap() - private var parent: Node? = null - private var children = TreeMultiset.create(compareBy( - { it.order }, - { System.identityHashCode(it) } - )) - - fun getOrder(): Int { - return order - } - - fun addChild(node: FuzzyNode) { - children.add(node) - node.setParent(this) - } - - override fun getTypeLabel(): String { - return typeLabel - } - - override fun getChildren(): List { - return children.toList() - } - - override fun getParent(): Node? { - return parent - } - - override fun getToken(): String { - return token ?: "null" - } - - override fun isLeaf(): Boolean { - return children.isEmpty() - } - - override fun getMetadata(key: String): Any? { - return metadata[key] - } - - override fun setMetadata(key: String, value: Any) { - metadata[key] = value - } - - private fun setParent(node: Node) { - parent = node - } - - override fun removeChildrenOfType(typeLabel: String) { - children.removeIf { it.getTypeLabel() == typeLabel } - } -} diff --git a/src/main/kotlin/astminer/parse/cpp/utils.kt b/src/main/kotlin/astminer/parse/cpp/utils.kt deleted file mode 100644 index 5eade8d4..00000000 --- a/src/main/kotlin/astminer/parse/cpp/utils.kt +++ /dev/null @@ -1,26 +0,0 @@ -package astminer.parse.cpp - -import java.io.File -import java.util.concurrent.TimeUnit - -fun String.runCommand(workingDir: File) { - ProcessBuilder("/bin/sh", "-c", this) - .directory(workingDir) - .redirectOutput(ProcessBuilder.Redirect.INHERIT) - .redirectError(ProcessBuilder.Redirect.INHERIT) - .start() - .waitFor(60, TimeUnit.MINUTES) -} - -fun preprocessCppCode(file: File, outputDir: File, preprocessCommand: String) = """ - grep '^\s*#\s*include' ${file.absolutePath} >__tmp_include.cpp - grep -v '^\s*#\s*include\b' ${file.absolutePath} >__tmp_code.cpp - touch __tmp_preprocessed.cpp - if [ -s __tmp_code.cpp ] - then - $preprocessCommand __tmp_code.cpp | grep -v ^# >__tmp_preprocessed.cpp - fi - cat __tmp_include.cpp >${outputDir.absolutePath}/${file.name} - cat __tmp_preprocessed.cpp >>${outputDir.absolutePath}/${file.name} - rm __tmp_*.cpp -""".trimIndent() \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/factory.kt b/src/main/kotlin/astminer/parse/factory.kt new file mode 100644 index 00000000..2ccdccf7 --- /dev/null +++ b/src/main/kotlin/astminer/parse/factory.kt @@ -0,0 +1,45 @@ +package astminer.parse + +import astminer.common.model.ParsingResultFactory +import astminer.config.FileExtension +import astminer.config.ParserType +import astminer.parse.antlr.AntlrJavaParsingResultFactory +import astminer.parse.antlr.AntlrJavascriptParsingResultFactory +import astminer.parse.antlr.AntlrPHPParsingResultFactory +import astminer.parse.antlr.AntlrPythonParsingResultFactory +import astminer.parse.fuzzy.FuzzyParsingResultFactory +import astminer.parse.gumtree.GumtreeJavaParsingResultFactory +import astminer.parse.gumtree.GumtreePythonParsingResultFactory + +fun getParsingResultFactory(extension: FileExtension, parserType: ParserType): ParsingResultFactory { + return when (parserType) { + ParserType.GumTree -> getGumtreeParsingResultFactory(extension) + ParserType.Antlr -> getAntlrParsingResultFactory(extension) + ParserType.Fuzzy -> getFuzzyParsingResultFactory(extension) + } +} + +private fun getGumtreeParsingResultFactory(extension: FileExtension): ParsingResultFactory { + return when (extension) { + FileExtension.Java -> GumtreeJavaParsingResultFactory + FileExtension.Python -> GumtreePythonParsingResultFactory + else -> throw UnsupportedOperationException() + } +} + +private fun getAntlrParsingResultFactory(extension: FileExtension): ParsingResultFactory { + return when (extension) { + FileExtension.Java -> AntlrJavaParsingResultFactory + FileExtension.JavaScript -> AntlrJavascriptParsingResultFactory + FileExtension.Python -> AntlrPythonParsingResultFactory + FileExtension.PHP -> AntlrPHPParsingResultFactory + else -> throw UnsupportedOperationException() + } +} + +private fun getFuzzyParsingResultFactory(extension: FileExtension): ParsingResultFactory { + return when (extension) { + FileExtension.C, FileExtension.Cpp -> FuzzyParsingResultFactory + else -> throw UnsupportedOperationException() + } +} diff --git a/src/main/kotlin/astminer/parse/fuzzy/FuzzyNode.kt b/src/main/kotlin/astminer/parse/fuzzy/FuzzyNode.kt new file mode 100644 index 00000000..af122b38 --- /dev/null +++ b/src/main/kotlin/astminer/parse/fuzzy/FuzzyNode.kt @@ -0,0 +1,36 @@ +package astminer.parse.fuzzy + +import astminer.common.model.Node +import com.google.common.collect.TreeMultiset + +/** + * Node for AST, created by fuzzyc2cpg. + * @param typeLabel - node's label + * @param originalToken - node's token + * @param order - node's order, which used to express the ordering of children in the AST when it matters + */ +class FuzzyNode( + override val typeLabel: String, + originalToken: String?, + order: Int? +) : Node(originalToken) { + private val order = order ?: -1 + override var parent: Node? = null + private val childrenMultiset = TreeMultiset.create( + compareBy({ it.order }, { System.identityHashCode(it) }) + ) + + override val children + get() = childrenMultiset.toList() + + fun addChild(node: FuzzyNode) { + childrenMultiset.add(node) + node.parent = this + } + + override fun removeChildrenOfType(typeLabel: String) { + childrenMultiset.removeIf { it.typeLabel == typeLabel } + } + + override fun preOrder(): List = super.preOrder().map { it as FuzzyNode } +} diff --git a/src/main/kotlin/astminer/parse/fuzzy/FuzzyParsingResultFactory.kt b/src/main/kotlin/astminer/parse/fuzzy/FuzzyParsingResultFactory.kt new file mode 100644 index 00000000..da8bfe00 --- /dev/null +++ b/src/main/kotlin/astminer/parse/fuzzy/FuzzyParsingResultFactory.kt @@ -0,0 +1,40 @@ +package astminer.parse.fuzzy + +import astminer.common.model.ParsingResult +import astminer.common.model.PreprocessingParsingResultFactory +import astminer.parse.fuzzy.cpp.FuzzyCppParser +import astminer.parse.fuzzy.cpp.FuzzyFunctionSplitter +import java.io.File + +object FuzzyParsingResultFactory : PreprocessingParsingResultFactory { + override fun parse(file: File): ParsingResult { + val actualFile = if (file.nameWithoutExtension.endsWith(preprocessSuffix)) { + val actualFileNameSize = file.nameWithoutExtension.length - preprocessSuffix.length + file.parentFile.resolve("${file.nameWithoutExtension.take(actualFileNameSize)}.${file.extension}") + } else file + return CppFuzzyParsingResult(actualFile) + } + + /** + * Run g++ preprocessor (with [preprocessCommand]) on a given file excluding 'include' directives. + * The result of preprocessing is stored in create file "_preprocessed.cpp" + * @param file file to preprocess + * + */ + override fun preprocess(file: File, outputDir: File?): File { + if (file.extension !in supportedExtensions) return file + val outputFile = outputDir?.resolve(file.name) + ?: file.parentFile.resolve("${file.nameWithoutExtension}$preprocessSuffix.${file.extension}") + preprocessCppCode(file, outputFile, preprocessCommand).runCommand(file.absoluteFile.parentFile) + return outputFile + } + + class CppFuzzyParsingResult(file: File) : ParsingResult(file) { + override val root = FuzzyCppParser().parseFile(file) + override val splitter = FuzzyFunctionSplitter() + } + + private val supportedExtensions = listOf("c", "cpp") + private const val preprocessCommand: String = "g++ -E" + private const val preprocessSuffix = "_preprocessed" +} diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt new file mode 100644 index 00000000..711293b7 --- /dev/null +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppFunctionInfo.kt @@ -0,0 +1,66 @@ +package astminer.parse.fuzzy.cpp + +import astminer.common.model.EnclosingElement +import astminer.common.model.EnclosingElementType +import astminer.common.model.FunctionInfo +import astminer.common.model.FunctionInfoParameter +import astminer.parse.findEnclosingElementBy +import astminer.parse.fuzzy.FuzzyNode +import mu.KotlinLogging + +private val logger = KotlinLogging.logger("Fuzzyparser-Cpp-function-info") + +class FuzzyCppFunctionInfo(override val root: FuzzyNode, override val filePath: String) : FunctionInfo { + + override val returnType: String? = collectReturnType() + override val enclosingElement: EnclosingElement? = collectEnclosingClass() + override val nameNode: FuzzyNode? = collectNameNode() + override val parameters: List? = + try { collectParameters() } catch (e: IllegalStateException) { + logger.warn { e.message } + null + } + + private fun collectNameNode(): FuzzyNode? = root.getChildOfType(METHOD_NAME_NODE) as? FuzzyNode + + private fun collectReturnType(): String? = + root.getChildOfType(METHOD_RETURN_NODE)?.getChildOfType(METHOD_RETURN_TYPE_NODE)?.originalToken + + private fun collectEnclosingClass(): EnclosingElement? { + val enclosingClass = findEnclosingClass() ?: return null + val enclosingClassName = findEnclosingClassName(enclosingClass) + return EnclosingElement( + root = enclosingClass, + type = EnclosingElementType.Class, + name = enclosingClassName + ) + } + + private fun findEnclosingClass(): FuzzyNode? = + root.findEnclosingElementBy { it.typeLabel == CLASS_DECLARATION_NODE } + + private fun findEnclosingClassName(enclosingClass: FuzzyNode): String? = + enclosingClass.getChildOfType(CLASS_NAME_NODE)?.originalToken + + private fun collectParameters(): List { + val parameters = root.getChildrenOfType(METHOD_PARAMETER_NODE) + return parameters.map { param -> + val type = param.getChildOfType(PARAMETER_TYPE_NODE)?.originalToken + val name = param.getChildOfType(PARAMETER_NAME_NODE)?.originalToken ?: "" + FunctionInfoParameter(name, type) + } + } + + companion object { + private const val METHOD_NAME_NODE = "NAME" + private const val METHOD_RETURN_NODE = "METHOD_RETURN" + private const val METHOD_RETURN_TYPE_NODE = "TYPE_FULL_NAME" + + private const val CLASS_DECLARATION_NODE = "TYPE_DECL" + private const val CLASS_NAME_NODE = "NAME" + + private const val METHOD_PARAMETER_NODE = "METHOD_PARAMETER_IN" + private const val PARAMETER_NAME_NODE = "NAME" + private const val PARAMETER_TYPE_NODE = "TYPE_FULL_NAME" + } +} diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppParser.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppParser.kt new file mode 100644 index 00000000..fe641865 --- /dev/null +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyCppParser.kt @@ -0,0 +1,176 @@ +package astminer.parse.fuzzy.cpp + +import astminer.common.model.Parser +import astminer.parse.ParsingException +import astminer.parse.fuzzy.FuzzyNode +import io.shiftleft.codepropertygraph.Cpg +import io.shiftleft.codepropertygraph.generated.EdgeTypes +import io.shiftleft.codepropertygraph.generated.NodeKeys +import io.shiftleft.codepropertygraph.generated.NodeTypes +import io.shiftleft.fuzzyc2cpg.FuzzyC2Cpg +import overflowdb.Edge +import overflowdb.Node +import scala.Option +import scala.collection.immutable.Set +import java.io.File +import java.io.InputStream + +/** + * Parser of C/C++ files based on [FuzzyC2Cpg]. + * By default, it assumes that files have been preprocessed and skips all macroses. + */ +class FuzzyCppParser : Parser { + + /** + * Parse input stream and create an AST. + * If you already have a file with code you need to parse, better use [parseFile], + * otherwise temporary file for input stream will be created because of fuzzyc2cpg API. + * @param content to parse + * @return root of AST if content was parsed, null otherwise + */ + override fun parseInputStream(content: InputStream): FuzzyNode { + val file = File.createTempFile("fuzzy", ".cpp") + file.deleteOnExit() + file.outputStream().use { + content.copyTo(it) + } + return parseFile(file) + } + + /** + * Parse a single file and create an AST. + * @param file to parse + * @return root of an AST (null if parsing failed) + */ + override fun parseFile(file: File): FuzzyNode { + // We need some tweaks to create Scala sets from Kotlin code + val pathSetScalaBuilder = Set.newBuilder() + pathSetScalaBuilder.addOne(file.path) + val pathSet = pathSetScalaBuilder.result() + val extensionSetScalaBuilder = Set.newBuilder() + extensionSetScalaBuilder.addOne(".${file.extension}") + val extensionSet = extensionSetScalaBuilder.result() + + // Kotlin cannot use default value Scala:None for the argument, so we create it manually + val optionalOutputPath: Option = Option.empty() + + val cpg = FuzzyC2Cpg().runAndOutput(pathSet, extensionSet, optionalOutputPath) + return cpg2Nodes(cpg, file.path) + } + + /** + * Convert [cpg][io.shiftleft.codepropertygraph.Cpg] created by fuzzyc2cpg + * to list of [FuzzyNode][astminer.parse.fuzzy.FuzzyNode]. + * Cpg may contain graphs for several files, in that case several ASTs will be created. + * @param cpg to be converted + * @param filePath to the parsed file that will be used if parsing failed + * @return list of AST roots + */ + private fun cpg2Nodes(cpg: Cpg, filePath: String): FuzzyNode { + val g = cpg.graph() + val vertexToNode = mutableMapOf() + g.E().forEach { + if (it.label() == EdgeTypes.AST) { + addNodesFromEdge(it, vertexToNode) + } + } + g.V().forEach { + if (it.label() == NodeTypes.FILE) { + val actualFilePath = it.property("NAME").toString() + if (File(actualFilePath).absolutePath != File(filePath).absolutePath) { + println("While parsing $filePath, actually parsed $actualFilePath") + } + return vertexToNode[it] ?: throw ParsingException("Fuzzy", "C++") + } + } + throw ParsingException("Fuzzy", "C++") + } + + private fun addNodesFromEdge(e: Edge, map: MutableMap) { + val parentNode = map.getOrPut(e.outNode()) { createNodeFromVertex(e.outNode()) } + val childNode = map.getOrPut(e.inNode()) { createNodeFromVertex(e.inNode()) } + parentNode.addChild(childNode) + } + + private fun createNodeFromVertex(v: Node): FuzzyNode { + val token: String? = v.property(NodeKeys.CODE) + val order: Int? = v.property(NodeKeys.ORDER) + + for (replaceableNodeKey in replaceableNodeKeys) { + if (replaceableNodeKey.condition(v)) { + val node = FuzzyNode(v.property(replaceableNodeKey.key).toString(), token, order) + v.propertyKeys().forEach { k -> + val property = v.property(k) ?: return@forEach + node.metadata[k] = property.toString() + } + return node + } + } + + val node = FuzzyNode(v.label(), token, order) + v.propertyKeys().forEach { k -> + val property = v.property(k)?.toString() ?: return@forEach + for (expandableNodeKey in expandableNodeKeys) { + if (expandableNodeKey.key == k && expandableNodeKey.supportedNodeLabels.contains(v.label())) { + val keyNode = FuzzyNode(k, property, expandableNodeKey.order) + node.addChild(keyNode) + return@forEach + } + } + node.metadata[k] = property + } + return node + } + + companion object { + data class ExpandableNodeKey( + val key: String, + val supportedNodeLabels: List, + val order: Int + ) + + private val expandableNodeKeys = listOf( + ExpandableNodeKey( + "NAME", + listOf( + NodeTypes.TYPE, NodeTypes.TYPE_DECL, NodeTypes.TYPE_PARAMETER, NodeTypes.MEMBER, + NodeTypes.TYPE_ARGUMENT, NodeTypes.METHOD, NodeTypes.METHOD_PARAMETER_IN, NodeTypes.LOCAL, + NodeTypes.MODIFIER, NodeTypes.IDENTIFIER, NodeTypes.CALL, NodeTypes.UNKNOWN + ), + 0 + ), + ExpandableNodeKey( + "TYPE_FULL_NAME", + listOf( + NodeTypes.TYPE, + NodeTypes.METHOD_RETURN, + NodeTypes.METHOD_PARAMETER_IN, + NodeTypes.LOCAL, + NodeTypes.IDENTIFIER, + NodeTypes.UNKNOWN + ), + 0 + ), + ExpandableNodeKey( + "ALIAS_TYPE_FULL_NAME", + listOf( + NodeTypes.TYPE_DECL, + NodeTypes.UNKNOWN + ), + 0 + ) + ) + + data class ReplaceableNodeKey(val key: String, val condition: (Node) -> Boolean) + + private val replaceableNodeKeys = listOf( + ReplaceableNodeKey("NAME") { v -> + v.propertyKeys().contains("NAME") && + v.property("NAME").toString().startsWith("") + }, + ReplaceableNodeKey("PARSER_TYPE_NAME") { v -> + v.propertyKeys().contains("PARSER_TYPE_NAME") + } + ) + } +} diff --git a/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyFunctionSplitter.kt b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyFunctionSplitter.kt new file mode 100644 index 00000000..6af517cd --- /dev/null +++ b/src/main/kotlin/astminer/parse/fuzzy/cpp/FuzzyFunctionSplitter.kt @@ -0,0 +1,13 @@ +package astminer.parse.fuzzy.cpp + +import astminer.common.model.* +import astminer.parse.fuzzy.FuzzyNode + +class FuzzyFunctionSplitter : TreeFunctionSplitter { + private val methodNode = "METHOD" + + override fun splitIntoFunctions(root: FuzzyNode, filePath: String): Collection> { + val methodRoots = root.preOrder().filter { it.typeLabel == methodNode } + return methodRoots.map { FuzzyCppFunctionInfo(it, filePath) } + } +} diff --git a/src/main/kotlin/astminer/parse/fuzzy/utils.kt b/src/main/kotlin/astminer/parse/fuzzy/utils.kt new file mode 100644 index 00000000..8ec52105 --- /dev/null +++ b/src/main/kotlin/astminer/parse/fuzzy/utils.kt @@ -0,0 +1,26 @@ +package astminer.parse.fuzzy + +import java.io.File +import java.util.concurrent.TimeUnit + +fun String.runCommand(workingDir: File) { + ProcessBuilder("/bin/sh", "-c", this) + .directory(workingDir) + .redirectOutput(ProcessBuilder.Redirect.INHERIT) + .redirectError(ProcessBuilder.Redirect.INHERIT) + .start() + .waitFor(60, TimeUnit.MINUTES) +} + +fun preprocessCppCode(inputFile: File, outputFile: File, preprocessCommand: String) = """ + grep '^\s*#\s*include' "${inputFile.absolutePath}" >__tmp_include.cpp + grep -v '^\s*#\s*include\b' "${inputFile.absolutePath}" >__tmp_code.cpp + touch __tmp_preprocessed.cpp + if [ -s __tmp_code.cpp ] + then + $preprocessCommand __tmp_code.cpp | grep -v ^# >__tmp_preprocessed.cpp + fi + cat __tmp_include.cpp > "${outputFile.absolutePath}" + cat __tmp_preprocessed.cpp >> "${outputFile.absolutePath}" + rm __tmp_*.cpp +""".trimIndent() diff --git a/src/main/kotlin/astminer/parse/gumtree/GumTreeNode.kt b/src/main/kotlin/astminer/parse/gumtree/GumTreeNode.kt new file mode 100644 index 00000000..f8db3f21 --- /dev/null +++ b/src/main/kotlin/astminer/parse/gumtree/GumTreeNode.kt @@ -0,0 +1,30 @@ +package astminer.parse.gumtree + +import astminer.common.model.Node +import com.github.gumtreediff.tree.ITree +import com.github.gumtreediff.tree.TreeContext + +class GumTreeNode(val wrappedNode: ITree, val context: TreeContext, override var parent: GumTreeNode?) : + Node(wrappedNode.label) { + override val typeLabel: String + get() = context.getTypeLabel(wrappedNode) + + override val children: MutableList by lazy { + wrappedNode.children.map { GumTreeNode(it, context, this) }.toMutableList() + } + + override fun removeChildrenOfType(typeLabel: String) { + children.removeIf { it.typeLabel == typeLabel } + } + + override fun getChildOfType(typeLabel: String): GumTreeNode? = + getChildrenOfType(typeLabel).firstOrNull() + + override fun getChildrenOfType(typeLabel: String): List { + val children = super.getChildrenOfType(typeLabel) + return children.filterIsInstance() + .apply { if (size != children.size) throw TypeCastException("Node have children of different types") } + } + + override fun preOrder(): List = super.preOrder().map { it as GumTreeNode } +} diff --git a/src/main/kotlin/astminer/parse/gumtree/GumtreeParsingResult.kt b/src/main/kotlin/astminer/parse/gumtree/GumtreeParsingResult.kt new file mode 100644 index 00000000..4de7c5ae --- /dev/null +++ b/src/main/kotlin/astminer/parse/gumtree/GumtreeParsingResult.kt @@ -0,0 +1,27 @@ +package astminer.parse.gumtree + +import astminer.common.model.ParsingResult +import astminer.common.model.ParsingResultFactory +import astminer.parse.gumtree.java.GumTreeJavaFunctionSplitter +import astminer.parse.gumtree.java.GumTreeJavaParser +import astminer.parse.gumtree.python.GumTreePythonFunctionSplitter +import astminer.parse.gumtree.python.GumTreePythonParser +import java.io.File + +object GumtreeJavaParsingResultFactory : ParsingResultFactory { + override fun parse(file: File): ParsingResult = JavaGumtreeParsingResult(file) + + class JavaGumtreeParsingResult(file: File) : ParsingResult(file) { + override val root = GumTreeJavaParser().parseFile(file) + override val splitter = GumTreeJavaFunctionSplitter() + } +} + +object GumtreePythonParsingResultFactory : ParsingResultFactory { + override fun parse(file: File): ParsingResult = PythonGumtreeParsingResult(file) + + class PythonGumtreeParsingResult(file: File) : ParsingResult(file) { + override val root = GumTreePythonParser().parseFile(file) + override val splitter = GumTreePythonFunctionSplitter() + } +} diff --git a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt new file mode 100644 index 00000000..5e38c372 --- /dev/null +++ b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionInfo.kt @@ -0,0 +1,76 @@ +package astminer.parse.gumtree.java + +import astminer.common.model.EnclosingElement +import astminer.common.model.EnclosingElementType +import astminer.common.model.FunctionInfo +import astminer.common.model.FunctionInfoParameter +import astminer.parse.gumtree.GumTreeNode +import mu.KotlinLogging + +private val logger = KotlinLogging.logger("Gumtree-Java-function-info") + +class GumTreeJavaFunctionInfo( + override val root: GumTreeNode, + override val filePath: String +) : FunctionInfo { + + override val nameNode: GumTreeNode? = root.getChildOfType(TypeLabels.simpleName) + override val returnType: String? = root.getElementType() + override val enclosingElement: EnclosingElement? = collectEnclosingClass() + override val parameters: List? = + try { collectParameters() } catch (e: IllegalStateException) { + logger.warn { e.message } + null + } + + override val modifiers: List = root + .children + .filter { it.typeLabel == "Modifier" } + .mapNotNull { it.originalToken } + + override val annotations: List = root + .children + .filter { it.typeLabel == "MarkerAnnotation" } + .mapNotNull { it.children.first().originalToken } + + override val isConstructor: Boolean = root.typeLabel == "Initializer" + + private fun collectEnclosingClass(): EnclosingElement? { + val enclosingClassNode = getEnclosingClassNode(root.parent) ?: return null + val enclosingClassName = enclosingClassNode.getChildOfType(TypeLabels.simpleName)?.originalToken + return EnclosingElement( + root = enclosingClassNode, + type = EnclosingElementType.Class, + name = enclosingClassName + ) + } + + private fun getEnclosingClassNode(node: GumTreeNode?): GumTreeNode? { + if (node == null || node.typeLabel == TypeLabels.typeDeclaration) { + return node + } + return getEnclosingClassNode(node.parent) + } + + private fun collectParameters(): List { + val params = root.getChildrenOfType(TypeLabels.singleVariableDeclaration) + return params.map { node -> + FunctionInfoParameter(node.getElementName(), node.getElementType()) + } + } + + private fun GumTreeNode.getElementName(): String = + getChildOfType(TypeLabels.simpleName)?.originalToken ?: error("No name found for element") + + private fun GumTreeNode.getElementType(): String? = children.firstOrNull { it.isTypeNode() }?.originalToken + + private fun GumTreeNode.isTypeNode() = typeLabel.endsWith("Type") + + companion object { + private object TypeLabels { + const val simpleName = "SimpleName" + const val typeDeclaration = "TypeDeclaration" + const val singleVariableDeclaration = "SingleVariableDeclaration" + } + } +} diff --git a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitter.kt b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitter.kt new file mode 100644 index 00000000..eeaf23ef --- /dev/null +++ b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitter.kt @@ -0,0 +1,13 @@ +package astminer.parse.gumtree.java + +import astminer.common.model.* +import astminer.parse.gumtree.GumTreeNode + +class GumTreeJavaFunctionSplitter : TreeFunctionSplitter { + private val methodDeclaration = "MethodDeclaration" + + override fun splitIntoFunctions(root: GumTreeNode, filePath: String): Collection> { + val methodRoots = root.preOrder().filter { it.typeLabel == methodDeclaration } + return methodRoots.map { GumTreeJavaFunctionInfo(it, filePath) } + } +} diff --git a/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaParser.kt b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaParser.kt new file mode 100644 index 00000000..41c35bb3 --- /dev/null +++ b/src/main/kotlin/astminer/parse/gumtree/java/GumTreeJavaParser.kt @@ -0,0 +1,29 @@ +package astminer.parse.gumtree.java + +import astminer.common.model.Parser +import astminer.parse.ParsingException +import astminer.parse.gumtree.GumTreeNode +import com.github.gumtreediff.client.Run +import com.github.gumtreediff.gen.SyntaxException +import com.github.gumtreediff.gen.jdt.JdtTreeGenerator +import com.github.gumtreediff.tree.TreeContext +import mu.KotlinLogging +import java.io.InputStream +import java.io.InputStreamReader + +private val logger = KotlinLogging.logger("GumTree-JavaParser") + +class GumTreeJavaParser : Parser { + init { + Run.initGenerators() + } + + override fun parseInputStream(content: InputStream): GumTreeNode = try { + val treeContext = JdtTreeGenerator().generate(InputStreamReader(content)) + wrapGumTreeNode(treeContext) + } catch (e: SyntaxException) { + throw ParsingException(parserType = "Gumtree", language = "Java", exc = e) + } +} + +fun wrapGumTreeNode(treeContext: TreeContext): GumTreeNode = GumTreeNode(treeContext.root, treeContext, null) diff --git a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt new file mode 100644 index 00000000..78f80496 --- /dev/null +++ b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionInfo.kt @@ -0,0 +1,93 @@ +package astminer.parse.gumtree.python + +import astminer.common.model.EnclosingElement +import astminer.common.model.EnclosingElementType +import astminer.common.model.FunctionInfo +import astminer.common.model.FunctionInfoParameter +import astminer.parse.findEnclosingElementBy +import astminer.parse.gumtree.GumTreeNode +import mu.KotlinLogging + +private val logger = KotlinLogging.logger("Gumtree-Java-function-info") + +class GumTreePythonFunctionInfo( + override val root: GumTreeNode, + override val filePath: String +) : FunctionInfo { + + override val nameNode: GumTreeNode = root + override val enclosingElement: EnclosingElement? = collectEnclosingClass() + override val returnType: String? = getElementType(root)?.typeLabel + override val parameters: List? = + try { collectParameters() } catch (e: IllegalStateException) { + logger.warn { e.message } + null + } + + private fun getElementType(node: GumTreeNode): GumTreeNode? { + if (node.typeLabel == TypeLabels.arg) { + return node.getChildOfType(TypeLabels.nameLoad) + } + // if return statement has "Constant-`Type`" return value => function type is `Type` + if (TypeLabels.methodDefinitions.contains(node.typeLabel)) { + return node.getChildOfType(TypeLabels.body)?.getChildOfType(TypeLabels.returnTypeLabel)?.let { + it.children.firstOrNull { child -> + child.typeLabel.startsWith(TypeLabels.constantType) + } + } + } + return null + } + + private fun collectEnclosingClass(): EnclosingElement? { + val enclosing = findEnclosingClass() ?: return null + return EnclosingElement( + type = EnclosingElementType.Class, + name = enclosing.originalToken, + root = enclosing + ) + } + + private fun findEnclosingClass(): GumTreeNode? = + root.findEnclosingElementBy { it.typeLabel == TypeLabels.classDefinition } + + private fun collectParameters(): List { + val arguments = root.getChildrenOfType(TypeLabels.arguments).flatMap { it.children } + val params = arguments.flatMap { node -> + when (node.typeLabel) { + in TypeLabels.funcArgsTypesNodes -> + node.children + .filter { it.typeLabel == TypeLabels.arg } + TypeLabels.vararg, TypeLabels.kwarg -> listOf(node) + else -> emptyList() + } + } + return params.mapNotNull { + FunctionInfoParameter(it.originalToken ?: return@mapNotNull null, getElementType(it)?.originalToken) + } + } + + companion object { + private object TypeLabels { + const val classDefinition = "ClassDef" + const val functionDefinition = "FunctionDef" + const val asyncFunctionDefinition = "AsyncFunctionDef" + const val nameLoad = "Name_Load" + const val posOnlyArgs = "posonlyargs" + const val kwOnlyArgs = "kwonlyargs" + const val arguments = "arguments" + const val vararg = "vararg" + const val kwarg = "kwarg" + const val args = "args" + const val arg = "arg" + + const val body = "body" + const val returnTypeLabel = "Return" + const val passTypeLabel = "Pass" + const val constantType = "Constant-" + + val methodDefinitions = listOf(functionDefinition, asyncFunctionDefinition) + val funcArgsTypesNodes = listOf(args, posOnlyArgs, kwOnlyArgs) + } + } +} diff --git a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitter.kt b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitter.kt new file mode 100644 index 00000000..1db706ca --- /dev/null +++ b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitter.kt @@ -0,0 +1,19 @@ +package astminer.parse.gumtree.python + +import astminer.common.model.* +import astminer.parse.gumtree.GumTreeNode + +class GumTreePythonFunctionSplitter : TreeFunctionSplitter { + override fun splitIntoFunctions(root: GumTreeNode, filePath: String): Collection> { + val functionRoots = root.preOrder().filter { TypeLabels.methodDefinitions.contains(it.typeLabel) } + return functionRoots.map { GumTreePythonFunctionInfo(it, filePath) } + } + + companion object { + private object TypeLabels { + const val functionDefinition = "FunctionDef" + const val asyncFunctionDefinition = "AsyncFunctionDef" + val methodDefinitions = listOf(functionDefinition, asyncFunctionDefinition) + } + } +} diff --git a/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonParser.kt b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonParser.kt new file mode 100644 index 00000000..1af64aca --- /dev/null +++ b/src/main/kotlin/astminer/parse/gumtree/python/GumTreePythonParser.kt @@ -0,0 +1,29 @@ +package astminer.parse.gumtree.python + +import astminer.common.model.Parser +import astminer.common.model.ParserNotInstalledException +import astminer.parse.ParsingException +import astminer.parse.gumtree.GumTreeNode +import com.github.gumtreediff.client.Run +import com.github.gumtreediff.gen.python.PythonTreeGenerator +import com.github.gumtreediff.tree.TreeContext +import java.io.IOException +import java.io.InputStream +import java.io.InputStreamReader + +class GumTreePythonParser : Parser { + init { + Run.initGenerators() + } + + override fun parseInputStream(content: InputStream): GumTreeNode = try { + val context = PythonTreeGenerator().generate(InputStreamReader(content)) + wrapGumTreeNode(context) + } catch (e: RuntimeException) { + throw ParsingException("GumTree", "Python", e) + } catch (e: IOException) { + throw ParserNotInstalledException("Gumtree", "Python", e) + } +} + +fun wrapGumTreeNode(treeContext: TreeContext): GumTreeNode = GumTreeNode(treeContext.root, treeContext, null) diff --git a/src/main/kotlin/astminer/parse/java/GumTreeJavaNode.kt b/src/main/kotlin/astminer/parse/java/GumTreeJavaNode.kt deleted file mode 100644 index d8d4e163..00000000 --- a/src/main/kotlin/astminer/parse/java/GumTreeJavaNode.kt +++ /dev/null @@ -1,46 +0,0 @@ -package astminer.parse.java - -import com.github.gumtreediff.tree.ITree -import com.github.gumtreediff.tree.TreeContext -import astminer.common.model.Node - -class GumTreeJavaNode(val wrappedNode: ITree, val context: TreeContext, val parent: GumTreeJavaNode?) : Node { - private val metadata: MutableMap = HashMap() - - override fun getMetadata(key: String): Any? { - return metadata[key] - } - - override fun setMetadata(key: String, value: Any) { - metadata[key] = value - } - - override fun isLeaf(): Boolean { - return childrenList.isEmpty() - } - - private val childrenList: MutableList by lazy { - wrappedNode.children.map { GumTreeJavaNode(it, context, this) }.toMutableList() - } - - override fun getTypeLabel(): String { - return context.getTypeLabel(wrappedNode) - } - - override fun getChildren(): List { - return childrenList - } - - override fun getParent(): Node? { - return parent - } - - override fun getToken(): String { - return wrappedNode.label - } - - override fun removeChildrenOfType(typeLabel: String) { - childrenList.removeIf { it.getTypeLabel() == typeLabel } - } - -} \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/java/GumTreeJavaParser.kt b/src/main/kotlin/astminer/parse/java/GumTreeJavaParser.kt deleted file mode 100644 index 2112f5cd..00000000 --- a/src/main/kotlin/astminer/parse/java/GumTreeJavaParser.kt +++ /dev/null @@ -1,23 +0,0 @@ -package astminer.parse.java - -import com.github.gumtreediff.client.Run -import com.github.gumtreediff.gen.jdt.JdtTreeGenerator -import com.github.gumtreediff.tree.TreeContext -import astminer.common.model.Parser -import java.io.InputStream -import java.io.InputStreamReader - -class GumTreeJavaParser : Parser { - init { - Run.initGenerators() - } - - override fun parseInputStream(content: InputStream): GumTreeJavaNode? { - val treeContext = JdtTreeGenerator().generate(InputStreamReader(content)) - return wrapGumTreeNode(treeContext) - } -} - -fun wrapGumTreeNode(treeContext: TreeContext): GumTreeJavaNode { - return GumTreeJavaNode(treeContext.root, treeContext, null) -} \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/java/GumTreeMethodSplitter.kt b/src/main/kotlin/astminer/parse/java/GumTreeMethodSplitter.kt deleted file mode 100644 index a66fc261..00000000 --- a/src/main/kotlin/astminer/parse/java/GumTreeMethodSplitter.kt +++ /dev/null @@ -1,73 +0,0 @@ -package astminer.parse.java - -import astminer.common.model.* -import astminer.common.preOrder - -private fun GumTreeJavaNode.isTypeNode() = getTypeLabel().endsWith("Type") - -class GumTreeMethodSplitter : TreeMethodSplitter { - - companion object { - private object TypeLabels { - const val methodDeclaration = "MethodDeclaration" - const val simpleName = "SimpleName" - const val typeDeclaration = "TypeDeclaration" - const val singleVariableDeclaration = "SingleVariableDeclaration" - } - } - - override fun splitIntoMethods(root: GumTreeJavaNode): Collection> { - val methodRoots = root.preOrder().filter { it.getTypeLabel() == TypeLabels.methodDeclaration } - return methodRoots.map { collectMethodInfo(it as GumTreeJavaNode) } - } - - private fun collectMethodInfo(methodNode: GumTreeJavaNode): MethodInfo { - val methodReturnType = getElementType(methodNode) - val methodName = getElementName(methodNode) - - val classRoot = getEnclosingClass(methodNode) - val className = classRoot?.let { getElementName(it) } - - val parameters = getParameters(methodNode) - - return MethodInfo( - MethodNode(methodNode, methodReturnType, methodName), - ElementNode(classRoot, className), - parameters - ) - } - - private fun getElementName(node: GumTreeJavaNode) = node.getChildren().map { - it as GumTreeJavaNode - }.firstOrNull { - it.getTypeLabel() == TypeLabels.simpleName - } - - private fun getElementType(node: GumTreeJavaNode) = node.getChildren().map { - it as GumTreeJavaNode - }.firstOrNull { - it.isTypeNode() - } - - private fun getEnclosingClass(node: GumTreeJavaNode): GumTreeJavaNode? { - if (node.getTypeLabel() == TypeLabels.typeDeclaration) { - return node - } - val parentNode = node.getParent() as? GumTreeJavaNode - return parentNode?.let { getEnclosingClass(it) } - } - - private fun getParameters(methodNode: GumTreeJavaNode): List> { - val params = methodNode.getChildren().filter { - it.getTypeLabel() == TypeLabels.singleVariableDeclaration - } - return params.map { - val node = it as GumTreeJavaNode - ParameterNode( - node, - getElementType(node), - getElementName(node) - ) - }.toList() - } -} \ No newline at end of file diff --git a/src/main/kotlin/astminer/paths/Code2VecPathStorage.kt b/src/main/kotlin/astminer/paths/Code2VecPathStorage.kt deleted file mode 100644 index 595b459a..00000000 --- a/src/main/kotlin/astminer/paths/Code2VecPathStorage.kt +++ /dev/null @@ -1,16 +0,0 @@ -package astminer.paths - -import astminer.common.model.PathContextId - -class Code2VecPathStorage(outputFolderPath: String, - pathsLimit: Long = Long.MAX_VALUE, - tokensLimit: Long = Long.MAX_VALUE -) : CountingPathStorage(outputFolderPath, pathsLimit, tokensLimit) { - - override fun pathContextIdsToString(pathContextIds: List, label: String): String { - val joinedPathContexts = pathContextIds.joinToString(" ") { pathContextId -> - "${pathContextId.startTokenId},${pathContextId.pathId},${pathContextId.endTokenId}" - } - return "$label $joinedPathContexts" - } -} diff --git a/src/main/kotlin/astminer/paths/CountingPathStorage.kt b/src/main/kotlin/astminer/paths/CountingPathStorage.kt deleted file mode 100644 index c671b138..00000000 --- a/src/main/kotlin/astminer/paths/CountingPathStorage.kt +++ /dev/null @@ -1,61 +0,0 @@ -package astminer.paths - -import astminer.common.model.* -import astminer.common.storage.* -import java.io.File -import java.io.PrintWriter - -abstract class CountingPathStorage(override val directoryPath: String, - override val tokensLimit: Long, - override val pathsLimit: Long -) : PathStorage { - - protected val tokensMap: RankedIncrementalIdStorage = RankedIncrementalIdStorage() - protected val orientedNodeTypesMap: RankedIncrementalIdStorage = RankedIncrementalIdStorage() - protected val pathsMap: RankedIncrementalIdStorage> = RankedIncrementalIdStorage() - - private val pathsFile: File - private val labeledPathContextIdsWriter: PrintWriter - - init { - File(directoryPath).mkdirs() - pathsFile = File("$directoryPath/path_contexts.csv") - pathsFile.createNewFile() - labeledPathContextIdsWriter = PrintWriter(pathsFile) - } - - abstract fun pathContextIdsToString(pathContextIds: List, label: LabelType): String - - private fun dumpPathContexts(labeledPathContextIds: LabeledPathContextIds) { - val pathContextIdsString = labeledPathContextIds.pathContexts.filter { - tokensMap.getIdRank(it.startTokenId) <= tokensLimit && - tokensMap.getIdRank(it.endTokenId) <= tokensLimit && - pathsMap.getIdRank(it.pathId) <= pathsLimit - } - labeledPathContextIdsWriter.println(pathContextIdsToString(pathContextIdsString, labeledPathContextIds.label)) - } - - private fun storePathContext(pathContext: PathContext): PathContextId { - val startTokenId = tokensMap.record(pathContext.startToken) - val endTokenId = tokensMap.record(pathContext.endToken) - val orientedNodesIds = pathContext.orientedNodeTypes.map { orientedNodeTypesMap.record(it) } - val pathId = pathsMap.record(orientedNodesIds) - return PathContextId(startTokenId, pathId, endTokenId) - } - - override fun store(labeledPathContexts: LabeledPathContexts) { - val labeledPathContextIds = LabeledPathContextIds( - labeledPathContexts.label, - labeledPathContexts.pathContexts.map { storePathContext(it) } - ) - dumpPathContexts(labeledPathContextIds) - } - - override fun close() { - dumpIdStorageToCsv(tokensMap, "token", tokenToCsvString, File("$directoryPath/tokens.csv"), tokensLimit) - dumpIdStorageToCsv(orientedNodeTypesMap, "node_type", orientedNodeToCsvString, File("$directoryPath/node_types.csv"), Long.MAX_VALUE) - dumpIdStorageToCsv(pathsMap, "path", pathToCsvString, File("$directoryPath/paths.csv"), pathsLimit) - - labeledPathContextIdsWriter.close() - } -} diff --git a/src/main/kotlin/astminer/paths/CsvPathStorage.kt b/src/main/kotlin/astminer/paths/CsvPathStorage.kt deleted file mode 100644 index 05f5d7cf..00000000 --- a/src/main/kotlin/astminer/paths/CsvPathStorage.kt +++ /dev/null @@ -1,16 +0,0 @@ -package astminer.paths - -import astminer.common.model.PathContextId - -class CsvPathStorage(outputFolderPath: String, - pathsLimit: Long = Long.MAX_VALUE, - tokensLimit: Long = Long.MAX_VALUE -) : CountingPathStorage(outputFolderPath, pathsLimit, tokensLimit) { - - override fun pathContextIdsToString(pathContextIds: List, label: String): String { - val joinedPathContexts = pathContextIds.joinToString(";") { pathContextId -> - "${pathContextId.startTokenId} ${pathContextId.pathId} ${pathContextId.endTokenId}" - } - return "$label,$joinedPathContexts" - } -} diff --git a/src/main/kotlin/astminer/paths/PathMiner.kt b/src/main/kotlin/astminer/paths/PathMiner.kt index 8bfab93d..b00239cd 100644 --- a/src/main/kotlin/astminer/paths/PathMiner.kt +++ b/src/main/kotlin/astminer/paths/PathMiner.kt @@ -5,10 +5,9 @@ import astminer.common.model.Node data class PathRetrievalSettings(val maxLength: Int, val maxWidth: Int) -class PathMiner(val settings: PathRetrievalSettings) { +class PathMiner(private val settings: PathRetrievalSettings) { private val pathWorker = PathWorker() - fun retrievePaths(tree: Node): Collection { - return pathWorker.retrievePaths(tree, settings.maxLength, settings.maxWidth) - } -} \ No newline at end of file + fun retrievePaths(tree: Node): Collection = + pathWorker.retrievePaths(tree, settings.maxLength, settings.maxWidth) +} diff --git a/src/main/kotlin/astminer/paths/PathUtil.kt b/src/main/kotlin/astminer/paths/PathUtil.kt index f342c560..ad8c9cc8 100644 --- a/src/main/kotlin/astminer/paths/PathUtil.kt +++ b/src/main/kotlin/astminer/paths/PathUtil.kt @@ -2,11 +2,11 @@ package astminer.paths import astminer.common.model.* -fun toPathContext(path: ASTPath, getToken: (Node) -> String = { node -> node.getToken() }): PathContext { +fun toPathContext(path: ASTPath, getToken: (Node) -> String = { node -> node.token }): PathContext { val startToken = getToken(path.upwardNodes.first()) val endToken = getToken(path.downwardNodes.last()) - val astNodes = path.upwardNodes.map { OrientedNodeType(it.getTypeLabel(), Direction.UP) } + - OrientedNodeType(path.topNode.getTypeLabel(), Direction.TOP) + - path.downwardNodes.map { OrientedNodeType(it.getTypeLabel(), Direction.DOWN) } + val astNodes = path.upwardNodes.map { OrientedNodeType(it.typeLabel, Direction.UP) } + + OrientedNodeType(path.topNode.typeLabel, Direction.TOP) + + path.downwardNodes.map { OrientedNodeType(it.typeLabel, Direction.DOWN) } return PathContext(startToken, astNodes, endToken) } diff --git a/src/main/kotlin/astminer/paths/PathWorker.kt b/src/main/kotlin/astminer/paths/PathWorker.kt index ad1bceff..a19bd487 100644 --- a/src/main/kotlin/astminer/paths/PathWorker.kt +++ b/src/main/kotlin/astminer/paths/PathWorker.kt @@ -3,27 +3,16 @@ package astminer.paths import astminer.common.model.ASTPath import astminer.common.model.Node import astminer.common.model.PathPiece -import astminer.common.postOrderIterator import kotlin.math.min class PathWorker { - companion object { - private const val PATH_PIECES_KEY = "path_pieces" - - private fun Node.setPathPieces(pathPieces: List) { - this.setMetadata(PATH_PIECES_KEY, pathPieces) - } - - private fun Node.getPathPieces(): List? = this.getMetadata(PATH_PIECES_KEY) as List? - } - fun retrievePaths(tree: Node) = retrievePaths(tree, null, null) - fun updatePathPieces( - currentNode: Node, - pathPiecesPerChild: List?>, - maxLength: Int? + private fun updatePathPieces( + currentNode: Node, + pathPiecesPerChild: List?>, + maxLength: Int? ) = pathPiecesPerChild.filterNotNull().flatMap { childPieces -> childPieces.filter { pathPiece -> maxLength == null || pathPiece.size <= maxLength @@ -32,10 +21,11 @@ class PathWorker { } } - fun collapsePiecesToPaths( - currentNode: Node, - pathPiecesPerChild: List?>, - maxLength: Int?, maxWidth: Int? + private fun collapsePiecesToPaths( + currentNode: Node, + pathPiecesPerChild: List?>, + maxLength: Int?, + maxWidth: Int? ): Collection { val paths: MutableCollection = ArrayList() val childrenCount = pathPiecesPerChild.size @@ -59,11 +49,11 @@ class PathWorker { val paths: MutableList = ArrayList() iterator.forEach { currentNode -> if (currentNode.isLeaf()) { - if (currentNode.getToken().isNotEmpty()) { + if (currentNode.token.isNotEmpty()) { currentNode.setPathPieces(listOf(listOf(currentNode))) } } else { - val pathPiecesPerChild = currentNode.getChildren().map { it.getPathPieces() } + val pathPiecesPerChild = currentNode.children.map { it.getPathPieces() } val currentNodePathPieces = updatePathPieces(currentNode, pathPiecesPerChild, maxLength) val currentNodePaths = collapsePiecesToPaths(currentNode, pathPiecesPerChild, maxLength, maxWidth) @@ -73,4 +63,16 @@ class PathWorker { } return paths } -} \ No newline at end of file + + companion object { + private const val PATH_PIECES_KEY = "path_pieces" + + private fun Node.setPathPieces(pathPieces: List) { + this.metadata[PATH_PIECES_KEY] = pathPieces + } + + // In runtime all generics upcast to upper bound, therefore it's impossible to check type inside List + @Suppress("UNCHECKED_CAST") + private fun Node.getPathPieces(): List? = this.metadata[PATH_PIECES_KEY] as? List + } +} diff --git a/src/main/kotlin/astminer/pipeline/Pipeline.kt b/src/main/kotlin/astminer/pipeline/Pipeline.kt new file mode 100644 index 00000000..0ba4468b --- /dev/null +++ b/src/main/kotlin/astminer/pipeline/Pipeline.kt @@ -0,0 +1,80 @@ +package astminer.pipeline + +import astminer.common.getProjectFilesWithExtension +import astminer.common.model.* +import astminer.config.FileExtension +import astminer.config.PipelineConfig +import astminer.parse.getParsingResultFactory +import astminer.pipeline.branch.FilePipelineBranch +import astminer.pipeline.branch.FunctionPipelineBranch +import astminer.pipeline.branch.IllegalLabelExtractorException +import me.tongfei.progressbar.ProgressBar +import java.io.File + +/** + * Pipeline runs all the steps needed to parse, process and save data. + * @param config The pipeline config that defines the pipeline + */ +class Pipeline(private val config: PipelineConfig) { + private val inputDirectory = File(config.inputDir) + private val outputDirectory = File(config.outputDir) + + private val filters = config.filters.map { it.filterImpl } + private val labelExtractor = config.labelExtractor.labelExtractorImpl + + private val holdoutMap = findDatasetHoldouts(inputDirectory) + private val isDataset = holdoutMap.size > 1 + + private val branch = when (labelExtractor) { + is FileLabelExtractor -> FilePipelineBranch(filters, labelExtractor) + is FunctionLabelExtractor -> FunctionPipelineBranch(filters, labelExtractor) + else -> throw IllegalLabelExtractorException(labelExtractor::class.simpleName) + } + + private fun createStorageDirectory(extension: FileExtension): File { + val outputDirectoryForExtension = outputDirectory.resolve(extension.fileExtension) + outputDirectoryForExtension.mkdir() + return outputDirectoryForExtension + } + + private fun createStorage(extension: FileExtension): Storage { + val storagePath = createStorageDirectory(extension).path + return config.storage.createStorage(storagePath) + } + + private fun parseLanguage(language: FileExtension) { + val parsingResultFactory = getParsingResultFactory(language, config.parser.name) + createStorage(language).use { storage -> + for ((holdoutType, holdoutDir) in holdoutMap) { + val holdoutFiles = getProjectFilesWithExtension(holdoutDir, language.fileExtension) + printHoldoutStat(holdoutFiles, holdoutType) + val progressBar = ProgressBar("", holdoutFiles.size.toLong()) + parsingResultFactory.parseFilesInThreads(holdoutFiles, config.numOfThreads) { parseResult -> + val labeledResults = branch.process(parseResult) + storage.storeSynchronously(labeledResults, holdoutType) + progressBar.step() + } + progressBar.close() + } + } + } + + private fun printHoldoutStat(files: List, holdoutType: DatasetHoldout) { + val output = StringBuilder("${files.size} file(s) found") + if (isDataset) { output.append(" in ${holdoutType.name}") } + println(output.toString()) + } + + /** + * Runs the pipeline that is defined in the [config]. + */ + fun run() { + println("Working in ${config.numOfThreads} thread(s)") + if (isDataset) { println("Dataset structure found") } + for (language in config.parser.languages) { + println("Parsing $language") + parseLanguage(language) + } + println("Done!") + } +} diff --git a/src/main/kotlin/astminer/pipeline/branch/Exceptions.kt b/src/main/kotlin/astminer/pipeline/branch/Exceptions.kt new file mode 100644 index 00000000..bb0128a1 --- /dev/null +++ b/src/main/kotlin/astminer/pipeline/branch/Exceptions.kt @@ -0,0 +1,13 @@ +package astminer.pipeline.branch + +/** + * This exception is thrown when label extractor's granularity is implemented incorrectly. + */ +class IllegalLabelExtractorException(problemName: String?) : + IllegalStateException("Unknown label extractor `${problemName ?: "anonymous"}`") + +/** + * This exception is thrown when the given filter is not implemented for the given granularity. + */ +class IllegalFilterException(granularity: String, filterName: String?) : + IllegalStateException("Unknown filter `${filterName ?: "anonymous"}` for $granularity granularity") diff --git a/src/main/kotlin/astminer/pipeline/branch/FilePipelineBranch.kt b/src/main/kotlin/astminer/pipeline/branch/FilePipelineBranch.kt new file mode 100644 index 00000000..81963e99 --- /dev/null +++ b/src/main/kotlin/astminer/pipeline/branch/FilePipelineBranch.kt @@ -0,0 +1,31 @@ +package astminer.pipeline.branch + +import astminer.common.model.* +import astminer.common.model.FileFilter + +/** + * PipelineBranch for pipeline with file-level granularity (FilePipelineConfig). + * Works with files as a whole. Tests parsed files with filters and extracts a label from them. + */ +class FilePipelineBranch( + filters: List, + private val labelExtractor: FileLabelExtractor +) : PipelineBranch { + + private val filters: List = filters.map { filter -> + filter as? FileFilter + ?: throw IllegalFilterException("file", filter::class.simpleName) + } + + private fun passesThroughFilters(parseResult: ParsingResult) = + filters.all { filter -> filter.validate(parseResult) } + + override fun process(parsingResult: ParsingResult): List> { + return if (passesThroughFilters(parsingResult)) { + val labeledResult = labelExtractor.process(parsingResult) ?: return emptyList() + listOf(labeledResult) + } else { + emptyList() + } + } +} diff --git a/src/main/kotlin/astminer/pipeline/branch/FunctionPipelineBranch.kt b/src/main/kotlin/astminer/pipeline/branch/FunctionPipelineBranch.kt new file mode 100644 index 00000000..08231b20 --- /dev/null +++ b/src/main/kotlin/astminer/pipeline/branch/FunctionPipelineBranch.kt @@ -0,0 +1,27 @@ +package astminer.pipeline.branch + +import astminer.common.model.* + +/** + * PipelineBranch for pipeline with function-level granularity (FunctionPipelineConfig). + * Extracts functions from the parsed files. + * Then tests functions with filters, processes them and extracts labels from each function. + */ +class FunctionPipelineBranch( + filters: List, + private val labelExtractor: FunctionLabelExtractor +) : PipelineBranch { + + private val filters: List = filters.map { filter -> + filter as? FunctionFilter + ?: throw IllegalFilterException("function", filter::class.simpleName) + } + + private fun passesThroughFilters(functionInfo: FunctionInfo) = + filters.all { filter -> filter.validate(functionInfo) } + + override fun process(parsingResult: ParsingResult): List> = + parsingResult.splitIntoFunctions() + .filter { functionInfo -> passesThroughFilters(functionInfo) } + .mapNotNull { functionInfo -> labelExtractor.process(functionInfo) } +} diff --git a/src/main/kotlin/astminer/pipeline/branch/PipelineBranch.kt b/src/main/kotlin/astminer/pipeline/branch/PipelineBranch.kt new file mode 100644 index 00000000..35559c57 --- /dev/null +++ b/src/main/kotlin/astminer/pipeline/branch/PipelineBranch.kt @@ -0,0 +1,18 @@ +package astminer.pipeline.branch + +import astminer.common.model.LabeledResult +import astminer.common.model.Node +import astminer.common.model.ParsingResult + +/** + * PipelineBranch is a part of the pipeline that encapsulate inside itself granularity based logic. + * It accepts parsed files (LanguageHandler) and returns labeled results. + */ +interface PipelineBranch { + /** + * Extracts labeled results from LanguageHandler + * May mutate the AST. + * Should have no other side-effects + */ + fun process(parsingResult: ParsingResult): List> +} diff --git a/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt b/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt new file mode 100644 index 00000000..9ef41b0b --- /dev/null +++ b/src/main/kotlin/astminer/storage/ast/CsvAstStorage.kt @@ -0,0 +1,72 @@ +package astminer.storage.ast + +import astminer.common.model.DatasetHoldout +import astminer.common.model.LabeledResult +import astminer.common.model.Node +import astminer.common.model.Storage +import astminer.common.storage.RankedIncrementalIdStorage +import astminer.common.storage.dumpIdStorageToCsv +import astminer.common.storage.nodeTypeToCsvString +import astminer.common.storage.tokenToCsvString +import java.io.File +import java.io.PrintWriter + +/** + * Stores multiple ASTs by their roots and saves them in .csv format. + * Output consists of 3 .csv files: with node types, with tokens and with ASTs. + */ +class CsvAstStorage(override val outputDirectoryPath: String) : Storage { + + private val tokensMap: RankedIncrementalIdStorage = RankedIncrementalIdStorage() + private val nodeTypesMap: RankedIncrementalIdStorage = RankedIncrementalIdStorage() + + private val astsPrintWriters = mutableMapOf() + + init { + File(outputDirectoryPath).mkdirs() + } + + override fun store(labeledResult: LabeledResult, holdout: DatasetHoldout) { + for (node in labeledResult.root.preOrder()) { + tokensMap.record(node.token) + nodeTypesMap.record(node.typeLabel) + } + val writer = astsPrintWriters.getOrPut(holdout) { holdout.resolveHoldout() } + dumpAst(labeledResult.root, labeledResult.label, writer) + } + + override fun close() { + dumpTokenStorage(File("$outputDirectoryPath/tokens.csv")) + dumpNodeTypesStorage(File("$outputDirectoryPath/node_types.csv")) + + astsPrintWriters.values.map { it.close() } + } + + private fun dumpTokenStorage(file: File) { + dumpIdStorageToCsv(tokensMap, "token", tokenToCsvString, file) + } + + private fun dumpNodeTypesStorage(file: File) { + dumpIdStorageToCsv(nodeTypesMap, "node_type", nodeTypeToCsvString, file) + } + + private fun dumpAst(root: Node, id: String, writer: PrintWriter) { + writer.println("$id,${astString(root)}") + } + + internal fun astString(node: Node): String { + return "${tokensMap.getId(node.token)} ${nodeTypesMap.getId(node.typeLabel)}{${ + node.children.joinToString(separator = "", transform = ::astString) + }}" + } + + private fun DatasetHoldout.resolveHoldout(): PrintWriter { + val holdoutDir = File(outputDirectoryPath).resolve(this.dirName) + holdoutDir.mkdirs() + val astFile = holdoutDir.resolve("asts.csv") + astFile.createNewFile() + val newWriter = PrintWriter(astFile) + newWriter.println("id,ast") + return newWriter + } +} diff --git a/src/main/kotlin/astminer/ast/DotAstStorage.kt b/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt similarity index 60% rename from src/main/kotlin/astminer/ast/DotAstStorage.kt rename to src/main/kotlin/astminer/storage/ast/DotAstStorage.kt index 876ea2bb..fc147270 100644 --- a/src/main/kotlin/astminer/ast/DotAstStorage.kt +++ b/src/main/kotlin/astminer/storage/ast/DotAstStorage.kt @@ -1,9 +1,9 @@ -package astminer.ast +package astminer.storage.ast -import astminer.common.getNormalizedToken -import astminer.common.model.AstStorage +import astminer.common.model.DatasetHoldout +import astminer.common.model.LabeledResult import astminer.common.model.Node -import astminer.common.preOrder +import astminer.common.model.Storage import astminer.common.storage.RankedIncrementalIdStorage import java.io.File import java.io.PrintWriter @@ -12,34 +12,36 @@ import java.io.PrintWriter * Stores multiple ASTs in dot format (https://en.wikipedia.org/wiki/DOT_(graph_description_language)) * Output consist of separate .dot files for each AST and one full description in .csv format */ -class DotAstStorage(override val directoryPath: String) : AstStorage { +class DotAstStorage(override val outputDirectoryPath: String) : Storage { internal data class FilePath(val parentPath: String, val fileName: String) - private val astDirectoryPath: File + private val astDirectoryPaths = mutableMapOf() private val astFilenameFormat = "ast_%d.dot" private val descriptionFileStream: PrintWriter private var index: Long = 0 init { - File(directoryPath).mkdirs() - astDirectoryPath = File(directoryPath, "asts") - astDirectoryPath.mkdirs() - val descriptionFile = File(directoryPath, "description.csv") + File(outputDirectoryPath).mkdirs() + val descriptionFile = File(outputDirectoryPath, "description.csv") descriptionFile.createNewFile() descriptionFileStream = PrintWriter(descriptionFile) descriptionFileStream.write("dot_file,source_file,label,node_id,token,type\n") } - override fun store(root: Node, label: String, filePath: String) { + override fun store(labeledResult: LabeledResult, holdout: DatasetHoldout) { // Use filename as a label for ast // TODO: save full signature for method - val normalizedLabel = normalizeAstLabel(label) - val normalizedFilepath = normalizeFilepath(filePath) - val nodesMap = dumpAst(root, File(astDirectoryPath, astFilenameFormat.format(index)), normalizedLabel) + val normalizedLabel = normalizeAstLabel(labeledResult.label) + val normalizedFilepath = normalizeFilepath(labeledResult.filePath) + val astDirectoryPath = astDirectoryPaths.getOrPut(holdout) { holdout.resolveHoldout() } + val nodesMap = + dumpAst(labeledResult.root, File(astDirectoryPath, astFilenameFormat.format(index)), normalizedLabel) val nodeDescriptionFormat = "${astFilenameFormat.format(index)},$normalizedFilepath,$normalizedLabel,%d,%s,%s" - for (node in root.preOrder()) { - descriptionFileStream.write(nodeDescriptionFormat.format(nodesMap.getId(node) - 1, node.getNormalizedToken(), node.getTypeLabel()) + "\n") + for (node in labeledResult.root.preOrder()) { + descriptionFileStream.write( + nodeDescriptionFormat.format(nodesMap.getId(node) - 1, node.token, node.typeLabel) + "\n" + ) } ++index } @@ -48,7 +50,7 @@ class DotAstStorage(override val directoryPath: String) : AstStorage { descriptionFileStream.close() } - private fun dumpAst(root: Node, file: File, astName: String) : RankedIncrementalIdStorage { + private fun dumpAst(root: Node, file: File, astName: String): RankedIncrementalIdStorage { val nodesMap = RankedIncrementalIdStorage() // dot parsers (e.g. pydot) can't parse graph/digraph if its name is "graph" val fixedAstName = if (astName == "graph" || astName == "digraph") "_$astName" else astName @@ -57,10 +59,8 @@ class DotAstStorage(override val directoryPath: String) : AstStorage { out.println("digraph $fixedAstName {") for (node in root.preOrder()) { val rootId = nodesMap.record(node) - 1 - val childrenIds = node.getChildren().map { nodesMap.record(it) - 1 } - out.println( - "$rootId -- {${childrenIds.joinToString(" ") { it.toString() }}};" - ) + val childrenIds = node.children.map { nodesMap.record(it) - 1 } + out.println("$rootId -- {${childrenIds.joinToString(" ") { it.toString() }}};") } out.println("}") @@ -68,16 +68,23 @@ class DotAstStorage(override val directoryPath: String) : AstStorage { return nodesMap } + private fun DatasetHoldout.resolveHoldout(): File { + val outputDir = File(outputDirectoryPath) + val asts = outputDir.resolve(this.dirName).resolve("asts") + asts.mkdirs() + return asts + } + // Label should contain only latin letters, numbers and underscores, other symbols replace with an underscore internal fun normalizeAstLabel(label: String): String = - label.replace("[^A-z^0-9^_]".toRegex(), "_") + label.replace("[^A-z0-9_]".toRegex(), "_") /** * Filepath should contain only latin letters, numbers, underscores, hyphens, backslashes and dots * Underscore replace other symbols */ internal fun normalizeFilepath(filepath: String): String = - filepath.replace("[^A-z^0-9^_^\\-^.^/]".toRegex(), "_") + filepath.replace("[^A-z0-9_\\-./]".toRegex(), "_") /** * Split the full path to specified file into the parent's path, and the file name @@ -88,5 +95,4 @@ class DotAstStorage(override val directoryPath: String) : AstStorage { val fileObject = File(fullPath) return FilePath(fileObject.parentFile?.path ?: "", fileObject.name) } - } diff --git a/src/main/kotlin/astminer/storage/ast/JsonAstStorage.kt b/src/main/kotlin/astminer/storage/ast/JsonAstStorage.kt new file mode 100644 index 00000000..3ce2463e --- /dev/null +++ b/src/main/kotlin/astminer/storage/ast/JsonAstStorage.kt @@ -0,0 +1,96 @@ +package astminer.storage.ast + +import astminer.common.model.DatasetHoldout +import astminer.common.model.LabeledResult +import astminer.common.model.Node +import astminer.common.model.Storage +import kotlinx.serialization.Serializable +import kotlinx.serialization.encodeToString +import kotlinx.serialization.json.Json +import java.io.File +import java.io.PrintWriter + +private typealias Id = Int + +/** + * Formats the output in the json format by flattening the trees. + * Each line in the output file is a single json object that corresponds to one of the labeled trees. + * Each tree is flattened and represented as a list of nodes. + */ +class JsonAstStorage(override val outputDirectoryPath: String, private val withPaths: Boolean) : Storage { + private val treeFlattener = TreeFlattener() + + private val datasetWriters = mutableMapOf() + + init { + val outputDirectory = File(outputDirectoryPath) + outputDirectory.mkdirs() + } + + @Serializable + private data class LabeledAst(val label: String, val path: String? = null, val ast: List) + + @Serializable + private data class OutputNode(val token: String, val typeLabel: String, val children: List) + + private fun TreeFlattener.EnumeratedNode.toOutputNode() = + OutputNode(node.token, node.typeLabel, children.map { it.id }) + + override fun store(labeledResult: LabeledResult, holdout: DatasetHoldout) { + val outputNodes = treeFlattener.flatten(labeledResult.root).map { it.toOutputNode() } + val path = if (withPaths) labeledResult.filePath else null + val labeledAst = LabeledAst(labeledResult.label, path, outputNodes) + val writer = datasetWriters.getOrPut(holdout) { holdout.resolveHoldout() } + writer.println(Json.encodeToString(labeledAst)) + } + + override fun close() { + datasetWriters.values.map { it.close() } + } + + private fun DatasetHoldout.resolveHoldout(): PrintWriter { + val holdoutDir = File(outputDirectoryPath).resolve(this.dirName) + holdoutDir.mkdirs() + val astFile = holdoutDir.resolve("asts.jsonl") + astFile.createNewFile() + return PrintWriter(astFile) + } +} + +/** + * Gives ids to all nodes in the tree and flattens the tree + */ +class TreeFlattener { + private var currentId: Id = 0 + + /** + * Node that has been given an Id. + * Also all his children have been given ids. + */ + data class EnumeratedNode(val id: Id, val node: Node, val children: List) + + private fun enumerateTree(node: Node): EnumeratedNode { + val nodeId = currentId + currentId += 1 + return EnumeratedNode(nodeId, node, node.children.map { enumerateTree(it) }) + } + + private fun putFlattenedTree(enumeratedNode: EnumeratedNode, flattenedTree: MutableList) { + flattenedTree.add(enumeratedNode) + for (child in enumeratedNode.children) { + putFlattenedTree(child, flattenedTree) + } + } + + /** + * Enumerates the given tree and returns the flattened tree. + * Enumerated node's id must be equal to its index in the returned list + */ + fun flatten(node: Node): List { + currentId = 0 + val enumeratedTree = enumerateTree(node) + val result = mutableListOf() + putFlattenedTree(enumeratedTree, result) + return result + } +} diff --git a/src/main/kotlin/astminer/storage/path/Code2SeqPathStorage.kt b/src/main/kotlin/astminer/storage/path/Code2SeqPathStorage.kt new file mode 100644 index 00000000..0c5c01c0 --- /dev/null +++ b/src/main/kotlin/astminer/storage/path/Code2SeqPathStorage.kt @@ -0,0 +1,42 @@ +package astminer.storage.path + +import astminer.common.model.LabeledPathContexts +import astminer.common.model.PathContext +import astminer.common.storage.RankedIncrementalIdStorage +import astminer.common.storage.dumpIdStorageToCsv +import java.io.File + +class Code2SeqPathStorage( + outputDirectoryPath: String, + config: PathBasedStorageConfig, + private val nodesToNumbers: Boolean = true +) : PathBasedStorage(outputDirectoryPath, config) { + + private val nodeTypesMap: RankedIncrementalIdStorage = RankedIncrementalIdStorage() + + private fun pathContextToString(pathContext: PathContext): String { + val stringNodeSequence = if (nodesToNumbers) { + pathContext.orientedNodeTypes.joinToString("|") { nodeTypesMap.record(it.typeLabel).toString() } + } else { + pathContext.orientedNodeTypes.joinToString("|") { it.typeLabel } + } + return "${pathContext.startToken},$stringNodeSequence,${pathContext.endToken}" + } + + override fun labeledPathContextsToString(labeledPathContexts: LabeledPathContexts): String { + val pathContexts = labeledPathContexts.pathContexts.map { pathContextToString(it) } + return "${labeledPathContexts.label} ${pathContexts.joinToString(" ")}" + } + + override fun close() { + super.close() + if (nodesToNumbers) { + dumpIdStorageToCsv( + nodeTypesMap, + "node_type", + { it }, + File("$outputDirectoryPath/node_types.csv") + ) + } + } +} diff --git a/src/main/kotlin/astminer/storage/path/Code2VecPathStorage.kt b/src/main/kotlin/astminer/storage/path/Code2VecPathStorage.kt new file mode 100644 index 00000000..39018ba9 --- /dev/null +++ b/src/main/kotlin/astminer/storage/path/Code2VecPathStorage.kt @@ -0,0 +1,73 @@ +package astminer.storage.path + +import astminer.common.model.* +import astminer.common.storage.* +import java.io.File + +class Code2VecPathStorage(outputDirectoryPath: String, private val config: PathBasedStorageConfig) : + PathBasedStorage(outputDirectoryPath, config) { + + private val tokensMap: RankedIncrementalIdStorage = RankedIncrementalIdStorage() + private val orientedNodeTypesMap: RankedIncrementalIdStorage = RankedIncrementalIdStorage() + private val pathsMap: RankedIncrementalIdStorage> = RankedIncrementalIdStorage() + + private fun dumpPathContexts(labeledPathContextIds: LabeledPathContextIds): String { + val pathContextIdsString = labeledPathContextIds.pathContexts.filter { + val isNumberOfTokensValid = config.maxTokens == null || + tokensMap.getIdRank(it.startTokenId) <= config.maxTokens && + tokensMap.getIdRank(it.endTokenId) <= config.maxTokens + val isNumberOfPathsValid = config.maxPaths == null || pathsMap.getIdRank(it.pathId) <= config.maxPaths + + isNumberOfTokensValid && isNumberOfPathsValid + } + + return pathContextIdsToString(pathContextIdsString, labeledPathContextIds.label) + } + + private fun storePathContext(pathContext: PathContext): PathContextId { + val startTokenId = tokensMap.record(pathContext.startToken) + val endTokenId = tokensMap.record(pathContext.endToken) + val orientedNodesIds = pathContext.orientedNodeTypes.map { orientedNodeTypesMap.record(it) } + val pathId = pathsMap.record(orientedNodesIds) + return PathContextId(startTokenId, pathId, endTokenId) + } + + override fun labeledPathContextsToString(labeledPathContexts: LabeledPathContexts): String { + val labeledPathContextIds = LabeledPathContextIds( + labeledPathContexts.label, + labeledPathContexts.pathContexts.map { storePathContext(it) } + ) + return dumpPathContexts(labeledPathContextIds) + } + + private fun pathContextIdsToString(pathContextIds: List, label: String): String { + val joinedPathContexts = pathContextIds.joinToString(" ") { pathContextId -> + "${pathContextId.startTokenId},${pathContextId.pathId},${pathContextId.endTokenId}" + } + return "$label $joinedPathContexts" + } + + override fun close() { + super.close() + dumpIdStorageToCsv( + tokensMap, + "token", + tokenToCsvString, + File("$outputDirectoryPath/tokens.csv"), + config.maxTokens + ) + dumpIdStorageToCsv( + orientedNodeTypesMap, + "node_type", + orientedNodeToCsvString, + File("$outputDirectoryPath/node_types.csv") + ) + dumpIdStorageToCsv( + pathsMap, + "path", + pathToCsvString, + File("$outputDirectoryPath/paths.csv"), + config.maxPaths + ) + } +} diff --git a/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt b/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt new file mode 100644 index 00000000..d41ad286 --- /dev/null +++ b/src/main/kotlin/astminer/storage/path/PathBasedStorage.kt @@ -0,0 +1,85 @@ +package astminer.storage.path + +import astminer.common.model.* +import astminer.common.model.LabeledResult +import astminer.common.model.Storage +import astminer.paths.PathMiner +import astminer.paths.PathRetrievalSettings +import astminer.paths.toPathContext +import java.io.File +import java.io.PrintWriter + +/** + * Config for CountingPathStorage which contains all hyperparameters for path extraction. + * @property maxPathLength The maximum length of a single path (based on the formal math definition of path length) + * @property maxPathWidth The maximum width of a single path (based on the formal math definition of path width) + * @property maxTokens The maximum number of tokens saved per extraction + * @property maxPaths The maximum number of paths saved per extraction + * @property maxPathContextsPerEntity The maximum number of path contexts that should be extracted from tree. + * In other words, the maximum number of path contexts to save from each file/method (depending on granularity) + */ +data class PathBasedStorageConfig( + val maxPathLength: Int, + val maxPathWidth: Int, + val maxTokens: Long? = null, + val maxPaths: Long? = null, + val maxPathContextsPerEntity: Int? = null +) + +/** + * Base class for all path storages. Extracts paths from given LabellingResult and stores it in a specified format. + * @property outputDirectoryPath The path to the output directory. + * @property config The config that contains hyperparameters for path extraction. + */ +abstract class PathBasedStorage( + final override val outputDirectoryPath: String, + private val config: PathBasedStorageConfig, +) : Storage { + + private val pathMiner = PathMiner(PathRetrievalSettings(config.maxPathLength, config.maxPathWidth)) + private val datasetFileWriters = mutableMapOf() + + init { + File(outputDirectoryPath).mkdirs() + } + + private fun retrievePaths(node: Node) = if (config.maxPathContextsPerEntity != null) { + pathMiner.retrievePaths(node).shuffled().take(config.maxPathContextsPerEntity) + } else { + pathMiner.retrievePaths(node) + } + + private fun retrieveLabeledPathContexts(labeledResult: LabeledResult): LabeledPathContexts { + val paths = retrievePaths(labeledResult.root) + return LabeledPathContexts( + labeledResult.label, + paths.map { astPath -> + toPathContext(astPath) { it.token.replace("\n", "\\n") } + } + ) + } + + abstract fun labeledPathContextsToString(labeledPathContexts: LabeledPathContexts): String + + /** + * Extract paths from [labeledResult] and store them in the specified format. + */ + override fun store(labeledResult: LabeledResult, holdout: DatasetHoldout) { + val labeledPathContexts = retrieveLabeledPathContexts(labeledResult) + val output = labeledPathContextsToString(labeledPathContexts) + val writer = datasetFileWriters.getOrPut(holdout) { holdout.resolveWriter() } + writer.println(output) + } + + override fun close() { + datasetFileWriters.values.map { it.close() } + } + + private fun DatasetHoldout.resolveWriter(): PrintWriter { + val holdoutDir = File(outputDirectoryPath).resolve(this.dirName) + holdoutDir.mkdirs() + val pathContextFile = holdoutDir.resolve("path_contexts.c2s") + pathContextFile.createNewFile() + return PrintWriter(pathContextFile) + } +} diff --git a/src/main/resources/simplelogger.properties b/src/main/resources/simplelogger.properties new file mode 100644 index 00000000..bd4d1cd0 --- /dev/null +++ b/src/main/resources/simplelogger.properties @@ -0,0 +1 @@ +org.slf4j.simpleLogger.logFile = log.txt diff --git a/src/test/kotlin/astminer/Utils.kt b/src/test/kotlin/astminer/Utils.kt new file mode 100644 index 00000000..5f421c54 --- /dev/null +++ b/src/test/kotlin/astminer/Utils.kt @@ -0,0 +1,18 @@ +package astminer + +import astminer.common.model.Node +import astminer.common.model.Parser +import java.io.File + +fun checkExecutable(execName: String): Boolean { + val execFolders = System.getenv("PATH").split(File.pathSeparator) + execFolders.forEach { + val folderFiles = File(it).list() ?: return@forEach + if (folderFiles.contains(execName)) { + return true + } + } + return false +} + +fun Parser.parseFiles(files: List) = files.map { parseFile(it) } diff --git a/src/test/kotlin/astminer/ast/CsvAstStorageTest.kt b/src/test/kotlin/astminer/ast/CsvAstStorageTest.kt deleted file mode 100644 index 5f4398cf..00000000 --- a/src/test/kotlin/astminer/ast/CsvAstStorageTest.kt +++ /dev/null @@ -1,18 +0,0 @@ -package astminer.ast - -import astminer.common.createSmallTree -import org.junit.Assert -import org.junit.Test - -class CsvAstStorageTest { - - @Test - fun testAstString() { - val root = createSmallTree() - val storage = CsvAstStorage(".") - storage.store(root, "entityId") - - Assert.assertEquals(storage.astString(root), "1 1{2 2{}3 3{4 4{}}}") - } - -} \ No newline at end of file diff --git a/src/test/kotlin/astminer/ast/DotAstStorageTest.kt b/src/test/kotlin/astminer/ast/DotAstStorageTest.kt deleted file mode 100644 index d98ce4f2..00000000 --- a/src/test/kotlin/astminer/ast/DotAstStorageTest.kt +++ /dev/null @@ -1,89 +0,0 @@ -package astminer.ast - -import astminer.common.createSmallTree -import org.junit.Test -import java.io.File -import kotlin.test.assertEquals - -class DotAstStorageTest { - - @Test - fun testDotStorageOnSmallTree() { - val root = createSmallTree() - val storage = DotAstStorage("test_examples") - storage.store(root, "entityId") - - storage.close() - - val trueLines = listOf( - "digraph entityId {", - "0 -- {1 2};", - "1 -- {};", - "2 -- {3};", - "3 -- {};", - "}" - ) - val storageLines = File(File("test_examples", "asts"), "ast_0.dot").readLines() - - File("test_examples").deleteRecursively() - - assertEquals(trueLines, storageLines) - } - - @Test - fun testLabelNormalization() { - val label = "some/kind/of/random/path" - val storage = DotAstStorage(".") - val normalizedLabel = storage.normalizeAstLabel(label) - - assertEquals("some_kind_of_random_path", normalizedLabel) - } - - @Test - fun testBindingNormalization() { - val label = "\$supposeToBeListener" - val storage = DotAstStorage(".") - val normalizedLabel = storage.normalizeAstLabel(label) - - assertEquals("_supposeToBeListener", normalizedLabel) - } - - @Test - fun testLabelWithCommaNormalization() { - val labelWithComma = "some,bad,label" - val storage = DotAstStorage(".") - val normalizedLabel = storage.normalizeAstLabel(labelWithComma) - - assertEquals("some_bad_label", normalizedLabel) - } - - @Test - fun testSplittingFullPath() { - val fullPath = "/path1/path2/path_3/path.4/file.name" - val storage = DotAstStorage(".") - val (path, fileName) = storage.splitFullPath(fullPath) - - assertEquals("/path1/path2/path_3/path.4", path) - assertEquals("file.name", fileName) - } - - @Test - fun testSplittingFileName() { - val fullPath = "file.name" - val storage = DotAstStorage(".") - val (path, fileName) = storage.splitFullPath(fullPath) - - assertEquals("", path) - assertEquals("file.name", fileName) - } - - @Test - fun testFilepathNormalization() { - // real life example - val badFilepath = "interviews/Leet-Code/binary-search/pow(x,n).java" - val storage = DotAstStorage(".") - val normalizedFilepath = storage.normalizeFilepath(badFilepath) - - assertEquals("interviews/Leet-Code/binary-search/pow_x_n_.java", normalizedFilepath) - } -} \ No newline at end of file diff --git a/src/test/kotlin/astminer/cli/Code2VecExtractorTest.kt b/src/test/kotlin/astminer/cli/Code2VecExtractorTest.kt deleted file mode 100644 index eb69e393..00000000 --- a/src/test/kotlin/astminer/cli/Code2VecExtractorTest.kt +++ /dev/null @@ -1,26 +0,0 @@ -package astminer.cli - -import astminer.cli.util.CliArgs -import astminer.cli.util.languagesToString -import astminer.cli.util.verifyPathContextExtraction -import org.junit.Test -import java.io.File - -internal class Code2VecExtractorTest { - private val testDataDir = File("src/test/resources") - private val code2VecExtractor = Code2VecExtractor() - - @Test - fun testDefaultExtraction() { - val extractedDataDir = createTempDir("extractedData") - val languages = listOf("java", "py") - val cliArgs = CliArgs.Builder(testDataDir, extractedDataDir) - .extensions(languagesToString(languages)) - .build() - - code2VecExtractor.main(cliArgs.args) - verifyPathContextExtraction(extractedDataDir, languages, false) - } -} - - diff --git a/src/test/kotlin/astminer/cli/LabelExtractorTest.kt b/src/test/kotlin/astminer/cli/LabelExtractorTest.kt deleted file mode 100644 index 5e3c44be..00000000 --- a/src/test/kotlin/astminer/cli/LabelExtractorTest.kt +++ /dev/null @@ -1,90 +0,0 @@ -package astminer.cli - -import astminer.common.getNormalizedToken -import astminer.common.model.ElementNode -import astminer.common.model.MethodInfo -import astminer.common.model.MethodNode -import astminer.common.model.ParseResult -import astminer.parse.antlr.SimpleNode -import org.junit.Test -import kotlin.test.assertEquals -import kotlin.test.assertTrue - -internal class LabelExtractorTest { - - companion object { - private const val PATH_STRING = "random/folder/file.txt" - private const val FOLDER = "folder" - private const val FILENAME = "file.txt" - private const val METHOD_NAME = "method" - private val DUMMY_ROOT = SimpleNode("", null, null) - } - - @Test - fun testEmptyFilePathExtractor() { - val labelExtractor = FilePathExtractor() - val emptyParseResult = ParseResult(null, PATH_STRING) - val labeledParseResults = labelExtractor.toLabeledData(emptyParseResult) - assertTrue { labeledParseResults.isEmpty() } - } - - @Test - fun testNonEmptyFilePathExtractor() { - val labelExtractor = FilePathExtractor() - val nonEmptyParseResult = ParseResult(DUMMY_ROOT, PATH_STRING) - val labeledParseResults = labelExtractor.toLabeledData(nonEmptyParseResult) - assertEquals(1, labeledParseResults.size) - val (root, label) = labeledParseResults[0] - assertEquals(DUMMY_ROOT, root) - assertEquals(PATH_STRING, label) - } - - @Test - fun testEmptyFolderExtractor() { - val labelExtractor = FolderExtractor() - val emptyParseResult = ParseResult(null, PATH_STRING) - val labeledParseResults = labelExtractor.toLabeledData(emptyParseResult) - assertTrue { labeledParseResults.isEmpty() } - } - - @Test - fun testNonEmptyFolderExtractor() { - val labelExtractor = FolderExtractor() - val nonEmptyParseResult = ParseResult(DUMMY_ROOT, PATH_STRING) - val labeledParseResults = labelExtractor.toLabeledData(nonEmptyParseResult) - assertEquals(1, labeledParseResults.size) - val (root, label) = labeledParseResults[0] - assertEquals(DUMMY_ROOT, root) - assertEquals(FOLDER, label) - } - - @Test - fun testMethodNameExtractor() { - val nameNode = SimpleNode("", DUMMY_ROOT, METHOD_NAME) - val methodInfo = MethodInfo( - MethodNode(DUMMY_ROOT, null, nameNode), - ElementNode(null, null), - emptyList() - ) - processNodeToken(nameNode, false) - val methodNameExtractor = MethodNameExtractor(false) - val label = methodNameExtractor.extractLabel(methodInfo, PATH_STRING) - assertEquals(METHOD_NAME, label) - assertEquals(METHOD_NAME, nameNode.getNormalizedToken()) - } - - @Test - fun testMethodNameExtractorHide() { - val nameNode = SimpleNode("", DUMMY_ROOT, METHOD_NAME) - val methodInfo = MethodInfo( - MethodNode(DUMMY_ROOT, null, nameNode), - ElementNode(null, null), - emptyList() - ) - processNodeToken(nameNode, false) - val methodNameExtractor = MethodNameExtractor(true) - val label = methodNameExtractor.extractLabel(methodInfo, PATH_STRING) - assertEquals(METHOD_NAME, label) - assertEquals("METHOD_NAME", nameNode.getNormalizedToken()) - } -} diff --git a/src/test/kotlin/astminer/cli/PathContextsExtractorTest.kt b/src/test/kotlin/astminer/cli/PathContextsExtractorTest.kt deleted file mode 100644 index 39adb422..00000000 --- a/src/test/kotlin/astminer/cli/PathContextsExtractorTest.kt +++ /dev/null @@ -1,24 +0,0 @@ -package astminer.cli - -import astminer.cli.util.CliArgs -import astminer.cli.util.languagesToString -import astminer.cli.util.verifyPathContextExtraction -import org.junit.Test -import java.io.File - -internal class PathContextsExtractorTest { - private val testDataDir = File("src/test/resources") - private val pathContextsExtractor = PathContextsExtractor() - - @Test - fun testDefaultExtraction() { - val extractedDataDir = createTempDir("extractedData") - val languages = listOf("java", "py") - val cliArgs = CliArgs.Builder(testDataDir, extractedDataDir) - .extensions(languagesToString(languages)) - .build() - - pathContextsExtractor.main(cliArgs.args) - verifyPathContextExtraction(extractedDataDir, languages, false) - } -} \ No newline at end of file diff --git a/src/test/kotlin/astminer/cli/util/CliArgs.kt b/src/test/kotlin/astminer/cli/util/CliArgs.kt deleted file mode 100644 index 734db41b..00000000 --- a/src/test/kotlin/astminer/cli/util/CliArgs.kt +++ /dev/null @@ -1,44 +0,0 @@ -package astminer.cli.util - -import java.io.File - -class CliArgs private constructor(val args: List) { - - data class Builder(val testDataDir: File, val extractedDataDir: File) { - val args = mutableListOf( - "--project", testDataDir.path, - "--output", extractedDataDir.path - ) - fun extensions(extensions: String) = apply { - args.add("--lang") - args.add(extensions) - } - - fun maxPathLength(l: Int) = apply { - args.add("--maxL") - args.add(l.toString()) - } - - fun maxPathWidth(w: Int) = apply { - args.add("--maxW") - args.add(w.toString()) - } - - fun maxPathContexts(maxPC: Int)= apply { - args.add("--maxContexts") - args.add(maxPC.toString()) - } - - fun maxTokens(nTokens: Long) = apply { - args.add("--maxTokens") - args.add(nTokens.toString()) - } - - fun maxPaths(nPaths: Long) = apply { - args.add("--maxPaths") - args.add(nPaths.toString()) - } - - fun build() = CliArgs(args) - } -} diff --git a/src/test/kotlin/astminer/cli/util/Util.kt b/src/test/kotlin/astminer/cli/util/Util.kt deleted file mode 100644 index 30d119fd..00000000 --- a/src/test/kotlin/astminer/cli/util/Util.kt +++ /dev/null @@ -1,3 +0,0 @@ -package astminer.cli.util - -fun languagesToString(languages: List) = languages.joinToString(",") diff --git a/src/test/kotlin/astminer/common/DummyNode.kt b/src/test/kotlin/astminer/common/DummyNode.kt new file mode 100644 index 00000000..91aaa046 --- /dev/null +++ b/src/test/kotlin/astminer/common/DummyNode.kt @@ -0,0 +1,90 @@ +package astminer.common + +import astminer.common.model.* +import java.io.File + +class DummyNode( + override val typeLabel: String, + override val children: MutableList = mutableListOf() +) : Node(typeLabel) { + + override val parent: Node? = null + + init { + // Tokens may change after normalization, for tests we want tokens to be unchanged + technicalToken = typeLabel + } + + override fun removeChildrenOfType(typeLabel: String) { + children.removeIf { it.typeLabel == typeLabel } + } + + fun toParseResult() = DummyParsingResult(File("."), this) + + fun labeledWith(label: String) = LabeledResult(this, label, "") +} + +class DummyParsingResult(file: File, override val root: DummyNode) : ParsingResult(file) { + override val splitter: TreeFunctionSplitter = object : TreeFunctionSplitter { + override fun splitIntoFunctions(root: DummyNode, filePath: String) = listOf>() + } +} + +/** + * Returns a small tree. + * Diagram: + * 1 + * / \ + * / \ + * 2 3 + * / | \ / \ + * 4 5 6 7 8 + * + */ +fun createDummyTree(): DummyNode { + val node4 = DummyNode("4", mutableListOf()) + val node5 = DummyNode("5", mutableListOf()) + val node6 = DummyNode("6", mutableListOf()) + val node7 = DummyNode("7", mutableListOf()) + val node8 = DummyNode("8", mutableListOf()) + + val node2 = DummyNode("2", mutableListOf(node4, node5, node6)) + val node3 = DummyNode("3", mutableListOf(node7, node8)) + + return DummyNode("1", mutableListOf(node2, node3)) +} + +/** + * Returns a small tree. + * Diagram: + * 1 + * / \ + * 2 3 + * \ + * 4 + */ +fun createSmallTree(): DummyNode { + val node4 = DummyNode("4", mutableListOf()) + val node3 = DummyNode("3", mutableListOf(node4)) + val node2 = DummyNode("2", mutableListOf()) + val node1 = DummyNode("1", mutableListOf(node2, node3)) + + return node1 +} + +/** + * Creates a bamboo + * Diagram for [size] 3: + * 1 + * \ + * 2 + * \ + * 3 + */ +fun createBamboo(size: Int): DummyNode { + var root = DummyNode(size.toString(), mutableListOf()) + for (i in 1 until size) { + root = DummyNode((size - i).toString(), mutableListOf(root)) + } + return root +} diff --git a/src/test/kotlin/astminer/common/FileParsingUtilTest.kt b/src/test/kotlin/astminer/common/FileParsingUtilTest.kt index 0ea4a52d..cd7948ac 100644 --- a/src/test/kotlin/astminer/common/FileParsingUtilTest.kt +++ b/src/test/kotlin/astminer/common/FileParsingUtilTest.kt @@ -22,7 +22,10 @@ class FileParsingUtilTest { file.writeText(text) file = changeExtensionTo(file, newExtension) - Assert.assertTrue("File extension should be changed but its content should not", file.extension == newExtension && file.readText() == text) + Assert.assertTrue( + "File extension should be changed but its content should not", + file.extension == newExtension && file.readText() == text + ) file.delete() } @@ -35,7 +38,11 @@ class FileParsingUtilTest { file.writeText(text) addClassWrapper(file, "Foo") - Assert.assertEquals("File wrapper should be added with braces and newlines", file.readText(), "class Foo {\n$text\n}") + Assert.assertEquals( + "File wrapper should be added with braces and newlines", + file.readText(), + "class Foo {\n$text\n}" + ) file.delete() } @@ -51,5 +58,4 @@ class FileParsingUtilTest { val file = File("src/test/resources/common/NonParsableFile.java") Assert.assertTrue("This file has syntax errors", hasSyntaxErrors(file)) } - -} \ No newline at end of file +} diff --git a/src/test/kotlin/astminer/common/TestUtils.kt b/src/test/kotlin/astminer/common/TestUtils.kt deleted file mode 100644 index cbeaeed1..00000000 --- a/src/test/kotlin/astminer/common/TestUtils.kt +++ /dev/null @@ -1,61 +0,0 @@ -package astminer.common - -import astminer.common.model.Node - - -class DummyNode(val data: String, val childrenList: MutableList) : Node { - override fun setMetadata(key: String, value: Any) { - - } - - override fun getMetadata(key: String): Any? { - return null - } - - override fun isLeaf(): Boolean { - return childrenList.isEmpty() - } - - override fun getTypeLabel(): String { - return data - } - - override fun getChildren(): List { - return childrenList - } - - override fun getParent(): Node? { - TODO("not implemented") //To change body of created functions use File | Settings | File Templates. - } - - override fun getToken(): String { - return data - } - - override fun removeChildrenOfType(typeLabel: String) { - childrenList.removeIf { it.getTypeLabel() == typeLabel } - } - -} - -fun createDummyTree(): DummyNode { - val node4 = DummyNode("4", mutableListOf()) - val node5 = DummyNode("5", mutableListOf()) - val node6 = DummyNode("6", mutableListOf()) - val node7 = DummyNode("7", mutableListOf()) - val node8 = DummyNode("8", mutableListOf()) - - val node2 = DummyNode("2", mutableListOf(node4, node5, node6)) - val node3 = DummyNode("3", mutableListOf(node7, node8)) - - return DummyNode("1", mutableListOf(node2, node3)) -} - -fun createSmallTree(): DummyNode { - val node4 = DummyNode("4", mutableListOf()) - val node3 = DummyNode("3", mutableListOf(node4)) - val node2 = DummyNode("2", mutableListOf()) - val node1 = DummyNode("1", mutableListOf(node2, node3)) - - return node1 -} diff --git a/src/test/kotlin/astminer/common/TreeUtilTest.kt b/src/test/kotlin/astminer/common/TreeUtilTest.kt index 8834a931..7a263f7e 100644 --- a/src/test/kotlin/astminer/common/TreeUtilTest.kt +++ b/src/test/kotlin/astminer/common/TreeUtilTest.kt @@ -4,32 +4,32 @@ import org.junit.Assert import org.junit.Test class TreeUtilTest { + private val defaultToken = "EMPTY" + @Test - fun testPostOrder() { + fun testPreOrder() { val root = createDummyTree() - val dataList = root.postOrderIterator().asSequence().map { it.getTypeLabel() } + val dataList = root.preOrderIterator().asSequence().map { it.typeLabel } - Assert.assertArrayEquals(arrayOf("4", "5", "6", "2", "7", "8", "3", "1"), dataList.toList().toTypedArray()) + Assert.assertArrayEquals(arrayOf("1", "2", "4", "5", "6", "3", "7", "8"), dataList.toList().toTypedArray()) } @Test - fun testPreOrder() { + fun testPostOrder() { val root = createDummyTree() - val dataList = root.preOrderIterator().asSequence().map { it.getTypeLabel() } + val dataList = root.postOrderIterator().asSequence().map { it.typeLabel } - Assert.assertArrayEquals(arrayOf("1", "2", "4", "5", "6", "3", "7", "8"), dataList.toList().toTypedArray()) + Assert.assertArrayEquals(arrayOf("4", "5", "6", "2", "7", "8", "3", "1"), dataList.toList().toTypedArray()) } - private val defaultToken = "EMPTY_TOKEN" - @Test fun testNormalizeTokenCleaning() { val token = " Token THAT \n contains Whi\"t,es''pace characters!!!and pu.n.c.t.u.a.tion \n" val expectedToken = "token" + "that" + "contains" + "whitespace" + "characters" + "and" + "punctuation" Assert.assertEquals( - "All whitespace characters and punctuation should be removed, keeping only letters", - expectedToken, - normalizeToken(token, defaultToken) + "All whitespace characters and punctuation should be removed, keeping only letters", + expectedToken, + normalizeToken(token, defaultToken) ) } @@ -38,20 +38,20 @@ class TreeUtilTest { val token = "* *\n" val expectedToken = "*_*" Assert.assertEquals( - "Token without letters have whitespaces replaced with underscores", - expectedToken, - normalizeToken(token, defaultToken) + "Token without letters have whitespaces replaced with underscores", + expectedToken, + normalizeToken(token, defaultToken) ) } @Test fun testNormalizeEmptyToken() { val token = "\n\n" - val expectedToken = DEFAULT_TOKEN + val expectedToken = EMPTY_TOKEN Assert.assertEquals( - "Token without letters have whitespaces replaced with underscores", - expectedToken, - normalizeToken(token, defaultToken) + "Token without letters have whitespaces replaced with underscores", + expectedToken, + normalizeToken(token, defaultToken) ) } @@ -60,9 +60,9 @@ class TreeUtilTest { val token = "fun_withReallyLong_And_ComplicatedName" val expectedToken = listOf("fun", "with", "really", "long", "and", "complicated", "name") Assert.assertEquals( - "Token with snake, camel and combined case should be split into list of its parts", - expectedToken, - splitToSubtokens(token) + "Token with snake, camel and combined case should be split into list of its parts", + expectedToken, + splitToSubtokens(token) ) } -} \ No newline at end of file +} diff --git a/src/test/kotlin/astminer/common/storage/RankedIncrementalIdStorageTest.kt b/src/test/kotlin/astminer/common/storage/RankedIncrementalIdStorageTest.kt index 7ca1a5cb..38b796b0 100644 --- a/src/test/kotlin/astminer/common/storage/RankedIncrementalIdStorageTest.kt +++ b/src/test/kotlin/astminer/common/storage/RankedIncrementalIdStorageTest.kt @@ -44,7 +44,7 @@ class RankedIncrementalIdStorageTest { val correctRanks = listOf(2, 3, 4, 1, 5) for (i in items.indices) { - for (rep in 0 until counts[i]) { + repeat(counts[i]) { storage.record(items[i]) } } @@ -55,4 +55,4 @@ class RankedIncrementalIdStorageTest { assertEquals(correctRanks[i].toLong(), storage.getKeyRank(items[i])) } } -} \ No newline at end of file +} diff --git a/src/test/kotlin/astminer/featureextraction/PrettyNode.kt b/src/test/kotlin/astminer/featureextraction/PrettyNode.kt new file mode 100644 index 00000000..d0867ce0 --- /dev/null +++ b/src/test/kotlin/astminer/featureextraction/PrettyNode.kt @@ -0,0 +1,50 @@ +package astminer.featureextraction + +import astminer.common.model.Node + +class PrettyNode(override val typeLabel: String, originalToken: String) : Node(originalToken) { + override var children: MutableList = ArrayList() + override var parent: PrettyNode? = null + set(value) { + value?.addChild(this) + field = value + } + + fun addChild(node: PrettyNode) = children.add(node) + + fun toPrettyString(indent: Int = 0, indentSymbol: String = "--"): String = with(StringBuilder()) { + repeat(indent) { append(indentSymbol) } + append(typeLabel) + if (token.isNotEmpty()) { + appendLine(" : $token") + } else { + appendLine() + } + children.forEach { append(it.toPrettyString(indent + 1, indentSymbol)) } + toString() + } + + override fun removeChildrenOfType(typeLabel: String) { + children.removeIf { it.typeLabel == typeLabel } + } +} + +fun restoreFromPrettyPrint(prettyPrintedTree: String, indentSymbol: String = "--"): PrettyNode { + val lastNodeByIndent = HashMap() + val tree = prettyPrintedTree.lines().map { s -> + val (node, indent) = restorePrintedNode(s, indentSymbol) + lastNodeByIndent[indent] = node + node.parent = lastNodeByIndent[indent - 1] + node + } + return tree.first() +} + +fun restorePrintedNode(printedNode: String, indentSymbol: String = "--"): Pair { + val indents = Regex("^($indentSymbol)*").find(printedNode)?.value ?: "" + val nodeString = printedNode.substringAfter(indents) + val type = nodeString.substringBefore(" : ") + val token = nodeString.substringAfter(" : ", "") + val indent = indents.length / indentSymbol.length + return PrettyNode(type, token) to indent +} diff --git a/src/test/kotlin/astminer/featureextraction/TreeFeatureTest.kt b/src/test/kotlin/astminer/featureextraction/TreeFeatureTest.kt index 97167ba4..3a5a1a2a 100644 --- a/src/test/kotlin/astminer/featureextraction/TreeFeatureTest.kt +++ b/src/test/kotlin/astminer/featureextraction/TreeFeatureTest.kt @@ -9,42 +9,42 @@ class TreeFeatureTest { @Test fun testDepthFeature() { val printedTree = File("src/test/resources/featureextraction/prettyTree.txt").readText() - val tree: PrettyNode = restoreFromPrettyPrint(printedTree)!! + val tree: PrettyNode = restoreFromPrettyPrint(printedTree) Assert.assertEquals(4, Depth.compute(tree)) } @Test fun testNumberOfNodes() { val printedTree = File("src/test/resources/featureextraction/prettyTree.txt").readText() - val tree: PrettyNode = restoreFromPrettyPrint(printedTree)!! + val tree: PrettyNode = restoreFromPrettyPrint(printedTree) Assert.assertEquals(12, NumberOfNodes.compute(tree)) } @Test fun testBranchingFactorOfLeaf() { val printedTree = File("src/test/resources/featureextraction/prettyLeaf.txt").readText() - val tree: PrettyNode = restoreFromPrettyPrint(printedTree)!! + val tree: PrettyNode = restoreFromPrettyPrint(printedTree) Assert.assertTrue(BranchingFactor.compute(tree) == 0.0) } @Test fun testBranchingFactor() { val printedTree = File("src/test/resources/featureextraction/prettyTree_bf.txt").readText() - val tree: PrettyNode = restoreFromPrettyPrint(printedTree)!! + val tree: PrettyNode = restoreFromPrettyPrint(printedTree) Assert.assertTrue(BranchingFactor.compute(tree) == 2.0) } @Test fun testCompressiblePathLengthsInLeaf() { val printedTree = File("src/test/resources/featureextraction/prettyLeaf.txt").readText() - val tree: PrettyNode = restoreFromPrettyPrint(printedTree)!! + val tree: PrettyNode = restoreFromPrettyPrint(printedTree) Assert.assertEquals(listOf(), CompressiblePathLengths.compute(tree)) } @Test fun testCompressiblePathLengths() { val printedTree = File("src/test/resources/featureextraction/prettyTree_paths.txt").readText() - val tree: PrettyNode = restoreFromPrettyPrint(printedTree)!! + val tree: PrettyNode = restoreFromPrettyPrint(printedTree) val expected = listOf(4, 1, 2, 2, 4, 4, 5).sorted() Assert.assertEquals(expected, CompressiblePathLengths.compute(tree).sorted()) } @@ -52,7 +52,7 @@ class TreeFeatureTest { @Test fun testNodeTypes() { val printedTree = File("src/test/resources/featureextraction/prettyTree.txt").readText() - val tree: PrettyNode = restoreFromPrettyPrint(printedTree)!! + val tree: PrettyNode = restoreFromPrettyPrint(printedTree) val expected = listOf("0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11").sorted() Assert.assertEquals(expected, NodeTypes.compute(tree).sorted()) } @@ -60,9 +60,8 @@ class TreeFeatureTest { @Test fun testTokens() { val printedTree = File("src/test/resources/featureextraction/prettyTree.txt").readText() - val tree: PrettyNode = restoreFromPrettyPrint(printedTree)!! + val tree: PrettyNode = restoreFromPrettyPrint(printedTree) val expected = listOf("a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l").sorted() Assert.assertEquals(expected, Tokens.compute(tree).sorted()) } - -} \ No newline at end of file +} diff --git a/src/test/kotlin/astminer/featureextraction/TreeFeatureTestUtil.kt b/src/test/kotlin/astminer/featureextraction/TreeFeatureTestUtil.kt deleted file mode 100644 index c0558bd6..00000000 --- a/src/test/kotlin/astminer/featureextraction/TreeFeatureTestUtil.kt +++ /dev/null @@ -1,68 +0,0 @@ -package astminer.featureextraction - -import astminer.common.model.Node - -class PrettyNode(private val type: String, private val token: String) : Node { - private var children: MutableList = ArrayList() - private var parent: PrettyNode? = null - private val metadata: MutableMap = HashMap() - - override fun getChildren(): MutableList = children - - override fun getParent(): PrettyNode? = parent - - fun addChild(node: PrettyNode) = children.add(node) - - fun setParent(node: PrettyNode?) { - node?.addChild(this) - parent = node - } - - fun toPrettyString(indent: Int = 0, indentSymbol: String = "--") : String = with(StringBuilder()) { - repeat(indent) { append(indentSymbol) } - append(getTypeLabel()) - if (getToken().isNotEmpty()) { - appendln(" : ${getToken()}") - } else { - appendln() - } - getChildren().forEach { append(it.toPrettyString(indent + 1, indentSymbol)) } - toString() - } - - override fun getToken(): String = token - - override fun isLeaf(): Boolean = children.isEmpty() - - override fun getMetadata(key: String): Any? = metadata[key] - - override fun setMetadata(key: String, value: Any) = metadata.set(key, value) - - override fun getTypeLabel(): String = type - - override fun removeChildrenOfType(typeLabel: String) { - children.removeIf { it.getTypeLabel() == typeLabel } - } - -} - -fun restoreFromPrettyPrint(prettyPrintedTree: String, indentSymbol: String = "--") : PrettyNode? { - val lastNodeByIndent = HashMap() - val tree = prettyPrintedTree.lines().map { s -> - val (node, indent) = restorePrintedNode(s, indentSymbol) - lastNodeByIndent[indent] = node - node.setParent(lastNodeByIndent[indent - 1]) - node - } - return tree.first() -} - - -fun restorePrintedNode(printedNode: String, indentSymbol: String = "--") : Pair { - val indents = Regex("^($indentSymbol)*").find(printedNode)?.value ?: "" - val nodeString = printedNode.substringAfter(indents) - val type = nodeString.substringBefore(" : ") - val token = nodeString.substringAfter(" : ", "") - val indent = indents.length / indentSymbol.length - return PrettyNode(type, token) to indent -} diff --git a/src/test/kotlin/astminer/featureextraction/TreeFeatureTestUtilTest.kt b/src/test/kotlin/astminer/featureextraction/TreeFeatureTestUtilTest.kt index 2ca6fce2..4e44b56b 100644 --- a/src/test/kotlin/astminer/featureextraction/TreeFeatureTestUtilTest.kt +++ b/src/test/kotlin/astminer/featureextraction/TreeFeatureTestUtilTest.kt @@ -16,18 +16,18 @@ class TreeFeatureTestUtilTest { val node6 = PrettyNode("6", "g") val node7 = PrettyNode("7", "h") - node1.setParent(node0) - node2.setParent(node0) - node3.setParent(node0) - node4.setParent(node1) - node5.setParent(node4) - node6.setParent(node1) - node7.setParent(node3) + node1.parent = node0 + node2.parent = node0 + node3.parent = node0 + node4.parent = node1 + node5.parent = node4 + node6.parent = node1 + node7.parent = node3 val prettyTree = node0.toPrettyString() - val restoredTree = restoreFromPrettyPrint(prettyTree)!! + val restoredTree = restoreFromPrettyPrint(prettyTree) val prettyRestoredTree = restoredTree.toPrettyString() Assert.assertEquals(prettyTree, prettyRestoredTree) } -} \ No newline at end of file +} diff --git a/src/test/kotlin/astminer/filters/FileFiltersTest.kt b/src/test/kotlin/astminer/filters/FileFiltersTest.kt new file mode 100644 index 00000000..80557632 --- /dev/null +++ b/src/test/kotlin/astminer/filters/FileFiltersTest.kt @@ -0,0 +1,32 @@ +package astminer.filters + +import astminer.common.createBamboo +import org.junit.Test +import kotlin.test.assertFalse +import kotlin.test.assertTrue + +internal class FileFiltersTest { + @Test + fun `test TreeSizeFilter for 100 should exclude bamboo of length 101`() { + val node = createBamboo(101).toParseResult() + assertFalse { TreeSizeFilter(maxSize = 100).validate(node) } + } + + @Test + fun `test TreeSizeFilter for 10 should not exclude bamboo of length 5`() { + val node = createBamboo(5).toParseResult() + assertTrue { TreeSizeFilter(maxSize = 10).validate(node) } + } + + @Test + fun `test TreeSizeFilter for minSize 10 should exclude bamboo of size 5`() { + val node = createBamboo(5).toParseResult() + assertFalse { TreeSizeFilter(minSize = 10).validate(node) } + } + + @Test + fun `test TreeSizeFilter for minSize 10 should not exclude bamboo of size 100`() { + val node = createBamboo(100).toParseResult() + assertTrue { TreeSizeFilter(minSize = 10).validate(node) } + } +} diff --git a/src/test/kotlin/astminer/filters/FunctionFiltersTest.kt b/src/test/kotlin/astminer/filters/FunctionFiltersTest.kt new file mode 100644 index 00000000..bc804f71 --- /dev/null +++ b/src/test/kotlin/astminer/filters/FunctionFiltersTest.kt @@ -0,0 +1,131 @@ +package astminer.filters + +import astminer.common.createBamboo +import astminer.common.model.FunctionInfo +import astminer.common.model.Node +import astminer.parse.antlr.AntlrNode +import org.junit.Test +import kotlin.test.assertFalse +import kotlin.test.assertTrue + +class FunctionFiltersTest { + @Test + fun `test ModifierFilter should exclude function if it has the excluded modifier`() { + val excludedModifiers = listOf("a", "b") + val functionInfo = object : FunctionInfo { + override val modifiers: List = listOf("b", "c") + } + assertFalse { ModifierFilter(excludedModifiers).validate(functionInfo) } + } + + @Test + fun `test ModifierFilter should not exclude function if it does not have the excluded modifier`() { + val excludedModifiers = listOf("a", "b") + val functionInfo = object : FunctionInfo { + override val modifiers: List = listOf("c", "d") + } + assertTrue { ModifierFilter(excludedModifiers).validate(functionInfo) } + } + + @Test + fun `test AnnotationFilter should exclude function if it has the excluded modifier`() { + val excludedModifiers = listOf("a", "b") + val functionInfo = object : FunctionInfo { + override val annotations: List = listOf("a", "c") + } + assertFalse { AnnotationFilter(excludedModifiers).validate(functionInfo) } + } + + @Test + fun `test AnnotationFilter should not exclude function if it does not have the excluded modifier`() { + val excludedModifiers = listOf("a", "b") + val functionInfo = object : FunctionInfo { + override val annotations: List = listOf("y", "x") + } + assertTrue { AnnotationFilter(excludedModifiers).validate(functionInfo) } + } + + @Test + fun `test ConstructorFilter should exclude constructor functions`() { + val functionInfo = object : FunctionInfo { + override val isConstructor = true + } + assertFalse { ConstructorFilter.validate(functionInfo) } + } + + @Test + fun `test ConstructorFilter should not exclude non-constructor functions`() { + val functionInfo = object : FunctionInfo { + override val isConstructor = false + } + assertTrue { ConstructorFilter.validate(functionInfo) } + } + + @Test + fun `test FunctionNameWordsNumberFilter for 50 should exclude function with name of 100 words`() { + val functionInfo = object : FunctionInfo { + override val name = "Word".repeat(100) + } + assertFalse { FunctionNameWordsNumberFilter(50).validate(functionInfo) } + } + + @Test + fun `test WordsNumberFilter for 101 should not exclude function with name of 100 words`() { + val functionInfo = object : FunctionInfo { + override val name = "Word".repeat(100) + } + assertTrue { FunctionNameWordsNumberFilter(101).validate(functionInfo) } + } + + @Test + fun `test WordsNumberFilter for 50 should exclude function with name of 100 words`() { + val functionInfo = object : FunctionInfo { + override val root = AntlrNode("", null, "Word".repeat(100)) + } + assertFalse { WordsNumberFilter(50).validate(functionInfo) } + } + + @Test + fun `test WordsNumberFilter for 2 should exclude function that has a child of 3 words`() { + val root = AntlrNode("", null, "word") + val child = AntlrNode("", root, "wordWordWord") + root.replaceChildren(listOf(child)) + + val functionInfo = object : FunctionInfo { + override val root = root + } + assertFalse { WordsNumberFilter(2).validate(functionInfo) } + } + + @Test + fun `test TreeSizeFilter for 100 should exclude bamboo of length 101`() { + val functionInfo = object : FunctionInfo { + override val root = createBamboo(101) + } + assertFalse { TreeSizeFilter(maxSize = 100).validate(functionInfo) } + } + + @Test + fun `test TreeSizeFilter for 10 should not exclude bamboo of length 5`() { + val functionInfo = object : FunctionInfo { + override val root = createBamboo(5) + } + assertTrue { TreeSizeFilter(maxSize = 10).validate(functionInfo) } + } + + @Test + fun `test TreeSizeFilter for minSize 100 should exclude bamboo of length 5`() { + val functionInfo = object : FunctionInfo { + override val root = createBamboo(5) + } + assertFalse { TreeSizeFilter(minSize = 100).validate(functionInfo) } + } + + @Test + fun `test TreeSizeFilter for (10, 100) should not exclude bambo of size 50 `() { + val functionInfo = object : FunctionInfo { + override val root = createBamboo(50) + } + assertTrue { TreeSizeFilter(10, 100).validate(functionInfo) } + } +} diff --git a/src/test/kotlin/astminer/labelextractor/FileNameExtractorTest.kt b/src/test/kotlin/astminer/labelextractor/FileNameExtractorTest.kt new file mode 100644 index 00000000..bce37ead --- /dev/null +++ b/src/test/kotlin/astminer/labelextractor/FileNameExtractorTest.kt @@ -0,0 +1,24 @@ +package astminer.labelextractor + +import astminer.common.DummyNode +import astminer.common.DummyParsingResult +import astminer.common.model.LabeledResult +import org.junit.Test +import java.io.File +import kotlin.test.assertEquals + +class FileNameExtractorTest { + @Test + fun `test file path extractor returns the same root and file path and labels with file path`() { + val nonEmptyParseResult = DummyParsingResult(File(PATH), dummyRoot) + val labeledParseResult = FileNameExtractor.process(nonEmptyParseResult) + + assertEquals(LabeledResult(dummyRoot, FILE_NAME, PATH), labeledParseResult) + } + + companion object { + private const val FILE_NAME = "file.txt" + private const val PATH = "random/folder/$FILE_NAME" + private var dummyRoot = DummyNode("") + } +} diff --git a/src/test/kotlin/astminer/labelextractor/FolderNameExtractorTest.kt b/src/test/kotlin/astminer/labelextractor/FolderNameExtractorTest.kt new file mode 100644 index 00000000..ed2fbbe7 --- /dev/null +++ b/src/test/kotlin/astminer/labelextractor/FolderNameExtractorTest.kt @@ -0,0 +1,34 @@ +package astminer.labelextractor + +import astminer.common.DummyNode +import astminer.common.DummyParsingResult +import astminer.common.model.LabeledResult +import org.junit.Test +import java.io.File +import kotlin.test.assertEquals +import kotlin.test.assertNull + +class FolderNameExtractorTest { + + @Test + fun `test folder extractor returns null when folder is empty or not found`() { + val nonEmptyParseResult = DummyParsingResult(File(""), dummyRoot) + val labeledParseResult = FolderNameExtractor.process(nonEmptyParseResult) + + assertNull(labeledParseResult) + } + + @Test + fun `test folder extractor extracts folder when it is not empty`() { + val nonEmptyParseResult = DummyParsingResult(File(PATH), dummyRoot) + val labeledParseResult = FolderNameExtractor.process(nonEmptyParseResult) + + assertEquals(LabeledResult(dummyRoot, FOLDER, PATH), labeledParseResult) + } + + companion object { + private const val PATH = "random/folder/file.txt" + private const val FOLDER = "folder" + private var dummyRoot = DummyNode("") + } +} diff --git a/src/test/kotlin/astminer/labelextractor/FunctionNameLabelExtractorTest.kt b/src/test/kotlin/astminer/labelextractor/FunctionNameLabelExtractorTest.kt new file mode 100644 index 00000000..e15966f8 --- /dev/null +++ b/src/test/kotlin/astminer/labelextractor/FunctionNameLabelExtractorTest.kt @@ -0,0 +1,58 @@ +package astminer.labelextractor + +import astminer.common.DummyNode +import astminer.common.model.FunctionInfo +import astminer.common.model.LabeledResult +import astminer.common.model.Node +import org.junit.Before +import org.junit.Test +import kotlin.test.assertEquals + +class FunctionNameLabelExtractorTest { + + lateinit var functionRoot: Node + + private val functionInfo: FunctionInfo + get() = object : FunctionInfo { + override val nameNode = functionRoot + override val filePath = PATH + override val root = functionRoot + } + + @Before + fun init() { + val leafNodeWithRecursiveCall = DummyNode(FUNCTION_NAME) + val emptyIntermediateNode = DummyNode("", mutableListOf(leafNodeWithRecursiveCall)) + functionRoot = DummyNode(FUNCTION_NAME, mutableListOf(emptyIntermediateNode)) + } + + @Test + fun `test FunctionNameProblem extracts correct method name`() { + val labeledResult = FunctionNameLabelExtractor.process(functionInfo) + assertEquals(LabeledResult(functionRoot, FUNCTION_NAME, PATH), labeledResult) + } + + @Test + fun `test FunctionNameProblem hides function name node token with METHOD_NAME`() { + FunctionNameLabelExtractor.process(functionInfo) + assertEquals("METHOD_NAME", functionInfo.nameNode?.token) + } + + @Test + fun `test FunctionNameProblem hides function root token with METHOD_NAME if it is the name node`() { + FunctionNameLabelExtractor.process(functionInfo) + assertEquals("METHOD_NAME", functionInfo.root.token) + } + + @Test + fun `test function name problem should hide recursive call tokens with SELF`() { + FunctionNameLabelExtractor.process(functionInfo) + val recursiveCallNode = functionInfo.root.children.firstOrNull()?.children?.firstOrNull() + assertEquals("SELF", recursiveCallNode?.token) + } + + companion object { + private const val PATH = "random/folder/file.txt" + private const val FUNCTION_NAME = "method" + } +} diff --git a/src/test/kotlin/astminer/parse/antlr/AntrlUtilTest.kt b/src/test/kotlin/astminer/parse/antlr/AntrlUtilTest.kt index 189158ed..e573af41 100644 --- a/src/test/kotlin/astminer/parse/antlr/AntrlUtilTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/AntrlUtilTest.kt @@ -1,6 +1,5 @@ package astminer.parse.antlr -import astminer.common.preOrder import astminer.parse.antlr.java.JavaParser import org.junit.Assert import org.junit.Test @@ -15,9 +14,9 @@ class AntrlUtilTest { val node = parser.parseInputStream(FileInputStream(file)) var adoptedNodesSize = 0 - node?.preOrder()?.forEach { node -> - adoptedNodesSize += node.getChildren().filter { it.getParent() != node }.size + node.preOrder().forEach { curNode -> + adoptedNodesSize += curNode.children.filter { it.parent != curNode }.size } Assert.assertEquals("There should be no children with different parent", 0, adoptedNodesSize) } -} \ No newline at end of file +} diff --git a/src/test/kotlin/astminer/parse/antlr/java/ANTLRJavaParserTest.kt b/src/test/kotlin/astminer/parse/antlr/java/ANTLRJavaParserTest.kt index 0cc761bc..db997fff 100644 --- a/src/test/kotlin/astminer/parse/antlr/java/ANTLRJavaParserTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/java/ANTLRJavaParserTest.kt @@ -1,6 +1,7 @@ package astminer.parse.antlr.java import astminer.common.getProjectFilesWithExtension +import astminer.parseFiles import org.junit.Assert import org.junit.Test import java.io.File @@ -57,9 +58,8 @@ class ANTLRJavaParserTest { fun testProjectParsing() { val parser = JavaParser() val projectRoot = File("src/test/resources/arrayCalls") - val trees = parser.parseFiles( - getProjectFilesWithExtension(projectRoot, "java")).map { it.root } - Assert.assertEquals("There is only 5 file with .java extension in 'testData/arrayCalls' folder",5, trees.size) + val trees = parser.parseFiles(getProjectFilesWithExtension(projectRoot, "java")) + Assert.assertEquals("There is only 5 file with .java extension in 'testData/arrayCalls' folder", 5, trees.size) trees.forEach { Assert.assertNotNull("Parse tree for a valid file should not be null", it) } } -} \ No newline at end of file +} diff --git a/src/test/kotlin/astminer/parse/antlr/java/JavaFunctionSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/java/JavaFunctionSplitterTest.kt new file mode 100644 index 00000000..73822194 --- /dev/null +++ b/src/test/kotlin/astminer/parse/antlr/java/JavaFunctionSplitterTest.kt @@ -0,0 +1,115 @@ +package astminer.parse.antlr.java + +import astminer.common.model.FunctionInfo +import astminer.parse.antlr.AntlrNode +import org.junit.Test +import java.io.File +import kotlin.test.BeforeTest +import kotlin.test.assertEquals +import kotlin.test.assertNotNull + +class JavaFunctionSplitterTest { + + var functionInfos: Collection> = listOf() + + @BeforeTest + fun parseTree() { + val testTree = parser.parseInputStream(File(FILE_PATH).inputStream()) + assertNotNull(testTree) + functionInfos = functionSplitter.splitIntoFunctions(testTree, FILE_PATH) + } + + @Test + fun testValidSplitting() { + assertEquals(N_FUNCTIONS, functionInfos.size, "Test file contains $N_FUNCTIONS methods") + } + + @Test + fun testReturnVoid() { + val methodVoid = functionInfos.find { it.name == "functionReturningVoid" } + assertNotNull(methodVoid) + assertEquals("void", methodVoid.returnType) + } + + @Test + fun testReturnInt() { + val methodInt = functionInfos.find { it.name == "functionReturningInt" } + assertNotNull(methodInt) + assertEquals("int", methodInt.returnType) + } + + @Test + fun testReturnStrings() { + val methodStrings = functionInfos.find { it.name == "functionReturningStrings" } + assertNotNull(methodStrings) + assertEquals("String[]", methodStrings.returnType) + } + + @Test + fun testReturnClass() { + val methodClass = functionInfos.find { it.name == "functionReturningClass" } + assertNotNull(methodClass) + assertEquals("Class1", methodClass.returnType) + } + + @Test + fun testFunctionInClass() { + val methodClass = functionInfos.find { it.name == "functionInClass1" } + assertNotNull(methodClass) + assertEquals("Class1", methodClass.enclosingElement?.name) + } + + @Test + fun testFunctionInNestedClass() { + val methodClass = functionInfos.find { it.name == "functionInClass2" } + assertNotNull(methodClass) + assertEquals("Class2", methodClass.enclosingElement?.name) + } + + @Test + fun testNoParameters() { + val methodNoParameters = functionInfos.find { it.name == "functionWithNoParameters" } + assertNotNull(methodNoParameters) + assertEquals(0, methodNoParameters.parameters?.size) + } + + @Test + fun testOneParameter() { + val methodOneParameter = functionInfos.find { it.name == "functionWithOneParameter" } + assertNotNull(methodOneParameter) + assertEquals(1, methodOneParameter.parameters?.size) + val parameter = methodOneParameter.parameters?.get(0) + assertEquals("p1", parameter?.name) + assertEquals("int", parameter?.type) + } + + @Test + fun testThreeParameters() { + val methodThreeParameters = functionInfos.find { it.name == "functionWithThreeParameters" } + assertNotNull(methodThreeParameters) + assertEquals(3, methodThreeParameters.parameters?.size) + val methodTypes = listOf("Class", "String[][]", "int[]") + for (i in 0 until 3) { + val parameter = methodThreeParameters.parameters?.get(i) + assertEquals("p${i + 1}", parameter?.name) + assertEquals(methodTypes[i], parameter?.type) + } + } + + @Test + fun testWeirdArrayParameter() { + val methodWeirdArrayParameter = functionInfos.find { it.name == "functionWithStrangeArrayParameter" } + assertNotNull(methodWeirdArrayParameter) + assertEquals(1, methodWeirdArrayParameter.parameters?.size) + val weirdParameter = methodWeirdArrayParameter.parameters?.get(0) + assertEquals(weirdParameter?.name, "arr[]") + assertEquals(weirdParameter?.type, "int") + } + + companion object { + const val FILE_PATH = "src/test/resources/methodSplitting/testMethodSplitting.java" + const val N_FUNCTIONS = 10 + val functionSplitter = JavaFunctionSplitter() + val parser = JavaParser() + } +} diff --git a/src/test/kotlin/astminer/parse/antlr/java/JavaMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/java/JavaMethodSplitterTest.kt deleted file mode 100644 index 4e89ffef..00000000 --- a/src/test/kotlin/astminer/parse/antlr/java/JavaMethodSplitterTest.kt +++ /dev/null @@ -1,103 +0,0 @@ -package astminer.parse.antlr.java - -import astminer.common.model.MethodInfo -import astminer.parse.antlr.SimpleNode -import org.junit.Test -import kotlin.test.assertEquals -import java.io.File -import kotlin.test.BeforeTest -import kotlin.test.assertNotNull - -class JavaMethodSplitterTest { - companion object { - const val N_FUNCTIONS = 9 - val methodSplitter = JavaMethodSplitter() - val parser = JavaParser() - } - - var methodInfos: Collection> = listOf() - - @BeforeTest - fun parseTree() { - val testTree = parser.parseInputStream(File("src/test/resources/methodSplitting/testMethodSplitting.java").inputStream()) - assertNotNull(testTree) - methodInfos = methodSplitter.splitIntoMethods(testTree) - } - - @Test - fun testValidSplitting() { - assertEquals(N_FUNCTIONS, methodInfos.size, "Test file contains $N_FUNCTIONS methods") - } - - @Test - fun testReturnVoid() { - val methodVoid = methodInfos.find { it.name() == "functionReturningVoid" } - assertNotNull(methodVoid) - assertEquals( "void", methodVoid.returnType()) - } - - @Test - fun testReturnInt() { - val methodInt = methodInfos.find { it.name() == "functionReturningInt" } - assertNotNull(methodInt) - assertEquals( "int", methodInt.returnType()) - } - - @Test - fun testReturnStrings() { - val methodStrings = methodInfos.find { it.name() == "functionReturningStrings" } - assertNotNull(methodStrings) - assertEquals( "String[]", methodStrings.returnType()) - } - - @Test - fun testReturnClass() { - val methodClass = methodInfos.find { it.name() == "functionReturningClass" } - assertNotNull(methodClass) - assertEquals( "Class1", methodClass.returnType()) - } - - @Test - fun testFunctionInClass() { - val methodClass = methodInfos.find { it.name() == "functionInClass1" } - assertNotNull(methodClass) - assertEquals( "Class1", methodClass.enclosingElementName()) - } - - @Test - fun testFunctionInNestedClass() { - val methodClass = methodInfos.find { it.name() == "functionInClass2" } - assertNotNull(methodClass) - assertEquals( "Class2", methodClass.enclosingElementName()) - } - - @Test - fun testNoParameters() { - val methodNoParameters = methodInfos.find { it.name() == "functionWithNoParameters" } - assertNotNull(methodNoParameters) - assertEquals(0, methodNoParameters.methodParameters.size) - } - - @Test - fun testOneParameter() { - val methodOneParameter = methodInfos.find { it.name() == "functionWithOneParameter" } - assertNotNull(methodOneParameter) - assertEquals(1, methodOneParameter.methodParameters.size) - val parameter = methodOneParameter.methodParameters[0] - assertEquals("p1", parameter.name()) - assertEquals("int", parameter.returnType()) - } - - @Test - fun testThreeParameters() { - val methodThreeParameters = methodInfos.find { it.name() == "functionWithThreeParameters" } - assertNotNull(methodThreeParameters) - assertEquals(3, methodThreeParameters.methodParameters.size) - val methodTypes = listOf("Class", "String[][]", "int[]") - for (i in 0 until 3) { - val parameter = methodThreeParameters.methodParameters[i] - assertEquals("p${i + 1}", parameter.name()) - assertEquals(methodTypes[i], parameter.returnType()) - } - } -} \ No newline at end of file diff --git a/src/test/kotlin/astminer/parse/antlr/javascript/ANTLRJavaScriptParserTest.kt b/src/test/kotlin/astminer/parse/antlr/javascript/ANTLRJavaScriptParserTest.kt index be76de3a..335da12e 100644 --- a/src/test/kotlin/astminer/parse/antlr/javascript/ANTLRJavaScriptParserTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/javascript/ANTLRJavaScriptParserTest.kt @@ -14,5 +14,4 @@ class ANTLRJavaScriptParserTest { val node = parser.parseInputStream(FileInputStream(file)) assertNotNull(node, "Parse tree for a valid file should not be null") } - -} \ No newline at end of file +} diff --git a/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitterTest.kt new file mode 100644 index 00000000..977013fb --- /dev/null +++ b/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptFunctionSplitterTest.kt @@ -0,0 +1,63 @@ +package astminer.parse.antlr.javascript + +import astminer.common.model.EnclosingElementType +import astminer.common.model.FunctionInfo +import astminer.parse.antlr.AntlrNode +import org.junit.Test +import java.io.File +import kotlin.test.BeforeTest +import kotlin.test.assertEquals +import kotlin.test.assertNotNull + +class JavaScriptFunctionSplitterTest { + + var functionInfos: Collection> = listOf() + + @BeforeTest + fun parseTree() { + val testTree = parser.parseInputStream(File(testFilePath).inputStream()) + assertNotNull(testTree) + functionInfos = functionSplitter.splitIntoFunctions(testTree, testFilePath) + } + + @Test + fun testValidSplitting() { + assertEquals(N_METHODS, functionInfos.size, "Test file contains $N_METHODS methods") + } + + @Test + fun testValidMethodInfo() { + fun EnclosingElementType.getEnclosingElementType(): String { + return when (this) { + EnclosingElementType.Function -> "fun" + EnclosingElementType.Class -> "class" + EnclosingElementType.Method -> "method" + EnclosingElementType.VariableDeclaration -> "var" + else -> "" + } + } + + fun FunctionInfo.getJsonInfo(): String { + return "info : {" + + "name : $name, " + + "args : ${parameters?.joinToString(", ") { it.name }}, " + + "enclosing element : ${enclosingElement?.type?.getEnclosingElementType()}, " + + "enclosing element name : ${enclosingElement?.name}" + + "}" + } + + val actualJsonInfos = functionInfos.map { it.getJsonInfo() }.sorted() + + val text = File(testFilePath).readText() + val expectedJsonInfos = Regex("info : \\{.*}").findAll(text).toList().map { it.value }.sorted() + + assertEquals(expectedJsonInfos, actualJsonInfos) + } + + companion object { + const val N_METHODS = 47 + const val testFilePath = "src/test/resources/methodSplitting/testMethodSplitting.js" + val functionSplitter = JavaScriptFunctionSplitter() + val parser = JavaScriptParser() + } +} diff --git a/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitterTest.kt deleted file mode 100644 index ee814f8b..00000000 --- a/src/test/kotlin/astminer/parse/antlr/javascript/JavaScriptMethodSplitterTest.kt +++ /dev/null @@ -1,62 +0,0 @@ -package astminer.parse.antlr.javascript - -import astminer.common.model.MethodInfo -import astminer.parse.antlr.SimpleNode -import org.junit.Test -import java.io.File -import kotlin.test.BeforeTest -import kotlin.test.assertEquals -import kotlin.test.assertNotNull - - -class JavaScriptMethodSplitterTest { - companion object { - const val N_METHODS = 47 - const val testFilePath = "src/test/resources/methodSplitting/testMethodSplitting.js" - val methodSplitter = JavaScriptMethodSplitter() - val parser = JavaScriptParser() - } - - var methodInfos: Collection> = listOf() - - @BeforeTest - fun parseTree() { - val testTree = parser.parseInputStream(File(testFilePath).inputStream()) - assertNotNull(testTree) - methodInfos = methodSplitter.splitIntoMethods(testTree) - } - - @Test - fun testValidSplitting() { - assertEquals(N_METHODS, methodInfos.size, "Test file contains $N_METHODS methods") - } - - @Test - fun testValidMethodInfo() { - fun String.getEnclosingElementType(): String { - return when { - "functionDeclaration" in this -> "fun" - "classDeclaration" in this -> "class" - "methodDefinition" in this -> "method" - "variableDeclaration" in this -> "var" - else -> "" - } - } - - fun MethodInfo.getJsonInfo(): String { - return "info : {" + - "name : ${name()}, " + - "args : ${methodParameters.map { it.name() }.joinToString(", ")}, " + - "enclosing element : ${enclosingElement.root?.getTypeLabel()?.getEnclosingElementType()}, " + - "enclosing element name : ${enclosingElementName()}" + - "}" - } - - val actualJsonInfos = methodInfos.map { it.getJsonInfo() }.sorted() - - val text = File(testFilePath).readText() - val expectedJsonInfos = Regex("info : \\{.*\\}").findAll(text).toList().map { it.value }.sorted() - - assertEquals(expectedJsonInfos, actualJsonInfos) - } -} \ No newline at end of file diff --git a/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionSplitterTest.kt new file mode 100644 index 00000000..3be0583d --- /dev/null +++ b/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionSplitterTest.kt @@ -0,0 +1,66 @@ +package astminer.parse.antlr.php + +import astminer.common.model.EnclosingElementType +import astminer.common.model.FunctionInfo +import astminer.parse.antlr.AntlrNode +import org.junit.Test +import java.io.File +import kotlin.test.BeforeTest +import kotlin.test.assertEquals +import kotlin.test.assertNotNull + +internal class ANTLRPHPFunctionSplitterTest { + + private var functionInfos: Collection> = listOf() + + @BeforeTest + fun parseTree() { + val testTree = parser.parseInputStream(File(testFilePath).inputStream()) + assertNotNull(testTree) + functionInfos = functionSplitter.splitIntoFunctions(testTree, testFilePath) + } + + @Test + fun testValidSplitting() { + assertEquals(N_METHODS, functionInfos.size, "Test file contains $N_METHODS methods") + } + + @Test + fun testValidMethodInfo() { + fun EnclosingElementType.getEnclosingElementType(): String { + return when (this) { + EnclosingElementType.Function -> "function" + EnclosingElementType.Class -> "class" + EnclosingElementType.Method -> "method" + EnclosingElementType.VariableDeclaration -> "variable" + else -> "" + } + } + + fun FunctionInfo.getJsonInfo(): String = listOf( + "info : {", + "name: $name, ", + "args: ${parameters?.joinToString(", ") { + listOfNotNull(it.type, it.name).joinToString(" ") + }}, ", + "enclosing element: ${enclosingElement?.type?.getEnclosingElementType()}, ", + "enclosing element name: ${enclosingElement?.name}, ", + "return type: $returnType", + "}" + ).joinToString("") + + val actualJsonInfos = functionInfos.map { it.getJsonInfo() + '\n' }.sorted() + + val text = File(testFilePath).readText() + val expectedJsonInfos = Regex("info : \\{.*}").findAll(text).toList().map { it.value + '\n' }.sorted() + + assertEquals(expectedJsonInfos, actualJsonInfos) + } + + companion object { + const val N_METHODS = 18 + const val testFilePath = "src/test/resources/methodSplitting/testMethodSplitting.php" + val functionSplitter = PHPFunctionSplitter() + val parser = PHPParser() + } +} diff --git a/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPParserText.kt b/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPParserText.kt new file mode 100644 index 00000000..da7b4caa --- /dev/null +++ b/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPParserText.kt @@ -0,0 +1,17 @@ +package astminer.parse.antlr.php + +import org.junit.Test +import java.io.File +import java.io.FileInputStream +import kotlin.test.assertNotNull + +internal class ANTLRPHPParserText { + + @Test + fun testNodeIsNotNull() { + val parser = PHPParser() + val file = File("src/test/resources/examples/1.php") + val node = parser.parseInputStream(FileInputStream(file)) + assertNotNull(node) + } +} diff --git a/src/test/kotlin/astminer/parse/antlr/python/ANTLRPythonParserTest.kt b/src/test/kotlin/astminer/parse/antlr/python/ANTLRPythonParserTest.kt index 543b96fb..e85eadb0 100644 --- a/src/test/kotlin/astminer/parse/antlr/python/ANTLRPythonParserTest.kt +++ b/src/test/kotlin/astminer/parse/antlr/python/ANTLRPythonParserTest.kt @@ -1,6 +1,7 @@ package astminer.parse.antlr.python import astminer.common.getProjectFilesWithExtension +import astminer.parseFiles import org.junit.Assert import org.junit.Test import java.io.File @@ -12,7 +13,7 @@ class ANTLRPythonParserTest { val parser = PythonParser() val file = File("src/test/resources/examples/1.py") - val node = parser.parseFile(file).root + val node = parser.parseFile(file) Assert.assertNotNull("Parse tree for a valid file should not be null", node) } @@ -20,8 +21,8 @@ class ANTLRPythonParserTest { fun testProjectParsing() { val parser = PythonParser() val projectRoot = File("src/test/resources/examples") - val trees = parser.parseFiles(getProjectFilesWithExtension(projectRoot, "py")).map { it.root } - Assert.assertEquals("There is only 1 file with .py extension in 'testData/examples' folder",1, trees.size) + val trees = parser.parseFiles(getProjectFilesWithExtension(projectRoot, "py")) + Assert.assertEquals("There is only 1 file with .py extension in 'testData/examples' folder", 1, trees.size) trees.forEach { Assert.assertNotNull("Parse tree for a valid file should not be null", it) } } -} \ No newline at end of file +} diff --git a/src/test/kotlin/astminer/parse/antlr/python/PythonFunctionSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/python/PythonFunctionSplitterTest.kt new file mode 100644 index 00000000..0e8bc21e --- /dev/null +++ b/src/test/kotlin/astminer/parse/antlr/python/PythonFunctionSplitterTest.kt @@ -0,0 +1,160 @@ +package astminer.parse.antlr.python + +import astminer.common.model.EnclosingElementType +import astminer.common.model.FunctionInfo +import astminer.parse.antlr.AntlrNode +import org.junit.Test +import java.io.File +import kotlin.test.BeforeTest +import kotlin.test.assertEquals +import kotlin.test.assertNotNull +import kotlin.test.assertNull + +class PythonFunctionSplitterTest { + + var functionInfos: Collection> = listOf() + + @BeforeTest + fun parseTree() { + val testTree = parser.parseInputStream(File(FILE_PATH).inputStream()) + assertNotNull(testTree) + functionInfos = functionSplitter.splitIntoFunctions(testTree, FILE_PATH) + } + + @Test + fun testValidSplitting() { + assertEquals(N_FUNCTIONS, functionInfos.size, "Test file contains $N_FUNCTIONS methods") + } + + @Test + fun testFunctionNotInClass() { + val functionClass = functionInfos.find { it.name == "fun_with_no_class" } + assertNotNull(functionClass) + assertNull(functionClass.enclosingElement) + } + + @Test + fun testFunctionInClass() { + val functionClass = functionInfos.find { it.name == "fun_in_class1" } + assertNotNull(functionClass) + assertEquals(EnclosingElementType.Class, functionClass.enclosingElement?.type) + assertEquals("Class1", functionClass.enclosingElement?.name) + } + + @Test + fun testFunctionInNestedClass() { + val functionClass = functionInfos.find { it.name == "fun_in_class2" } + assertNotNull(functionClass) + assertEquals(EnclosingElementType.Class, functionClass.enclosingElement?.type) + assertEquals("Class2", functionClass.enclosingElement?.name) + } + + @Test + fun testNoParameters() { + val functionNoParameters = functionInfos.find { it.name == "function_with_no_parameters" } + assertNotNull(functionNoParameters) + assertEquals(0, functionNoParameters.parameters?.size) + } + + @Test + fun testOneParameter() { + val functionOneParameter = functionInfos.find { it.name == "function_with_one_parameter" } + assertNotNull(functionOneParameter) + assertEquals(1, functionOneParameter.parameters?.size) + val parameter = functionOneParameter.parameters?.get(0) + assertEquals("p1", parameter?.name) + } + + @Test + fun testOneTypedParameter() { + val functionOneTypedParameter = functionInfos.find { it.name == "function_with_one_typed_parameter" } + assertNotNull(functionOneTypedParameter) + assertEquals(1, functionOneTypedParameter.parameters?.size) + val parameter = functionOneTypedParameter.parameters?.get(0) + assertEquals("p1", parameter?.name) + assertEquals("int", parameter?.type) + } + + @Test + fun functionWithComplexParameter() { + val functionOneTypedParameter = functionInfos.find { it.name == "function_with_complex_parameter" } + assertNotNull(functionOneTypedParameter) + assertEquals(1, functionOneTypedParameter.parameters?.size) + val parameter = functionOneTypedParameter.parameters?.get(0) + assertEquals("p1", parameter?.name) + assertEquals("List[int]", parameter?.type) + } + + @Test + fun testThreeParameters() { + val functionThreeParameters = functionInfos.find { it.name == "function_with_three_parameters" } + assertNotNull(functionThreeParameters) + assertEquals(3, functionThreeParameters.parameters?.size) + val parameters = functionThreeParameters.parameters!! + assertEquals("p1", parameters[0].name) + + assertEquals("p2", parameters[1].name) + + assertEquals("p3", parameters[2].name) + assertEquals("int", parameters[2].type) + } + + @Test + fun testParameterInClass() { + val functionOneParameter = functionInfos.find { it.name == "fun_with_parameter_in_class" } + assertNotNull(functionOneParameter) + assertEquals(2, functionOneParameter.parameters?.size) + val parameter = functionOneParameter.parameters?.get(1) + assertEquals("p1", parameter?.name) + } + + @Test + fun testTypedParameterInClass() { + val functionOneTypedParameter = functionInfos.find { it.name == "fun_with_typed_parameter_in_class" } + assertNotNull(functionOneTypedParameter) + assertEquals(2, functionOneTypedParameter.parameters?.size) + val parameter = functionOneTypedParameter.parameters?.get(1) + assertEquals("p1", parameter?.name) + assertEquals("int", parameter?.type) + } + + @Test + fun testEnclosingFunction() { + val functionInsideFunction = functionInfos.find { it.name == "function_inside_function" } + assertNotNull(functionInsideFunction) + val enclosingElement = functionInsideFunction.enclosingElement + + assertNotNull(enclosingElement) + assertEquals("function_containing_function", enclosingElement.name) + assertEquals(EnclosingElementType.Function, enclosingElement.type) + } + + @Test + fun testEnclosingMethod() { + val functionInsideMethod = functionInfos.find { it.name == "function_inside_method" } + assertNotNull(functionInsideMethod) + val enclosingElement = functionInsideMethod.enclosingElement + + assertNotNull(enclosingElement) + assertEquals("some_method", enclosingElement.name) + assertEquals(EnclosingElementType.Method, enclosingElement.type) + } + + @Test + fun testEnclosingFunctionInsideMethod() { + val funInsideFunInsideMethod = functionInfos.find { it.name == "fun_inside_fun_inside_method" } + assertNotNull(funInsideFunInsideMethod) + val enclosingElement = funInsideFunInsideMethod.enclosingElement + + assertNotNull(enclosingElement) + assertEquals("second_function_inside_method", enclosingElement.name) + assertEquals(EnclosingElementType.Function, enclosingElement.type) + } + + companion object { + const val FILE_PATH = "src/test/resources/methodSplitting/testMethodSplitting.py" + const val N_FUNCTIONS = 17 + val functionSplitter = PythonFunctionSplitter() + val parser = PythonParser() + } +} diff --git a/src/test/kotlin/astminer/parse/antlr/python/PythonMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/python/PythonMethodSplitterTest.kt deleted file mode 100644 index 2a668002..00000000 --- a/src/test/kotlin/astminer/parse/antlr/python/PythonMethodSplitterTest.kt +++ /dev/null @@ -1,80 +0,0 @@ -package astminer.parse.antlr.python - -import astminer.common.model.MethodInfo -import astminer.parse.antlr.SimpleNode -import org.junit.Test -import kotlin.test.assertEquals -import java.io.File -import kotlin.test.BeforeTest -import kotlin.test.assertNotNull -import kotlin.test.assertNull - -class PythonMethodSplitterTest { - companion object { - const val N_FUNCTIONS = 6 - val methodSplitter = PythonMethodSplitter() - val parser = PythonParser() - } - - var methodInfos: Collection> = listOf() - - @BeforeTest - fun parseTree() { - val testTree = parser.parseInputStream(File("src/test/resources/methodSplitting/testMethodSplitting.py").inputStream()) - assertNotNull(testTree) - methodInfos = methodSplitter.splitIntoMethods(testTree) - } - - @Test - fun testValidSplitting() { - assertEquals(N_FUNCTIONS, methodInfos.size, "Test file contains $N_FUNCTIONS methods") - } - - @Test - fun testFunctionNotInClass() { - val methodClass = methodInfos.find { it.name() == "funWithNoClass" } - assertNotNull(methodClass) - assertNull(methodClass.enclosingElement.root) - } - - @Test - fun testFunctionInClass() { - val methodClass = methodInfos.find { it.name() == "funInClass1" } - assertNotNull(methodClass) - assertEquals( "Class1", methodClass.enclosingElementName()) - } - - @Test - fun testFunctionInNestedClass() { - val methodClass = methodInfos.find { it.name() == "funInClass2" } - assertNotNull(methodClass) - assertEquals( "Class2", methodClass.enclosingElementName()) - } - - @Test - fun testNoParameters() { - val methodNoParameters = methodInfos.find { it.name() == "functionWithNoParameters" } - assertNotNull(methodNoParameters) - assertEquals(0, methodNoParameters.methodParameters.size) - } - - @Test - fun testOneParameter() { - val methodOneParameter = methodInfos.find { it.name() == "functionWithOneParameter" } - assertNotNull(methodOneParameter) - assertEquals(1, methodOneParameter.methodParameters.size) - val parameter = methodOneParameter.methodParameters[0] - assertEquals("p1", parameter.name()) - } - - @Test - fun testThreeParameters() { - val methodThreeParameters = methodInfos.find { it.name() == "functionWithThreeParameters" } - assertNotNull(methodThreeParameters) - assertEquals(3, methodThreeParameters.methodParameters.size) - for (i in 0 until 3) { - val parameter = methodThreeParameters.methodParameters[i] - assertEquals("p${i + 1}", parameter.name()) - } - } -} \ No newline at end of file diff --git a/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt b/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt index 619459cb..531f7934 100644 --- a/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt +++ b/src/test/kotlin/astminer/parse/cpp/FuzzyCppParserTest.kt @@ -1,22 +1,22 @@ package astminer.parse.cpp +import astminer.checkExecutable +import astminer.common.forFilesWithSuffix import astminer.common.getProjectFilesWithExtension -import astminer.examples.forFilesWithSuffix +import astminer.parse.fuzzy.FuzzyNode +import astminer.parse.fuzzy.FuzzyParsingResultFactory +import astminer.parse.fuzzy.cpp.FuzzyCppParser +import astminer.parseFiles import org.junit.Assert +import org.junit.Assume +import org.junit.Before import org.junit.Test import java.io.File class FuzzyCppParserTest { - @Test - fun testNodeIsNotNull() { - val parser = FuzzyCppParser() - val file = File("src/test/resources/fuzzy/test.cpp") - - val nodes = parser.parseFiles(listOf(file)) - Assert.assertTrue("Parse tree for a valid file should not be null", - nodes.size == 1 && nodes[0].root != null) - } + @Before + fun checkGPP() = Assume.assumeTrue(checkExecutable("g++")) @Test fun testInputStreamParsing() { @@ -26,7 +26,7 @@ class FuzzyCppParserTest { val parser = FuzzyCppParser() folder.forFilesWithSuffix(".cpp") { file -> n++ - parser.parseInputStream(file.inputStream())?.let { nodes.add(it) } + parser.parseInputStream(file.inputStream()).let { nodes.add(it) } } Assert.assertEquals(n, nodes.size) } @@ -35,65 +35,59 @@ class FuzzyCppParserTest { fun testProjectParsing() { val folder = File("src/test/resources/fuzzy/") val parser = FuzzyCppParser() - val nodes = parser.parseFiles(getProjectFilesWithExtension(folder,"cpp")).map { it.root } + val nodes = parser.parseFiles(getProjectFilesWithExtension(folder, "cpp")) Assert.assertEquals( - "There is only 3 file with .cpp extension in 'testData/examples' folder", - 3, - nodes.filterNotNull().size + "There is only 3 file with .cpp extension in 'testData/examples' folder", + 3, + nodes.size ) } @Test fun testPreprocessingDefine() { val folder = File("src/test/resources/fuzzy") - val preprocessedFolder = folder.resolve("preprocessed") - preprocessedFolder.mkdir() val defineFileName = "preprocDefineTest.cpp" - val parser = FuzzyCppParser() + val preprocessedFileName = "preprocDefineTest_preprocessed.cpp" - parser.preprocessFile(folder.resolve(defineFileName), preprocessedFolder) + FuzzyParsingResultFactory.preprocess(folder.resolve(defineFileName)) Assert.assertEquals( - "'define' directives should be replaced", - "for (int i = (0); i < (10); ++i) { }", - preprocessedFolder.resolve(defineFileName).readInOneLine() + "'define' directives should be replaced", + "for (int i = (0); i < (10); ++i) { }", + folder.resolve(preprocessedFileName).readInOneLine() ) - preprocessedFolder.deleteRecursively() + folder.resolve(preprocessedFileName).delete() } @Test fun testPreprocessingInclude() { val folder = File("src/test/resources/fuzzy") - val preprocessedFolder = folder.resolve("preprocessed") - preprocessedFolder.mkdir() val includeFileName = "preprocIncludeTest.cpp" - val parser = FuzzyCppParser() + val preprocessedFileName = "preprocIncludeTest_preprocessed.cpp" - parser.preprocessFile(folder.resolve(includeFileName), preprocessedFolder) + FuzzyParsingResultFactory.preprocess(folder.resolve(includeFileName)) Assert.assertEquals( - "'include' directives should not be replaced", - folder.resolve(includeFileName).readInOneLine(), - preprocessedFolder.resolve(includeFileName).readInOneLine() + "'include' directives should not be replaced", + folder.resolve(includeFileName).readInOneLine(), + folder.resolve(preprocessedFileName).readInOneLine() ) - preprocessedFolder.deleteRecursively() + folder.resolve(preprocessedFileName).delete() } @Test fun testPreprocessingProject() { val projectRoot = File("src/test/resources/examples/cpp") - val preprocessedRoot = File("src/test/resources/examples/preprocessed") - preprocessedRoot.mkdir() - val parser = FuzzyCppParser() - parser.preprocessProject(projectRoot, preprocessedRoot) - val nodes = parser.parseFiles(getProjectFilesWithExtension(projectRoot, "cpp")).map { it.root } + val files = getProjectFilesWithExtension(projectRoot, "cpp") + val nodes = FuzzyParsingResultFactory.parseFiles(files) { it.root }.filterNotNull() Assert.assertEquals( - "Parse tree for a valid file should not be null. There are 5 files in example project.", - 5, - nodes.filterNotNull().size + "Parse tree for a valid file should not be null. There are 5 files in example project.", + 5, + nodes.size ) - preprocessedRoot.deleteRecursively() + files.map { "${it.nameWithoutExtension}_preprocessed.${it.extension}" } + .forEach { projectRoot.resolve(it).delete() } } -} \ No newline at end of file +} diff --git a/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt index aebff769..3ff65833 100644 --- a/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt +++ b/src/test/kotlin/astminer/parse/cpp/FuzzyMethodSplitterTest.kt @@ -1,28 +1,30 @@ package astminer.parse.cpp -import astminer.common.model.MethodInfo +import astminer.checkExecutable +import astminer.common.model.FunctionInfo +import astminer.parse.fuzzy.FuzzyNode +import astminer.parse.fuzzy.cpp.FuzzyCppParser +import astminer.parse.fuzzy.cpp.FuzzyFunctionSplitter +import org.junit.Assume +import org.junit.Before import org.junit.Test -import kotlin.test.assertEquals import java.io.File -import kotlin.test.BeforeTest +import kotlin.test.assertEquals import kotlin.test.assertNotNull import kotlin.test.assertNull class FuzzyMethodSplitterTest { - companion object { - const val N_FUNCTIONS = 10 - val methodSplitter = FuzzyMethodSplitter() - val parser = FuzzyCppParser() - } - - var methodInfos: Collection> = listOf() + var methodInfos: Collection> = listOf() - @BeforeTest + @Before fun parseTree() { - val testTree = parser.parseInputStream(File("src/test/resources/methodSplitting/testMethodSplitting.cpp").inputStream()) + Assume.assumeTrue(checkExecutable("g++")) + val testTree = parser.parseInputStream( + File("src/test/resources/methodSplitting/testMethodSplitting.cpp").inputStream() + ) assertNotNull(testTree) - methodInfos = methodSplitter.splitIntoMethods(testTree) + methodInfos = methodSplitter.splitIntoFunctions(testTree, FILE_PATH) } @Test @@ -32,79 +34,86 @@ class FuzzyMethodSplitterTest { @Test fun testReturnVoid() { - val methodVoid = methodInfos.find { it.name() == "functionReturningVoid" } + val methodVoid = methodInfos.find { it.name == "functionReturningVoid" } assertNotNull(methodVoid) - assertEquals( "void", methodVoid.returnType()) + assertEquals("void", methodVoid.returnType) } @Test fun testReturnInt() { - val methodInt = methodInfos.find { it.name() == "functionReturningInt" } + val methodInt = methodInfos.find { it.name == "functionReturningInt" } assertNotNull(methodInt) - assertEquals( "int", methodInt.returnType()) + assertEquals("int", methodInt.returnType) } @Test fun testReturnString() { - val methodString = methodInfos.find { it.name() == "functionReturningString" } + val methodString = methodInfos.find { it.name == "functionReturningString" } assertNotNull(methodString) - assertEquals( "string", methodString.returnType()) + assertEquals("string", methodString.returnType) } @Test fun testReturnClass() { - val methodClass = methodInfos.find { it.name() == "functionReturningClass" } + val methodClass = methodInfos.find { it.name == "functionReturningClass" } assertNotNull(methodClass) - assertEquals( "Class", methodClass.returnType()) + assertEquals("Class", methodClass.returnType) } @Test fun testFunctionNotInClass() { - val methodClass = methodInfos.find { it.name() == "functionWithNoClass" } + val methodClass = methodInfos.find { it.name == "functionWithNoClass" } assertNotNull(methodClass) - assertNull(methodClass.enclosingElement.root) + assertNull(methodClass.enclosingElement) } @Test fun testFunctionInClass() { - val methodClass = methodInfos.find { it.name() == "functionInClass1" } + val methodClass = methodInfos.find { it.name == "functionInClass1" } assertNotNull(methodClass) - assertEquals( "Class1", methodClass.enclosingElementName()) + assertEquals("Class1", methodClass.enclosingElement?.name) } @Test fun testFunctionInNestedClass() { - val methodClass = methodInfos.find { it.name() == "functionInClass2" } + val methodClass = methodInfos.find { it.name == "functionInClass2" } assertNotNull(methodClass) - assertEquals( "Class2", methodClass.enclosingElementName()) + assertEquals("Class2", methodClass.enclosingElement?.name) } @Test fun testNoParameters() { - val methodNoParameters = methodInfos.find { it.name() == "functionWithNoParameters" } + val methodNoParameters = methodInfos.find { it.name == "functionWithNoParameters" } assertNotNull(methodNoParameters) - assertEquals(0, methodNoParameters.methodParameters.size) + assertEquals(0, methodNoParameters.parameters?.size) } @Test fun testOneParameter() { - val methodOneParameter = methodInfos.find { it.name() == "functionWithOneParameter" } + val methodOneParameter = methodInfos.find { it.name == "functionWithOneParameter" } assertNotNull(methodOneParameter) - assertEquals(1, methodOneParameter.methodParameters.size) - val parameter = methodOneParameter.methodParameters[0] - assertEquals("p1", parameter.name()) - assertEquals("int", parameter.returnType()) + assertEquals(1, methodOneParameter.parameters?.size) + val parameter = methodOneParameter.parameters?.get(0) + assertEquals("p1", parameter?.name) + assertEquals("int", parameter?.type) } @Test fun testThreeParameters() { - val methodThreeParameters = methodInfos.find { it.name() == "functionWithThreeParameters" } + val methodThreeParameters = methodInfos.find { it.name == "functionWithThreeParameters" } assertNotNull(methodThreeParameters) - assertEquals(3, methodThreeParameters.methodParameters.size) + assertEquals(3, methodThreeParameters.parameters?.size) for (i in 0 until 3) { - val parameter = methodThreeParameters.methodParameters[i] - assertEquals("p${i + 1}", parameter.name()) - assertEquals("int", parameter.returnType()) + val parameter = methodThreeParameters.parameters?.get(i) + assertEquals("p${i + 1}", parameter?.name) + assertEquals("int", parameter?.type) } } -} \ No newline at end of file + + companion object { + const val FILE_PATH = "src/test/resources/methodSplitting/testMethodSplitting.cpp" + const val N_FUNCTIONS = 10 + val methodSplitter = FuzzyFunctionSplitter() + val parser = FuzzyCppParser() + } +} diff --git a/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitterTest.kt b/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitterTest.kt new file mode 100644 index 00000000..2dfab373 --- /dev/null +++ b/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaFunctionSplitterTest.kt @@ -0,0 +1,80 @@ +package astminer.parse.gumtree.java + +import astminer.common.model.FunctionInfo +import astminer.parse.gumtree.GumTreeNode +import org.junit.Test +import java.io.File +import kotlin.test.assertEquals + +private fun createTree(filename: String): GumTreeNode = + GumTreeJavaParser().parseInputStream(File(filename).inputStream()) + +private fun createAndSplitTree(filename: String): Collection> = + GumTreeJavaFunctionSplitter().splitIntoFunctions(createTree(filename), filename) + +class GumTreeJavaFunctionSplitterTest { + @Test + fun testMethodExtraction1() { + val functionInfos = createAndSplitTree("src/test/resources/gumTreeMethodSplitter/1.java") + + assertEquals(1, functionInfos.size) + with(functionInfos.first()) { + assertEquals("fun", name) + assertEquals("void", returnType) + assertEquals("SingleFunction", enclosingElement?.name) + assertEquals(listOf("args", "param"), parameters?.map { it.name }) + assertEquals(listOf("String[]", "int"), parameters?.map { it.type }) + } + } + + @Test + fun testMethodExtraction2() { + val functionInfos = createAndSplitTree("src/test/resources/gumTreeMethodSplitter/2.java") + + assertEquals(1, functionInfos.size) + with(functionInfos.first()) { + assertEquals("main", name) + assertEquals("void", returnType) + assertEquals("InnerClass", enclosingElement?.name) + assertEquals(listOf("args"), parameters?.map { it.name }) + assertEquals(listOf("String[]"), parameters?.map { it.type }) + } + } + + @Test + fun testMethodExtraction3() { + val functionInfos = createAndSplitTree("src/test/resources/gumTreeMethodSplitter/3.java") + + assertEquals(2, functionInfos.size) + with(functionInfos.first()) { + assertEquals("main", name) + assertEquals("void", returnType) + assertEquals("InnerClass", enclosingElement?.name) + assertEquals(listOf("args"), parameters?.map { it.name }) + assertEquals(listOf("String[]"), parameters?.map { it.type }) + } + with(functionInfos.last()) { + assertEquals("fun", name) + assertEquals("void", returnType) + assertEquals("SingleMethodInnerClass", enclosingElement?.name) + assertEquals(listOf("args", "param"), parameters?.map { it.name }) + assertEquals(listOf("String[]", "int"), parameters?.map { it.type }) + } + } + + @Test + fun testMethodExtraction4() { + val functionInfos = createAndSplitTree("src/test/resources/gumTreeMethodSplitter/4.java") + + assertEquals(1, functionInfos.size) + with(functionInfos.first()) { + assertEquals("fun", name) + assertEquals("int", returnType) + assertEquals("SingleFunction", enclosingElement?.name) + assertEquals(listOf("args", "param"), parameters?.map { it.name }) + assertEquals(listOf("int", "SingleFunction"), parameters?.map { it.type }) + } + } + +// TODO: add more tests +} diff --git a/src/test/kotlin/astminer/parse/java/GumTreeJavaParserTest.kt b/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaParserTest.kt similarity index 80% rename from src/test/kotlin/astminer/parse/java/GumTreeJavaParserTest.kt rename to src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaParserTest.kt index e9098a97..a727ad72 100644 --- a/src/test/kotlin/astminer/parse/java/GumTreeJavaParserTest.kt +++ b/src/test/kotlin/astminer/parse/gumtree/java/GumTreeJavaParserTest.kt @@ -1,6 +1,7 @@ -package astminer.parse.java +package astminer.parse.gumtree.java import astminer.common.getProjectFilesWithExtension +import astminer.parseFiles import org.junit.Assert import org.junit.Test import java.io.* @@ -11,7 +12,7 @@ class GumTreeJavaParserTest { val parser = GumTreeJavaParser() val file = File("src/test/resources/examples/1.java") - val node = parser.parseFile(file).root + val node = parser.parseFile(file) Assert.assertNotNull("Parse tree for a valid file should not be null", node) } @@ -19,8 +20,9 @@ class GumTreeJavaParserTest { fun testProjectParsing() { val parser = GumTreeJavaParser() val projectRoot = File("src/test/resources/examples") - val trees = parser.parseFiles(getProjectFilesWithExtension(projectRoot, "java")).map { it.root } - Assert.assertEquals("There is only 2 file with .java extension in 'testData/examples' folder",2, trees.size) + + val trees = parser.parseFiles(getProjectFilesWithExtension(projectRoot, "java")) + Assert.assertEquals("There is only 2 file with .java extension in 'testData/examples' folder", 2, trees.size) trees.forEach { Assert.assertNotNull("Parse tree for a valid file should not be null", it) } } -} \ No newline at end of file +} diff --git a/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitterTest.kt b/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitterTest.kt new file mode 100644 index 00000000..124926af --- /dev/null +++ b/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonFunctionSplitterTest.kt @@ -0,0 +1,179 @@ +package astminer.parse.gumtree.python + +import astminer.checkExecutable +import astminer.common.model.FunctionInfo +import astminer.parse.gumtree.GumTreeNode +import org.junit.Assume +import org.junit.Before +import org.junit.Test +import java.io.File +import kotlin.test.assertEquals +import kotlin.test.assertNotNull + +class GumTreePythonFunctionSplitterTest { + private fun parse(filename: String): GumTreeNode = + GumTreePythonParser().parseInputStream(File(filename).inputStream()) + + private fun splitFunctions(filename: String): Collection> = + GumTreePythonFunctionSplitter().splitIntoFunctions(parse(filename), filename) + + private fun createPath(file: String) = "src/test/resources/gumTreeMethodSplitter/$file" + + @Before + fun checkPythonParser() = Assume.assumeTrue(checkExecutable("pythonparser")) + + @Test + fun methodsCountTest() { + assertEquals(7, splitFunctions(createPath("1.py")).size) + assertEquals(9, splitFunctions(createPath("2.py")).size) + assertEquals(3, splitFunctions(createPath("3.py")).size) + assertEquals(5, splitFunctions(createPath("4.py")).size) + } + + @Test + fun funcNamesTest() { + val realNames = setOf( + "no_args_func", + "with_args_no_typed", + "with_typed_args", + "with_typed_return_no_args", + "full_typed", + "func_dif_args_typed_return", + "complex_args_full_typed" + ) + val functionInfos = splitFunctions(createPath("1.py")) + val parsedNames = functionInfos.map { it.name }.toSet() + assertEquals(realNames, parsedNames) + } + + @Test + fun methodInfoTest1TypedArgs() { + val functionInfos = splitFunctions(createPath("1.py")) + val functionInfo = functionInfos.firstOrNull { it.name == "complex_args_full_typed" } + assertNotNull(functionInfo) + with(functionInfo) { + assertEquals("complex_args_full_typed", name) + assertEquals(null, returnType) + assertEquals(1, parameters?.size) + assertEquals(listOf("node"), parameters?.map { it.name }?.toList()) + assertEquals(listOf("JsonNodeType"), parameters?.map { it.type }?.toList()) + } + } + + @Test + fun methodInfoTest2ManyArgs() { + val functionInfos = splitFunctions(createPath("1.py")) + val functionInfo = functionInfos.firstOrNull { it.name == "func_dif_args_typed_return" } + assertNotNull(functionInfo) + with(functionInfo) { + assertEquals("func_dif_args_typed_return", name) + assertEquals("Constant-int", returnType) + assertEquals(6, parameters?.size) + assertEquals(listOf("a", "b", "c", "d", "e", "f"), parameters?.map { it.name }?.toList()) + assertEquals(emptyList(), parameters?.mapNotNull { it.type }?.toList()) + } + } + + @Test + fun methodInfoTest3EnclosingClass() { + val functionInfos = splitFunctions(createPath("2.py")) + val function = functionInfos.firstOrNull { it.name == "foo_typed" } + assertNotNull(function) + with(function) { + assertEquals("foo_typed", name) + assertEquals("A", enclosingElement?.name) + assertEquals(null, returnType) + assertEquals(3, parameters?.size) + assertEquals(listOf("self", "x", "y"), parameters?.map { it.name }?.toList()) + assertEquals(listOf(null, "int", "int"), parameters?.map { it.type }?.toList()) + } + } + + @Test + fun methodInfoTest4EnclosingClass() { + val functionInfos = splitFunctions(createPath("2.py")) + val functionInfo = functionInfos.firstOrNull { it.name == "bar_typed" } + assertNotNull(functionInfo) + with(functionInfo) { + assertEquals("bar_typed", name) + assertEquals("C", enclosingElement?.name) + assertEquals(null, returnType) + assertEquals(2, parameters?.size) + assertEquals(listOf("self", "x"), parameters?.map { it.name }?.toList()) + assertEquals(listOf(null, "int"), parameters?.map { it.type }?.toList()) + } + } + + @Test + fun methodInfoTest5AsyncDef() { + val functionInfos = splitFunctions(createPath("3.py")) + val functionInfo = functionInfos.firstOrNull { it.name == "async_schrecklich_typed" } + assertNotNull(functionInfo) + with(functionInfo) { + assertEquals("async_schrecklich_typed", name) + assertEquals("AsyncFunctionDef", root.typeLabel) + assertEquals(null, enclosingElement?.name) + assertEquals("Constant-int", returnType) + assertEquals(4, parameters?.size) + assertEquals(listOf("event", "x", "args", "kwargs"), parameters?.map { it.name }?.toList()) + assertEquals(listOf("str", "int", null, null), parameters?.map { it.type }?.toList()) + } + } + + @Test + fun methodInfoTest6Doc() { + val functionInfos = splitFunctions(createPath("3.py")) + val functionInfo = functionInfos.firstOrNull { it.name == "async_simple_no_typed" } + assertNotNull(functionInfo) + with(functionInfo) { + assertEquals("async_simple_no_typed", name) + assertEquals("AsyncFunctionDef", root.typeLabel) + assertEquals(null, enclosingElement?.name) + assertEquals( + "\n async doc\n ", + root.getChildOfType("body") + ?.getChildOfType("Expr") + ?.getChildOfType("Constant-str") + ?.originalToken + ) + assertEquals(4, parameters?.size) + assertEquals( + listOf("gh", "original_issue", "branch", "backport_pr_number"), + parameters?.map { it.name }?.toList() + ) + assertEquals(listOf(null, null, null, null), parameters?.map { it.type }?.toList()) + } + } + + @Test + fun methodInfoTest7InnerFunc() { + val functionInfos = splitFunctions(createPath("4.py")) + val functionInfo = functionInfos.firstOrNull { it.name == "foo_2" } + assertNotNull(functionInfo) + with(functionInfo) { + assertEquals("foo_2", name) + assertEquals("foo_1", functionInfo.root.parent?.wrappedNode?.parent?.label) + assertEquals(null, enclosingElement?.name) + assertEquals("Constant-NoneType", returnType) + assertEquals(1, parameters?.size) + assertEquals(listOf("c"), parameters?.map { it.name }?.toList()) + assertEquals(listOf(null), parameters?.map { it.type }?.toList()) + } + } + + @Test + fun methodInfoTest8InnerFunc() { + val functionInfos = splitFunctions(createPath("4.py")) + val functionInfo = functionInfos.firstOrNull { it.name == "bar_2" } + assertNotNull(functionInfo) + with(functionInfo) { + assertEquals("bar_2", name) + assertEquals("bar_1", functionInfo.root.parent?.wrappedNode?.parent?.label) + assertEquals(null, enclosingElement?.name) + assertEquals("Constant-int", returnType) + assertEquals(2, parameters?.size) + assertEquals(listOf("d", "e"), parameters?.map { it.name }?.toList()) + assertEquals(listOf("int", "int"), parameters?.map { it.type }?.toList()) + } + } +} diff --git a/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonParserTest.kt b/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonParserTest.kt new file mode 100644 index 00000000..56d150e4 --- /dev/null +++ b/src/test/kotlin/astminer/parse/gumtree/python/GumTreePythonParserTest.kt @@ -0,0 +1,52 @@ +package astminer.parse.gumtree.python + +import astminer.checkExecutable +import astminer.parse.ParsingException +import org.junit.After +import org.junit.Assume +import org.junit.Before +import org.junit.Test +import java.io.File +import kotlin.test.assertFalse +import kotlin.test.assertNotNull +import kotlin.test.assertTrue + +class GumTreePythonParserTest { + private val parser: GumTreePythonParser = GumTreePythonParser() + private val testFolder = File(".python_parser_test_tmp") + private val testFile = testFolder.resolve("test_file.py") + + @Before + fun mkdir() { + Assume.assumeTrue(checkExecutable("pythonparser")) + testFolder.mkdirs() + testFile.createNewFile() + } + + @After + fun rmdir() { + testFolder.deleteRecursively() + } + + @Test(expected = Test.None::class) + fun emptyFile() { + val node = parser.parseInputStream(testFile.inputStream()) + assertNotNull(node) + assertTrue(node.wrappedNode.children.isEmpty()) + } + + @Test(expected = ParsingException::class) + fun invalidCode() { + testFile.writeText("INVALID PYTHON CODE") + parser.parseInputStream(testFile.inputStream()) + } + + @Test(expected = Test.None::class) + fun goodFile() { + val node = parser.parseInputStream( + File("src/test/resources/gumTreeMethodSplitter/1.py").inputStream() + ) + assertNotNull(node) + assertFalse(node.wrappedNode.children.isEmpty()) + } +} diff --git a/src/test/kotlin/astminer/parse/java/GumTreeMethodSplitterTest.kt b/src/test/kotlin/astminer/parse/java/GumTreeMethodSplitterTest.kt deleted file mode 100644 index a59e6a8c..00000000 --- a/src/test/kotlin/astminer/parse/java/GumTreeMethodSplitterTest.kt +++ /dev/null @@ -1,81 +0,0 @@ -package astminer.parse.java - -import astminer.common.model.MethodInfo -import org.junit.Test -import java.io.File -import kotlin.test.assertEquals - -private fun createTree(filename: String): GumTreeJavaNode { - val parser = GumTreeJavaParser() - return parser.parseInputStream(File(filename).inputStream()) as GumTreeJavaNode -} - -private fun createAndSplitTree(filename: String): Collection> { - return GumTreeMethodSplitter().splitIntoMethods(createTree(filename)) -} - -class GumTreeMethodSplitterTest { - @Test - fun testMethodExtraction1() { - val methodInfos = createAndSplitTree("src/test/resources/gumTreeMethodSplitter/1.java") - - assertEquals(1, methodInfos.size) - with(methodInfos.first()) { - assertEquals("fun", name()) - assertEquals("void", returnType()) - assertEquals("SingleFunction", enclosingElementName()) - assertEquals(listOf("args", "param"), methodParameters.map { it.name() }.toList()) - assertEquals(listOf("String[]", "int"), methodParameters.map { it.returnType() }.toList()) - } - - } - - @Test - fun testMethodExtraction2() { - val methodInfos = createAndSplitTree("src/test/resources/gumTreeMethodSplitter/2.java") - - assertEquals(1, methodInfos.size) - with(methodInfos.first()) { - assertEquals("main", name()) - assertEquals("void", returnType()) - assertEquals("InnerClass", enclosingElementName()) - assertEquals(listOf("args"), methodParameters.map { it.name() }.toList()) - assertEquals(listOf("String[]"), methodParameters.map { it.returnType() }.toList()) - } - } - - @Test - fun testMethodExtraction3() { - val methodInfos = createAndSplitTree("src/test/resources/gumTreeMethodSplitter/3.java") - - assertEquals(2, methodInfos.size) - with(methodInfos.first()) { - assertEquals("main", name()) - assertEquals("void", returnType()) - assertEquals("InnerClass", enclosingElementName()) - assertEquals(listOf("args"), methodParameters.map { it.name() }.toList()) - assertEquals(listOf("String[]"), methodParameters.map { it.returnType() }.toList()) - } - with(methodInfos.last()) { - assertEquals("fun", name()) - assertEquals("void", returnType()) - assertEquals("SingleMethodInnerClass", enclosingElementName()) - assertEquals(listOf("args", "param"), methodParameters.map { it.name() }.toList()) - assertEquals(listOf("String[]", "int"), methodParameters.map { it.returnType() }.toList()) - } - } - - @Test - fun testMethodExtraction4() { - val methodInfos = createAndSplitTree("src/test/resources/gumTreeMethodSplitter/4.java") - - assertEquals(1, methodInfos.size) - with(methodInfos.first()) { - assertEquals("fun", name()) - assertEquals("int", returnType()) - assertEquals("SingleFunction", enclosingElementName()) - assertEquals(listOf("args", "param"), methodParameters.map { it.name() }.toList()) - assertEquals(listOf("int", "SingleFunction"), methodParameters.map { it.returnType() }.toList()) - } - } -} \ No newline at end of file diff --git a/src/test/kotlin/astminer/paths/PathWorkerTestBase.kt b/src/test/kotlin/astminer/paths/PathWorkerTestBase.kt index ba41566b..3e7ef0d3 100644 --- a/src/test/kotlin/astminer/paths/PathWorkerTestBase.kt +++ b/src/test/kotlin/astminer/paths/PathWorkerTestBase.kt @@ -1,7 +1,6 @@ package astminer.paths import astminer.common.model.Node -import astminer.common.postOrder import org.junit.Assert import org.junit.Test @@ -20,11 +19,14 @@ abstract class PathWorkerTestBase { val nLeaves = tree.postOrder().count { it.isLeaf() } val allPaths = PathWorker().retrievePaths(tree) - val expectedCount = (nLeaves * (nLeaves - 1)) / 2 + val expectedCount = nLeaves * (nLeaves - 1) / 2 - Assert.assertEquals("A tree with $nLeaves leaves contains $expectedCount paths, " + + Assert.assertEquals( + "A tree with $nLeaves leaves contains $expectedCount paths, " + "one per distinct ordered pair of leaves. Worker returned ${allPaths.size}", - expectedCount, allPaths.size) + expectedCount, + allPaths.size + ) } @Test @@ -49,9 +51,9 @@ abstract class PathWorkerTestBase { for (maxWidth in 1..leavesCount) { val paths = PathWorker().retrievePaths(tree, maxLength, maxWidth) Assert.assertEquals( - "Unexpected paths count with length $maxLength and width $maxWidth", - allPathCharacteristics.count { (w, len) -> w <= maxWidth && len <= maxLength }, - paths.size + "Unexpected paths count with length $maxLength and width $maxWidth", + allPathCharacteristics.count { (w, len) -> w <= maxWidth && len <= maxLength }, + paths.size ) } } @@ -71,4 +73,4 @@ abstract class PathWorkerTestBase { } } } -} \ No newline at end of file +} diff --git a/src/test/kotlin/astminer/paths/PathWorkerTestUtil.kt b/src/test/kotlin/astminer/paths/PathWorkerTestUtil.kt index c195e54c..41de26aa 100644 --- a/src/test/kotlin/astminer/paths/PathWorkerTestUtil.kt +++ b/src/test/kotlin/astminer/paths/PathWorkerTestUtil.kt @@ -2,19 +2,15 @@ package astminer.paths import astminer.common.model.ASTPath import astminer.common.model.Node -import astminer.common.postOrder -import astminer.parse.antlr.SimpleNode +import astminer.parse.antlr.AntlrNode import org.junit.Assert -fun simpleNode(number: Int, parent: Node?): SimpleNode { - return SimpleNode("$number", parent, "node_$number") -} +fun simpleNode(number: Int, parent: AntlrNode?): AntlrNode = + AntlrNode("$number", parent, "node_$number") -fun simpleNodes(numbers: List, parent: Node?): List { - return numbers.map { simpleNode(it, parent) } -} +fun simpleNodes(numbers: List, parent: AntlrNode?): List = numbers.map { simpleNode(it, parent) } -fun getParentStack(node: Node): List = (node.getParent()?.let { getParentStack(it) } ?: emptyList()) + node +fun getParentStack(node: Node): List = (node.parent?.let { getParentStack(it) } ?: emptyList()) + node fun getAllPathCharacteristics(root: Node): Collection> { val leaves = root.postOrder().filter { it.isLeaf() } @@ -28,8 +24,8 @@ fun getAllPathCharacteristics(root: Node): Collection> { var rightDepth = rightStack.size leftStack.zip(rightStack).zipWithNext { (left1, right1), (left2, right2) -> if (left1 == right1 && left2 != right2) { - val leftIndex = left1.getChildren().indexOf(left2) - val rightIndex = left1.getChildren().indexOf(right2) + val leftIndex = left1.children.indexOf(left2) + val rightIndex = left1.children.indexOf(right2) allPathCharacteristics.add(Pair(rightIndex - leftIndex, leftDepth + rightDepth - 1)) return@zipWithNext } @@ -43,14 +39,14 @@ fun getAllPathCharacteristics(root: Node): Collection> { } fun ASTPath.allNodesAreDistinct(): Boolean { - return this.upwardNodes.size == this.upwardNodes.toSet().size - && this.downwardNodes.size == this.downwardNodes.toSet().size + return this.upwardNodes.size == this.upwardNodes.toSet().size && + this.downwardNodes.size == this.downwardNodes.toSet().size } fun ASTPath.isSimple(): Boolean { - return this.upwardNodes.toSet().intersect(this.downwardNodes.toSet()).isEmpty() - && !this.upwardNodes.contains(this.topNode) - && !this.downwardNodes.contains(this.topNode) + return this.upwardNodes.toSet().intersect(this.downwardNodes.toSet()).isEmpty() && + !this.upwardNodes.contains(this.topNode) && + !this.downwardNodes.contains(this.topNode) } fun ASTPath.piecesMatch(): Boolean = this.upwardNodes.last() === this.downwardNodes.first() @@ -58,7 +54,7 @@ fun ASTPath.piecesMatch(): Boolean = this.upwardNodes.last() === this.downwardNo fun assertPathIsValid(path: ASTPath) { Assert.assertTrue("Nodes in each of the path pieces should be distinct", path.allNodesAreDistinct()) Assert.assertTrue( - "Path should be simple: upward and downward pieces should not intersect or contain top node", - path.isSimple() + "Path should be simple: upward and downward pieces should not intersect or contain top node", + path.isSimple() ) -} \ No newline at end of file +} diff --git a/src/test/kotlin/astminer/paths/SampleTreePathWorkerTest.kt b/src/test/kotlin/astminer/paths/SampleTreePathWorkerTest.kt index 682baf97..4d6a5c20 100644 --- a/src/test/kotlin/astminer/paths/SampleTreePathWorkerTest.kt +++ b/src/test/kotlin/astminer/paths/SampleTreePathWorkerTest.kt @@ -8,22 +8,22 @@ class SampleTreePathWorkerTest : PathWorkerTestBase() { val rootChildren = simpleNodes(listOf(2, 3), root) val (node2, node3) = rootChildren - root.setChildren(rootChildren) + root.replaceChildren(rootChildren) val node2Children = simpleNodes(listOf(4, 5), node2) val (_, node5) = node2Children - node2.setChildren(node2Children) + node2.replaceChildren(node2Children) val node3Children = simpleNodes(listOf(6, 7, 8), node3) val (_, node7, _) = node3Children - node3.setChildren(node3Children) + node3.replaceChildren(node3Children) val node5Children = simpleNodes(listOf(9, 10, 11), node5) - node5.setChildren(node5Children) + node5.replaceChildren(node5Children) val node7Children = simpleNodes(listOf(12, 13), node7) - node7.setChildren(node7Children) + node7.replaceChildren(node7Children) return root } -} \ No newline at end of file +} diff --git a/src/test/kotlin/astminer/pipeline/Code2VecExtractionTest.kt b/src/test/kotlin/astminer/pipeline/Code2VecExtractionTest.kt new file mode 100644 index 00000000..3cfaee15 --- /dev/null +++ b/src/test/kotlin/astminer/pipeline/Code2VecExtractionTest.kt @@ -0,0 +1,29 @@ +package astminer.pipeline + +import astminer.config.* +import astminer.pipeline.util.verifyPathContextExtraction +import org.junit.Test +import java.io.File +import java.nio.file.Files + +internal class Code2VecExtractionTest { + private val testDataDir = File("src/test/resources") + + @Test + fun `test code2vec path extraction from files generates correct folders and files when no dataset`() { + val extractedDataDir = Files.createTempDirectory("extractedData") + + val languages = listOf(FileExtension.Java, FileExtension.Python) + + val config = PipelineConfig( + inputDir = testDataDir.path, + outputDir = extractedDataDir.toAbsolutePath().toString(), + parser = ParserConfig(ParserType.Antlr, languages), + labelExtractor = FileNameExtractorConfig(), + storage = Code2VecPathStorageConfig(8, 3) + ) + Pipeline(config).run() + + verifyPathContextExtraction(extractedDataDir.toFile(), languages.map { it.fileExtension }, false) + } +} diff --git a/src/test/kotlin/astminer/pipeline/PipelineMultiThreadStressTest.kt b/src/test/kotlin/astminer/pipeline/PipelineMultiThreadStressTest.kt new file mode 100644 index 00000000..114b03f7 --- /dev/null +++ b/src/test/kotlin/astminer/pipeline/PipelineMultiThreadStressTest.kt @@ -0,0 +1,120 @@ +package astminer.pipeline + +import astminer.config.* +import org.junit.AfterClass +import org.junit.BeforeClass +import org.junit.Test +import java.io.BufferedReader +import java.io.File +import java.io.FileReader +import kotlin.test.assertEquals + +class PipelineMultiThreadStressTest { + @Test + fun jsonStorageTest() { + val outputPath = tempOutputDir.resolve("json").path + val config = PipelineConfig( + inputDir = tempInputDir.path, + outputDir = outputPath, + parser = ParserConfig( + name = ParserType.Antlr, + languages = listOf(FileExtension.Java) + ), + filters = listOf(), + labelExtractor = FunctionNameExtractorConfig(), + storage = JsonAstStorageConfig(), + numOfThreads = 8 + ) + Pipeline(config).run() + val expectedNumOfAst = numOfFiles * numOfMethods + val actualNumOfAst = countLines("$outputPath/java/data/asts.jsonl") + assertEquals(expected = expectedNumOfAst.toLong(), actual = actualNumOfAst) + } + + @Test + fun code2vecStorageTest() { + val outputPath = tempOutputDir.resolve("code2vec").path + val config = PipelineConfig( + inputDir = tempInputDir.path, + outputDir = outputPath, + parser = ParserConfig( + name = ParserType.Antlr, + languages = listOf(FileExtension.Java) + ), + filters = listOf(), + labelExtractor = FunctionNameExtractorConfig(), + storage = Code2VecPathStorageConfig( + maxPaths = null, + maxTokens = null, + maxPathContextsPerEntity = null, + maxPathLength = 1000, + maxPathWidth = 1000 + ), + numOfThreads = 8 + ) + Pipeline(config).run() + val expectedNumOfPathContexts = numOfFiles * numOfMethods + val actualNumOfPathContexts = countLines("$outputPath/java/data/path_contexts.c2s") + assertEquals(expected = expectedNumOfPathContexts.toLong(), actual = actualNumOfPathContexts) + } + + @Test + fun code2seqStorageTest() { + val outputPath = tempOutputDir.resolve("code2seq").path + val config = PipelineConfig( + inputDir = tempInputDir.path, + outputDir = outputPath, + parser = ParserConfig( + name = ParserType.Antlr, + languages = listOf(FileExtension.Java) + ), + filters = listOf(), + labelExtractor = FunctionNameExtractorConfig(), + storage = Code2SeqPathStorageConfig( + maxPathContextsPerEntity = null, + maxPathLength = 1000, + maxPathWidth = 1000 + ), + numOfThreads = 8 + ) + Pipeline(config).run() + val expectedNumOfPathContexts = numOfFiles * numOfMethods + val actualNumOfPathContexts = countLines("$outputPath/java/data/path_contexts.c2s") + assertEquals(expected = expectedNumOfPathContexts.toLong(), actual = actualNumOfPathContexts) + } + + private fun countLines(filePath: String): Long { + val reader = BufferedReader(FileReader(filePath)) + var numOfLines = 0L + while (reader.readLine() != null) { numOfLines++ } + return numOfLines + } + + companion object { + private const val numOfFiles = 3000 + private const val numOfMethods = 100 + private val tempInputDir = File("src/test/resources/someData") + private val tempOutputDir = File("src/test/resources/someOutput") + + @BeforeClass + @JvmStatic + fun setup() { + tempInputDir.mkdirs() + repeat(numOfFiles) { index -> + val newFile = File.createTempFile("someFile", ".java", tempInputDir) + newFile.writeText("class someClass$index {\n") + repeat(numOfMethods) { + newFile.appendText("public void someMethod${it + index * numOfMethods}() {} \n") + } + newFile.appendText("}") + } + } + + @AfterClass + @JvmStatic + fun tearDown() { + tempInputDir.deleteRecursively() + tempOutputDir.deleteRecursively() + } + } +} diff --git a/src/test/kotlin/astminer/cli/util/OutputVerification.kt b/src/test/kotlin/astminer/pipeline/util/OutputVerification.kt similarity index 71% rename from src/test/kotlin/astminer/cli/util/OutputVerification.kt rename to src/test/kotlin/astminer/pipeline/util/OutputVerification.kt index 89c75d32..33c60822 100644 --- a/src/test/kotlin/astminer/cli/util/OutputVerification.kt +++ b/src/test/kotlin/astminer/pipeline/util/OutputVerification.kt @@ -1,4 +1,4 @@ -package astminer.cli.util +package astminer.pipeline.util import java.io.File import kotlin.test.assertTrue @@ -22,18 +22,25 @@ internal fun checkExtractedDir(extractedDataDir: File, languages: List) internal fun validPathContextsFile(name: String, batching: Boolean): Boolean { return if (batching) { - name.startsWith("path_contexts_") && name.endsWith(".csv") + name.startsWith("path_contexts_") && name.endsWith(".c2s") } else { - name == "path_contexts.csv" + name == "path_contexts.c2s" } } +internal fun validPathContextHoldout(holdoutDir: File, batching: Boolean): Boolean { + val holdoutFiles = checkNotNull(holdoutDir.listFiles()) + return holdoutFiles.all { validPathContextsFile(it.name, batching) } +} + internal fun checkPathContextsDir(languageDir: File, batching: Boolean) { val expectedFiles = listOf("tokens.csv", "paths.csv", "node_types.csv") languageDir.listFiles()?.forEach { file -> with(file) { + val isDescriptionFile = expectedFiles.contains(name) + val isPathContextHoldout = this.isDirectory && validPathContextHoldout(this, batching) assertTrue( - expectedFiles.contains(name) || validPathContextsFile(name, batching), + isDescriptionFile || isPathContextHoldout, "Unexpected file $name in ${languageDir.name}" ) } diff --git a/src/test/kotlin/astminer/storage/ast/CsvAstStorageTest.kt b/src/test/kotlin/astminer/storage/ast/CsvAstStorageTest.kt new file mode 100644 index 00000000..ee4e1873 --- /dev/null +++ b/src/test/kotlin/astminer/storage/ast/CsvAstStorageTest.kt @@ -0,0 +1,67 @@ +package astminer.storage.ast + +import astminer.common.createBamboo +import astminer.common.createDummyTree +import astminer.common.createSmallTree +import org.junit.Assert +import org.junit.Test +import java.io.File +import kotlin.test.AfterTest + +class CsvAstStorageTest { + private fun generateCorrectAstStringForBamboo(from: Int, to: Int): String { + if (from == to) { + return "$from $from{}" + } + val child = generateCorrectAstStringForBamboo(from + 1, to) + return "$from $from{$child}" + } + + @AfterTest + fun removeTestOutput() { + File(OUTPUT_FOLDER).deleteRecursively() + } + + @Test + fun testAstString() { + val root = createSmallTree() + val storage = CsvAstStorage(OUTPUT_FOLDER) + storage.store(root.labeledWith("entityId")) + + Assert.assertEquals(storage.astString(root), "1 1{2 2{}3 3{4 4{}}}") + } + + @Test + fun `test ast string for bigger tree`() { + val root = createDummyTree() + val storage = CsvAstStorage(OUTPUT_FOLDER) + storage.store(root.labeledWith("entityId")) + + val expected = "1 1{2 2{3 3{}4 4{}5 5{}}6 6{7 7{}8 8{}}}" + Assert.assertEquals(expected, storage.astString(root)) + } + + @Test + fun `test ast string for small bamboo`() { + val bamboo = createBamboo(10) + val storage = CsvAstStorage(OUTPUT_FOLDER) + storage.store(bamboo.labeledWith("entityId")) + + val expected = generateCorrectAstStringForBamboo(1, 10) + Assert.assertEquals(expected, storage.astString(bamboo)) + } + + @Test + fun `test ast string for big bamboo`() { + val bamboo = createBamboo(100) + val storage = CsvAstStorage(OUTPUT_FOLDER) + storage.store(bamboo.labeledWith("entityId")) + + val expected = generateCorrectAstStringForBamboo(1, 100) + Assert.assertEquals(expected, storage.astString(bamboo)) + } + + companion object { + private const val OUTPUT_FOLDER = "test_output" + } +} diff --git a/src/test/kotlin/astminer/storage/ast/DotAstStorageTest.kt b/src/test/kotlin/astminer/storage/ast/DotAstStorageTest.kt new file mode 100644 index 00000000..f14a82e7 --- /dev/null +++ b/src/test/kotlin/astminer/storage/ast/DotAstStorageTest.kt @@ -0,0 +1,146 @@ +package astminer.storage.ast + +import astminer.common.* +import org.junit.Test +import java.io.File +import kotlin.test.AfterTest +import kotlin.test.assertEquals + +class DotAstStorageTest { + private fun testOnTree(root: DummyNode, expectedLines: List) { + DotAstStorage("test_examples").use { storage -> + storage.store(root.labeledWith("entityId")) + } + + val storageLines = File(File("test_examples/data", "asts"), "ast_0.dot").readLines() + + File("test_examples").deleteRecursively() + + assertEquals(expectedLines, storageLines) + } + + private fun getBambooLines(size: Int): List { + val lines = mutableListOf() + lines.add("digraph entityId {") + for (i in 0..size - 2) { + lines.add("$i -- {${i + 1}};") + } + lines.add("${size - 1} -- {};") + lines.add("}") + return lines + } + + @AfterTest + fun removeOutput() { + File(OUTPUT_FOLDER).deleteRecursively() + } + + @Test + fun testDotStorageOnSmallTree() { + val root = createSmallTree() + val storage = DotAstStorage(OUTPUT_FOLDER) + storage.store(root.labeledWith("entityId")) + + storage.close() + + val trueLines = listOf( + "digraph entityId {", + "0 -- {1 2};", + "1 -- {};", + "2 -- {3};", + "3 -- {};", + "}" + ) + val storageLines = File(File("$OUTPUT_FOLDER/data", "asts"), "ast_0.dot").readLines() + assertEquals(trueLines, storageLines) + } + + @Test + fun `test dot storage on dummy tree`() { + val trueLines = listOf( + "digraph entityId {", + "0 -- {1 2};", + "1 -- {3 4 5};", + "3 -- {};", + "4 -- {};", + "5 -- {};", + "2 -- {6 7};", + "6 -- {};", + "7 -- {};", + "}" + ) + + testOnTree(createDummyTree(), trueLines) + } + + @Test + fun `test dot storage on small bamboo`() { + testOnTree(createBamboo(10), getBambooLines(10)) + } + + @Test + fun `test dot storage on big bamboo`() { + testOnTree(createBamboo(100), getBambooLines(100)) + } + + @Test + fun testLabelNormalization() { + val label = "some/kind/of/random/path" + val storage = DotAstStorage(OUTPUT_FOLDER) + val normalizedLabel = storage.normalizeAstLabel(label) + + assertEquals("some_kind_of_random_path", normalizedLabel) + } + + @Test + fun testBindingNormalization() { + val label = "\$supposeToBeListener" + val storage = DotAstStorage(OUTPUT_FOLDER) + val normalizedLabel = storage.normalizeAstLabel(label) + + assertEquals("_supposeToBeListener", normalizedLabel) + } + + @Test + fun testLabelWithCommaNormalization() { + val labelWithComma = "some,bad,label" + val storage = DotAstStorage(OUTPUT_FOLDER) + val normalizedLabel = storage.normalizeAstLabel(labelWithComma) + + assertEquals("some_bad_label", normalizedLabel) + } + + @Test + fun testSplittingFullPath() { + val fullPath = "/path1/path2/path_3/path.4/file.name" + val storage = DotAstStorage(OUTPUT_FOLDER) + val (path, fileName) = storage.splitFullPath(fullPath) + + assertEquals("/path1/path2/path_3/path.4", path) + assertEquals("file.name", fileName) + } + + @Test + fun testSplittingFileName() { + val fullPath = "file.name" + val storage = DotAstStorage(OUTPUT_FOLDER) + val (path, fileName) = storage.splitFullPath(fullPath) + + assertEquals("", path) + assertEquals("file.name", fileName) + } + + @Test + fun testFilepathNormalization() { + // real life example + val badFilepath = "interviews/Leet-Code/binary-search/pow(x,n).java" + val storage = DotAstStorage(OUTPUT_FOLDER) + val normalizedFilepath = storage.normalizeFilepath(badFilepath) + + assertEquals("interviews/Leet-Code/binary-search/pow_x_n_.java", normalizedFilepath) + } + + companion object { + private const val OUTPUT_FOLDER = "test_output" + } +} diff --git a/src/test/kotlin/astminer/storage/ast/TreeFlattenerTest.kt b/src/test/kotlin/astminer/storage/ast/TreeFlattenerTest.kt new file mode 100644 index 00000000..f1cae49c --- /dev/null +++ b/src/test/kotlin/astminer/storage/ast/TreeFlattenerTest.kt @@ -0,0 +1,86 @@ +package astminer.storage.ast + +import astminer.common.DummyNode +import astminer.common.createBamboo +import astminer.common.createDummyTree +import astminer.common.createSmallTree +import org.junit.Before +import org.junit.Test +import kotlin.test.assertEquals + +internal class TreeFlattenerTest { + private lateinit var treeFlattener: TreeFlattener + + @Before + fun init() { + treeFlattener = TreeFlattener() + } + + private data class EnumeratedResult(val id: Int, val typeLabel: String, val children: List = emptyList()) + + private fun enumerate(node: DummyNode): List { + val enumeratedNodes = treeFlattener.flatten(node) + return enumeratedNodes.map { EnumeratedResult(it.id, it.node.typeLabel, it.children.map { child -> child.id }) } + } + + @Test + fun `test for 1 node`() { + val node = DummyNode("a") + val expected = listOf(EnumeratedResult(0, "a")) + assertEquals(expected, enumerate(node)) + } + + @Test + fun `test should be reusable`() { + treeFlattener.flatten(DummyNode("something previous")) + val node = DummyNode("a") + val expected = listOf(EnumeratedResult(0, "a")) + assertEquals(expected, enumerate(node)) + } + + @Test + fun `test on small bamboo`() { + val bamboo = createBamboo(10) + val expected = List(10) { i -> + EnumeratedResult(i, (i + 1).toString(), if (i == 9) emptyList() else listOf(i + 1)) + } + assertEquals(expected, enumerate(bamboo)) + } + + @Test + fun `test on big bamboo`() { + val bamboo = createBamboo(1000) + val expected = List(1000) { i -> + EnumeratedResult(i, (i + 1).toString(), if (i == 999) emptyList() else listOf(i + 1)) + } + assertEquals(expected, enumerate(bamboo)) + } + + @Test + fun `test on very small dummy tree`() { + val node = createSmallTree() + val expected = listOf( + EnumeratedResult(0, "1", listOf(1, 2)), + EnumeratedResult(1, "2"), + EnumeratedResult(2, "3", listOf(3)), + EnumeratedResult(3, "4") + ) + assertEquals(expected, enumerate(node)) + } + + @Test + fun `test on small dummy tree`() { + val node = createDummyTree() + val expected = listOf( + EnumeratedResult(0, "1", listOf(1, 5)), + EnumeratedResult(1, "2", listOf(2, 3, 4)), + EnumeratedResult(2, "4"), + EnumeratedResult(3, "5"), + EnumeratedResult(4, "6"), + EnumeratedResult(5, "3", listOf(6, 7)), + EnumeratedResult(6, "7"), + EnumeratedResult(7, "8") + ) + assertEquals(expected, enumerate(node)) + } +} diff --git a/src/test/resources/examples/1.php b/src/test/resources/examples/1.php new file mode 100644 index 00000000..28972163 --- /dev/null +++ b/src/test/resources/examples/1.php @@ -0,0 +1,28 @@ +_add($a, $b); + } + + public function sub($a, $b) { + return $a - $b; + } + + public function mul($a, $b) { + return $a * $b; + } + + /** + * Защищённый метод + * @param interge + * @return interge + */ + protected function _add($a, $b) { + return $a + $b; + } +} + +$server = new Yar_Server(new Operator()); +$server->handle(); +?> \ No newline at end of file diff --git a/src/test/resources/examples/test_dir_name_extension.java/1.java b/src/test/resources/examples/test_dir_name_extension.java/1.java index 52c71b04..179582b5 100644 --- a/src/test/resources/examples/test_dir_name_extension.java/1.java +++ b/src/test/resources/examples/test_dir_name_extension.java/1.java @@ -1,5 +1,12 @@ class SingleFunction { + int x; + + @Override void fun(String[] args, int param) { System.out.println("Hello again world!"); } + + public SingleFunction { + x = 5; + } } diff --git a/src/test/resources/gumTreeMethodSplitter/1.py b/src/test/resources/gumTreeMethodSplitter/1.py new file mode 100644 index 00000000..0fd6c75d --- /dev/null +++ b/src/test/resources/gumTreeMethodSplitter/1.py @@ -0,0 +1,44 @@ +from typing import Dict, List, Union + + +def no_args_func(): + """ + :return: None + """ + return None + + +def with_args_no_typed(a, b, c, d = 42): + return a, b, c + + +def with_typed_args(a: int, z: str): + return None + + +def with_typed_return_no_args() -> str: + x: int = 42 + return "str" + + +def full_typed(filename: str) -> str: + """ + :param filename: path to file + :return: string with file content + """ + with open(filename, 'rt') as f: + content = f.read() + return content + + +def func_dif_args_typed_return(a, b, /, c, d, *, e, f) -> int: + """ + python doc + """ + return 42 + + +JsonNodeType = Dict[str, Union[str, List[int]]] + +def complex_args_full_typed(node: JsonNodeType) -> JsonNodeType: + return node diff --git a/src/test/resources/gumTreeMethodSplitter/2.py b/src/test/resources/gumTreeMethodSplitter/2.py new file mode 100644 index 00000000..3b487363 --- /dev/null +++ b/src/test/resources/gumTreeMethodSplitter/2.py @@ -0,0 +1,43 @@ +class A: + def __init__(self): + """ + outer init + """ + pass + + + def __add__(self, other): + pass + + + def foo(self, x): + return x * x + + + def foo_typed(self, x: int, y: int) -> int: + return x * y + + + class B: + def __init__(self): + """ + inner init + """ + pass + + + def __get__(self, instance, owner): + pass + + + def foo_typed(self, x: int, y: int) -> int: + return x + y + + + class C: + + def __init__(self): + pass + + def bar_typed(self, x: int) -> int: + return x diff --git a/src/test/resources/gumTreeMethodSplitter/3.py b/src/test/resources/gumTreeMethodSplitter/3.py new file mode 100644 index 00000000..254dc280 --- /dev/null +++ b/src/test/resources/gumTreeMethodSplitter/3.py @@ -0,0 +1,16 @@ + +async def async_simple_no_typed(gh, original_issue, branch, backport_pr_number): + """ + async doc + """ + pass + + +@router.register("pull_request", action="opened") +@router.register("pull_request", action="edited") +async def async_schrecklich_typed(event: str, x: int , *args, **kwargs) -> int: + + def inner(): + pass + + return 42 diff --git a/src/test/resources/gumTreeMethodSplitter/4.py b/src/test/resources/gumTreeMethodSplitter/4.py new file mode 100644 index 00000000..e13209ba --- /dev/null +++ b/src/test/resources/gumTreeMethodSplitter/4.py @@ -0,0 +1,22 @@ +def foo(a: int): + + def foo_1(b): + + def foo_2(c): + return None + + return foo_2(b) + + + def bar_1(b: int, c: int) -> int: + + def bar_2(d: int, e: int) -> int: + return 42 + + return bar_2(b, c) + + + foo_1(a) + bar_1(a, a) + + return None diff --git a/src/test/resources/methodSplitting/testMethodSplitting.java b/src/test/resources/methodSplitting/testMethodSplitting.java index e3667b08..621e502b 100644 --- a/src/test/resources/methodSplitting/testMethodSplitting.java +++ b/src/test/resources/methodSplitting/testMethodSplitting.java @@ -24,4 +24,6 @@ void functionWithNoParameters() {} void functionWithOneParameter(int p1) {} void functionWithThreeParameters(Class p1, String[][] p2, int[]... p3) {} + + void functionWithStrangeArrayParameter(int arr[]) {} } \ No newline at end of file diff --git a/src/test/resources/methodSplitting/testMethodSplitting.php b/src/test/resources/methodSplitting/testMethodSplitting.php new file mode 100644 index 00000000..0201ecca --- /dev/null +++ b/src/test/resources/methodSplitting/testMethodSplitting.php @@ -0,0 +1,87 @@ + $x + $y; + +// #9 info : {name: null, args: $x, enclosing element: variable, enclosing element name: $arrow2, return type: null} +// #10 info : {name: null, args: $y, enclosing element: function, enclosing element name: null, return type: null} +$arrow2 = fn($x) => fn($y) => $x * $y; + +// #11 info : {name: null, args: $x, enclosing element: null, enclosing element name: null, return type: null} +fn($x = 42) => $x; + +// #12 info : {name: null, args: &$x, enclosing element: null, enclosing element name: null, return type: null} +fn(&$x) => $x; + +// #13 info : {name: null, args: $x, enclosing element: null, enclosing element name: null, return type: null} +fn&($x) => $x; + +// #14 info : {name: null, args: $x, ...$rest, enclosing element: null, enclosing element name: null, return type: null} +fn($x, ...$rest) => $rest; + +////////////////// METHOD FUNCTIONS ////////////////// + +class someClass { + // #15 info : {name: someFunc, args: , enclosing element: class, enclosing element name: someClass, return type: null} + public function someFunc() { + return 42; + } + + // #16 info : {name: funcWithParams, args: $a, $b, enclosing element: class, enclosing element name: someClass, return type: null} + public function funcWithParams($a, $b) { + + // #17 info : {name: innerFunction, args: , enclosing element: method, enclosing element name: funcWithParams, return type: null} + function innerFunction() { + + // #18 info : {name: superInnerFunction, args: , enclosing element: function, enclosing element name: innerFunction, return type: null} + function superInnerFunction() { + return 42; + } + return 42; + } + return 42; + } +} \ No newline at end of file diff --git a/src/test/resources/methodSplitting/testMethodSplitting.py b/src/test/resources/methodSplitting/testMethodSplitting.py index befebc43..e6663fe4 100644 --- a/src/test/resources/methodSplitting/testMethodSplitting.py +++ b/src/test/resources/methodSplitting/testMethodSplitting.py @@ -1,21 +1,48 @@ -def funWithNoClass(): +def fun_with_no_class(): pass class Class1: - def funInClass1(): + def fun_in_class1(self): pass class Class2: - def funInClass2(): + def fun_in_class2(self): pass -def functionWithNoParameters(): +def function_with_no_parameters(): pass -def functionWithOneParameter(p1): +def function_with_one_parameter(p1): pass -def functionWithThreeParameters(p1, p2 = 4, p3: int = 3): +def function_with_one_typed_parameter(p1: int): pass + +def function_with_complex_parameter(p1: List[int]): + pass + +def function_with_three_parameters(p1, p2 = 4, p3: int = 3): + pass + +class Class3: + def fun_with_parameter_in_class(self, p1): + pass + + def fun_with_typed_parameter_in_class(self, p1: int): + pass + +def function_containing_function(): + def function_inside_function(): + pass + pass + +class Class4: + def some_method(self): + def function_inside_method(): + pass + def second_method(self): + def second_function_inside_method(): + def fun_inside_fun_inside_method(): + pass \ No newline at end of file