diff --git a/README.md b/README.md index 129ccbc5..0851c456 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ Supported languages of the input: - [x] Java - [x] Python - [x] C/C++ -- [x] Javascript (beta) (see [issue](https://github.com/vovak/astminer/issues/22)) +- [x] Javascript ### Version history @@ -46,14 +46,14 @@ In other tasks, if you feed C/C++ file with macroses, they will be dropped as we Extract ASTs from all the files in supported languages. ```shell script -./cli.sh parse --lang py,java,c,cpp --project path/to/project --output path/to/result --storage dot +./cli.sh parse --lang py,java,c,cpp,js --project path/to/project --output path/to/result --storage dot ``` #### PathContexts Extract path contexts from all the files in supported languages and store in form `fileName triplesOfPathContexts`. ```shell script -./cli.sh pathContexts --lang py,java,c,cpp --project path/to/project --output path/to/results --maxL L --maxW W --maxContexts C --maxTokens T --maxPaths P +./cli.sh pathContexts --lang py,java,c,cpp,js --project path/to/project --output path/to/results --maxL L --maxW W --maxContexts C --maxTokens T --maxPaths P ``` #### Code2vec @@ -61,7 +61,7 @@ Extract path contexts from all the files in supported languages and store in for Extract data suitable as input for [code2vec](https://github.com/tech-srl/code2vec) model. Parse all files written in specified language into ASTs, split into methods, and store in form `method|name triplesOfPathContexts`. ```shell script -./cli.sh code2vec --lang py,java,c,cpp --project path/to/project --output path/to/results --maxL L --maxW W --maxContexts C --maxTokens T --maxPaths P --split-tokens --granularity method +./cli.sh code2vec --lang py,java,c,cpp,js --project path/to/project --output path/to/results --maxL L --maxW W --maxContexts C --maxTokens T --maxPaths P --split-tokens --granularity method ``` ### Integrate in your mining pipeline diff --git a/src/main/kotlin/astminer/cli/Code2VecExtractor.kt b/src/main/kotlin/astminer/cli/Code2VecExtractor.kt index 402c9092..26b3e263 100644 --- a/src/main/kotlin/astminer/cli/Code2VecExtractor.kt +++ b/src/main/kotlin/astminer/cli/Code2VecExtractor.kt @@ -17,12 +17,12 @@ import java.io.File class Code2VecExtractor(private val customLabelExtractor: LabelExtractor? = null) : CliktCommand() { - private val supportedLanguages = listOf("java", "c", "cpp", "py") + private val supportedLanguages = listOf("java", "c", "cpp", "py", "js") val extensions: List by option( "--lang", help = "Comma-separated list of file extensions that will be parsed.\n" + - "Supports 'c', 'cpp', 'java', 'py', defaults to all these extensions." + "Supports 'c', 'cpp', 'java', 'py', 'js', defaults to all these extensions." ).split(",").default(supportedLanguages) val projectRoot: String by option( diff --git a/src/main/kotlin/astminer/cli/LabelExtractors.kt b/src/main/kotlin/astminer/cli/LabelExtractors.kt index 1d195449..cb035e9c 100644 --- a/src/main/kotlin/astminer/cli/LabelExtractors.kt +++ b/src/main/kotlin/astminer/cli/LabelExtractors.kt @@ -7,6 +7,7 @@ import astminer.common.preOrder import astminer.common.setNormalizedToken import astminer.parse.antlr.SimpleNode import astminer.parse.antlr.java.JavaMethodSplitter +import astminer.parse.antlr.javascript.JavaScriptMethodSplitter import astminer.parse.antlr.python.PythonMethodSplitter import astminer.parse.cpp.FuzzyMethodSplitter import astminer.parse.cpp.FuzzyNode @@ -76,6 +77,10 @@ abstract class MethodLabelExtractor( val methodSplitter = PythonMethodSplitter() methodSplitter.splitIntoMethods(root as SimpleNode) } + "js" -> { + val methodSplitter = JavaScriptMethodSplitter() + methodSplitter.splitIntoMethods(root as SimpleNode) + } else -> throw UnsupportedOperationException("Unsupported extension $fileExtension") }.filter { methodInfo -> filterPredicates.all { predicate -> diff --git a/src/main/kotlin/astminer/cli/PathContextsExtractor.kt b/src/main/kotlin/astminer/cli/PathContextsExtractor.kt index 7cd53b9d..ff8d4d61 100644 --- a/src/main/kotlin/astminer/cli/PathContextsExtractor.kt +++ b/src/main/kotlin/astminer/cli/PathContextsExtractor.kt @@ -4,6 +4,7 @@ import astminer.common.getNormalizedToken import astminer.common.getProjectFilesWithExtension import astminer.common.model.* import astminer.parse.antlr.java.JavaParser +import astminer.parse.antlr.javascript.JavaScriptParser import astminer.parse.antlr.python.PythonParser import astminer.parse.cpp.FuzzyCppParser import astminer.parse.java.GumTreeJavaParser @@ -33,7 +34,8 @@ class PathContextsExtractor(private val customLabelExtractor: LabelExtractor? = SupportedLanguage(GumTreeJavaParser(), "java"), SupportedLanguage(FuzzyCppParser(), "c"), SupportedLanguage(FuzzyCppParser(), "cpp"), - SupportedLanguage(PythonParser(), "py") + SupportedLanguage(PythonParser(), "py"), + SupportedLanguage(JavaScriptParser(), "js") ) val extensions: List by option( diff --git a/src/main/kotlin/astminer/cli/ProjectParser.kt b/src/main/kotlin/astminer/cli/ProjectParser.kt index 4e161ecc..7941f1b7 100644 --- a/src/main/kotlin/astminer/cli/ProjectParser.kt +++ b/src/main/kotlin/astminer/cli/ProjectParser.kt @@ -12,12 +12,12 @@ import java.io.File class ProjectParser(private val customLabelExtractor: LabelExtractor? = null) : CliktCommand() { - private val supportedLanguages = listOf("java", "c", "cpp", "py") + private val supportedLanguages = listOf("java", "c", "cpp", "py", "js") val extensions: List by option( "--lang", help = "Comma-separated list of file extensions that will be parsed.\n" + - "Supports 'c', 'cpp', 'java', 'py', defaults to all these extensions." + "Supports 'c', 'cpp', 'java', 'py', 'js', defaults to all these extensions." ).split(",").default(supportedLanguages) val projectRoot: String by option( diff --git a/src/main/kotlin/astminer/cli/utils.kt b/src/main/kotlin/astminer/cli/utils.kt index edf36664..815e4b5e 100644 --- a/src/main/kotlin/astminer/cli/utils.kt +++ b/src/main/kotlin/astminer/cli/utils.kt @@ -10,6 +10,7 @@ import astminer.common.model.Parser import astminer.common.preOrder import astminer.common.setNormalizedToken import astminer.common.splitToSubtokens +import astminer.parse.antlr.javascript.JavaScriptParser fun getParser( extension: String, @@ -28,6 +29,7 @@ fun getParser( "c" -> FuzzyCppParser() "cpp" -> FuzzyCppParser() "py" -> PythonParser() + "js" -> JavaScriptParser() else -> { throw UnsupportedOperationException("Unsupported extension $extension") }