Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add JS to CLI #123

Merged
merged 2 commits into from
Feb 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ Supported languages of the input:
- [x] Java
- [x] Python
- [x] C/C++
- [x] Javascript (beta) (see [issue](https://github.com/vovak/astminer/issues/22))
- [x] Javascript

### Version history

Expand Down Expand Up @@ -46,22 +46,22 @@ In other tasks, if you feed C/C++ file with macroses, they will be dropped as we

Extract ASTs from all the files in supported languages.
```shell script
./cli.sh parse --lang py,java,c,cpp --project path/to/project --output path/to/result --storage dot
./cli.sh parse --lang py,java,c,cpp,js --project path/to/project --output path/to/result --storage dot
```

#### PathContexts

Extract path contexts from all the files in supported languages and store in form `fileName triplesOfPathContexts`.
```shell script
./cli.sh pathContexts --lang py,java,c,cpp --project path/to/project --output path/to/results --maxL L --maxW W --maxContexts C --maxTokens T --maxPaths P
./cli.sh pathContexts --lang py,java,c,cpp,js --project path/to/project --output path/to/results --maxL L --maxW W --maxContexts C --maxTokens T --maxPaths P
```

#### Code2vec

Extract data suitable as input for [code2vec](https://github.com/tech-srl/code2vec) model.
Parse all files written in specified language into ASTs, split into methods, and store in form `method|name triplesOfPathContexts`.
```shell script
./cli.sh code2vec --lang py,java,c,cpp --project path/to/project --output path/to/results --maxL L --maxW W --maxContexts C --maxTokens T --maxPaths P --split-tokens --granularity method
./cli.sh code2vec --lang py,java,c,cpp,js --project path/to/project --output path/to/results --maxL L --maxW W --maxContexts C --maxTokens T --maxPaths P --split-tokens --granularity method
```

### Integrate in your mining pipeline
Expand Down
4 changes: 2 additions & 2 deletions src/main/kotlin/astminer/cli/Code2VecExtractor.kt
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@ import java.io.File

class Code2VecExtractor(private val customLabelExtractor: LabelExtractor? = null) : CliktCommand() {

private val supportedLanguages = listOf("java", "c", "cpp", "py")
private val supportedLanguages = listOf("java", "c", "cpp", "py", "js")

val extensions: List<String> by option(
"--lang",
help = "Comma-separated list of file extensions that will be parsed.\n" +
"Supports 'c', 'cpp', 'java', 'py', defaults to all these extensions."
"Supports 'c', 'cpp', 'java', 'py', 'js', defaults to all these extensions."
).split(",").default(supportedLanguages)

val projectRoot: String by option(
Expand Down
5 changes: 5 additions & 0 deletions src/main/kotlin/astminer/cli/LabelExtractors.kt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import astminer.common.preOrder
import astminer.common.setNormalizedToken
import astminer.parse.antlr.SimpleNode
import astminer.parse.antlr.java.JavaMethodSplitter
import astminer.parse.antlr.javascript.JavaScriptMethodSplitter
import astminer.parse.antlr.python.PythonMethodSplitter
import astminer.parse.cpp.FuzzyMethodSplitter
import astminer.parse.cpp.FuzzyNode
Expand Down Expand Up @@ -76,6 +77,10 @@ abstract class MethodLabelExtractor(
val methodSplitter = PythonMethodSplitter()
methodSplitter.splitIntoMethods(root as SimpleNode)
}
"js" -> {
val methodSplitter = JavaScriptMethodSplitter()
methodSplitter.splitIntoMethods(root as SimpleNode)
}
else -> throw UnsupportedOperationException("Unsupported extension $fileExtension")
}.filter { methodInfo ->
filterPredicates.all { predicate ->
Expand Down
4 changes: 3 additions & 1 deletion src/main/kotlin/astminer/cli/PathContextsExtractor.kt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import astminer.common.getNormalizedToken
import astminer.common.getProjectFilesWithExtension
import astminer.common.model.*
import astminer.parse.antlr.java.JavaParser
import astminer.parse.antlr.javascript.JavaScriptParser
import astminer.parse.antlr.python.PythonParser
import astminer.parse.cpp.FuzzyCppParser
import astminer.parse.java.GumTreeJavaParser
Expand Down Expand Up @@ -33,7 +34,8 @@ class PathContextsExtractor(private val customLabelExtractor: LabelExtractor? =
SupportedLanguage(GumTreeJavaParser(), "java"),
SupportedLanguage(FuzzyCppParser(), "c"),
SupportedLanguage(FuzzyCppParser(), "cpp"),
SupportedLanguage(PythonParser(), "py")
SupportedLanguage(PythonParser(), "py"),
SupportedLanguage(JavaScriptParser(), "js")
)

val extensions: List<String> by option(
Expand Down
4 changes: 2 additions & 2 deletions src/main/kotlin/astminer/cli/ProjectParser.kt
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@ import java.io.File

class ProjectParser(private val customLabelExtractor: LabelExtractor? = null) : CliktCommand() {

private val supportedLanguages = listOf("java", "c", "cpp", "py")
private val supportedLanguages = listOf("java", "c", "cpp", "py", "js")

val extensions: List<String> by option(
"--lang",
help = "Comma-separated list of file extensions that will be parsed.\n" +
"Supports 'c', 'cpp', 'java', 'py', defaults to all these extensions."
"Supports 'c', 'cpp', 'java', 'py', 'js', defaults to all these extensions."
).split(",").default(supportedLanguages)

val projectRoot: String by option(
Expand Down
2 changes: 2 additions & 0 deletions src/main/kotlin/astminer/cli/utils.kt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import astminer.common.model.Parser
import astminer.common.preOrder
import astminer.common.setNormalizedToken
import astminer.common.splitToSubtokens
import astminer.parse.antlr.javascript.JavaScriptParser

fun getParser(
extension: String,
Expand All @@ -28,6 +29,7 @@ fun getParser(
"c" -> FuzzyCppParser()
"cpp" -> FuzzyCppParser()
"py" -> PythonParser()
"js" -> JavaScriptParser()
else -> {
throw UnsupportedOperationException("Unsupported extension $extension")
}
Expand Down