Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better performance and feedback #171

Merged
merged 20 commits into from
Aug 6, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions src/main/kotlin/astminer/common/model/ParsingModel.kt
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,11 @@ abstract class Node(val originalToken: String?) {
abstract val children: List<Node>
abstract val parent: Node?

val normalizedToken: String = run {
val normalizedToken: String =
originalToken?.let {
val subtokens = splitToSubtokens(it)
if (subtokens.isEmpty()) EMPTY_TOKEN else subtokens.joinToString(TOKEN_DELIMITER)
} ?: EMPTY_TOKEN
}

var technicalToken: String? = null

Expand Down
17 changes: 7 additions & 10 deletions src/main/kotlin/astminer/common/model/ParsingResultModel.kt
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,12 @@ import java.io.File
import kotlin.concurrent.thread

private val logger = KotlinLogging.logger("HandlerFactory")
private const val NUM_OF_THREADS = 16

interface ParsingResultFactory {
fun parse(file: File): ParsingResult<out Node>

fun <T> parseFiles(
files: List<File>,
progressBar: ProgressBar? = null,
action: (ParsingResult<out Node>) -> T
): List<T?> {
val results = mutableListOf<T?>()
Expand All @@ -25,24 +23,25 @@ interface ParsingResultFactory {
logger.error(parsingException) { "Failed to parse file ${file.path}" }
results.add(null)
}
progressBar?.step()
}
return results
}

fun <T> parseFilesAsync(files: List<File>, action: (ParsingResult<out Node>) -> T): List<T?> {
fun <T> parseFilesInThreads(
files: List<File>,
numOfThreads: Int,
action: (ParsingResult<out Node>) -> T
): List<T?> {
val results = mutableListOf<T?>()
val threads = mutableListOf<Thread>()
val progressBar = ProgressBar("Parsing progress:", files.size.toLong())

synchronized(results) {
files.chunked(files.size / NUM_OF_THREADS + 1).filter { it.isNotEmpty() }
files.chunked(files.size / numOfThreads + 1).filter { it.isNotEmpty() }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's use ceil(files.size / numOfThreads). This is more accurate (the case is taken into account if it is divided entirely), as well as more readable (here I had to hang up, why are you +1 doing)

.map { chunk ->
threads.add(thread { results.addAll(parseFiles(chunk, progressBar, action)) })
threads.add(thread { results.addAll(parseFiles(chunk, action)) })
}
}
threads.map { it.join() }
progressBar.close()
return results
}
}
Expand All @@ -57,11 +56,9 @@ interface PreprocessingParsingResultFactory : ParsingResultFactory {
*/
override fun <T> parseFiles(
files: List<File>,
progressBar: ProgressBar?,
action: (ParsingResult<out Node>) -> T
) =
files.map { file ->
progressBar?.step()
try {
val preprocessedFile = preprocess(file)
val result = action(parse(preprocessedFile))
Expand Down
3 changes: 2 additions & 1 deletion src/main/kotlin/astminer/config/PipelineConfig.kt
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,6 @@ data class PipelineConfig(
val parser: ParserConfig,
val filters: List<FilterConfig> = emptyList(),
@SerialName("label") val labelExtractor: LabelExtractorConfig,
val storage: StorageConfig
val storage: StorageConfig,
val performance: PerformanceConfig = defaultPerformanceConfig
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we use yet another config object? Maybe we can set the number of threads directly in this config?
Also, I suggest setting the number of threads nullable. Null means no thread at all.

)
8 changes: 7 additions & 1 deletion src/main/kotlin/astminer/pipeline/Pipeline.kt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import astminer.parse.getParsingResultFactory
import astminer.pipeline.branch.FilePipelineBranch
import astminer.pipeline.branch.FunctionPipelineBranch
import astminer.pipeline.branch.IllegalLabelExtractorException
import me.tongfei.progressbar.ProgressBar
import java.io.File

/**
Expand Down Expand Up @@ -44,6 +45,7 @@ class Pipeline(private val config: PipelineConfig) {
* Runs the pipeline that is defined in the [config].
*/
fun run() {
println("Working in ${config.performance.numOfThreads}")
for (language in config.parser.languages) {
println("Parsing $language")
val parsingResultFactory = getParsingResultFactory(language, config.parser.name)
Expand All @@ -52,15 +54,19 @@ class Pipeline(private val config: PipelineConfig) {
val files = getProjectFilesWithExtension(inputDirectory, language.fileExtension)
println("${files.size} files retrieved")

val progressBar = ProgressBar("", files.size.toLong())

createStorage(language).use { storage ->
synchronized(storage) {
parsingResultFactory.parseFilesAsync(files) { parseResult ->
parsingResultFactory.parseFilesInThreads(files, config.performance.numOfThreads) { parseResult ->
for (labeledResult in branch.process(parseResult)) {
storage.store(labeledResult)
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AFAIK, you can update progress bar here

progressBar.step()
}
}
}
progressBar.close()
}
println("Done!")
}
Expand Down