-
Notifications
You must be signed in to change notification settings - Fork 2.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Make getting style violations thread-safe and parallelize linting #1103
Changes from 4 commits
8a4671e
28813fc
d82ca68
f24ceac
914aeb1
8774a92
253e1f4
3608cf4
53d0cf6
8906da6
95a468a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,6 +6,7 @@ | |
// Copyright © 2016 Realm. All rights reserved. | ||
// | ||
|
||
import Dispatch | ||
import Foundation | ||
|
||
extension Array where Element: NSTextCheckingResult { | ||
|
@@ -42,4 +43,44 @@ extension Array { | |
return dictionary | ||
} | ||
} | ||
|
||
public func parallelForEach(block: @escaping ((Element) -> Void)) { | ||
_ = parallelMap(transform: block) | ||
} | ||
|
||
func parallelFlatMap<T>(transform: @escaping ((Element) -> [T])) -> [T] { | ||
return parallelMap(transform: transform).flatMap { $0 } | ||
} | ||
|
||
func parallelMap<T>(transform: @escaping ((Element) -> T)) -> [T] { | ||
let count = self.count | ||
let maxConcurrentJobs = ProcessInfo.processInfo.activeProcessorCount | ||
|
||
guard count > 1 && maxConcurrentJobs > 1 else { | ||
// skip GCD overhead if we'd only run one at a time anyway | ||
return map(transform) | ||
} | ||
|
||
var result = [(Int, [T])]() | ||
result.reserveCapacity(count) | ||
let group = DispatchGroup() | ||
let uuid = NSUUID().uuidString | ||
let jobCount = Int(ceil(Double(count) / Double(maxConcurrentJobs))) | ||
|
||
let queueLabelPrefix = "io.realm.SwiftLintFramework.map.\(uuid)" | ||
let resultAccumulatorQueue = DispatchQueue(label: "\(queueLabelPrefix).resultAccumulator") | ||
|
||
for jobIndex in stride(from: 0, to: count, by: jobCount) { | ||
let queue = DispatchQueue(label: "\(queueLabelPrefix).\(jobIndex / jobCount)") | ||
queue.async(group: group) { | ||
let jobElements = self[jobIndex..<Swift.min(count, jobIndex + jobCount)] | ||
let jobIndexAndResults = (jobIndex, jobElements.map(transform)) | ||
resultAccumulatorQueue.sync { | ||
result.append(jobIndexAndResults) | ||
} | ||
} | ||
} | ||
group.wait() | ||
return result.sorted { $0.0 < $1.0 }.flatMap { $0.1 } | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -33,35 +33,96 @@ private var syntaxTokensByLinesCache = Cache({ file in file.syntaxTokensByLine() | |
internal typealias AssertHandler = () -> Void | ||
private var assertHandlers = [String: AssertHandler]() | ||
|
||
private var _allDeclarationsByType = [String: [String]]() | ||
private var queueForRebuild = [Structure]() | ||
private struct RebuildQueue { | ||
private let lock = NSLock() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure here, but wouldn't using a queue be faster? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What makes you think that? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I had the impression that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I mean, locking in general has considerable overhead, but that goes for whether you're using Actually, some of the patterns here could avoid some locking by using lock-free data structures, since they're append-only and we end up sorting them afterwards, but I couldn't find good resources on lock-free data structures in Swift, so I've punted on that for now. |
||
private var queue = [Structure]() | ||
private var allDeclarationsByType = [String: [String]]() | ||
|
||
mutating func append(_ structure: Structure) { | ||
lock.lock() | ||
defer { lock.unlock() } | ||
queue.append(structure) | ||
} | ||
|
||
mutating func clear() { | ||
lock.lock() | ||
defer { lock.unlock() } | ||
queue.removeAll(keepingCapacity: false) | ||
allDeclarationsByType.removeAll(keepingCapacity: false) | ||
} | ||
|
||
// Must hold lock when calling | ||
private mutating func rebuildIfNecessary() { | ||
guard !queue.isEmpty else { return } | ||
let allDeclarationsByType = queue.flatMap { structure -> (String, [String])? in | ||
guard let firstSubstructureDict = structure.dictionary.substructure.first, | ||
let name = firstSubstructureDict["key.name"] as? String, | ||
let kind = (firstSubstructureDict["key.kind"] as? String).flatMap(SwiftDeclarationKind.init), | ||
kind == .protocol, | ||
case let substructure = firstSubstructureDict.substructure, | ||
!substructure.isEmpty else { | ||
return nil | ||
} | ||
return (name, substructure.flatMap({ $0["key.name"] as? String })) | ||
} | ||
allDeclarationsByType.forEach { self.allDeclarationsByType[$0.0] = $0.1 } | ||
queue.removeAll(keepingCapacity: false) | ||
} | ||
|
||
mutating func getAllDeclarationsByType() -> [String: [String]] { | ||
lock.lock() | ||
defer { lock.unlock() } | ||
rebuildIfNecessary() | ||
return allDeclarationsByType | ||
} | ||
} | ||
|
||
private var queueForRebuild = RebuildQueue() | ||
|
||
private struct Cache<T> { | ||
fileprivate var values = [String: T]() | ||
fileprivate var factory: (File) -> T | ||
private var values = [String: T]() | ||
private let factory: (File) -> T | ||
private let lock = NSLock() | ||
|
||
fileprivate init(_ factory: @escaping (File) -> T) { | ||
self.factory = factory | ||
} | ||
|
||
fileprivate mutating func get(_ file: File) -> T { | ||
let key = file.cacheKey | ||
if let value = values[key] { | ||
return value | ||
lock.lock() | ||
if let cachedValue = values[key] { | ||
lock.unlock() | ||
return cachedValue | ||
} | ||
lock.unlock() | ||
let value = factory(file) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That shouldn't be a problem when There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for change. I thought that it should be locked for functionality of |
||
values[key] = value | ||
set(key: key, value: value) | ||
return value | ||
} | ||
|
||
fileprivate mutating func invalidate(_ file: File) { | ||
if let key = file.path { | ||
values.removeValue(forKey: key) | ||
doLocked { values.removeValue(forKey: key) } | ||
} | ||
} | ||
|
||
fileprivate mutating func clear() { | ||
values.removeAll(keepingCapacity: false) | ||
doLocked { values.removeAll(keepingCapacity: false) } | ||
} | ||
|
||
fileprivate mutating func set(key: String, value: T) { | ||
doLocked { values[key] = value } | ||
} | ||
|
||
fileprivate mutating func unset(key: String) { | ||
doLocked { values.removeValue(forKey: key) } | ||
} | ||
|
||
private func doLocked(block: () -> Void) { | ||
lock.lock() | ||
block() | ||
lock.unlock() | ||
} | ||
} | ||
|
||
|
@@ -77,10 +138,9 @@ extension File { | |
} | ||
set { | ||
if newValue { | ||
let value: [String: SourceKitRepresentable]? = nil | ||
responseCache.values[cacheKey] = value | ||
responseCache.set(key: cacheKey, value: nil) | ||
} else { | ||
responseCache.values.removeValue(forKey: cacheKey) | ||
responseCache.unset(key: cacheKey) | ||
} | ||
} | ||
} | ||
|
@@ -148,37 +208,16 @@ extension File { | |
} | ||
|
||
internal static func clearCaches() { | ||
queueForRebuild = [] | ||
_allDeclarationsByType = [:] | ||
queueForRebuild.clear() | ||
responseCache.clear() | ||
assertHandlers = [:] | ||
assertHandlers.removeAll(keepingCapacity: false) | ||
structureCache.clear() | ||
syntaxMapCache.clear() | ||
syntaxTokensByLinesCache.clear() | ||
syntaxKindsByLinesCache.clear() | ||
} | ||
|
||
internal static var allDeclarationsByType: [String: [String]] { | ||
if !queueForRebuild.isEmpty { | ||
rebuildAllDeclarationsByType() | ||
} | ||
return _allDeclarationsByType | ||
} | ||
} | ||
|
||
private func rebuildAllDeclarationsByType() { | ||
let allDeclarationsByType = queueForRebuild.flatMap { structure -> (String, [String])? in | ||
guard let firstSubstructureDict = structure.dictionary.substructure.first, | ||
let name = firstSubstructureDict["key.name"] as? String, | ||
let kind = (firstSubstructureDict["key.kind"] as? String) | ||
.flatMap(SwiftDeclarationKind.init), | ||
kind == .protocol, | ||
case let substructure = firstSubstructureDict.substructure, | ||
!substructure.isEmpty else { | ||
return nil | ||
} | ||
return (name, substructure.flatMap({ $0["key.name"] as? String })) | ||
return queueForRebuild.getAllDeclarationsByType() | ||
} | ||
allDeclarationsByType.forEach { _allDeclarationsByType[$0.0] = $0.1 } | ||
queueForRebuild = [] | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,7 @@ | |
// | ||
|
||
import Commandant | ||
import Dispatch | ||
import Foundation | ||
import Result | ||
import SourceKittenFramework | ||
|
@@ -16,6 +17,8 @@ struct LintCommand: CommandProtocol { | |
let verb = "lint" | ||
let function = "Print lint warnings and errors (default command)" | ||
|
||
private static let violationsAccumulatorQueue = DispatchQueue(label: "io.realm.swiftlint.violationsAccumulator") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I know probably no one will use multiple linters at once, but shouldn't this queue be per instance? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done in 253e1f4. |
||
|
||
func run(_ options: LintOptions) -> Result<(), CommandantError<()>> { | ||
var fileBenchmark = Benchmark(name: "files") | ||
var ruleBenchmark = Benchmark(name: "rules") | ||
|
@@ -34,7 +37,9 @@ struct LintCommand: CommandProtocol { | |
currentViolations = linter.styleViolations | ||
} | ||
linter.file.invalidateCache() | ||
violations += currentViolations | ||
LintCommand.violationsAccumulatorQueue.sync { | ||
violations += currentViolations | ||
} | ||
reporter.reportViolations(currentViolations, realtimeCondition: true) | ||
}.flatMap { files in | ||
if LintCommand.isWarningThresholdBroken(configuration, violations: violations) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,7 @@ | |
// | ||
|
||
import Commandant | ||
import Dispatch | ||
import Foundation | ||
import Result | ||
import SourceKittenFramework | ||
|
@@ -66,8 +67,8 @@ extension Configuration { | |
} | ||
|
||
func visitLintableFiles(_ path: String, action: String, useSTDIN: Bool = false, | ||
quiet: Bool = false, useScriptInputFiles: Bool, | ||
visitorBlock: (Linter) -> Void) -> Result<[File], CommandantError<()>> { | ||
quiet: Bool = false, useScriptInputFiles: Bool, parallel: Bool = false, | ||
visitorBlock: @escaping (Linter) -> Void) -> Result<[File], CommandantError<()>> { | ||
return getFiles(path, action: action, useSTDIN: useSTDIN, quiet: quiet, | ||
useScriptInputFiles: useScriptInputFiles) | ||
.flatMap { files -> Result<[File], CommandantError<()>> in | ||
|
@@ -77,15 +78,28 @@ extension Configuration { | |
} | ||
return .success(files) | ||
}.flatMap { files in | ||
let queue = DispatchQueue(label: "io.realm.swiftlint.indexIncrementer") | ||
var index = 0 | ||
let fileCount = files.count | ||
for (index, file) in files.enumerated() { | ||
let visit = { (file: File) -> Void in | ||
if !quiet, let path = file.path { | ||
let filename = path.bridge().lastPathComponent | ||
queuedPrintError("\(action) '\(filename)' (\(index + 1)/\(fileCount))") | ||
} | ||
autoreleasepool { | ||
visitorBlock(Linter(file: file, configuration: configurationForFile(file))) | ||
let increment = { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oops. Nice catch! Re-added in 3608cf4. |
||
index += 1 | ||
let filename = path.bridge().lastPathComponent | ||
queuedPrintError("\(action) '\(filename)' (\(index)/\(fileCount))") | ||
} | ||
if parallel { | ||
queue.sync(execute: increment) | ||
} else { | ||
increment() | ||
} | ||
} | ||
visitorBlock(Linter(file: file, configuration: self.configurationForFile(file))) | ||
} | ||
if parallel { | ||
files.parallelForEach(block: visit) | ||
} else { | ||
files.forEach(visit) | ||
} | ||
return .success(files) | ||
} | ||
|
@@ -118,6 +132,7 @@ extension Configuration { | |
func visitLintableFiles(_ options: LintOptions, visitorBlock: @escaping (Linter) -> Void) -> | ||
Result<[File], CommandantError<()>> { | ||
return visitLintableFiles(options.path, action: "Linting", useSTDIN: options.useSTDIN, quiet: options.quiet, | ||
useScriptInputFiles: options.useScriptInputFiles, visitorBlock: visitorBlock) | ||
useScriptInputFiles: options.useScriptInputFiles, parallel: true, | ||
visitorBlock: visitorBlock) | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
curious whether there is a reason you didn't use
DispatchQueue.concurrentPerform
here?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I didn't use it because I wasn't aware of it! Thanks for sharing 😊
The docs are non-existent, unfortunately: https://developer.apple.com/reference/dispatch/dispatchqueue/2016088-concurrentperform
But doing a bit of digging, the Dispatch Swift overlay seems to implement that function with
dispatch_apply
, which is well documented here: https://developer.apple.com/reference/dispatch/1452846-dispatch_apply_fIt seems to be exactly what I've reimplemented here, so I think I'll refactor to use that instead.