-
Notifications
You must be signed in to change notification settings - Fork 128
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Improve performance of JSONPointer encoding #1099
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
/* | ||
This source file is part of the Swift.org open source project | ||
|
||
Copyright (c) 2021 Apple Inc. and the Swift project authors | ||
Copyright (c) 2021-2024 Apple Inc. and the Swift project authors | ||
Licensed under Apache License v2.0 with Runtime Library Exception | ||
|
||
See https://swift.org/LICENSE.txt for license information | ||
|
@@ -20,7 +20,7 @@ public struct JSONPointer: Codable, CustomStringConvertible, Equatable { | |
public var pathComponents: [String] | ||
|
||
public var description: String { | ||
"/\(pathComponents.map(Self.escape).joined(separator: "/"))" | ||
Self.escaped(pathComponents) | ||
} | ||
|
||
/// Creates a JSON Pointer given its path components. | ||
|
@@ -87,36 +87,78 @@ public struct JSONPointer: Codable, CustomStringConvertible, Equatable { | |
public init(from decoder: Decoder) throws { | ||
let container = try decoder.singleValueContainer() | ||
let stringValue = try container.decode(String.self) | ||
self.pathComponents = stringValue.removingLeadingSlash.components(separatedBy: "/").map(Self.unescape) | ||
self.pathComponents = Self.unescaped(stringValue) | ||
} | ||
|
||
/// Escapes a path component of a JSON pointer. | ||
static func escape(_ pointerPathComponents: String) -> String { | ||
applyEscaping(pointerPathComponents, shouldUnescape: false) | ||
} | ||
|
||
/// Unescaped a path component of a JSON pointer. | ||
static func unescape(_ pointerPathComponents: String) -> String { | ||
applyEscaping(pointerPathComponents, shouldUnescape: true) | ||
private static func escaped(_ pathComponents: [String]) -> String { | ||
// This code is called quite frequently for mixed language content. | ||
// Optimizing it has a measurable impact on the total documentation build time. | ||
|
||
var string: [UTF8.CodeUnit] = [] | ||
string.reserveCapacity( | ||
pathComponents.reduce(0) { acc, component in | ||
acc + 1 /* the "/" separator */ + component.utf8.count | ||
} | ||
) | ||
|
||
for component in pathComponents { | ||
// The leading slash and component separator | ||
string.append(forwardSlash) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I assume Swift's implementation of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, append is very fast. All it does is:
The next step would be dropping down to unsafe buffers ourselves but then we'd be responsible for validating the capacity (uniqueness isn't a concern since it's a locally scoped variable). Since the replacements we're doing here are longer, the escaped string may be longer than the original string which means validating the capacity is a real concern so we would need to do that ourselves. Because of this we're not gaining anything by using lower level API for this. This means that in practice this code is likely to do one reallocation when it escaped the string. We could bake in a little bit of extra capacity (for example 8 or 16 elements) to avoid this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Now that I said that I had to measure reserving capacity for 16 extra characters. 😄 The current changes in this PR is a ~580% improvement compared to the original code in a micro benchmark that only measures encoding lots and lots of Reserving an extra 16 characters to avoid the reallocation when the escaped string grows beyond the length of the original string, is a ~5% improvement over the current changes in this PR which means that it's a ~600% improvement over the original code. Since it's only 1 more line to make that change I'd say it's worth it. |
||
|
||
// The escaped component | ||
for char in component.utf8 { | ||
switch char { | ||
case tilde: | ||
string.append(contentsOf: escapedTilde) | ||
case forwardSlash: | ||
string.append(contentsOf: escapedForwardSlash) | ||
default: | ||
string.append(char) | ||
} | ||
} | ||
} | ||
|
||
return String(decoding: string, as: UTF8.self) | ||
} | ||
|
||
/// Applies an escaping operation to the path component of a JSON pointer. | ||
/// - Parameters: | ||
/// - pointerPathComponent: The path component to escape. | ||
/// - shouldUnescape: Whether this function should unescape or escape the path component. | ||
/// - Returns: The escaped value if `shouldUnescape` is false, otherwise the escaped value. | ||
private static func applyEscaping(_ pointerPathComponent: String, shouldUnescape: Bool) -> String { | ||
EscapedCharacters.allCases | ||
.reduce(pointerPathComponent) { partialResult, characterThatNeedsEscaping in | ||
partialResult | ||
.replacingOccurrences( | ||
of: characterThatNeedsEscaping[ | ||
keyPath: shouldUnescape ? \EscapedCharacters.escaped : \EscapedCharacters.rawValue | ||
], | ||
with: characterThatNeedsEscaping[ | ||
keyPath: shouldUnescape ? \EscapedCharacters.rawValue : \EscapedCharacters.escaped | ||
] | ||
) | ||
private static func unescaped(_ escapedRawString: String) -> [String] { | ||
escapedRawString.removingLeadingSlash.components(separatedBy: "/").map { | ||
// This code is called quite frequently for mixed language content. | ||
// Optimizing it has a measurable impact on the total documentation build time. | ||
|
||
var string: [UTF8.CodeUnit] = [] | ||
string.reserveCapacity($0.utf8.count) | ||
|
||
var remaining = $0.utf8[...] | ||
while let char = remaining.popFirst() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why couldn't you use the simpler There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because I need to iterate over pairs of characters to identify "~0" and "~1". At first I though I could iterate over the character's pairwise but when I tried it I realized that it meant that |
||
guard char == tilde, let escapedCharacterIndicator = remaining.popFirst() else { | ||
string.append(char) | ||
continue | ||
} | ||
|
||
// Check the character | ||
switch escapedCharacterIndicator { | ||
case zero: | ||
string.append(tilde) | ||
case one: | ||
string.append(forwardSlash) | ||
default: | ||
// This string isn't an escaped JSON Pointer. Return it as-is. | ||
return $0 | ||
} | ||
} | ||
|
||
return String(decoding: string, as: UTF8.self) | ||
} | ||
} | ||
} | ||
|
||
// A few UInt8 raw values for various UTF-8 characters that this implementation frequently checks for | ||
|
||
private let tilde = UTF8.CodeUnit(ascii: "~") | ||
private let forwardSlash = UTF8.CodeUnit(ascii: "/") | ||
private let zero = UTF8.CodeUnit(ascii: "0") | ||
private let one = UTF8.CodeUnit(ascii: "1") | ||
|
||
private let escapedTilde = [tilde, zero] | ||
private let escapedForwardSlash = [tilde, one] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would writing an explicit for-loop be faster than calling
reduce
with a closure?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It shouldn't be and I couldn't measure any difference.
reduce(_:)
is an accumulating for-loop and the compiler shouldn't have any problems inlining and optimizing it the same as if I wrote the loop inline myself.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note that this is only true because the accumulating value is a primitive (
Int
).Using
reduce(_:)
over collections result in repeated copies that makes it accidentally O(n²) instead of O(n). Usingreduce(into:)
instead for accumulating into collections solves this.