Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parse Swift operators as link components for operators with symbol diacritics #1125

Merged
merged 5 commits into from
Dec 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ private struct PathComponentScanner {

mutating func _scanOperatorName() -> Substring? {
// If the next component is a Swift operator, parse the full operator before splitting on "/" ("/" may appear in the operator name)
if remaining.unicodeScalars.prefix(3).allSatisfy(\.isValidSwiftOperatorHead) {
if remaining.unicodeScalars.prefix(3).isValidSwiftOperator() {
return scanUntil(index: remaining.firstIndex(of: Self.swiftOperatorEnd)) + scan(length: 1)
}

Expand Down Expand Up @@ -354,6 +354,20 @@ private extension StringProtocol {
}
}

private extension Collection<Unicode.Scalar> {
/// Determines if this sequence of unicode scalars represent a valid Swift operator name
/// - Complexity: O(_n_), where _n_ is the length of the collection.
func isValidSwiftOperator() -> Bool {
// See https://docs.swift.org/swift-book/documentation/the-swift-programming-language/lexicalstructure#Operators

// The first character of an operator supports fewer characters than the rest of the operator name
guard let first, first.isValidSwiftOperatorHead else {
return false
}
return dropFirst().allSatisfy { $0.isValidSwiftOperatorCharacter }
}
}

private extension Unicode.Scalar {
/// Checks if this unicode scalar is a valid C99 Extended Identifier.
var isValidC99ExtendedIdentifier: Bool {
Expand Down Expand Up @@ -547,10 +561,11 @@ private extension Unicode.Scalar {
}
}

/// A Boolean value that indicates if this scalar is a valid Swift operator head.
var isValidSwiftOperatorHead: Bool {
// See https://docs.swift.org/swift-book/documentation/the-swift-programming-language/lexicalstructure#Operators
switch value {
case
case
// ! % & * + - . / < = > ? ^| ~
0x21, 0x25, 0x26, 0x2A, 0x2B, 0x2D...0x2F, 0x3C, 0x3D...0x3F, 0x5E, 0x7C, 0x7E,
// ¡ ¢ £ ¤ ¥ ¦ §
Expand All @@ -573,6 +588,12 @@ private extension Unicode.Scalar {
0x2041 ... 0x2053,
// ⁕ ⁖ ⁗ ⁘ ⁙ ⁚ ⁛ ⁜ ⁝ ⁞
0x2055 ... 0x205E,
// Arrows
0x2190 ... 0x21FF,
// Mathematical Operators
0x2200 ... 0x22FF,
// Miscellaneous Technical
0x2300 ... 0x23FF,
// Box Drawing
0x2500 ... 0x257F,
// Block Elements
Expand Down Expand Up @@ -610,4 +631,31 @@ private extension Unicode.Scalar {
return false
}
}

/// A Boolean value that indicates if this scalar is a valid Swift operator character (after the first character).
var isValidSwiftOperatorCharacter: Bool {
// See https://docs.swift.org/swift-book/documentation/the-swift-programming-language/lexicalstructure#Operators
if isValidSwiftOperatorHead {
return true
}

switch value {
case
// Combining Diacritical Marks
0x0300 ... 0x036F,
// Combining Diacritical Marks Supplement
0x1DC0 ... 0x1DFF,
// Combining Diacritical Marks for Symbols
0x20D0 ... 0x20FF,
// Variation Selectors
0xFE00 ... 0xFE0F,
// Combining Half Marks
0xFE20 ... 0xFE2F,
// Variation Selectors Supplement
0xE0100 ... 0xE01EF:
return true
default:
return false
}
}
}
3 changes: 3 additions & 0 deletions Tests/SwiftDocCTests/Infrastructure/PathHierarchyTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -3453,6 +3453,9 @@ class PathHierarchyTests: XCTestCase {
assertParsedPathComponents("MyNumber//=(_:_:)", [("MyNumber", nil), ("/=(_:_:)", nil)])
assertParsedPathComponents("MyNumber////=(_:_:)", [("MyNumber", nil), ("///=(_:_:)", nil)])
assertParsedPathComponents("MyNumber/+/-(_:_:)", [("MyNumber", nil), ("+/-(_:_:)", nil)])

// "☜⃩" is a symbol with a symbol diacritic mark.
assertParsedPathComponents("☜⃩/(_:_:)", [("☜⃩/(_:_:)", nil)])

// Check parsing return values and parameter types
assertParsedPathComponents("..<(_:_:)->Bool", [("..<(_:_:)", .typeSignature(parameterTypes: nil, returnTypes: ["Bool"]))])
Expand Down