From 4b12c498902b5c161e4ca0b627bb353d3030b932 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20R=C3=B6nnqvist?= Date: Thu, 12 Dec 2024 21:16:33 +0100 Subject: [PATCH] Parse Swift operators as link components for operators with symbol diacritics (#1125) --- .../PathHierarchy+PathComponent.swift | 52 ++++++++++++++++++- .../Infrastructure/PathHierarchyTests.swift | 3 ++ 2 files changed, 53 insertions(+), 2 deletions(-) diff --git a/Sources/SwiftDocC/Infrastructure/Link Resolution/PathHierarchy+PathComponent.swift b/Sources/SwiftDocC/Infrastructure/Link Resolution/PathHierarchy+PathComponent.swift index de670d4f2e..531ad22f76 100644 --- a/Sources/SwiftDocC/Infrastructure/Link Resolution/PathHierarchy+PathComponent.swift +++ b/Sources/SwiftDocC/Infrastructure/Link Resolution/PathHierarchy+PathComponent.swift @@ -248,7 +248,7 @@ private struct PathComponentScanner { mutating func _scanOperatorName() -> Substring? { // If the next component is a Swift operator, parse the full operator before splitting on "/" ("/" may appear in the operator name) - if remaining.unicodeScalars.prefix(3).allSatisfy(\.isValidSwiftOperatorHead) { + if remaining.unicodeScalars.prefix(3).isValidSwiftOperator() { return scanUntil(index: remaining.firstIndex(of: Self.swiftOperatorEnd)) + scan(length: 1) } @@ -354,6 +354,20 @@ private extension StringProtocol { } } +private extension Collection { + /// Determines if this sequence of unicode scalars represent a valid Swift operator name + /// - Complexity: O(_n_), where _n_ is the length of the collection. + func isValidSwiftOperator() -> Bool { + // See https://docs.swift.org/swift-book/documentation/the-swift-programming-language/lexicalstructure#Operators + + // The first character of an operator supports fewer characters than the rest of the operator name + guard let first, first.isValidSwiftOperatorHead else { + return false + } + return dropFirst().allSatisfy { $0.isValidSwiftOperatorCharacter } + } +} + private extension Unicode.Scalar { /// Checks if this unicode scalar is a valid C99 Extended Identifier. var isValidC99ExtendedIdentifier: Bool { @@ -547,10 +561,11 @@ private extension Unicode.Scalar { } } + /// A Boolean value that indicates if this scalar is a valid Swift operator head. var isValidSwiftOperatorHead: Bool { // See https://docs.swift.org/swift-book/documentation/the-swift-programming-language/lexicalstructure#Operators switch value { - case + case // ! % & * + - . / < = > ? ^| ~ 0x21, 0x25, 0x26, 0x2A, 0x2B, 0x2D...0x2F, 0x3C, 0x3D...0x3F, 0x5E, 0x7C, 0x7E, // ¡ ¢ £ ¤ ¥ ¦ § @@ -573,6 +588,12 @@ private extension Unicode.Scalar { 0x2041 ... 0x2053, // ⁕ ⁖ ⁗ ⁘ ⁙ ⁚ ⁛ ⁜ ⁝ ⁞ 0x2055 ... 0x205E, + // Arrows + 0x2190 ... 0x21FF, + // Mathematical Operators + 0x2200 ... 0x22FF, + // Miscellaneous Technical + 0x2300 ... 0x23FF, // Box Drawing 0x2500 ... 0x257F, // Block Elements @@ -610,4 +631,31 @@ private extension Unicode.Scalar { return false } } + + /// A Boolean value that indicates if this scalar is a valid Swift operator character (after the first character). + var isValidSwiftOperatorCharacter: Bool { + // See https://docs.swift.org/swift-book/documentation/the-swift-programming-language/lexicalstructure#Operators + if isValidSwiftOperatorHead { + return true + } + + switch value { + case + // Combining Diacritical Marks + 0x0300 ... 0x036F, + // Combining Diacritical Marks Supplement + 0x1DC0 ... 0x1DFF, + // Combining Diacritical Marks for Symbols + 0x20D0 ... 0x20FF, + // Variation Selectors + 0xFE00 ... 0xFE0F, + // Combining Half Marks + 0xFE20 ... 0xFE2F, + // Variation Selectors Supplement + 0xE0100 ... 0xE01EF: + return true + default: + return false + } + } } diff --git a/Tests/SwiftDocCTests/Infrastructure/PathHierarchyTests.swift b/Tests/SwiftDocCTests/Infrastructure/PathHierarchyTests.swift index b982628ed6..261496cce7 100644 --- a/Tests/SwiftDocCTests/Infrastructure/PathHierarchyTests.swift +++ b/Tests/SwiftDocCTests/Infrastructure/PathHierarchyTests.swift @@ -3453,6 +3453,9 @@ class PathHierarchyTests: XCTestCase { assertParsedPathComponents("MyNumber//=(_:_:)", [("MyNumber", nil), ("/=(_:_:)", nil)]) assertParsedPathComponents("MyNumber////=(_:_:)", [("MyNumber", nil), ("///=(_:_:)", nil)]) assertParsedPathComponents("MyNumber/+/-(_:_:)", [("MyNumber", nil), ("+/-(_:_:)", nil)]) + + // "☜⃩" is a symbol with a symbol diacritic mark. + assertParsedPathComponents("☜⃩/(_:_:)", [("☜⃩/(_:_:)", nil)]) // Check parsing return values and parameter types assertParsedPathComponents("..<(_:_:)->Bool", [("..<(_:_:)", .typeSignature(parameterTypes: nil, returnTypes: ["Bool"]))])