Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add String multi-replace via Scanner #227

Merged
merged 3 commits into from
Apr 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Alicerce.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@
0A77982920FCCD24008E269A /* RetryTestCase.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0A77982820FCCD24008E269A /* RetryTestCase.swift */; };
0A77982F20FFF29D008E269A /* Retry.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0A77982E20FFF29D008E269A /* Retry.swift */; };
0A79686120812130005738AF /* LockTestCase.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0ACEB2992080F0E5000D95AD /* LockTestCase.swift */; };
0A7ACC852527467B00AA2213 /* Character.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0A7ACC842527467B00AA2213 /* Character.swift */; };
0A7B504D20B632FA005A08E7 /* *.alicerce.mindera.com.pem in Resources */ = {isa = PBXBuildFile; fileRef = 0A7B504C20B632FA005A08E7 /* *.alicerce.mindera.com.pem */; };
0A7B505020B6D346005A08E7 /* SecCertificate+PublicKey.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0A7B504E20B6D2C4005A08E7 /* SecCertificate+PublicKey.swift */; };
0A7B505220B6D769005A08E7 /* SecCertificate+PublicKeyTestCase.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0A7B505120B6D769005A08E7 /* SecCertificate+PublicKeyTestCase.swift */; };
Expand Down Expand Up @@ -491,6 +492,7 @@
0A76A004209F854C00D46B63 /* Route+TrieNode_IsEmptyAndDescriptionTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "Route+TrieNode_IsEmptyAndDescriptionTests.swift"; sourceTree = "<group>"; };
0A77982820FCCD24008E269A /* RetryTestCase.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RetryTestCase.swift; sourceTree = "<group>"; };
0A77982E20FFF29D008E269A /* Retry.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Retry.swift; sourceTree = "<group>"; };
0A7ACC842527467B00AA2213 /* Character.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Character.swift; sourceTree = "<group>"; };
0A7B504C20B632FA005A08E7 /* *.alicerce.mindera.com.pem */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = "*.alicerce.mindera.com.pem"; sourceTree = "<group>"; };
0A7B504E20B6D2C4005A08E7 /* SecCertificate+PublicKey.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "SecCertificate+PublicKey.swift"; sourceTree = "<group>"; };
0A7B505120B6D769005A08E7 /* SecCertificate+PublicKeyTestCase.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "SecCertificate+PublicKeyTestCase.swift"; sourceTree = "<group>"; };
Expand Down Expand Up @@ -806,6 +808,7 @@
0A3C2C8E1EA7E18500EFB7D4 /* String.swift */,
0A3C2C8F1EA7E18500EFB7D4 /* Thread.swift */,
1B4D4CB61F05016B00FA4260 /* URLRequest.swift */,
0A7ACC842527467B00AA2213 /* Character.swift */,
);
path = Foundation;
sourceTree = "<group>";
Expand Down Expand Up @@ -2011,6 +2014,7 @@
0A3C2DB71EA7E5DD00EFB7D4 /* CollectionReusableView.swift in Sources */,
9D4E3AA1239A6557007F3050 /* CollectionReusableViewSizer.swift in Sources */,
4838FE3123A94CE0007311F0 /* Array+ConstrainableProxy.swift in Sources */,
0A7ACC852527467B00AA2213 /* Character.swift in Sources */,
0A266F201ED374F5009CD0D7 /* AssertDumpsEqual.swift in Sources */,
0ACEB2922080E6D4000D95AD /* Atomic.swift in Sources */,
0A83885E1EB1F6B000C1E835 /* NSPersistentStoreCoordinator+CoreDataStack.swift in Sources */,
Expand Down
14 changes: 14 additions & 0 deletions Sources/Extensions/Foundation/Character.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import Foundation

extension Character {

static let lineSeparator: Character = "\u{2028}"
static let nonBreakingSpace: Character = "\u{00a0}"
static let nonBreakingHyphen: Character = "\u{2011}"
static let wordJoiner: Character = "\u{2060}"
static let emDash: Character = "\u{2013}" // —
static let enDash: Character = "\u{2014}" // –

// from `CharacterSet.newlines`
static let newlines: [Character] = ["\u{A}", "\u{B}", "\u{C}", "\u{D}", "\u{85}", "\u{2028}", "\u{2029}"]
}
97 changes: 97 additions & 0 deletions Sources/Extensions/Foundation/String.swift
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,100 @@ public extension String {
dump(x, to: &self)
}
}

extension String {

/// Replaces occurrences of multiple `Character`s with corresponding `String` values using the given mapping, while
/// skipping (filtering out) an optional set of characters from the output. Being backed by a `Scanner`, a single
/// pass is made over the receiver.
///
/// - Parameters:
/// - replacementMap: A dictionary containing the replacement mapping `Character` -> `String`.
/// - charactersToBeSkipped: An optional set of characters to skip (i.e. filter out from the input).
/// - Returns: A modified version of the receiver with the replacement mapping applied.
public func replacingOccurrencesOfCharacters(
in replacementMap: [Character: String],
skippingCharactersIn charactersToBeSkipped: CharacterSet? = nil
) -> String {

guard !replacementMap.isEmpty else { return self }

let matchSet = CharacterSet(charactersIn: replacementMap.keys.reduce(into: "") { $0 += String($1) })
.union(charactersToBeSkipped ?? CharacterSet())

var final = ""

let scanner = Scanner(string: self)
scanner.charactersToBeSkipped = charactersToBeSkipped

while !scanner.isAtEnd {

// copy everything until finding a character to be replaced or skipped
var collector: NSString? = ""
if scanner.scanUpToCharacters(from: matchSet, into: &collector), let collector = collector {
final.append(collector as String)
}

// exit early if we're already at the end
guard !scanner.isAtEnd else { break }

// find and replace matching character if needed
replacementMap
.first { match, _ in scanner.scanString(String(match), into: nil) }
.flatMap { _, replacement in final.append(replacement) }
}

return final
}
}

extension String {

public static let nonBreakingSpace = String(Character.nonBreakingSpace)
public static let nonBreakingHyphen = String(Character.nonBreakingHyphen)
public static let wordJoiner = String(Character.wordJoiner)
public static let emDash = String(Character.emDash)
public static let enDash = String(Character.enDash)

/// Returns a non line breaking version of `self`. Line breaking characters occurrences are replaced with
/// corresponding non line breaking variants when existent. Otherwise, word joiner characters are attached to them
/// to make them non line breaking. Existing newlines can be replaced by any given string, via the optional
/// `newlineCharacterReplacement` parameter (defaults to `nil`, which preserves newlines).
///
/// The character mapping is:
/// - space (" ") -> non breaking space (`U+2028`)
/// - hyphen ("-") -> non breaking hyphen (`U+00A0`)
/// - em dash ("—") -> word joiner (`U+2060`) + em dash + word joiner (`U+2060`)
/// - en dash ("–") -> word joiner (`U+2060`) + en dash + word joiner (`U+2060`)
/// - question mark ("?") -> question mark + word joiner (`U+2060`)
/// - closing brace ("}") -> closing brace + word joiner (`U+2060`)
///
/// The `newlineCharacterReplacement` acts upon the characters specified in `CharacterSet.newlines`
/// (`U+000A ~ U+000D`, `U+0085`, `U+2028`, and `U+2029`), some example values are:
/// - `nil` -> newlines are preserved
/// - `""` -> newlines are stripped
/// - `String.nonBreakingSpace` -> output a single line
///
/// - Parameter newlineCharacterReplacement: The replacement string to use for newline characters (defaults to
/// `nil`).
/// - Returns: A modified version of the receiver without line breaking characters.
public func nonLineBreaking(replacingNewlinesWith newlineCharacterReplacement: String? = nil) -> String {

let newlineReplacementMap = newlineCharacterReplacement
.flatMap { replacement in Dictionary(uniqueKeysWithValues: Character.newlines.map { ($0, replacement) }) }
?? [:]

return replacingOccurrencesOfCharacters(
in: [
" ": String.nonBreakingSpace,
"-": String.nonBreakingHyphen,
.emDash: String([.wordJoiner, .emDash, .wordJoiner]),
.enDash: String([.wordJoiner, .enDash, .wordJoiner]),
"?": "?" + .wordJoiner,
"}": "}" + .wordJoiner
]
.merging(newlineReplacementMap) { $1 },
skippingCharactersIn: nil
filipe-lemos marked this conversation as resolved.
Show resolved Hide resolved
)
}
}
149 changes: 148 additions & 1 deletion Tests/AlicerceTests/Extensions/Foundation/StringTestCase.swift
Original file line number Diff line number Diff line change
Expand Up @@ -66,5 +66,152 @@ class StringTestCase: XCTestCase {

XCTAssertEqual(intDump, dumpString)
}


// replacingOccurrencesOfCharacters(in:skippingCharactersIn:)

func testReplacingOccurrencesOfCharacters_WithEmptyMap_ShouldReturnSelf() {

let text = "The quick brown fox jumps over the lazy dog"

XCTAssertEqual(text.replacingOccurrencesOfCharacters(in: [:], skippingCharactersIn: nil), text)
}

func testReplacingOccurrencesOfCharacters_WithMatchingCharactersInSingleEntryMapAndNilSkippingCharacterSet_ShouldReplaceOccurrences() {

let original = "The quick brown fox jumps over the lazy dog"
let expected = "The_quick_brown_fox_jumps_over_the_lazy_dog"

XCTAssertEqual(
original.replacingOccurrencesOfCharacters(in: [.init(" "): "_"], skippingCharactersIn: nil),
expected
)
}

func testReplacingOccurrencesOfCharacters_WithMatchingCharactersInMultiEntryMapAndNilSkippingCharacterSet_ShouldReplaceOccurrences() {

let original = "0123456789ABCDEF"
let expected = "0123456789abcdef"

XCTAssertEqual(
original.replacingOccurrencesOfCharacters(
in: [
.init("A"): "a",
.init("B"): "b",
.init("C"): "c",
.init("D"): "d",
.init("E"): "e",
.init("F"): "f",
],
skippingCharactersIn: nil
),
expected
)
}

func testReplacingOccurrencesOfCharacters_WithMatchingCharactersInMapAndMatchingCharactersInSkippingCharacterSet_ShouldReplaceOccurrencesAndSkip() {

let original = "0123456789ABCDEF_0A0B0C0D0E0F0"
let expected = "abcdef_abcdef"

XCTAssertEqual(
original.replacingOccurrencesOfCharacters(
in: [
.init("A"): "a",
.init("B"): "b",
.init("C"): "c",
.init("D"): "d",
.init("E"): "e",
.init("F"): "f",
],
skippingCharactersIn: .decimalDigits
),
expected
)
}

// nonLineBreaking()

func testNonLineBreaking_WithNoLineBreakingCharactersInString_ShouldReturnSelf() {

let original = "0123456789ABCDEF"

XCTAssertEqual(original.nonLineBreaking(), original)
}

func testNonLineBreaking_WithLineBreakingCharactersInString_ShouldReturnANonLineBreakingVersion() {

let original = "The quick-brown\(String.emDash)fox\(String.enDash)jumps?over{the}lazy dog"
let expected =
"""
The\(String.nonBreakingSpace)quick\(String.nonBreakingHyphen)brown\
\(String([.wordJoiner, .emDash, .wordJoiner]))fox\
\(String([.wordJoiner, .enDash, .wordJoiner]))jumps\
?\(String.wordJoiner)over{the}\(String.wordJoiner)lazy\(String.nonBreakingSpace)dog
"""

XCTAssertEqual(original.nonLineBreaking(), expected)
}

func testNonLineBreaking_WithLineBreakingCharactersAndNewlinesInStringAndNilNewlineReplacement_ShouldReturnANonLineBreakingVersionAndPreserveNewlines() {

let original =
"""
\nThe quick-brown\u{85}\(String.emDash)fox\n\(String.enDash)jumps?\u{2028}\u{2029}over{the}lazy dog\n\
\u{A}.\u{B},\u{C};\u{D}
"""

let expected =
"""
\nThe\(String.nonBreakingSpace)quick\(String.nonBreakingHyphen)brown\u{85}\
\(String([.wordJoiner, .emDash, .wordJoiner]))fox\n\
\(String([.wordJoiner, .enDash, .wordJoiner]))jumps\
?\(String.wordJoiner)\u{2028}\u{2029}over\
{the}\(String.wordJoiner)lazy\(String.nonBreakingSpace)dog\n\
\u{A}.\u{B},\u{C};\u{D}
"""

XCTAssertEqual(original.nonLineBreaking(replacingNewlinesWith: nil), expected)
}

func testNonLineBreaking_WithLineBreakingCharactersAndNewlinesInStringAndEmptyStringNewlineReplacement_ShouldReturnANonLineBreakingVersionAndReplaceNewlines() {

let original =
"""
\nThe quick-brown\u{85}\(String.emDash)fox\n\(String.enDash)jumps?\u{2028}\u{2029}over{the}lazy dog\n\
\u{A}.\u{B},\u{C};\u{D}
"""

let expected =
"""
The\(String.nonBreakingSpace)quick\(String.nonBreakingHyphen)brown\
\(String([.wordJoiner, .emDash, .wordJoiner]))fox\
\(String([.wordJoiner, .enDash, .wordJoiner]))jumps\
?\(String.wordJoiner)over\
{the}\(String.wordJoiner)lazy\(String.nonBreakingSpace)dog\
.,;
"""

XCTAssertEqual(original.nonLineBreaking(replacingNewlinesWith: ""), expected)
}

func testNonLineBreaking_WithLineBreakingCharactersAndNewlinesInStringAndNonNilStringNewlineReplacement_ShouldReturnANonLineBreakingVersionAndReplaceNewlines() {

let original =
"""
\nThe quick-brown\u{85}\(String.emDash)fox\n\(String.enDash)jumps?\u{2028}\u{2029}over{the}lazy dog\n\
\u{A}.\u{B},\u{C};\u{D}
"""

let expected =
"""
🦊The\(String.nonBreakingSpace)quick\(String.nonBreakingHyphen)brown🦊\
\(String([.wordJoiner, .emDash, .wordJoiner]))fox🦊\
\(String([.wordJoiner, .enDash, .wordJoiner]))jumps\
?\(String.wordJoiner)🦊🦊over\
{the}\(String.wordJoiner)lazy\(String.nonBreakingSpace)dog🦊\
🦊.🦊,🦊;🦊
"""

XCTAssertEqual(original.nonLineBreaking(replacingNewlinesWith: "🦊"), expected)
}
}