diff --git a/CHANGELOG.md b/CHANGELOG.md index ce7bd082c..9da3a415d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,9 @@ None. when all characters matched character set. [JP Simard](https://github.com/jpsim) +* Fix `indexOfByteOffset(offset:)` failing when string include some emoji. + [Norio Nomura](https://github.com/norio-nomura) + [#111](https://github.com/jpsim/SourceKitten/pull/111) ## 0.6.2 diff --git a/Source/SourceKittenFramework/String+SourceKitten.swift b/Source/SourceKittenFramework/String+SourceKitten.swift index 2777fb698..be058f778 100644 --- a/Source/SourceKittenFramework/String+SourceKitten.swift +++ b/Source/SourceKittenFramework/String+SourceKitten.swift @@ -22,41 +22,6 @@ private let commentLinePrefixCharacterSet: NSCharacterSet = { }() extension NSString { - /** - Binary search for NSString index equivalent to byte offset. - - - parameter offset: Byte offset. - - - returns: NSString index, if any. - */ - private func indexOfByteOffset(offset: Int) -> Int? { - var usedLength = 0 - - var left = Int(floor(Double(offset)/2)) - var right = min(length, offset + 1) - var midpoint = (left + right) / 2 - - for _ in left.. offset { - right = midpoint - midpoint = (right + left) / 2 - } else { - return midpoint - } - } - return nil - } - public func lineAndCharacterForCharacterOffset(offset: Int) -> (line: Int, character: Int)? { let range = NSRange(location: offset, length: 0) var numberOfLines = 0, index = 0, lineRangeStart = 0, previousIndex = 0 @@ -114,8 +79,9 @@ extension NSString { - returns: An equivalent `NSRange`. */ public func byteRangeToNSRange(start start: Int, length: Int) -> NSRange? { - return indexOfByteOffset(start).flatMap { stringStart in - return indexOfByteOffset(start + length).map { stringEnd in + let string = self as String + return string.indexOfByteOffset(start).flatMap { stringStart in + return string.indexOfByteOffset(start + length).map { stringEnd in return NSRange(location: stringStart, length: stringEnd - stringStart) } } @@ -204,6 +170,17 @@ extension NSString { } extension String { + /** + UTF16 index equivalent to byte offset. + + - parameter offset: Byte offset. + + - returns: UTF16 index, if any. + */ + private func indexOfByteOffset(offset: Int) -> Int? { + return utf8.startIndex.advancedBy(offset).samePositionIn(utf16).map(utf16.startIndex.distanceTo) + } + /// Returns the `#pragma mark`s in the string. /// Just the content; no leading dashes or leading `#pragma mark`. public func pragmaMarks(filename: String, excludeRanges: [NSRange], limitRange: NSRange?) -> [SourceDeclaration] { diff --git a/Source/SourceKittenFrameworkTests/StringTests.swift b/Source/SourceKittenFrameworkTests/StringTests.swift index 1feccb34e..fc91c7963 100644 --- a/Source/SourceKittenFrameworkTests/StringTests.swift +++ b/Source/SourceKittenFrameworkTests/StringTests.swift @@ -124,28 +124,28 @@ class StringTests: XCTestCase { } func testSubstringWithByteRange() { - let string = "๐Ÿ˜„123" - XCTAssertEqual(string.substringWithByteRange(start: 0, length: 4)!, "๐Ÿ˜„") - XCTAssertEqual(string.substringWithByteRange(start: 4, length: 1)!, "1") + let string = "๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ง123" + XCTAssertEqual(string.substringWithByteRange(start: 0, length: 25)!, "๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ง") + XCTAssertEqual(string.substringWithByteRange(start: 25, length: 1)!, "1") } func testSubstringLinesWithByteRange() { - let string = "๐Ÿ˜„\n123" - XCTAssertEqual(string.substringLinesWithByteRange(start: 0, length: 0)!, "๐Ÿ˜„\n") - XCTAssertEqual(string.substringLinesWithByteRange(start: 0, length: 4)!, "๐Ÿ˜„\n") - XCTAssertEqual(string.substringLinesWithByteRange(start: 0, length: 5)!, "๐Ÿ˜„\n") - XCTAssertEqual(string.substringLinesWithByteRange(start: 0, length: 6)!, string) - XCTAssertEqual(string.substringLinesWithByteRange(start: 6, length: 0)!, "123") + let string = "๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ง\n123" + XCTAssertEqual(string.substringLinesWithByteRange(start: 0, length: 0)!, "๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ง\n") + XCTAssertEqual(string.substringLinesWithByteRange(start: 0, length: 25)!, "๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ง\n") + XCTAssertEqual(string.substringLinesWithByteRange(start: 0, length: 26)!, "๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ง\n") + XCTAssertEqual(string.substringLinesWithByteRange(start: 0, length: 27)!, string) + XCTAssertEqual(string.substringLinesWithByteRange(start: 27, length: 0)!, "123") } func testLineRangeWithByteRange() { XCTAssert("".lineRangeWithByteRange(start: 0, length: 0) == nil) - let string = "๐Ÿ˜„\n123" + let string = "๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ง\n123" XCTAssert(string.lineRangeWithByteRange(start: 0, length: 0)! == (1, 1)) - XCTAssert(string.lineRangeWithByteRange(start: 0, length: 4)! == (1, 1)) - XCTAssert(string.lineRangeWithByteRange(start: 0, length: 5)! == (1, 2)) - XCTAssert(string.lineRangeWithByteRange(start: 0, length: 6)! == (1, 2)) - XCTAssert(string.lineRangeWithByteRange(start: 6, length: 0)! == (2, 2)) + XCTAssert(string.lineRangeWithByteRange(start: 0, length: 25)! == (1, 1)) + XCTAssert(string.lineRangeWithByteRange(start: 0, length: 26)! == (1, 2)) + XCTAssert(string.lineRangeWithByteRange(start: 0, length: 27)! == (1, 2)) + XCTAssert(string.lineRangeWithByteRange(start: 27, length: 0)! == (2, 2)) } }