From d2ba42becdf84ab6770d3db91451fdc6aa8b0796 Mon Sep 17 00:00:00 2001 From: Chris Loer Date: Sun, 20 Nov 2016 09:36:07 -0800 Subject: [PATCH] [core] Full support for line breaking bidirectional text using ICU bidi functionality. - Trim whitespace from labels before determining their max-width for alignment. - Fix crash on labels that contain lines with only a single character of whitespace. --- package.json | 2 +- src/mbgl/layout/symbol_feature.hpp | 2 - src/mbgl/layout/symbol_layout.cpp | 10 +- src/mbgl/text/bidi.cpp | 133 ++++++++++++---- src/mbgl/text/bidi.hpp | 36 ++++- src/mbgl/text/glyph_set.cpp | 248 +++++++++++++++++------------ src/mbgl/text/glyph_set.hpp | 33 +++- src/mbgl/util/i18n.cpp | 10 +- src/mbgl/util/i18n.hpp | 3 - test/util/merge_lines.test.cpp | 48 +++--- 10 files changed, 338 insertions(+), 187 deletions(-) diff --git a/package.json b/package.json index 8b8e57460b1..e07567e5438 100644 --- a/package.json +++ b/package.json @@ -24,7 +24,7 @@ "lodash": "^4.16.4", "mapbox-gl-shaders": "mapbox/mapbox-gl-shaders#597115a1e1bd982944b068f8accde34eada74fc2", "mapbox-gl-style-spec": "mapbox/mapbox-gl-style-spec#7f62a4fc9f21e619824d68abbc4b03cbc1685572", - "mapbox-gl-test-suite": "mapbox/mapbox-gl-test-suite#c32d0c5ac80e3b7393bc17b8944e64fa5cffd90a", + "mapbox-gl-test-suite": "mapbox/mapbox-gl-test-suite#d85534f77e1b06fbfe7aa610c98a363be86fceb4", "mkdirp": "^0.5.1", "node-cmake": "^1.2.1", "request": "^2.72.0", diff --git a/src/mbgl/layout/symbol_feature.hpp b/src/mbgl/layout/symbol_feature.hpp index b1ac3ffe78a..9e0eacaac51 100644 --- a/src/mbgl/layout/symbol_feature.hpp +++ b/src/mbgl/layout/symbol_feature.hpp @@ -1,6 +1,5 @@ #pragma once -#include #include #include @@ -12,7 +11,6 @@ class SymbolFeature { public: GeometryCollection geometry; optional text; - optional writingDirection; optional icon; std::size_t index; }; diff --git a/src/mbgl/layout/symbol_layout.cpp b/src/mbgl/layout/symbol_layout.cpp index 41ad6f6452c..7dad5c94290 100644 --- a/src/mbgl/layout/symbol_layout.cpp +++ b/src/mbgl/layout/symbol_layout.cpp @@ -20,8 +20,6 @@ #include #include -#include - namespace mbgl { using namespace style; @@ -92,9 +90,7 @@ SymbolLayout::SymbolLayout(std::string bucketName_, u8string = platform::lowercase(u8string); } - std::u16string u16string = util::utf8_to_utf16::convert(u8string); - ft.text = bidi.bidiTransform(u16string); - ft.writingDirection = bidi.baseWritingDirection(u16string); + ft.text = applyArabicShaping(util::utf8_to_utf16::convert(u8string)); // Loop through all characters of this text and collect unique codepoints. for (char16_t chr : *ft.text) { @@ -198,7 +194,6 @@ void SymbolLayout::prepare(uintptr_t tileUID, if (feature.text) { shapedText = glyphSet->getShaping( /* string */ *feature.text, - /* base direction of text */ *feature.writingDirection, /* maxWidth: ems */ layout.get() != SymbolPlacementType::Line ? layout.get() * 24 : 0, /* lineHeight: ems */ layout.get() * 24, @@ -206,7 +201,8 @@ void SymbolLayout::prepare(uintptr_t tileUID, /* verticalAlign */ verticalAlign, /* justify */ justify, /* spacing: ems */ layout.get() * 24, - /* translate */ Point(layout.get()[0], layout.get()[1])); + /* translate */ Point(layout.get()[0], layout.get()[1]), + /* bidirectional algorithm object */ bidi); // Add the glyphs we need for this label to the glyph atlas. if (shapedText) { diff --git a/src/mbgl/text/bidi.cpp b/src/mbgl/text/bidi.cpp index 4c127e9cabc..7d5f6313bc3 100644 --- a/src/mbgl/text/bidi.cpp +++ b/src/mbgl/text/bidi.cpp @@ -2,55 +2,124 @@ #include #include -#include #include namespace mbgl { -BiDi::BiDi() { +// Takes UTF16 input in logical order and applies Arabic shaping to the input while maintaining +// logical order +// Output won't be intelligible until the bidirectional algorithm is applied +std::u16string applyArabicShaping(const std::u16string& input) { UErrorCode errorCode = U_ZERO_ERROR; - transform = ubiditransform_open(&errorCode); // Only error is failure to allocate memory, in - // that case ubidi_transform would fall back to - // creating transform object on the fly + + int32_t outputLength = + u_shapeArabic(input.c_str(), static_cast(input.size()), NULL, 0, + (U_SHAPE_LETTERS_SHAPE & U_SHAPE_LETTERS_MASK) | + (U_SHAPE_TEXT_DIRECTION_LOGICAL & U_SHAPE_TEXT_DIRECTION_MASK), + &errorCode); + + // Pre-flighting will always set U_BUFFER_OVERFLOW_ERROR + errorCode = U_ZERO_ERROR; + + std::unique_ptr outputText = std::make_unique(outputLength); + u_shapeArabic(input.c_str(), static_cast(input.size()), outputText.get(), outputLength, + (U_SHAPE_LETTERS_SHAPE & U_SHAPE_LETTERS_MASK) | + (U_SHAPE_TEXT_DIRECTION_LOGICAL & U_SHAPE_TEXT_DIRECTION_MASK), + &errorCode); + + // If the algorithm fails for any reason, fall back to non-transformed text + if (U_FAILURE(errorCode)) + return input; + + return std::u16string(outputText.get(), outputLength); +} + +ProcessedBiDiText::ProcessedBiDiText(BiDi& p_bidi) : bidi(p_bidi) { +} + +void ProcessedBiDiText::mergeParagraphLineBreaks(std::set& lineBreakPoints) { + int32_t paragraphCount = ubidi_countParagraphs(bidi.bidiText); + for (int32_t i = 0; i < paragraphCount; i++) { + UErrorCode errorCode = U_ZERO_ERROR; + int32_t paragraphEndIndex; + ubidi_getParagraphByIndex(bidi.bidiText, i, NULL, ¶graphEndIndex, NULL, &errorCode); + + if (U_FAILURE(errorCode)) + throw std::runtime_error(std::string("ProcessedBiDiText::mergeParagraphLineBreaks: ") + + u_errorName(errorCode)); + + lineBreakPoints.insert(paragraphEndIndex); + } +} + +std::vector +ProcessedBiDiText::applyLineBreaking(std::set lineBreakPoints) { + // BiDi::getLine will error if called across a paragraph boundary, so we need to ensure that all + // paragraph + // boundaries are included in the set of line break points. The calling code might not include + // the line break because it + // didn't need to wrap at that point, or because the text was separated with a more exotic code + // point such as (U+001C) + mergeParagraphLineBreaks(lineBreakPoints); + + std::vector transformedLines; + int32_t start = 0; + for (int32_t lineBreakPoint : lineBreakPoints) { + transformedLines.push_back(bidi.getLine(start, lineBreakPoint)); + start = lineBreakPoint; + } + + return transformedLines; +} + +BiDi::BiDi() { + bidiText = ubidi_open(); + bidiLine = ubidi_open(); } BiDi::~BiDi() { - if (transform) - ubiditransform_close(transform); + if (bidiText) + ubidi_close(bidiText); + + if (bidiLine) + ubidi_close(bidiLine); } -std::u16string BiDi::bidiTransform(const std::u16string& input) { +ProcessedBiDiText BiDi::processText(const std::u16string& input) { UErrorCode errorCode = U_ZERO_ERROR; - std::unique_ptr outputText = - std::make_unique(input.size() * 2); // Maximum output of ubidi_transform is twice - // the size of input according to - // ubidi_transform.h - uint32_t outputLength = ubiditransform_transform( - transform, input.c_str(), static_cast(input.size()), outputText.get(), - static_cast(input.size()) * 2, - UBIDI_DEFAULT_LTR, // Assume input is LTR unless strong RTL characters are found - UBIDI_LOGICAL, // Input is in logical order - UBIDI_LTR, // Output is in "visual LTR" order - UBIDI_VISUAL, // '' - UBIDI_MIRRORING_ON, // Use mirroring lookups for things like parentheses that need mirroring - // in RTL text - U_SHAPE_LETTERS_SHAPE, // Add options here for handling numbers in bidirectional text - &errorCode); + ubidi_setPara(bidiText, input.c_str(), static_cast(input.size()), UBIDI_DEFAULT_LTR, + NULL, &errorCode); - // If the algorithm fails for any reason, fall back to non-transformed text if (U_FAILURE(errorCode)) - return input; + throw std::runtime_error(std::string("BiDi::processText: ") + u_errorName(errorCode)); - return std::u16string(outputText.get(), outputLength); + return ProcessedBiDiText(*this); } -WritingDirection BiDi::baseWritingDirection(const std::u16string& input) { - // This just looks for the first character with a strong direction property, it does not perform - // the BiDi algorithm - return ubidi_getBaseDirection(input.c_str(), static_cast(input.size())) == UBIDI_RTL - ? WritingDirection::RightToLeft - : WritingDirection::LeftToRight; +std::u16string BiDi::getLine(int32_t start, int32_t end) { + UErrorCode errorCode = U_ZERO_ERROR; + ubidi_setLine(bidiText, start, end, bidiLine, &errorCode); + + if (U_FAILURE(errorCode)) + throw std::runtime_error(std::string("BiDi::getLine (setLine): ") + u_errorName(errorCode)); + + // Because we set UBIDI_REMOVE_BIDI_CONTROLS, the output may be smaller than what we reserve + // Setting UBIDI_INSERT_LRM_FOR_NUMERIC would require + // ubidi_getLength(pBiDi)+2*ubidi_countRuns(pBiDi) + int32_t outputLength = ubidi_getProcessedLength(bidiLine); + std::unique_ptr outputText = std::make_unique(outputLength); + + // UBIDI_DO_MIRRORING: Apply unicode mirroring of characters like parentheses + // UBIDI_REMOVE_BIDI_CONTROLS: Now that all the lines are set, remove control characters so that + // they don't show up on screen (some fonts have glyphs representing them) + ubidi_writeReordered(bidiLine, outputText.get(), outputLength, + UBIDI_DO_MIRRORING | UBIDI_REMOVE_BIDI_CONTROLS, &errorCode); + + if (U_FAILURE(errorCode)) + throw std::runtime_error(std::string("BiDi::getLine (writeReordered): ") + u_errorName(errorCode)); + + return std::u16string(outputText.get(), outputLength); } } // end namespace mbgl diff --git a/src/mbgl/text/bidi.hpp b/src/mbgl/text/bidi.hpp index e29bf041e2a..8cbe079e579 100644 --- a/src/mbgl/text/bidi.hpp +++ b/src/mbgl/text/bidi.hpp @@ -1,25 +1,47 @@ #pragma once +#include #include +#include #include -struct UBiDiTransform; +struct UBiDi; namespace mbgl { - -enum class WritingDirection : bool { LeftToRight, RightToLeft }; - + +class BiDi; + +std::u16string applyArabicShaping(const std::u16string&); + +class ProcessedBiDiText { +public: + ProcessedBiDiText(BiDi&); + + std::vector applyLineBreaking(std::set); + +private: + void mergeParagraphLineBreaks(std::set&); + + BiDi& bidi; +}; + class BiDi : private util::noncopyable { public: BiDi(); ~BiDi(); - std::u16string bidiTransform(const std::u16string&); - WritingDirection baseWritingDirection(const std::u16string&); + // Calling processText resets internal state, invalidating any existing ProcessedBiDiText + // objects + ProcessedBiDiText processText(const std::u16string&); + + friend class ProcessedBiDiText; private: - UBiDiTransform* transform; + std::u16string getLine(int32_t start, int32_t end); + + UBiDi* bidiText; + UBiDi* bidiLine; }; } // end namespace mbgl diff --git a/src/mbgl/text/glyph_set.cpp b/src/mbgl/text/glyph_set.cpp index ef556aa537e..f1cb85a03a8 100644 --- a/src/mbgl/text/glyph_set.cpp +++ b/src/mbgl/text/glyph_set.cpp @@ -1,9 +1,11 @@ -#include -#include #include +#include #include -#include +#include + +#include +#include #include namespace mbgl { @@ -28,45 +30,47 @@ void GlyphSet::insert(uint32_t id, SDFGlyph&& glyph) { } } -const std::map &GlyphSet::getSDFs() const { +const std::map& GlyphSet::getSDFs() const { return sdfs; } -const Shaping GlyphSet::getShaping(const std::u16string &string, const WritingDirection writingDirection, const float maxWidth, - const float lineHeight, const float horizontalAlign, - const float verticalAlign, const float justify, - const float spacing, const Point &translate) const { - Shaping shaping(translate.x * 24, translate.y * 24, string); +const Shaping GlyphSet::getShaping(const std::u16string& logicalInput, + const float maxWidth, + const float lineHeight, + const float horizontalAlign, + const float verticalAlign, + const float justify, + const float spacing, + const Point& translate, + BiDi& bidi) const { - // the y offset *should* be part of the font metadata - const int32_t yOffset = -17; + // The string stored in shaping.text is used for finding duplicates, but may end up quite + // different from the glyphs that get shown + Shaping shaping(translate.x * 24, translate.y * 24, logicalInput); - float x = 0; - const float y = yOffset; + ProcessedBiDiText processedText = bidi.processText(logicalInput); - // Loop through all characters of this label and shape. - for (char16_t chr : string) { - auto it = sdfs.find(chr); - if (it != sdfs.end()) { - shaping.positionedGlyphs.emplace_back(chr, x, y); - x += it->second.metrics.advance + spacing; - } - } + std::vector reorderedLines = + processedText.applyLineBreaking(determineLineBreaks(logicalInput, spacing, maxWidth)); - if (shaping.positionedGlyphs.empty()) - return shaping; - - lineWrap(shaping, lineHeight, maxWidth, horizontalAlign, verticalAlign, justify, translate, - util::i18n::allowsIdeographicBreaking(string), writingDirection); + shapeLines(shaping, reorderedLines, spacing, lineHeight, horizontalAlign, verticalAlign, + justify, translate); return shaping; } -void align(Shaping &shaping, const float justify, const float horizontalAlign, - const float verticalAlign, const uint32_t maxLineLength, const float lineHeight, - const uint32_t line, const Point &translate) { - const float shiftX = (justify - horizontalAlign) * maxLineLength + ::round(translate.x * 24/* one em */); - const float shiftY = (-verticalAlign * (line + 1) + 0.5) * lineHeight + ::round(translate.y * 24/* one em */); +void align(Shaping& shaping, + const float justify, + const float horizontalAlign, + const float verticalAlign, + const float maxLineLength, + const float lineHeight, + const uint32_t lineCount, + const Point& translate) { + const float shiftX = + (justify - horizontalAlign) * maxLineLength + ::round(translate.x * 24 /* one em */); + const float shiftY = + (-verticalAlign * lineCount + 0.5) * lineHeight + ::round(translate.y * 24 /* one em */); for (auto& glyph : shaping.positionedGlyphs) { glyph.x += shiftX; @@ -74,9 +78,16 @@ void align(Shaping &shaping, const float justify, const float horizontalAlign, } } -void justifyLine(std::vector &positionedGlyphs, const std::map &sdfs, uint32_t start, - uint32_t end, float justify) { - PositionedGlyph &glyph = positionedGlyphs[end]; +// justify left = 0, right = 1, center = .5 +void justifyLine(std::vector& positionedGlyphs, + const std::map& sdfs, + uint32_t start, + uint32_t end, + float justify) { + if (!justify) + return; + + PositionedGlyph& glyph = positionedGlyphs[end]; auto it = sdfs.find(glyph.glyph); if (it != sdfs.end()) { const uint32_t lastAdvance = it->second.metrics.advance; @@ -88,81 +99,122 @@ void justifyLine(std::vector &positionedGlyphs, const std::map< } } -void GlyphSet::lineWrap(Shaping &shaping, const float lineHeight, float maxWidth, - const float horizontalAlign, const float verticalAlign, - const float justify, const Point &translate, - bool useBalancedIdeographicBreaking, const WritingDirection writingDirection) const { - float lineFeedOffset = writingDirection == WritingDirection::RightToLeft ? -lineHeight : lineHeight; - - uint32_t lastSafeBreak = 0; +float GlyphSet::determineIdeographicLineWidth(const std::u16string& logicalInput, + const float spacing, + float maxWidth) const { + float totalWidth = 0; + + // totalWidth doesn't include the last character for magical tuning reasons. This makes the + // algorithm a little + // more agressive about trying to fit the text into fewer lines, taking advantage of the + // tolerance for going a little + // over maxWidth + for (uint32_t i = 0; i < logicalInput.size() - 1; i++) { + auto it = sdfs.find(logicalInput[i]); + if (it != sdfs.end()) + totalWidth += it->second.metrics.advance + spacing; + } - uint32_t lengthBeforeCurrentLine = 0; - uint32_t lineStartIndex = 0; - uint32_t line = 0; + int32_t lineCount = std::fmax(1, std::ceil(totalWidth / maxWidth)); + return totalWidth / lineCount; +} + +// We determine line breaks based on shaped text in logical order. Working in visual order would be +// more intuitive, but we can't do that because the visual order may be changed by line breaks! +std::set GlyphSet::determineLineBreaks(const std::u16string& logicalInput, + const float spacing, + float maxWidth) const { + if (!maxWidth) + return {}; + + if (logicalInput.empty()) + return {}; - uint32_t maxLineLength = 0; + if (util::i18n::allowsIdeographicBreaking(logicalInput)) + maxWidth = determineIdeographicLineWidth(logicalInput, spacing, maxWidth); + + std::set lineBreakPoints; + float currentX = 0; + uint32_t lastSafeBreak = 0; + float lastSafeBreakX = 0; - std::vector &positionedGlyphs = shaping.positionedGlyphs; + for (uint32_t i = 0; i < logicalInput.size(); i++) { + auto it = sdfs.find(logicalInput[i]); + if (it == sdfs.end()) + continue; - if (maxWidth) { - if (useBalancedIdeographicBreaking) { - auto lastPositionedGlyph = positionedGlyphs[positionedGlyphs.size() - 1]; - uint32_t estimatedLineCount = std::fmax(1, std::ceil(lastPositionedGlyph.x / maxWidth)); - maxWidth = lastPositionedGlyph.x / estimatedLineCount; + const SDFGlyph& glyph = it->second; + + // Ideographic characters, spaces, and word-breaking punctuation that often appear without + // surrounding spaces. + if (util::i18n::allowsWordBreaking(glyph.id) || + util::i18n::allowsIdeographicBreaking(glyph.id)) { + lastSafeBreak = i; + lastSafeBreakX = currentX; } - for (uint32_t i = 0; i < positionedGlyphs.size(); i++) { - PositionedGlyph &shape = positionedGlyphs[i]; - - shape.x -= lengthBeforeCurrentLine; - shape.y += lineFeedOffset * line; - - if (shape.x > maxWidth && lastSafeBreak > 0) { - - uint32_t lineLength = positionedGlyphs[lastSafeBreak + 1].x; - maxLineLength = util::max(lineLength, maxLineLength); - - for (uint32_t k = lastSafeBreak + 1; k <= i; k++) { - positionedGlyphs[k].y += lineFeedOffset; - positionedGlyphs[k].x -= lineLength; - } - - if (justify) { - // Collapse invisible characters. - uint32_t breakGlyph = positionedGlyphs[lastSafeBreak].glyph; - uint32_t lineEnd = lastSafeBreak; - if (util::i18n::isVisible(breakGlyph)) { - lineEnd--; - } - - justifyLine(positionedGlyphs, sdfs, lineStartIndex, lineEnd, justify); - } - - lineStartIndex = lastSafeBreak + 1; - lastSafeBreak = 0; - lengthBeforeCurrentLine += lineLength; - line++; - } - - // Ideographic characters, spaces, and word-breaking punctuation that often appear without surrounding spaces. - if (useBalancedIdeographicBreaking - || util::i18n::allowsWordBreaking(shape.glyph) - || util::i18n::allowsIdeographicBreaking(shape.glyph)) { - lastSafeBreak = i; - } + if (currentX > maxWidth && lastSafeBreak > 0) { + lineBreakPoints.insert(lastSafeBreak); + currentX -= lastSafeBreakX; + lastSafeBreakX = 0; } + + currentX += glyph.metrics.advance + spacing; } - const PositionedGlyph& lastPositionedGlyph = positionedGlyphs.back(); - const auto lastGlyphIt = sdfs.find(lastPositionedGlyph.glyph); - assert(lastGlyphIt != sdfs.end()); - const uint32_t lastLineLength = lastPositionedGlyph.x + lastGlyphIt->second.metrics.advance; - maxLineLength = std::max(maxLineLength, lastLineLength); + return lineBreakPoints; +} + +void GlyphSet::shapeLines(Shaping& shaping, + const std::vector& lines, + const float spacing, + const float lineHeight, + const float horizontalAlign, + const float verticalAlign, + const float justify, + const Point& translate) const { + + // the y offset *should* be part of the font metadata + const int32_t yOffset = -17; + + float x = 0; + float y = yOffset; + + float maxLineLength = 0; - const uint32_t height = (line + 1) * lineHeight; + for (std::u16string line : lines) { + // Collapse whitespace so it doesn't throw off justification + boost::algorithm::trim_if(line, boost::algorithm::is_any_of(u" \t\n\v\f\r")); + + if (line.empty()) + continue; + + uint32_t lineStartIndex = static_cast(shaping.positionedGlyphs.size()); + for (char16_t chr : line) { + auto it = sdfs.find(chr); + if (it == sdfs.end()) + continue; + + const SDFGlyph& glyph = it->second; + shaping.positionedGlyphs.emplace_back(chr, x, y); + x += glyph.metrics.advance + spacing; + } + + if (static_cast(shaping.positionedGlyphs.size()) == lineStartIndex) + continue; + + maxLineLength = util::max(x, maxLineLength); + + justifyLine(shaping.positionedGlyphs, sdfs, lineStartIndex, + static_cast(shaping.positionedGlyphs.size()) - 1, justify); + + x = 0; + y += lineHeight; // Move to next line + } - justifyLine(positionedGlyphs, sdfs, lineStartIndex, uint32_t(positionedGlyphs.size()) - 1, justify); - align(shaping, justify, horizontalAlign, verticalAlign, maxLineLength, lineHeight, line, translate); + align(shaping, justify, horizontalAlign, verticalAlign, maxLineLength, lineHeight, + static_cast(lines.size()), translate); + const uint32_t height = lines.size() * lineHeight; // Calculate the bounding box shaping.top += -verticalAlign * height; diff --git a/src/mbgl/text/glyph_set.hpp b/src/mbgl/text/glyph_set.hpp index b4fcf4c3a4d..b48973b6ea7 100644 --- a/src/mbgl/text/glyph_set.hpp +++ b/src/mbgl/text/glyph_set.hpp @@ -9,15 +9,34 @@ namespace mbgl { class GlyphSet { public: void insert(uint32_t id, SDFGlyph&&); - const std::map &getSDFs() const; - const Shaping getShaping(const std::u16string &string, const WritingDirection writingDirection, float maxWidth, float lineHeight, - float horizontalAlign, float verticalAlign, float justify, - float spacing, const Point &translate) const; - void lineWrap(Shaping &shaping, float lineHeight, float maxWidth, float horizontalAlign, - float verticalAlign, float justify, const Point &translate, - bool useBalancedIdeographicBreaking, const WritingDirection writingDirection) const; + const std::map& getSDFs() const; + const Shaping getShaping(const std::u16string& string, + float maxWidth, + float lineHeight, + float horizontalAlign, + float verticalAlign, + float justify, + float spacing, + const Point& translate, + BiDi& bidi) const; private: + float determineIdeographicLineWidth(const std::u16string& logicalInput, + const float spacing, + float maxWidth) const; + std::set determineLineBreaks(const std::u16string& logicalInput, + const float spacing, + float maxWidth) const; + + void shapeLines(Shaping& shaping, + const std::vector& lines, + const float spacing, + float lineHeight, + float horizontalAlign, + float verticalAlign, + float justify, + const Point& translate) const; + std::map sdfs; }; diff --git a/src/mbgl/util/i18n.cpp b/src/mbgl/util/i18n.cpp index 97dda763206..4be624d2e55 100644 --- a/src/mbgl/util/i18n.cpp +++ b/src/mbgl/util/i18n.cpp @@ -294,12 +294,6 @@ namespace mbgl { namespace util { namespace i18n { -bool isVisible(uint16_t chr) { - return (chr == 0x0a /* newline */ - || chr == 0x20 /* space */ - || chr == 0x200b /* zero-width space */); -} - bool allowsWordBreaking(uint16_t chr) { return (chr == 0x0a /* newline */ || chr == 0x20 /* space */ @@ -324,6 +318,10 @@ bool allowsIdeographicBreaking(const std::u16string& string) { } bool allowsIdeographicBreaking(uint16_t chr) { + // Allow U+2027 "Interpunct" for hyphenation of Chinese words + if (chr == 0x2027) + return true; + // Return early for characters outside all ideographic ranges. if (chr < 0x2E80) return false; diff --git a/src/mbgl/util/i18n.hpp b/src/mbgl/util/i18n.hpp index c07dc91ed65..f1d3f53f729 100644 --- a/src/mbgl/util/i18n.hpp +++ b/src/mbgl/util/i18n.hpp @@ -6,9 +6,6 @@ namespace mbgl { namespace util { namespace i18n { -/** Returns whether a character is a visible character. */ -bool isVisible(uint16_t chr); - /** Returns whether a line break can be inserted after the character indicated by the given Unicode codepoint due to word breaking. */ bool allowsWordBreaking(uint16_t chr); diff --git a/test/util/merge_lines.test.cpp b/test/util/merge_lines.test.cpp index 8383183e0aa..30cd1af0682 100644 --- a/test/util/merge_lines.test.cpp +++ b/test/util/merge_lines.test.cpp @@ -9,21 +9,21 @@ const std::u16string bbb = u"b"; TEST(MergeLines, SameText) { // merges lines with the same text std::vector input1 = { - { {{{0, 0}, {1, 0}, {2, 0}}}, aaa, mbgl::WritingDirection::LeftToRight, {}, 0 }, - { {{{4, 0}, {5, 0}, {6, 0}}}, bbb, mbgl::WritingDirection::LeftToRight, {}, 0 }, - { {{{8, 0}, {9, 0}}}, aaa, mbgl::WritingDirection::LeftToRight, {}, 0 }, - { {{{2, 0}, {3, 0}, {4, 0}}}, aaa, mbgl::WritingDirection::LeftToRight, {}, 0 }, - { {{{6, 0}, {7, 0}, {8, 0}}}, aaa, mbgl::WritingDirection::LeftToRight, {}, 0 }, - { {{{5, 0}, {6, 0}}}, aaa, mbgl::WritingDirection::LeftToRight, {}, 0 } + { {{{0, 0}, {1, 0}, {2, 0}}}, aaa, {}, 0 }, + { {{{4, 0}, {5, 0}, {6, 0}}}, bbb, {}, 0 }, + { {{{8, 0}, {9, 0}}}, aaa, {}, 0 }, + { {{{2, 0}, {3, 0}, {4, 0}}}, aaa, {}, 0 }, + { {{{6, 0}, {7, 0}, {8, 0}}}, aaa, {}, 0 }, + { {{{5, 0}, {6, 0}}}, aaa, {}, 0 } }; const std::vector expected1 = { - { {{{0, 0}, {1, 0}, {2, 0}, {3, 0}, {4, 0}}}, aaa, mbgl::WritingDirection::LeftToRight, {}, 0 }, - { {{{4, 0}, {5, 0}, {6, 0}}}, bbb, mbgl::WritingDirection::LeftToRight, {}, 0 }, - { {{{5, 0}, {6, 0}, {7, 0}, {8, 0}, {9, 0}}}, aaa, mbgl::WritingDirection::LeftToRight, {}, 0 }, - { {{}}, aaa, mbgl::WritingDirection::LeftToRight, {}, 0 }, - { {{}}, aaa, mbgl::WritingDirection::LeftToRight, {}, 0 }, - { {{}}, aaa, mbgl::WritingDirection::LeftToRight, {}, 0 } + { {{{0, 0}, {1, 0}, {2, 0}, {3, 0}, {4, 0}}}, aaa, {}, 0 }, + { {{{4, 0}, {5, 0}, {6, 0}}}, bbb, {}, 0 }, + { {{{5, 0}, {6, 0}, {7, 0}, {8, 0}, {9, 0}}}, aaa, {}, 0 }, + { {{}}, aaa, {}, 0 }, + { {{}}, aaa, {}, 0 }, + { {{}}, aaa, {}, 0 } }; mbgl::util::mergeLines(input1); @@ -36,15 +36,15 @@ TEST(MergeLines, SameText) { TEST(MergeLines, BothEnds) { // mergeLines handles merge from both ends std::vector input2 = { - { {{{0, 0}, {1, 0}, {2, 0}}}, aaa, mbgl::WritingDirection::LeftToRight, {}, 0 }, - { {{{4, 0}, {5, 0}, {6, 0}}}, aaa, mbgl::WritingDirection::LeftToRight, {}, 0 }, - { {{{2, 0}, {3, 0}, {4, 0}}}, aaa, mbgl::WritingDirection::LeftToRight, {}, 0 } + { {{{0, 0}, {1, 0}, {2, 0}}}, aaa, {}, 0 }, + { {{{4, 0}, {5, 0}, {6, 0}}}, aaa, {}, 0 }, + { {{{2, 0}, {3, 0}, {4, 0}}}, aaa, {}, 0 } }; const std::vector expected2 = { - { {{{0, 0}, {1, 0}, {2, 0}, {3, 0}, {4, 0}, {5, 0}, {6, 0}}}, aaa, mbgl::WritingDirection::LeftToRight, {}, 0 }, - { {{}}, aaa, mbgl::WritingDirection::LeftToRight, {}, 0 }, - { {{}}, aaa, mbgl::WritingDirection::LeftToRight, {}, 0 } + { {{{0, 0}, {1, 0}, {2, 0}, {3, 0}, {4, 0}, {5, 0}, {6, 0}}}, aaa, {}, 0 }, + { {{}}, aaa, {}, 0 }, + { {{}}, aaa, {}, 0 } }; mbgl::util::mergeLines(input2); @@ -57,15 +57,15 @@ TEST(MergeLines, BothEnds) { TEST(MergeLines, CircularLines) { // mergeLines handles circular lines std::vector input3 = { - { {{{0, 0}, {1, 0}, {2, 0}}}, aaa, mbgl::WritingDirection::LeftToRight, {}, 0 }, - { {{{2, 0}, {3, 0}, {4, 0}}}, aaa, mbgl::WritingDirection::LeftToRight, {}, 0 }, - { {{{4, 0}, {0, 0}}}, aaa, mbgl::WritingDirection::LeftToRight, {}, 0 } + { {{{0, 0}, {1, 0}, {2, 0}}}, aaa, {}, 0 }, + { {{{2, 0}, {3, 0}, {4, 0}}}, aaa, {}, 0 }, + { {{{4, 0}, {0, 0}}}, aaa, {}, 0 } }; const std::vector expected3 = { - { {{{0, 0}, {1, 0}, {2, 0}, {3, 0}, {4, 0}, {0, 0}}}, aaa, mbgl::WritingDirection::LeftToRight, {}, 0 }, - { {{}}, aaa, mbgl::WritingDirection::LeftToRight, {}, 0 }, - { {{}}, aaa, mbgl::WritingDirection::LeftToRight, {}, 0 } + { {{{0, 0}, {1, 0}, {2, 0}, {3, 0}, {4, 0}, {0, 0}}}, aaa, {}, 0 }, + { {{}}, aaa, {}, 0 }, + { {{}}, aaa, {}, 0 } }; mbgl::util::mergeLines(input3);