Skip to content
This repository has been archived by the owner on Aug 8, 2023. It is now read-only.

Commit

Permalink
[core] Full support for line breaking bidirectional text using ICU bi…
Browse files Browse the repository at this point in the history
…di functionality.

 - Trim whitespace from labels before determining their max-width for alignment.
 - Fix crash on labels that contain lines with only a single character of whitespace.
  • Loading branch information
ChrisLoer committed Nov 28, 2016
1 parent 4577c56 commit 24b9538
Show file tree
Hide file tree
Showing 10 changed files with 337 additions and 187 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
"lodash": "^4.16.4",
"mapbox-gl-shaders": "mapbox/mapbox-gl-shaders#597115a1e1bd982944b068f8accde34eada74fc2",
"mapbox-gl-style-spec": "mapbox/mapbox-gl-style-spec#7f62a4fc9f21e619824d68abbc4b03cbc1685572",
"mapbox-gl-test-suite": "mapbox/mapbox-gl-test-suite#c32d0c5ac80e3b7393bc17b8944e64fa5cffd90a",
"mapbox-gl-test-suite": "mapbox/mapbox-gl-test-suite#65ecebbe29ffdc16c01cfafc6ae347b0d6a02464",
"mkdirp": "^0.5.1",
"node-cmake": "^1.2.1",
"request": "^2.72.0",
Expand Down
2 changes: 0 additions & 2 deletions src/mbgl/layout/symbol_feature.hpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#pragma once

#include <mbgl/text/bidi.hpp>
#include <mbgl/tile/geometry_tile_data.hpp>
#include <mbgl/util/optional.hpp>

Expand All @@ -12,7 +11,6 @@ class SymbolFeature {
public:
GeometryCollection geometry;
optional<std::u16string> text;
optional<WritingDirection> writingDirection;
optional<std::string> icon;
std::size_t index;
};
Expand Down
10 changes: 3 additions & 7 deletions src/mbgl/layout/symbol_layout.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@
#include <mbgl/util/platform.hpp>
#include <mbgl/util/logging.hpp>

#include <mbgl/text/bidi.hpp>

namespace mbgl {

using namespace style;
Expand Down Expand Up @@ -92,9 +90,7 @@ SymbolLayout::SymbolLayout(std::string bucketName_,
u8string = platform::lowercase(u8string);
}

std::u16string u16string = util::utf8_to_utf16::convert(u8string);
ft.text = bidi.bidiTransform(u16string);
ft.writingDirection = bidi.baseWritingDirection(u16string);
ft.text = applyArabicShaping(util::utf8_to_utf16::convert(u8string));

// Loop through all characters of this text and collect unique codepoints.
for (char16_t chr : *ft.text) {
Expand Down Expand Up @@ -198,15 +194,15 @@ void SymbolLayout::prepare(uintptr_t tileUID,
if (feature.text) {
shapedText = glyphSet->getShaping(
/* string */ *feature.text,
/* base direction of text */ *feature.writingDirection,
/* maxWidth: ems */ layout.get<SymbolPlacement>() != SymbolPlacementType::Line ?
layout.get<TextMaxWidth>() * 24 : 0,
/* lineHeight: ems */ layout.get<TextLineHeight>() * 24,
/* horizontalAlign */ horizontalAlign,
/* verticalAlign */ verticalAlign,
/* justify */ justify,
/* spacing: ems */ layout.get<TextLetterSpacing>() * 24,
/* translate */ Point<float>(layout.get<TextOffset>()[0], layout.get<TextOffset>()[1]));
/* translate */ Point<float>(layout.get<TextOffset>()[0], layout.get<TextOffset>()[1]),
/* bidirectional algorithm object */ bidi);

// Add the glyphs we need for this label to the glyph atlas.
if (shapedText) {
Expand Down
132 changes: 100 additions & 32 deletions src/mbgl/text/bidi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,55 +2,123 @@

#include <mbgl/text/bidi.hpp>
#include <unicode/ubidi.h>
#include <unicode/ubiditransform.h>
#include <unicode/ushape.h>

namespace mbgl {

BiDi::BiDi() {
// Takes UTF16 input in logical order and applies Arabic shaping to the input while maintaining
// logical order
// Output won't be intelligible until the bidirectional algorithm is applied
std::u16string applyArabicShaping(const std::u16string& input) {
UErrorCode errorCode = U_ZERO_ERROR;
transform = ubiditransform_open(&errorCode); // Only error is failure to allocate memory, in
// that case ubidi_transform would fall back to
// creating transform object on the fly

int32_t outputLength =
u_shapeArabic(input.c_str(), static_cast<int32_t>(input.size()), NULL, 0,
(U_SHAPE_LETTERS_SHAPE & U_SHAPE_LETTERS_MASK) |
(U_SHAPE_TEXT_DIRECTION_LOGICAL & U_SHAPE_TEXT_DIRECTION_MASK),
&errorCode);

// Pre-flighting will always set U_BUFFER_OVERFLOW_ERROR
errorCode = U_ZERO_ERROR;

std::unique_ptr<UChar[]> outputText = std::make_unique<UChar[]>(outputLength);
u_shapeArabic(input.c_str(), static_cast<int32_t>(input.size()), outputText.get(), outputLength,
(U_SHAPE_LETTERS_SHAPE & U_SHAPE_LETTERS_MASK) |
(U_SHAPE_TEXT_DIRECTION_LOGICAL & U_SHAPE_TEXT_DIRECTION_MASK),
&errorCode);

// If the algorithm fails for any reason, fall back to non-transformed text
if (U_FAILURE(errorCode))
return input;

return std::u16string(outputText.get(), outputLength);
}

ProcessedBiDiText::ProcessedBiDiText(BiDi& p_bidi) : bidi(p_bidi) {
}

void ProcessedBiDiText::mergeParagraphLineBreaks(std::set<int32_t>& lineBreakPoints) {
for (int32_t i = 0; i < ubidi_countParagraphs(bidi.bidiText); i++) {
UErrorCode errorCode = U_ZERO_ERROR;
int32_t paragraphEndIndex;
ubidi_getParagraphByIndex(bidi.bidiText, i, NULL, &paragraphEndIndex, NULL, &errorCode);

if (U_FAILURE(errorCode))
throw std::runtime_error(std::string("ProcessedBiDiText::mergeParagraphLineBreaks: ") +
u_errorName(errorCode));

lineBreakPoints.insert(paragraphEndIndex);
}
}

std::vector<std::u16string>
ProcessedBiDiText::applyLineBreaking(std::set<int32_t> lineBreakPoints) {
// BiDi::getLine will error if called across a paragraph boundary, so we need to ensure that all
// paragraph
// boundaries are included in the set of line break points. The calling code might not include
// the line break because it
// didn't need to wrap at that point, or because the text was separated with a more exotic code
// point such as (U+001C)
mergeParagraphLineBreaks(lineBreakPoints);

std::vector<std::u16string> transformedLines;
int32_t start = 0;
for (int32_t lineBreakPoint : lineBreakPoints) {
transformedLines.push_back(bidi.getLine(start, lineBreakPoint));
start = lineBreakPoint;
}

return transformedLines;
}

BiDi::BiDi() {
bidiText = ubidi_open();
bidiLine = ubidi_open();
}

BiDi::~BiDi() {
if (transform)
ubiditransform_close(transform);
if (bidiText)
ubidi_close(bidiText);

if (bidiLine)
ubidi_close(bidiLine);
}

std::u16string BiDi::bidiTransform(const std::u16string& input) {
ProcessedBiDiText BiDi::processText(const std::u16string& input) {
UErrorCode errorCode = U_ZERO_ERROR;

std::unique_ptr<UChar[]> outputText =
std::make_unique<UChar[]>(input.size() * 2); // Maximum output of ubidi_transform is twice
// the size of input according to
// ubidi_transform.h
uint32_t outputLength = ubiditransform_transform(
transform, input.c_str(), static_cast<int32_t>(input.size()), outputText.get(),
static_cast<int32_t>(input.size()) * 2,
UBIDI_DEFAULT_LTR, // Assume input is LTR unless strong RTL characters are found
UBIDI_LOGICAL, // Input is in logical order
UBIDI_LTR, // Output is in "visual LTR" order
UBIDI_VISUAL, // ''
UBIDI_MIRRORING_ON, // Use mirroring lookups for things like parentheses that need mirroring
// in RTL text
U_SHAPE_LETTERS_SHAPE, // Add options here for handling numbers in bidirectional text
&errorCode);
ubidi_setPara(bidiText, input.c_str(), static_cast<int32_t>(input.size()), UBIDI_DEFAULT_LTR,
NULL, &errorCode);

// If the algorithm fails for any reason, fall back to non-transformed text
if (U_FAILURE(errorCode))
return input;
throw std::runtime_error(std::string("BiDi::processText: ") + u_errorName(errorCode));

return std::u16string(outputText.get(), outputLength);
return ProcessedBiDiText(*this);
}

WritingDirection BiDi::baseWritingDirection(const std::u16string& input) {
// This just looks for the first character with a strong direction property, it does not perform
// the BiDi algorithm
return ubidi_getBaseDirection(input.c_str(), static_cast<int32_t>(input.size())) == UBIDI_RTL
? WritingDirection::RightToLeft
: WritingDirection::LeftToRight;
std::u16string BiDi::getLine(int32_t start, int32_t end) {
UErrorCode errorCode = U_ZERO_ERROR;
ubidi_setLine(bidiText, start, end, bidiLine, &errorCode);

if (U_FAILURE(errorCode))
throw std::runtime_error("msg");

// Because we set UBIDI_REMOVE_BIDI_CONTROLS, the output may be smaller than what we reserve
// Setting UBIDI_INSERT_LRM_FOR_NUMERIC would require
// ubidi_getLength(pBiDi)+2*ubidi_countRuns(pBiDi)
int32_t outputLength = ubidi_getProcessedLength(bidiLine);
std::unique_ptr<UChar[]> outputText = std::make_unique<UChar[]>(outputLength);

// UBIDI_DO_MIRRORING: Apply unicode mirroring of characters like parentheses
// UBIDI_REMOVE_BIDI_CONTROLS: Now that all the lines are set, remove control characters so that
// they don't show up on screen (some fonts have glyphs representing them)
ubidi_writeReordered(bidiLine, outputText.get(), outputLength,
UBIDI_DO_MIRRORING | UBIDI_REMOVE_BIDI_CONTROLS, &errorCode);

if (U_FAILURE(errorCode))
throw std::runtime_error(std::string("BiDi::getLine: ") + u_errorName(errorCode));

return std::u16string(outputText.get(), outputLength);
}

} // end namespace mbgl
36 changes: 29 additions & 7 deletions src/mbgl/text/bidi.hpp
Original file line number Diff line number Diff line change
@@ -1,25 +1,47 @@
#pragma once

#include <set>
#include <string>
#include <vector>

#include <mbgl/util/noncopyable.hpp>

struct UBiDiTransform;
struct UBiDi;

namespace mbgl {

enum class WritingDirection : bool { LeftToRight, RightToLeft };


class BiDi;

std::u16string applyArabicShaping(const std::u16string&);

class ProcessedBiDiText {
public:
ProcessedBiDiText(BiDi&);

std::vector<std::u16string> applyLineBreaking(std::set<int32_t>);

private:
void mergeParagraphLineBreaks(std::set<int32_t>&);

BiDi& bidi;
};

class BiDi : private util::noncopyable {
public:
BiDi();
~BiDi();

std::u16string bidiTransform(const std::u16string&);
WritingDirection baseWritingDirection(const std::u16string&);
// Calling processText resets internal state, invalidating any existing ProcessedBiDiText
// objects
ProcessedBiDiText processText(const std::u16string&);

friend class ProcessedBiDiText;

private:
UBiDiTransform* transform;
std::u16string getLine(int32_t start, int32_t end);

UBiDi* bidiText;
UBiDi* bidiLine;
};

} // end namespace mbgl
Loading

0 comments on commit 24b9538

Please sign in to comment.