Skip to content
This repository has been archived by the owner on Aug 8, 2023. It is now read-only.

Full support for bidirectional line breaking #7123

Merged
merged 1 commit into from
Nov 30, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
"lodash": "^4.16.4",
"mapbox-gl-shaders": "mapbox/mapbox-gl-shaders#597115a1e1bd982944b068f8accde34eada74fc2",
"mapbox-gl-style-spec": "mapbox/mapbox-gl-style-spec#7f62a4fc9f21e619824d68abbc4b03cbc1685572",
"mapbox-gl-test-suite": "mapbox/mapbox-gl-test-suite#c32d0c5ac80e3b7393bc17b8944e64fa5cffd90a",
"mapbox-gl-test-suite": "mapbox/mapbox-gl-test-suite#d85534f77e1b06fbfe7aa610c98a363be86fceb4",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mapbox/mapbox-gl-test-suite#186 was rebased onto that repository’s master branch, so mapbox/mapbox-gl-test-suite@d85534f has been orphaned. This line should have been changed to point to mapbox/mapbox-gl-test-suite@0c6f3e0 before this PR was merged.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oops, sorry about that, and thanks for fixing!

"mkdirp": "^0.5.1",
"node-cmake": "^1.2.1",
"request": "^2.72.0",
Expand Down
2 changes: 0 additions & 2 deletions src/mbgl/layout/symbol_feature.hpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#pragma once

#include <mbgl/text/bidi.hpp>
#include <mbgl/tile/geometry_tile_data.hpp>
#include <mbgl/util/optional.hpp>

Expand All @@ -12,7 +11,6 @@ class SymbolFeature {
public:
GeometryCollection geometry;
optional<std::u16string> text;
optional<WritingDirection> writingDirection;
optional<std::string> icon;
std::size_t index;
};
Expand Down
10 changes: 3 additions & 7 deletions src/mbgl/layout/symbol_layout.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@
#include <mbgl/util/platform.hpp>
#include <mbgl/util/logging.hpp>

#include <mbgl/text/bidi.hpp>

namespace mbgl {

using namespace style;
Expand Down Expand Up @@ -92,9 +90,7 @@ SymbolLayout::SymbolLayout(std::string bucketName_,
u8string = platform::lowercase(u8string);
}

std::u16string u16string = util::utf8_to_utf16::convert(u8string);
ft.text = bidi.bidiTransform(u16string);
ft.writingDirection = bidi.baseWritingDirection(u16string);
ft.text = applyArabicShaping(util::utf8_to_utf16::convert(u8string));

// Loop through all characters of this text and collect unique codepoints.
for (char16_t chr : *ft.text) {
Expand Down Expand Up @@ -198,15 +194,15 @@ void SymbolLayout::prepare(uintptr_t tileUID,
if (feature.text) {
shapedText = glyphSet->getShaping(
/* string */ *feature.text,
/* base direction of text */ *feature.writingDirection,
/* maxWidth: ems */ layout.get<SymbolPlacement>() != SymbolPlacementType::Line ?
layout.get<TextMaxWidth>() * 24 : 0,
/* lineHeight: ems */ layout.get<TextLineHeight>() * 24,
/* horizontalAlign */ horizontalAlign,
/* verticalAlign */ verticalAlign,
/* justify */ justify,
/* spacing: ems */ layout.get<TextLetterSpacing>() * 24,
/* translate */ Point<float>(layout.get<TextOffset>()[0], layout.get<TextOffset>()[1]));
/* translate */ Point<float>(layout.get<TextOffset>()[0], layout.get<TextOffset>()[1]),
/* bidirectional algorithm object */ bidi);

// Add the glyphs we need for this label to the glyph atlas.
if (shapedText) {
Expand Down
133 changes: 101 additions & 32 deletions src/mbgl/text/bidi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,55 +2,124 @@

#include <mbgl/text/bidi.hpp>
#include <unicode/ubidi.h>
#include <unicode/ubiditransform.h>
#include <unicode/ushape.h>

namespace mbgl {

BiDi::BiDi() {
// Takes UTF16 input in logical order and applies Arabic shaping to the input while maintaining
// logical order
// Output won't be intelligible until the bidirectional algorithm is applied
std::u16string applyArabicShaping(const std::u16string& input) {
UErrorCode errorCode = U_ZERO_ERROR;
transform = ubiditransform_open(&errorCode); // Only error is failure to allocate memory, in
// that case ubidi_transform would fall back to
// creating transform object on the fly

int32_t outputLength =
u_shapeArabic(input.c_str(), static_cast<int32_t>(input.size()), NULL, 0,
(U_SHAPE_LETTERS_SHAPE & U_SHAPE_LETTERS_MASK) |
(U_SHAPE_TEXT_DIRECTION_LOGICAL & U_SHAPE_TEXT_DIRECTION_MASK),
&errorCode);

// Pre-flighting will always set U_BUFFER_OVERFLOW_ERROR
errorCode = U_ZERO_ERROR;

std::unique_ptr<UChar[]> outputText = std::make_unique<UChar[]>(outputLength);
u_shapeArabic(input.c_str(), static_cast<int32_t>(input.size()), outputText.get(), outputLength,
(U_SHAPE_LETTERS_SHAPE & U_SHAPE_LETTERS_MASK) |
(U_SHAPE_TEXT_DIRECTION_LOGICAL & U_SHAPE_TEXT_DIRECTION_MASK),
&errorCode);

// If the algorithm fails for any reason, fall back to non-transformed text
if (U_FAILURE(errorCode))
return input;

return std::u16string(outputText.get(), outputLength);
}

ProcessedBiDiText::ProcessedBiDiText(BiDi& p_bidi) : bidi(p_bidi) {
}

void ProcessedBiDiText::mergeParagraphLineBreaks(std::set<int32_t>& lineBreakPoints) {
int32_t paragraphCount = ubidi_countParagraphs(bidi.bidiText);
for (int32_t i = 0; i < paragraphCount; i++) {
UErrorCode errorCode = U_ZERO_ERROR;
int32_t paragraphEndIndex;
ubidi_getParagraphByIndex(bidi.bidiText, i, NULL, &paragraphEndIndex, NULL, &errorCode);

if (U_FAILURE(errorCode))
throw std::runtime_error(std::string("ProcessedBiDiText::mergeParagraphLineBreaks: ") +
u_errorName(errorCode));

lineBreakPoints.insert(paragraphEndIndex);
}
}

std::vector<std::u16string>
ProcessedBiDiText::applyLineBreaking(std::set<int32_t> lineBreakPoints) {
// BiDi::getLine will error if called across a paragraph boundary, so we need to ensure that all
// paragraph
// boundaries are included in the set of line break points. The calling code might not include
// the line break because it
// didn't need to wrap at that point, or because the text was separated with a more exotic code
// point such as (U+001C)
mergeParagraphLineBreaks(lineBreakPoints);

std::vector<std::u16string> transformedLines;
int32_t start = 0;
for (int32_t lineBreakPoint : lineBreakPoints) {
transformedLines.push_back(bidi.getLine(start, lineBreakPoint));
start = lineBreakPoint;
}

return transformedLines;
}

BiDi::BiDi() {
bidiText = ubidi_open();
bidiLine = ubidi_open();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why two handles?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's just the way the ICU interface works. ubidi_setLine wants two UBiDi* objects: one "input" representing the whole run of text, and one "output" representing bidirectional information for a single line.

}

BiDi::~BiDi() {
if (transform)
ubiditransform_close(transform);
if (bidiText)
ubidi_close(bidiText);

if (bidiLine)
ubidi_close(bidiLine);
}

std::u16string BiDi::bidiTransform(const std::u16string& input) {
ProcessedBiDiText BiDi::processText(const std::u16string& input) {
UErrorCode errorCode = U_ZERO_ERROR;

std::unique_ptr<UChar[]> outputText =
std::make_unique<UChar[]>(input.size() * 2); // Maximum output of ubidi_transform is twice
// the size of input according to
// ubidi_transform.h
uint32_t outputLength = ubiditransform_transform(
transform, input.c_str(), static_cast<int32_t>(input.size()), outputText.get(),
static_cast<int32_t>(input.size()) * 2,
UBIDI_DEFAULT_LTR, // Assume input is LTR unless strong RTL characters are found
UBIDI_LOGICAL, // Input is in logical order
UBIDI_LTR, // Output is in "visual LTR" order
UBIDI_VISUAL, // ''
UBIDI_MIRRORING_ON, // Use mirroring lookups for things like parentheses that need mirroring
// in RTL text
U_SHAPE_LETTERS_SHAPE, // Add options here for handling numbers in bidirectional text
&errorCode);
ubidi_setPara(bidiText, input.c_str(), static_cast<int32_t>(input.size()), UBIDI_DEFAULT_LTR,
NULL, &errorCode);

// If the algorithm fails for any reason, fall back to non-transformed text
if (U_FAILURE(errorCode))
return input;
throw std::runtime_error(std::string("BiDi::processText: ") + u_errorName(errorCode));

return std::u16string(outputText.get(), outputLength);
return ProcessedBiDiText(*this);
}

WritingDirection BiDi::baseWritingDirection(const std::u16string& input) {
// This just looks for the first character with a strong direction property, it does not perform
// the BiDi algorithm
return ubidi_getBaseDirection(input.c_str(), static_cast<int32_t>(input.size())) == UBIDI_RTL
? WritingDirection::RightToLeft
: WritingDirection::LeftToRight;
std::u16string BiDi::getLine(int32_t start, int32_t end) {
UErrorCode errorCode = U_ZERO_ERROR;
ubidi_setLine(bidiText, start, end, bidiLine, &errorCode);

if (U_FAILURE(errorCode))
throw std::runtime_error(std::string("BiDi::getLine (setLine): ") + u_errorName(errorCode));

// Because we set UBIDI_REMOVE_BIDI_CONTROLS, the output may be smaller than what we reserve
// Setting UBIDI_INSERT_LRM_FOR_NUMERIC would require
// ubidi_getLength(pBiDi)+2*ubidi_countRuns(pBiDi)
int32_t outputLength = ubidi_getProcessedLength(bidiLine);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

const

std::unique_ptr<UChar[]> outputText = std::make_unique<UChar[]>(outputLength);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can just use auto here

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, right! I left C++ world at C++98, still getting used to all the great additions...


// UBIDI_DO_MIRRORING: Apply unicode mirroring of characters like parentheses
// UBIDI_REMOVE_BIDI_CONTROLS: Now that all the lines are set, remove control characters so that
// they don't show up on screen (some fonts have glyphs representing them)
ubidi_writeReordered(bidiLine, outputText.get(), outputLength,
UBIDI_DO_MIRRORING | UBIDI_REMOVE_BIDI_CONTROLS, &errorCode);

if (U_FAILURE(errorCode))
throw std::runtime_error(std::string("BiDi::getLine (writeReordered): ") + u_errorName(errorCode));

return std::u16string(outputText.get(), outputLength);
}

} // end namespace mbgl
36 changes: 29 additions & 7 deletions src/mbgl/text/bidi.hpp
Original file line number Diff line number Diff line change
@@ -1,25 +1,47 @@
#pragma once

#include <set>
#include <string>
#include <vector>

#include <mbgl/util/noncopyable.hpp>

struct UBiDiTransform;
struct UBiDi;

namespace mbgl {

enum class WritingDirection : bool { LeftToRight, RightToLeft };


class BiDi;

std::u16string applyArabicShaping(const std::u16string&);

class ProcessedBiDiText {
public:
ProcessedBiDiText(BiDi&);

std::vector<std::u16string> applyLineBreaking(std::set<int32_t>);

private:
void mergeParagraphLineBreaks(std::set<int32_t>&);

BiDi& bidi;
};

class BiDi : private util::noncopyable {
public:
BiDi();
~BiDi();

std::u16string bidiTransform(const std::u16string&);
WritingDirection baseWritingDirection(const std::u16string&);
// Calling processText resets internal state, invalidating any existing ProcessedBiDiText
// objects
ProcessedBiDiText processText(const std::u16string&);

friend class ProcessedBiDiText;

private:
UBiDiTransform* transform;
std::u16string getLine(int32_t start, int32_t end);

UBiDi* bidiText;
UBiDi* bidiLine;
};

} // end namespace mbgl
Loading