Skip to content

Commit

Permalink
Make CodepointWidthDetector::GetWidth faster (#3727)
Browse files Browse the repository at this point in the history
This is a subset of #3578 which I think is harmless and the first step towards making things right.
References #3546 #3578 

## Detailed Description of the Pull Request / Additional comments

For more robust Unicode support, `CodepointWidthDetector` should provide concrete width information rather than a simple boolean of `IsWide`. Currently only `IsWide` is widely used and optimized using quick lookup table and fallback cache. This PR moves those optimization into `GetWidth`.

## Validation Steps Performed

API remains unchanged. Things are not broken.
  • Loading branch information
skyline75489 authored Apr 4, 2020
1 parent 9421553 commit 4f8acb4
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 52 deletions.
117 changes: 66 additions & 51 deletions src/types/CodepointWidthDetector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -319,29 +319,47 @@ CodepointWidthDetector::CodepointWidthDetector() noexcept :
}

// Routine Description:
// - returns the width type of codepoint by searching the map generated from the unicode spec
// - returns the width type of codepoint as fast as we can by using quick lookup table and fallback cache.
// Arguments:
// - glyph - the utf16 encoded codepoint to search for
// Return Value:
// - the width type of the codepoint
CodepointWidth CodepointWidthDetector::GetWidth(const std::wstring_view glyph) const
{
if (glyph.empty())
THROW_HR_IF(E_INVALIDARG, glyph.empty());
if (glyph.size() == 1)
{
return CodepointWidth::Invalid;
}

const auto codepoint = _extractCodepoint(glyph);
const auto it = std::lower_bound(s_wideAndAmbiguousTable.begin(), s_wideAndAmbiguousTable.end(), codepoint);
// We first attempt to look at our custom quick lookup table of char width preferences.
const auto width = GetQuickCharWidth(glyph.front());

// For characters that are not _in_ the table, lower_bound will return the nearest item that is.
// We must check its bounds to make sure that our hit was a true hit.
if (it != s_wideAndAmbiguousTable.end() && codepoint >= it->lowerBound && codepoint <= it->upperBound)
// If it's invalid, the quick width had no opinion, so go to the lookup table.
if (width == CodepointWidth::Invalid)
{
return _lookupGlyphWidthWithCache(glyph);
}
// If it's ambiguous, the quick width wanted us to ask the font directly, try that if we can.
// If not, go to the lookup table.
else if (width == CodepointWidth::Ambiguous)
{
if (_pfnFallbackMethod)
{
return _checkFallbackViaCache(glyph) ? CodepointWidth::Wide : CodepointWidth::Ambiguous;
}
else
{
return _lookupGlyphWidthWithCache(glyph);
}
}
// Otherwise, return Width as it is.
else
{
return width;
}
}
else
{
return it->width;
return _lookupGlyphWidthWithCache(glyph);
}

return CodepointWidth::Narrow;
}

// Routine Description:
Expand Down Expand Up @@ -369,74 +387,71 @@ bool CodepointWidthDetector::IsWide(const wchar_t wch) const noexcept
// - true if codepoint is wide
bool CodepointWidthDetector::IsWide(const std::wstring_view glyph) const
{
THROW_HR_IF(E_INVALIDARG, glyph.empty());
if (glyph.size() == 1)
{
// We first attempt to look at our custom quick lookup table of char width preferences.
const auto width = GetQuickCharWidth(glyph.front());
return GetWidth(glyph) == CodepointWidth::Wide;
}

// If it's invalid, the quick width had no opinion, so go to the lookup table.
if (width == CodepointWidth::Invalid)
{
return _lookupIsWide(glyph);
}
// If it's ambiguous, the quick width wanted us to ask the font directly, try that if we can.
// If not, go to the lookup table.
else if (width == CodepointWidth::Ambiguous)
{
if (_pfnFallbackMethod)
{
return _checkFallbackViaCache(glyph);
}
else
{
return _lookupIsWide(glyph);
}
}
// Otherwise, return Wide as True and Narrow as False.
else
{
return width == CodepointWidth::Wide;
}
// Routine Description:
// - returns the width type of codepoint by searching the map generated from the unicode spec
// Arguments:
// - glyph - the utf16 encoded codepoint to search for
// Return Value:
// - the width type of the codepoint
CodepointWidth CodepointWidthDetector::_lookupGlyphWidth(const std::wstring_view glyph) const
{
if (glyph.empty())
{
return CodepointWidth::Invalid;
}
else

const auto codepoint = _extractCodepoint(glyph);
const auto it = std::lower_bound(s_wideAndAmbiguousTable.begin(), s_wideAndAmbiguousTable.end(), codepoint);

// For characters that are not _in_ the table, lower_bound will return the nearest item that is.
// We must check its bounds to make sure that our hit was a true hit.
if (it != s_wideAndAmbiguousTable.end() && codepoint >= it->lowerBound && codepoint <= it->upperBound)
{
return _lookupIsWide(glyph);
return it->width;
}

return CodepointWidth::Narrow;
}

// Routine Description:
// - checks if codepoint is wide using fallback methods.
// - returns the width type of codepoint using fallback methods.
// Arguments:
// - glyph - the utf16 encoded codepoint to check width of
// Return Value:
// - true if codepoint is wide or if it can't be confirmed to be narrow
bool CodepointWidthDetector::_lookupIsWide(const std::wstring_view glyph) const noexcept
// - the width type of the codepoint
CodepointWidth CodepointWidthDetector::_lookupGlyphWidthWithCache(const std::wstring_view glyph) const noexcept
{
try
{
// Use our generated table to try to lookup the width based on the Unicode standard.
const CodepointWidth width = GetWidth(glyph);
const CodepointWidth width = _lookupGlyphWidth(glyph);

// If it's ambiguous, then ask the font if we can.
if (width == CodepointWidth::Ambiguous)
{
if (_pfnFallbackMethod)
{
return _checkFallbackViaCache(glyph);
return _checkFallbackViaCache(glyph) ? CodepointWidth::Wide : CodepointWidth::Ambiguous;
}
else
{
return CodepointWidth::Ambiguous;
}
}
// If it's not ambiguous, it should say wide or narrow. Turn that into True = Wide or False = Narrow.
// If it's not ambiguous, it should say wide or narrow.
else
{
return width == CodepointWidth::Wide;
return width;
}
}
CATCH_LOG();

// If we got this far, we couldn't figure it out.
// It's better to be too wide than too narrow.
return true;
return CodepointWidth::Wide;
}

// Routine Description:
Expand Down
3 changes: 2 additions & 1 deletion src/types/inc/CodepointWidthDetector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ class CodepointWidthDetector final
#endif

private:
bool _lookupIsWide(const std::wstring_view glyph) const noexcept;
CodepointWidth _lookupGlyphWidth(const std::wstring_view glyph) const;
CodepointWidth _lookupGlyphWidthWithCache(const std::wstring_view glyph) const noexcept;
bool _checkFallbackViaCache(const std::wstring_view glyph) const;
static unsigned int _extractCodepoint(const std::wstring_view glyph) noexcept;

Expand Down

0 comments on commit 4f8acb4

Please sign in to comment.