Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make CodepointWidthDetector::GetWidth faster #3727

Merged
3 commits merged into from
Apr 4, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 62 additions & 51 deletions src/types/CodepointWidthDetector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -319,29 +319,47 @@ CodepointWidthDetector::CodepointWidthDetector() noexcept :
}

// Routine Description:
// - returns the width type of codepoint by searching the map generated from the unicode spec
// - returns the width type of codepoint as fast as we can by using quick lookup table and fallback cache.
// Arguments:
// - glyph - the utf16 encoded codepoint to search for
// Return Value:
// - the width type of the codepoint
CodepointWidth CodepointWidthDetector::GetWidth(const std::wstring_view glyph) const
{
if (glyph.empty())
THROW_HR_IF(E_INVALIDARG, glyph.empty());
if (glyph.size() == 1)
{
return CodepointWidth::Invalid;
}

const auto codepoint = _extractCodepoint(glyph);
const auto it = std::lower_bound(s_wideAndAmbiguousTable.begin(), s_wideAndAmbiguousTable.end(), codepoint);
// We first attempt to look at our custom quick lookup table of char width preferences.
const auto width = GetQuickCharWidth(glyph.front());

// For characters that are not _in_ the table, lower_bound will return the nearest item that is.
// We must check its bounds to make sure that our hit was a true hit.
if (it != s_wideAndAmbiguousTable.end() && codepoint >= it->lowerBound && codepoint <= it->upperBound)
// If it's invalid, the quick width had no opinion, so go to the lookup table.
if (width == CodepointWidth::Invalid)
{
return _lookupGlyphWidthWithCache(glyph);
}
// If it's ambiguous, the quick width wanted us to ask the font directly, try that if we can.
// If not, go to the lookup table.
else if (width == CodepointWidth::Ambiguous)
{
if (_pfnFallbackMethod)
{
return _checkFallbackViaCache(glyph) ? CodepointWidth::Wide : CodepointWidth::Narrow;
}
else
{
return _lookupGlyphWidthWithCache(glyph);
}
}
// Otherwise, return Wide as True and Narrow as False.
else
{
return width;
}
}
else
{
return it->width;
return _lookupGlyphWidthWithCache(glyph);
}

return CodepointWidth::Narrow;
}

// Routine Description:
Expand Down Expand Up @@ -369,74 +387,67 @@ bool CodepointWidthDetector::IsWide(const wchar_t wch) const noexcept
// - true if codepoint is wide
bool CodepointWidthDetector::IsWide(const std::wstring_view glyph) const
{
THROW_HR_IF(E_INVALIDARG, glyph.empty());
if (glyph.size() == 1)
{
// We first attempt to look at our custom quick lookup table of char width preferences.
const auto width = GetQuickCharWidth(glyph.front());
return GetWidth(glyph) == CodepointWidth::Wide;
}

// If it's invalid, the quick width had no opinion, so go to the lookup table.
if (width == CodepointWidth::Invalid)
{
return _lookupIsWide(glyph);
}
// If it's ambiguous, the quick width wanted us to ask the font directly, try that if we can.
// If not, go to the lookup table.
else if (width == CodepointWidth::Ambiguous)
{
if (_pfnFallbackMethod)
{
return _checkFallbackViaCache(glyph);
}
else
{
return _lookupIsWide(glyph);
}
}
// Otherwise, return Wide as True and Narrow as False.
else
{
return width == CodepointWidth::Wide;
}
// Routine Description:
// - returns the width type of codepoint by searching the map generated from the unicode spec
// Arguments:
// - glyph - the utf16 encoded codepoint to search for
// Return Value:
// - the width type of the codepoint
CodepointWidth CodepointWidthDetector::_lookupGlyphWidth(const std::wstring_view glyph) const
DHowett-MSFT marked this conversation as resolved.
Show resolved Hide resolved
{
if (glyph.empty())
{
return CodepointWidth::Invalid;
}
else

const auto codepoint = _extractCodepoint(glyph);
const auto it = std::lower_bound(s_wideAndAmbiguousTable.begin(), s_wideAndAmbiguousTable.end(), codepoint);

// For characters that are not _in_ the table, lower_bound will return the nearest item that is.
// We must check its bounds to make sure that our hit was a true hit.
if (it != s_wideAndAmbiguousTable.end() && codepoint >= it->lowerBound && codepoint <= it->upperBound)
{
return _lookupIsWide(glyph);
return it->width;
}

return CodepointWidth::Narrow;
}

// Routine Description:
// - checks if codepoint is wide using fallback methods.
// - returns the width type of codepoint using fallback methods.
// Arguments:
// - glyph - the utf16 encoded codepoint to check width of
// Return Value:
// - true if codepoint is wide or if it can't be confirmed to be narrow
bool CodepointWidthDetector::_lookupIsWide(const std::wstring_view glyph) const noexcept
// - the width type of the codepoint
CodepointWidth CodepointWidthDetector::_lookupGlyphWidthWithCache(const std::wstring_view glyph) const noexcept
{
try
{
// Use our generated table to try to lookup the width based on the Unicode standard.
const CodepointWidth width = GetWidth(glyph);
const CodepointWidth width = _lookupGlyphWidth(glyph);

// If it's ambiguous, then ask the font if we can.
if (width == CodepointWidth::Ambiguous)
{
if (_pfnFallbackMethod)
{
return _checkFallbackViaCache(glyph);
return _checkFallbackViaCache(glyph) ? CodepointWidth::Wide : CodepointWidth::Narrow;
}
}
// If it's not ambiguous, it should say wide or narrow. Turn that into True = Wide or False = Narrow.
// If it's not ambiguous, it should say wide or narrow.
else
{
return width == CodepointWidth::Wide;
return width;
}
}
CATCH_LOG();

// If we got this far, we couldn't figure it out.
// It's better to be too wide than too narrow.
return true;
return CodepointWidth::Wide;
}

// Routine Description:
Expand Down
3 changes: 2 additions & 1 deletion src/types/inc/CodepointWidthDetector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ class CodepointWidthDetector final
#endif

private:
bool _lookupIsWide(const std::wstring_view glyph) const noexcept;
CodepointWidth _lookupGlyphWidth(const std::wstring_view glyph) const;
CodepointWidth _lookupGlyphWidthWithCache(const std::wstring_view glyph) const noexcept;
bool _checkFallbackViaCache(const std::wstring_view glyph) const;
static unsigned int _extractCodepoint(const std::wstring_view glyph) noexcept;

Expand Down