Skip to content

Commit

Permalink
Re-factor the fallbackToUnicode functionality (PR 9192 follow-up)
Browse files Browse the repository at this point in the history
Rather than having to create and check a *separate* `ToUnicodeMap` to handle these cases, we can simply use the `fallbackToUnicode`-data (when it exists) to directly supplement *missing* /ToUnicode entires in the regular `ToUnicodeMap` instead.
  • Loading branch information
Snuffleupagus committed Jun 9, 2021
1 parent 882ca4c commit 7337987
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 14 deletions.
13 changes: 6 additions & 7 deletions src/core/evaluator.js
Original file line number Diff line number Diff line change
Expand Up @@ -3176,10 +3176,10 @@ class PartialEvaluator {
}

/**
* @returns {ToUnicodeMap}
* @returns {Array}
* @private
*/
_buildSimpleFontToUnicode(properties, forceGlyphs = false) {
_simpleFontToUnicode(properties, forceGlyphs = false) {
assert(!properties.composite, "Must be a simple font.");

const toUnicode = [];
Expand Down Expand Up @@ -3240,7 +3240,7 @@ class PartialEvaluator {
Number.isNaN(code) &&
Number.isInteger(parseInt(codeStr, 16))
) {
return this._buildSimpleFontToUnicode(
return this._simpleFontToUnicode(
properties,
/* forceGlyphs */ true
);
Expand Down Expand Up @@ -3273,7 +3273,7 @@ class PartialEvaluator {
}
toUnicode[charcode] = String.fromCharCode(glyphsUnicodeMap[glyphName]);
}
return new ToUnicodeMap(toUnicode);
return toUnicode;
}

/**
Expand All @@ -3292,8 +3292,7 @@ class PartialEvaluator {
// text-extraction. For simple fonts, containing encoding information,
// use a fallback ToUnicode map to improve this (fixes issue8229.pdf).
if (!properties.composite && properties.hasEncoding) {
properties.fallbackToUnicode =
this._buildSimpleFontToUnicode(properties);
properties.fallbackToUnicode = this._simpleFontToUnicode(properties);
}
return properties.toUnicode;
}
Expand All @@ -3304,7 +3303,7 @@ class PartialEvaluator {
// in pratice it seems better to always try to create a toUnicode map
// based of the default encoding.
if (!properties.composite /* is simple font */) {
return this._buildSimpleFontToUnicode(properties);
return new ToUnicodeMap(this._simpleFontToUnicode(properties));
}

// If the font is a composite font that uses one of the predefined CMaps
Expand Down
35 changes: 28 additions & 7 deletions src/core/fonts.js
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,29 @@ function adjustToUnicode(properties, builtInEncoding) {
}
}

/**
* NOTE: This function should only be called at the *end* of font-parsing,
* after e.g. `adjustToUnicode` has run, to prevent any issues.
*/
function amendFallbackToUnicode(properties) {
if (!properties.fallbackToUnicode) {
return;
}
if (properties.toUnicode instanceof IdentityToUnicodeMap) {
return;
}
const toUnicode = [];
for (const charCode in properties.fallbackToUnicode) {
if (properties.toUnicode.has(charCode)) {
continue; // The font dictionary has a `ToUnicode` entry.
}
toUnicode[charCode] = properties.fallbackToUnicode[charCode];
}
if (toUnicode.length > 0) {
properties.toUnicode.amend(toUnicode);
}
}

class Glyph {
constructor(
originalCharCode,
Expand Down Expand Up @@ -854,8 +877,6 @@ class Font {
this.defaultEncoding = properties.defaultEncoding;

this.toUnicode = properties.toUnicode;
this.fallbackToUnicode = properties.fallbackToUnicode || new ToUnicodeMap();

this.toFontChar = [];

if (properties.type === "Type3") {
Expand Down Expand Up @@ -941,6 +962,7 @@ class Font {
return;
}

amendFallbackToUnicode(properties);
this.data = data;
this.fontType = getFontType(type, subtype, properties.isStandardFont);

Expand Down Expand Up @@ -1099,6 +1121,8 @@ class Font {
}
this.toFontChar = map;
}

amendFallbackToUnicode(properties);
this.loadedName = fontName.split("-")[0];
this.fontType = getFontType(type, subtype, properties.isStandardFont);
}
Expand Down Expand Up @@ -2957,15 +2981,12 @@ class Font {
width = isNum(width) ? width : this.defaultWidth;
const vmetric = this.vmetrics && this.vmetrics[widthCode];

let unicode =
this.toUnicode.get(charcode) ||
this.fallbackToUnicode.get(charcode) ||
charcode;
let unicode = this.toUnicode.get(charcode) || charcode;
if (typeof unicode === "number") {
unicode = String.fromCharCode(unicode);
}

let isInFont = charcode in this.toFontChar;
let isInFont = this.toFontChar[charcode] !== undefined;
// First try the toFontChar map, if it's not there then try falling
// back to the char code.
fontCharCode = this.toFontChar[charcode] || charcode;
Expand Down

0 comments on commit 7337987

Please sign in to comment.