diff --git a/src/core/fonts.js b/src/core/fonts.js index 355aca39956c4..b2c06c9c56e0d 100644 --- a/src/core/fonts.js +++ b/src/core/fonts.js @@ -1610,7 +1610,12 @@ var Font = (function FontClosure() { continue; } - if (platformId === 0 && encodingId === 0) { + if ( + platformId === 0 && + (encodingId === /* Unicode Default */ 0 || + encodingId === /* Unicode 1.1 */ 1 || + encodingId === /* Unicode BMP */ 3) + ) { useTable = true; // Continue the loop since there still may be a higher priority // table. @@ -2792,32 +2797,24 @@ var Font = (function FontClosure() { var cmapEncodingId = cmapTable.encodingId; var cmapMappings = cmapTable.mappings; var cmapMappingsLength = cmapMappings.length; + let baseEncoding = []; + if ( + properties.hasEncoding && + (properties.baseEncodingName === "MacRomanEncoding" || + properties.baseEncodingName === "WinAnsiEncoding") + ) { + baseEncoding = getEncoding(properties.baseEncodingName); + } - // The spec seems to imply that if the font is symbolic the encoding - // should be ignored, this doesn't appear to work for 'preistabelle.pdf' - // where the the font is symbolic and it has an encoding. + // If the font has an encoding and is not symbolic then follow the + // rules in section 9.6.6.4 of the spec on how to map 3,1 and 1,0 + // cmaps. if ( - (properties.hasEncoding && - ((cmapPlatformId === 3 && cmapEncodingId === 1) || - (cmapPlatformId === 1 && cmapEncodingId === 0))) || - (cmapPlatformId === -1 && - cmapEncodingId === -1 && // Temporary hack - !!getEncoding(properties.baseEncodingName)) + properties.hasEncoding && + !this.isSymbolicFont && + ((cmapPlatformId === 3 && cmapEncodingId === 1) || + (cmapPlatformId === 1 && cmapEncodingId === 0)) ) { - // Temporary hack - // When no preferred cmap table was found and |baseEncodingName| is - // one of the predefined encodings, we seem to obtain a better - // |charCodeToGlyphId| map from the code below (fixes bug 1057544). - // TODO: Note that this is a hack which should be removed as soon as - // we have proper support for more exotic cmap tables. - - var baseEncoding = []; - if ( - properties.baseEncodingName === "MacRomanEncoding" || - properties.baseEncodingName === "WinAnsiEncoding" - ) { - baseEncoding = getEncoding(properties.baseEncodingName); - } var glyphsUnicodeMap = getGlyphsUnicode(); for (let charCode = 0; charCode < 256; charCode++) { var glyphName, standardGlyphName; @@ -2845,29 +2842,15 @@ var Font = (function FontClosure() { unicodeOrCharCode = MacRomanEncoding.indexOf(standardGlyphName); } - var found = false; for (let i = 0; i < cmapMappingsLength; ++i) { if (cmapMappings[i].charCode !== unicodeOrCharCode) { continue; } charCodeToGlyphId[charCode] = cmapMappings[i].glyphId; - found = true; break; } - if (!found && properties.glyphNames) { - // Try to map using the post table. - var glyphId = properties.glyphNames.indexOf(glyphName); - // The post table ought to use the same kind of glyph names as the - // `differences` array, but check the standard ones as a fallback. - if (glyphId === -1 && standardGlyphName !== glyphName) { - glyphId = properties.glyphNames.indexOf(standardGlyphName); - } - if (glyphId > 0 && hasGlyph(glyphId)) { - charCodeToGlyphId[charCode] = glyphId; - } - } } - } else if (cmapPlatformId === 0 && cmapEncodingId === 0) { + } else if (cmapPlatformId === 0) { // Default Unicode semantics, use the charcodes as is. for (let i = 0; i < cmapMappingsLength; ++i) { charCodeToGlyphId[cmapMappings[i].charCode] = @@ -2897,6 +2880,19 @@ var Font = (function FontClosure() { charCodeToGlyphId[charCode] = cmapMappings[i].glyphId; } } + + // Last, try to map any missing charcodes using the post table. + if (properties.glyphNames && baseEncoding.length) { + for (let i = 0; i < 256; ++i) { + if (charCodeToGlyphId[i] === undefined && baseEncoding[i]) { + glyphName = baseEncoding[i]; + const glyphId = properties.glyphNames.indexOf(glyphName); + if (glyphId > 0 && hasGlyph(glyphId)) { + charCodeToGlyphId[i] = glyphId; + } + } + } + } } if (charCodeToGlyphId.length === 0) { diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 3da0e6caba35d..8b3ad5998351a 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -88,6 +88,7 @@ !issue10665_reduced.pdf !issue11016_reduced.pdf !issue11045.pdf +!bug1057544.pdf !issue11150_reduced.pdf !issue11242_reduced.pdf !issue11279.pdf @@ -192,6 +193,7 @@ !issue4260_reduced.pdf !bug1250079.pdf !bug1473809.pdf +!issue12120_reduced.pdf !pdfjsbad1586.pdf !freeculture.pdf !issue6006.pdf diff --git a/test/pdfs/bug1057544.pdf b/test/pdfs/bug1057544.pdf new file mode 100644 index 0000000000000..4c1611c4cba49 Binary files /dev/null and b/test/pdfs/bug1057544.pdf differ diff --git a/test/pdfs/issue12120_reduced.pdf b/test/pdfs/issue12120_reduced.pdf new file mode 100644 index 0000000000000..7f791793aea0a Binary files /dev/null and b/test/pdfs/issue12120_reduced.pdf differ diff --git a/test/test_manifest.json b/test/test_manifest.json index 9a9370bd84be7..3f56139dd57fb 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -1991,6 +1991,12 @@ "type": "eq", "about": "MediaBox and CropBox with indirect objects." }, + { "id": "bug1057544", + "file": "pdfs/bug1057544.pdf", + "md5": "49ad71b82ead1ee0fe4ddb41aa9e30b4", + "rounds": 1, + "type": "eq" + }, { "id": "issue2642", "file": "pdfs/issue2642.pdf", "md5": "b6679861fdce3bbab0c1fa51bb7f5077", @@ -4204,6 +4210,12 @@ "lastPage": 2, "type": "eq" }, + { "id": "issue12120_reduced", + "file": "pdfs/issue12120_reduced.pdf", + "md5": "b4570dcee26ac3121ad3322e19ed1a6a", + "rounds": 1, + "type": "eq" + }, { "id": "issue4883", "file": "pdfs/issue4883.pdf", "md5": "2fac0d9a189ca5fcef8626153d050be8",