From 56424967f274591ff3cb7496c7fc9df9ff769e18 Mon Sep 17 00:00:00 2001 From: Calixte Denizet Date: Thu, 27 Aug 2020 16:04:17 +0200 Subject: [PATCH] Fix encoding issues when printing/saving a form with non-ascii characters --- src/core/annotation.js | 110 ++++++++----- src/core/cmap.js | 16 ++ src/core/fonts.js | 88 ++++++++++ src/shared/util.js | 16 ++ test/unit/annotation_spec.js | 307 +++++++++++++++++++++++++++++++++-- test/unit/util_spec.js | 24 +++ 6 files changed, 506 insertions(+), 55 deletions(-) diff --git a/src/core/annotation.js b/src/core/annotation.js index 04c1b0d355930..d0b4199be9288 100644 --- a/src/core/annotation.js +++ b/src/core/annotation.js @@ -23,10 +23,12 @@ import { assert, escapeString, getModificationDate, + isAscii, isString, OPS, shadow, stringToPDFString, + stringToUTF16BEString, unreachable, Util, warn, @@ -1222,7 +1224,7 @@ class WidgetAnnotation extends Annotation { appearance = newTransform.encryptString(appearance); } - dict.set("V", value); + dict.set("V", isAscii(value) ? value : stringToUTF16BEString(value)); dict.set("AP", AP); dict.set("M", `D:${getModificationDate()}`); @@ -1298,25 +1300,29 @@ class WidgetAnnotation extends Annotation { const defaultAppearance = this.data.defaultAppearance; const alignment = this.data.textAlignment; - if (this.data.comb) { - return this._getCombAppearance( + if (this.data.multiLine) { + return this._getMultilineAppearance( defaultAppearance, value, + font, + fontSize, totalWidth, + totalHeight, + alignment, hPadding, vPadding ); } - if (this.data.multiLine) { - return this._getMultilineAppearance( + // TODO: need to handle chars which are not in the font. + const encodedString = font.encodeString(value).join(""); + + if (this.data.comb) { + return this._getCombAppearance( defaultAppearance, - value, font, - fontSize, + encodedString, totalWidth, - totalHeight, - alignment, hPadding, vPadding ); @@ -1327,13 +1333,15 @@ class WidgetAnnotation extends Annotation { return ( "/Tx BMC q BT " + defaultAppearance + - ` 1 0 0 1 ${hPadding} ${vPadding} Tm (${escapeString(value)}) Tj` + + ` 1 0 0 1 ${hPadding} ${vPadding} Tm (${escapeString( + encodedString + )}) Tj` + " ET Q EMC" ); } const renderedText = this._renderText( - value, + encodedString, font, fontSize, totalWidth, @@ -1373,10 +1381,21 @@ class WidgetAnnotation extends Annotation { _computeFontSize(font, fontName, fontSize, height) { if (fontSize === null || fontSize === 0) { - const em = font.charsToGlyphs("M")[0].width / 1000; - // According to https://en.wikipedia.org/wiki/Em_(typography) - // an average cap height should be 70% of 1em - const capHeight = 0.7 * em; + let capHeight; + if (font.capHeight) { + capHeight = font.capHeight; + } else { + const glyphs = font.charsToGlyphs(font.encodeString("M").join("")); + if (glyphs.length === 1 && glyphs[0].width) { + const em = glyphs[0].width / 1000; + // According to https://en.wikipedia.org/wiki/Em_(typography) + // an average cap height should be 70% of 1em + capHeight = 0.7 * em; + } else { + capHeight = 0.7; + } + } + // 1.5 * capHeight * fontSize seems to be a good value for lineHeight fontSize = Math.max(1, Math.floor(height / (1.5 * capHeight))); @@ -1510,11 +1529,12 @@ class TextWidgetAnnotation extends WidgetAnnotation { this.data.maxLen !== null; } - _getCombAppearance(defaultAppearance, text, width, hPadding, vPadding) { + _getCombAppearance(defaultAppearance, font, text, width, hPadding, vPadding) { const combWidth = (width / this.data.maxLen).toFixed(2); const buf = []; - for (const character of text) { - buf.push(`(${escapeString(character)}) Tj`); + const positions = font.getCharPositions(text); + for (const [start, end] of positions) { + buf.push(`(${escapeString(text.substring(start, end))}) Tj`); } const renderedComb = buf.join(` ${combWidth} 0 Td `); @@ -1568,49 +1588,61 @@ class TextWidgetAnnotation extends WidgetAnnotation { } _splitLine(line, font, fontSize, width) { - if (line.length <= 1) { + // TODO: need to handle chars which are not in the font. + line = font.encodeString(line).join(""); + + const glyphs = font.charsToGlyphs(line); + + if (glyphs.length <= 1) { // Nothing to split return [line]; } + const positions = font.getCharPositions(line); const scale = fontSize / 1000; - const whitespace = font.charsToGlyphs(" ")[0].width * scale; const chunks = []; - let lastSpacePos = -1, + let lastSpacePosInStringStart = -1, + lastSpacePosInStringEnd = -1, + lastSpacePos = -1, startChunk = 0, currentWidth = 0; - for (let i = 0, ii = line.length; i < ii; i++) { - const character = line.charAt(i); - if (character === " ") { - if (currentWidth + whitespace > width) { + for (let i = 0, ii = glyphs.length; i < ii; i++) { + const [start, end] = positions[i]; + const glyph = glyphs[i]; + const glyphWidth = glyph.width * scale; + if (glyph.unicode === " ") { + if (currentWidth + glyphWidth > width) { // We can break here - chunks.push(line.substring(startChunk, i)); - startChunk = i; - currentWidth = whitespace; + chunks.push(line.substring(startChunk, start)); + startChunk = start; + currentWidth = glyphWidth; + lastSpacePosInStringStart = -1; lastSpacePos = -1; } else { - currentWidth += whitespace; + currentWidth += glyphWidth; + lastSpacePosInStringStart = start; + lastSpacePosInStringEnd = end; lastSpacePos = i; } } else { - const charWidth = font.charsToGlyphs(character)[0].width * scale; - if (currentWidth + charWidth > width) { + if (currentWidth + glyphWidth > width) { // We must break to the last white position (if available) - if (lastSpacePos !== -1) { - chunks.push(line.substring(startChunk, lastSpacePos + 1)); - startChunk = i = lastSpacePos + 1; - lastSpacePos = -1; + if (lastSpacePosInStringStart !== -1) { + chunks.push(line.substring(startChunk, lastSpacePosInStringEnd)); + startChunk = lastSpacePosInStringEnd; + i = lastSpacePos + 1; + lastSpacePosInStringStart = -1; currentWidth = 0; } else { // Just break in the middle of the word - chunks.push(line.substring(startChunk, i)); - startChunk = i; - currentWidth = charWidth; + chunks.push(line.substring(startChunk, start)); + startChunk = start; + currentWidth = glyphWidth; } } else { - currentWidth += charWidth; + currentWidth += glyphWidth; } } } diff --git a/src/core/cmap.js b/src/core/cmap.js index 838b4a332c186..54c2477777e53 100644 --- a/src/core/cmap.js +++ b/src/core/cmap.js @@ -338,6 +338,22 @@ class CMap { out.length = 1; } + getCharCodeLength(charCode) { + const codespaceRanges = this.codespaceRanges; + for (let n = 0, nn = codespaceRanges.length; n < nn; n++) { + // Check each codespace range to see if it falls within. + const codespaceRange = codespaceRanges[n]; + for (let k = 0, kk = codespaceRange.length; k < kk; ) { + const low = codespaceRange[k++]; + const high = codespaceRange[k++]; + if (charCode >= low && charCode <= high) { + return n + 1; + } + } + } + return 1; + } + get length() { return this._map.length; } diff --git a/src/core/fonts.js b/src/core/fonts.js index 890e4ca293070..cc3bfce70f5d8 100644 --- a/src/core/fonts.js +++ b/src/core/fonts.js @@ -590,6 +590,7 @@ var Font = (function FontClosure() { this.defaultWidth = properties.defaultWidth; this.composite = properties.composite; this.cMap = properties.cMap; + this.capHeight = properties.capHeight / PDF_GLYPH_SPACE_UNITS; this.ascent = properties.ascent / PDF_GLYPH_SPACE_UNITS; this.descent = properties.descent / PDF_GLYPH_SPACE_UNITS; this.fontMatrix = properties.fontMatrix; @@ -3351,9 +3352,93 @@ var Font = (function FontClosure() { return (charsCache[charsCacheKey] = glyphs); }, + /** + * Chars can have different sizes (depends on the encoding). + * @param {String} a string encoded with font encoding. + * @returns {Array>} the positions of each char in the string. + */ + getCharPositions(chars) { + // This function doesn't use a cache because + // it's called only when saving or printing. + const positions = []; + + if (this.cMap) { + const c = Object.create(null); + let i = 0; + while (i < chars.length) { + this.cMap.readCharCode(chars, i, c); + const length = c.length; + positions.push([i, i + length]); + i += length; + } + } else { + for (let i = 0, ii = chars.length; i < ii; ++i) { + positions.push([i, i + 1]); + } + } + + return positions; + }, + get glyphCacheValues() { return Object.values(this.glyphCache); }, + + /** + * Encode a js string using font encoding. + * The resulting array contains an encoded string at even positions + * (can be empty) and a non-encoded one at odd positions. + * @param {String} a js string. + * @returns {Array} an array of encoded strings or non-encoded ones. + */ + encodeString(str) { + const buffers = []; + const currentBuf = []; + + // buffers will contain: encoded, non-encoded, encoded, ... + // currentBuf is pushed in buffers each time there is a change. + // So when buffers.length is odd then the last string is an encoded one + // and currentBuf contains non-encoded chars. + const hasCurrentBufErrors = () => buffers.length % 2 === 1; + + for (let i = 0, ii = str.length; i < ii; i++) { + const unicode = str.codePointAt(i); + if (unicode > 0xd7ff && (unicode < 0xe000 || unicode > 0xfffd)) { + // unicode is represented by two uint16 + i++; + } + if (this.toUnicode) { + const char = String.fromCodePoint(unicode); + const charCode = this.toUnicode.charCodeOf(char); + if (charCode !== -1) { + if (hasCurrentBufErrors()) { + buffers.push(currentBuf.join("")); + currentBuf.length = 0; + } + const charCodeLength = this.cMap + ? this.cMap.getCharCodeLength(charCode) + : 1; + for (let j = charCodeLength - 1; j >= 0; j--) { + currentBuf.push( + String.fromCharCode((charCode >> (8 * j)) & 0xff) + ); + } + continue; + } + } + + // unicode can't be encoded + if (!hasCurrentBufErrors()) { + buffers.push(currentBuf.join("")); + currentBuf.length = 0; + } + currentBuf.push(String.fromCodePoint(unicode)); + } + + buffers.push(currentBuf.join("")); + + return buffers; + }, }; return Font; @@ -3371,6 +3456,9 @@ var ErrorFont = (function ErrorFontClosure() { charsToGlyphs: function ErrorFont_charsToGlyphs() { return []; }, + encodeString: function ErrorFont_encodeString(chars) { + return [chars]; + }, exportData(extraProperties = false) { return { error: this.error }; }, diff --git a/src/shared/util.js b/src/shared/util.js index 77dddfb9c9464..fc7ef95441e98 100644 --- a/src/shared/util.js +++ b/src/shared/util.js @@ -842,6 +842,20 @@ function escapeString(str) { }); } +function isAscii(str) { + return /^[\x00-\x7F]*$/.test(str); +} + +function stringToUTF16BEString(str) { + const buf = ["\xFE\xFF"]; + for (let i = 0, ii = str.length; i < ii; i++) { + const char = str.charCodeAt(i); + buf.push(String.fromCharCode((char >> 8) & 0xff)); + buf.push(String.fromCharCode(char & 0xff)); + } + return buf.join(""); +} + function stringToUTF8String(str) { return decodeURIComponent(escape(str)); } @@ -1044,6 +1058,7 @@ export { getModificationDate, getVerbosityLevel, info, + isAscii, isArrayBuffer, isArrayEqual, isBool, @@ -1061,6 +1076,7 @@ export { string32, stringToBytes, stringToPDFString, + stringToUTF16BEString, stringToUTF8String, utf8StringToString, warn, diff --git a/test/unit/annotation_spec.js b/test/unit/annotation_spec.js index 75a454d576b89..0abf022a50403 100644 --- a/test/unit/annotation_spec.js +++ b/test/unit/annotation_spec.js @@ -32,10 +32,18 @@ import { import { createIdFactory, XRefMock } from "./test_utils.js"; import { Dict, Name, Ref, RefSetCache } from "../../src/core/primitives.js"; import { Lexer, Parser } from "../../src/core/parser.js"; +import { DOMCMapReaderFactory } from "../../src/display/display_utils.js"; +import { isNodeJS } from "../../src/shared/is_node.js"; +import { NodeCMapReaderFactory } from "../../src/display/node_utils.js"; import { PartialEvaluator } from "../../src/core/evaluator.js"; import { StringStream } from "../../src/core/stream.js"; import { WorkerTask } from "../../src/core/worker.js"; +const cMapUrl = { + dom: "../../external/bcmaps/", + node: "./external/bcmaps/", +}; + describe("annotation", function () { class PDFManagerMock { constructor(params) { @@ -82,6 +90,30 @@ describe("annotation", function () { pdfManagerMock = new PDFManagerMock({ docBaseUrl: null, }); + + let CMapReaderFactory; + if (isNodeJS) { + CMapReaderFactory = new NodeCMapReaderFactory({ + baseUrl: cMapUrl.node, + isCompressed: true, + }); + } else { + CMapReaderFactory = new DOMCMapReaderFactory({ + baseUrl: cMapUrl.dom, + isCompressed: true, + }); + } + + const builtInCMapCache = new Map(); + builtInCMapCache.set( + "UniJIS-UTF16-H", + CMapReaderFactory.fetch({ name: "UniJIS-UTF16-H" }) + ); + builtInCMapCache.set( + "Adobe-Japan1-UCS2", + CMapReaderFactory.fetch({ name: "Adobe-Japan1-UCS2" }) + ); + idFactoryMock = createIdFactory(/* pageIndex = */ 0); partialEvaluator = new PartialEvaluator({ xref: new XRefMock(), @@ -89,7 +121,9 @@ describe("annotation", function () { pageIndex: 0, idFactory: createIdFactory(/* pageIndex = */ 0), fontCache: new RefSetCache(), + builtInCMapCache, }); + done(); }); @@ -1419,7 +1453,7 @@ describe("annotation", function () { }); describe("TextWidgetAnnotation", function () { - let textWidgetDict, fontRefObj; + let textWidgetDict, helvRefObj, gothRefObj; beforeEach(function (done) { textWidgetDict = new Dict(); @@ -1432,11 +1466,38 @@ describe("annotation", function () { helvDict.set("Type", Name.get("Font")); helvDict.set("Subtype", Name.get("Type1")); - const fontRef = Ref.get(314, 0); - fontRefObj = { ref: fontRef, data: helvDict }; + const gothDict = new Dict(); + gothDict.set("BaseFont", Name.get("MSGothic")); + gothDict.set("Type", Name.get("Font")); + gothDict.set("Subtype", Name.get("Type0")); + gothDict.set("Encoding", Name.get("UniJIS-UTF16-H")); + gothDict.set("Name", Name.get("MSGothic")); + + const cidSysInfoDict = new Dict(); + cidSysInfoDict.set("Ordering", "Japan1"); + cidSysInfoDict.set("Registry", "Adobe"); + cidSysInfoDict.set("Supplement", "5"); + + const fontDescriptorDict = new Dict(); + fontDescriptorDict.set("FontName", Name.get("MSGothic")); + fontDescriptorDict.set("CapHeight", "680"); + + const gothDescendantDict = new Dict(); + gothDescendantDict.set("BaseFont", Name.get("MSGothic")); + gothDescendantDict.set("CIDSystemInfo", cidSysInfoDict); + gothDescendantDict.set("Subtype", Name.get("CIDFontType2")); + gothDescendantDict.set("Type", Name.get("Font")); + gothDescendantDict.set("FontDescriptor", fontDescriptorDict); + + gothDict.set("DescendantFonts", [gothDescendantDict]); + + const helvRef = Ref.get(314, 0); + const gothRef = Ref.get(159, 0); + helvRefObj = { ref: helvRef, data: helvDict }; + gothRefObj = { ref: gothRef, data: gothDict }; const resourceDict = new Dict(); const fontDict = new Dict(); - fontDict.set("Helv", fontRef); + fontDict.set("Helv", helvRef); resourceDict.set("Font", fontDict); textWidgetDict.set("DA", "/Helv 5 Tf"); @@ -1447,7 +1508,7 @@ describe("annotation", function () { }); afterEach(function () { - textWidgetDict = fontRefObj = null; + textWidgetDict = helvRefObj = gothRefObj = null; }); it("should handle unknown text alignment, maximum length and flags", function (done) { @@ -1614,7 +1675,7 @@ describe("annotation", function () { const textWidgetRef = Ref.get(271, 0); const xref = new XRefMock([ { ref: textWidgetRef, data: textWidgetDict }, - fontRefObj, + helvRefObj, ]); const task = new WorkerTask("test print"); partialEvaluator.xref = xref; @@ -1644,6 +1705,46 @@ describe("annotation", function () { }, done.fail); }); + it("should render regular text in Japanese for printing", function (done) { + textWidgetDict.get("DR").get("Font").set("Goth", gothRefObj.ref); + textWidgetDict.set("DA", "/Goth 5 Tf"); + + const textWidgetRef = Ref.get(271, 0); + const xref = new XRefMock([ + { ref: textWidgetRef, data: textWidgetDict }, + gothRefObj, + ]); + const task = new WorkerTask("test print"); + partialEvaluator.xref = xref; + + AnnotationFactory.create( + xref, + textWidgetRef, + pdfManagerMock, + idFactoryMock + ) + .then(annotation => { + const id = annotation.data.id; + const annotationStorage = {}; + annotationStorage[id] = { value: "こんにちは世界の" }; + return annotation._getAppearance( + partialEvaluator, + task, + annotationStorage + ); + }, done.fail) + .then(appearance => { + const utf16String = + "\x30\x53\x30\x93\x30\x6b\x30\x61" + + "\x30\x6f\x4e\x16\x75\x4c\x30\x6e"; + expect(appearance).toEqual( + "/Tx BMC q BT /Goth 5 Tf 1 0 0 1 0 0 Tm" + + ` 2.00 2.00 Td (${utf16String}) Tj ET Q EMC` + ); + done(); + }, done.fail); + }); + it("should render regular text for printing using normal appearance", function (done) { const textWidgetRef = Ref.get(271, 0); @@ -1658,7 +1759,7 @@ describe("annotation", function () { const xref = new XRefMock([ { ref: textWidgetRef, data: textWidgetDict }, - fontRefObj, + helvRefObj, ]); const task = new WorkerTask("test print"); partialEvaluator.xref = xref; @@ -1699,7 +1800,7 @@ describe("annotation", function () { const textWidgetRef = Ref.get(271, 0); const xref = new XRefMock([ { ref: textWidgetRef, data: textWidgetDict }, - fontRefObj, + helvRefObj, ]); const task = new WorkerTask("test print"); partialEvaluator.xref = xref; @@ -1729,13 +1830,53 @@ describe("annotation", function () { }, done.fail); }); + it("should render auto-sized text in Japanese for printing", function (done) { + textWidgetDict.get("DR").get("Font").set("Goth", gothRefObj.ref); + textWidgetDict.set("DA", "/Goth 0 Tf"); + + const textWidgetRef = Ref.get(271, 0); + const xref = new XRefMock([ + { ref: textWidgetRef, data: textWidgetDict }, + gothRefObj, + ]); + const task = new WorkerTask("test print"); + partialEvaluator.xref = xref; + + AnnotationFactory.create( + xref, + textWidgetRef, + pdfManagerMock, + idFactoryMock + ) + .then(annotation => { + const id = annotation.data.id; + const annotationStorage = {}; + annotationStorage[id] = { value: "こんにちは世界の" }; + return annotation._getAppearance( + partialEvaluator, + task, + annotationStorage + ); + }, done.fail) + .then(appearance => { + const utf16String = + "\x30\x53\x30\x93\x30\x6b\x30\x61" + + "\x30\x6f\x4e\x16\x75\x4c\x30\x6e"; + expect(appearance).toEqual( + "/Tx BMC q BT /Goth 9 Tf 1 0 0 1 0 0 Tm" + + ` 2.00 2.00 Td (${utf16String}) Tj ET Q EMC` + ); + done(); + }, done.fail); + }); + it("should not render a password for printing", function (done) { textWidgetDict.set("Ff", AnnotationFieldFlag.PASSWORD); const textWidgetRef = Ref.get(271, 0); const xref = new XRefMock([ { ref: textWidgetRef, data: textWidgetDict }, - fontRefObj, + helvRefObj, ]); const task = new WorkerTask("test print"); partialEvaluator.xref = xref; @@ -1768,7 +1909,7 @@ describe("annotation", function () { const textWidgetRef = Ref.get(271, 0); const xref = new XRefMock([ { ref: textWidgetRef, data: textWidgetDict }, - fontRefObj, + helvRefObj, ]); const task = new WorkerTask("test print"); partialEvaluator.xref = xref; @@ -1808,6 +1949,45 @@ describe("annotation", function () { }, done.fail); }); + it("should render multiline text in Japanese for printing", function (done) { + textWidgetDict.set("Ff", AnnotationFieldFlag.MULTILINE); + textWidgetDict.get("DR").get("Font").set("Goth", gothRefObj.ref); + textWidgetDict.set("DA", "/Goth 5 Tf"); + + const textWidgetRef = Ref.get(271, 0); + const xref = new XRefMock([ + { ref: textWidgetRef, data: textWidgetDict }, + gothRefObj, + ]); + const task = new WorkerTask("test print"); + partialEvaluator.xref = xref; + + AnnotationFactory.create( + xref, + textWidgetRef, + pdfManagerMock, + idFactoryMock + ) + .then(annotation => { + const id = annotation.data.id; + const annotationStorage = {}; + annotationStorage[id] = { value: "こんにちは世界の" }; + return annotation._getAppearance( + partialEvaluator, + task, + annotationStorage + ); + }, done.fail) + .then(appearance => { + expect(appearance).toEqual( + "/Tx BMC q BT /Goth 5 Tf 1 0 0 1 0 10 Tm " + + "2.00 -5.00 Td (\x30\x53\x30\x93\x30\x6b\x30\x61\x30\x6f) Tj\n" + + "0.00 -5.00 Td (\x4e\x16\x75\x4c\x30\x6e) Tj ET Q EMC" + ); + done(); + }, done.fail); + }); + it("should render multiline text with various EOL for printing", function (done) { textWidgetDict.set("Ff", AnnotationFieldFlag.MULTILINE); textWidgetDict.set("Rect", [0, 0, 128, 10]); @@ -1815,7 +1995,7 @@ describe("annotation", function () { const textWidgetRef = Ref.get(271, 0); const xref = new XRefMock([ { ref: textWidgetRef, data: textWidgetDict }, - fontRefObj, + helvRefObj, ]); const task = new WorkerTask("test print"); partialEvaluator.xref = xref; @@ -1881,7 +2061,7 @@ describe("annotation", function () { const textWidgetRef = Ref.get(271, 0); const xref = new XRefMock([ { ref: textWidgetRef, data: textWidgetDict }, - fontRefObj, + helvRefObj, ]); const task = new WorkerTask("test print"); partialEvaluator.xref = xref; @@ -1914,9 +2094,55 @@ describe("annotation", function () { }, done.fail); }); + it("should render comb with Japanese text for printing", function (done) { + textWidgetDict.set("Ff", AnnotationFieldFlag.COMB); + textWidgetDict.set("MaxLen", 4); + textWidgetDict.get("DR").get("Font").set("Goth", gothRefObj.ref); + textWidgetDict.set("DA", "/Goth 5 Tf"); + textWidgetDict.set("Rect", [0, 0, 32, 10]); + + const textWidgetRef = Ref.get(271, 0); + const xref = new XRefMock([ + { ref: textWidgetRef, data: textWidgetDict }, + gothRefObj, + ]); + const task = new WorkerTask("test print"); + partialEvaluator.xref = xref; + + AnnotationFactory.create( + xref, + textWidgetRef, + pdfManagerMock, + idFactoryMock + ) + .then(annotation => { + const id = annotation.data.id; + const annotationStorage = {}; + annotationStorage[id] = { value: "こんにちは世界の" }; + return annotation._getAppearance( + partialEvaluator, + task, + annotationStorage + ); + }, done.fail) + .then(appearance => { + expect(appearance).toEqual( + "/Tx BMC q BT /Goth 5 Tf 1 0 0 1 2 2 Tm" + + " (\x30\x53) Tj 8.00 0 Td (\x30\x93) Tj 8.00 0 Td (\x30\x6b) Tj" + + " 8.00 0 Td (\x30\x61) Tj 8.00 0 Td (\x30\x6f) Tj" + + " 8.00 0 Td (\x4e\x16) Tj 8.00 0 Td (\x75\x4c) Tj" + + " 8.00 0 Td (\x30\x6e) Tj ET Q EMC" + ); + done(); + }, done.fail); + }); + it("should save text", function (done) { const textWidgetRef = Ref.get(123, 0); - const xref = new XRefMock([{ ref: textWidgetRef, data: textWidgetDict }]); + const xref = new XRefMock([ + { ref: textWidgetRef, data: textWidgetDict }, + helvRefObj, + ]); partialEvaluator.xref = xref; const task = new WorkerTask("test save"); @@ -1935,17 +2161,17 @@ describe("annotation", function () { expect(data.length).toEqual(2); const [oldData, newData] = data; expect(oldData.ref).toEqual(Ref.get(123, 0)); - expect(newData.ref).toEqual(Ref.get(1, 0)); + expect(newData.ref).toEqual(Ref.get(2, 0)); oldData.data = oldData.data.replace(/\(D:[0-9]+\)/, "(date)"); expect(oldData.data).toEqual( "123 0 obj\n" + "<< /Type /Annot /Subtype /Widget /FT /Tx /DA (/Helv 5 Tf) /DR " + "<< /Font << /Helv 314 0 R>>>> /Rect [0 0 32 10] " + - "/V (hello world) /AP << /N 1 0 R>> /M (date)>>\nendobj\n" + "/V (hello world) /AP << /N 2 0 R>> /M (date)>>\nendobj\n" ); expect(newData.data).toEqual( - "1 0 obj\n<< /Length 77 /Subtype /Form /Resources " + + "2 0 obj\n<< /Length 77 /Subtype /Form /Resources " + "<< /Font << /Helv 314 0 R>>>> /BBox [0 0 32 10]>> stream\n" + "/Tx BMC q BT /Helv 5 Tf 1 0 0 1 0 0 Tm 2.00 2.00 Td (hello world) Tj " + "ET Q EMC\nendstream\nendobj\n" @@ -2039,6 +2265,55 @@ describe("annotation", function () { done(); }, done.fail); }); + + it("should save Japanese text", function (done) { + textWidgetDict.get("DR").get("Font").set("Goth", gothRefObj.ref); + textWidgetDict.set("DA", "/Goth 5 Tf"); + + const textWidgetRef = Ref.get(123, 0); + const xref = new XRefMock([ + { ref: textWidgetRef, data: textWidgetDict }, + gothRefObj, + ]); + partialEvaluator.xref = xref; + const task = new WorkerTask("test save"); + + AnnotationFactory.create( + xref, + textWidgetRef, + pdfManagerMock, + idFactoryMock + ) + .then(annotation => { + const annotationStorage = {}; + annotationStorage[annotation.data.id] = { value: "こんにちは世界の" }; + return annotation.save(partialEvaluator, task, annotationStorage); + }, done.fail) + .then(data => { + const utf16String = + "\x30\x53\x30\x93\x30\x6b\x30\x61" + + "\x30\x6f\x4e\x16\x75\x4c\x30\x6e"; + expect(data.length).toEqual(2); + const [oldData, newData] = data; + expect(oldData.ref).toEqual(Ref.get(123, 0)); + expect(newData.ref).toEqual(Ref.get(2, 0)); + + oldData.data = oldData.data.replace(/\(D:[0-9]+\)/, "(date)"); + expect(oldData.data).toEqual( + "123 0 obj\n" + + "<< /Type /Annot /Subtype /Widget /FT /Tx /DA (/Goth 5 Tf) /DR " + + "<< /Font << /Helv 314 0 R /Goth 159 0 R>>>> /Rect [0 0 32 10] " + + `/V (\xfe\xff${utf16String}) /AP << /N 2 0 R>> /M (date)>>\nendobj\n` + ); + expect(newData.data).toEqual( + "2 0 obj\n<< /Length 82 /Subtype /Form /Resources " + + "<< /Font << /Helv 314 0 R /Goth 159 0 R>>>> /BBox [0 0 32 10]>> stream\n" + + `/Tx BMC q BT /Goth 5 Tf 1 0 0 1 0 0 Tm 2.00 2.00 Td (${utf16String}) Tj ` + + "ET Q EMC\nendstream\nendobj\n" + ); + done(); + }, done.fail); + }); }); describe("ButtonWidgetAnnotation", function () { diff --git a/test/unit/util_spec.js b/test/unit/util_spec.js index b462ee744c92f..52c9f41d7894f 100644 --- a/test/unit/util_spec.js +++ b/test/unit/util_spec.js @@ -21,6 +21,7 @@ import { escapeString, getModificationDate, isArrayBuffer, + isAscii, isBool, isNum, isSameOrigin, @@ -29,6 +30,7 @@ import { string32, stringToBytes, stringToPDFString, + stringToUTF16BEString, } from "../../src/shared/util.js"; describe("util", function () { @@ -346,4 +348,26 @@ describe("util", function () { expect(encodeToXmlString(str)).toEqual(str); }); }); + + describe("isAscii", function () { + it("handles ascii/non-ascii strings", function () { + expect(isAscii("hello world")).toEqual(true); + expect(isAscii("こんにちは世界の")).toEqual(false); + expect(isAscii("hello world in Japanese is こんにちは世界の")).toEqual( + false + ); + }); + }); + + describe("stringToUTF16BEString", function () { + it("should encode a string in UTF16BE with a BOM", function () { + expect(stringToUTF16BEString("hello world")).toEqual( + "\xfe\xff\0h\0e\0l\0l\0o\0 \0w\0o\0r\0l\0d" + ); + expect(stringToUTF16BEString("こんにちは世界の")).toEqual( + "\xfe\xff\x30\x53\x30\x93\x30\x6b\x30\x61" + + "\x30\x6f\x4e\x16\x75\x4c\x30\x6e" + ); + }); + }); });