Skip to content

Commit

Permalink
Fix encoding issues when printing/saving a form with non-ascii charac…
Browse files Browse the repository at this point in the history
…ters
  • Loading branch information
calixteman committed Jan 3, 2021
1 parent 187542d commit 1a6adde
Show file tree
Hide file tree
Showing 6 changed files with 555 additions and 73 deletions.
151 changes: 94 additions & 57 deletions src/core/annotation.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,12 @@ import {
assert,
escapeString,
getModificationDate,
isAscii,
isString,
OPS,
shadow,
stringToPDFString,
stringToUTF16BEString,
unreachable,
Util,
warn,
Expand Down Expand Up @@ -286,9 +288,7 @@ class Annotation {
this.setAppearance(dict);

this._streams = [];
if (this.appearance) {
this._streams.push(this.appearance);
}
this._addStream(this.appearance);

// Expose public properties using a data object.
this.data = {
Expand All @@ -306,6 +306,17 @@ class Annotation {
this._fallbackFontDict = null;
}

/**
* @private
*/
_addStream(...streams) {
for (const stream of streams) {
if (stream) {
this._streams.push(stream);
}
}
}

/**
* @private
*/
Expand Down Expand Up @@ -953,7 +964,7 @@ class MarkupAnnotation extends Annotation {

// This method is only called if there is no appearance for the annotation,
// so `this.appearance` is not pushed yet in the `Annotation` constructor.
this._streams.push(this.appearance, appearanceStream);
this._addStream(this.appearance, appearanceStream);
}
}

Expand Down Expand Up @@ -1222,7 +1233,7 @@ class WidgetAnnotation extends Annotation {
appearance = newTransform.encryptString(appearance);
}

dict.set("V", value);
dict.set("V", isAscii(value) ? value : stringToUTF16BEString(value));
dict.set("AP", AP);
dict.set("M", `D:${getModificationDate()}`);

Expand Down Expand Up @@ -1286,37 +1297,42 @@ class WidgetAnnotation extends Annotation {

const fontInfo = await this._getFontData(evaluator, task);
const [font, fontName] = fontInfo;
const fontSize = this._computeFontSize(...fontInfo, totalHeight);
this._fontName = fontName;

let descent = font.descent;
if (isNaN(descent)) {
descent = 0;
}
const lineHeight = totalHeight;
const fontSize = this._computeFontSize(...fontInfo, lineHeight);

const descent = font.descent;
const vPadding = isNaN(descent)
? defaultPadding
: Math.abs(descent) * fontSize;

const vPadding = defaultPadding + Math.abs(descent) * fontSize;
const defaultAppearance = this.data.defaultAppearance;
const alignment = this.data.textAlignment;

if (this.data.comb) {
return this._getCombAppearance(
if (this.data.multiLine) {
return this._getMultilineAppearance(
defaultAppearance,
value,
font,
fontSize,
totalWidth,
totalHeight,
alignment,
hPadding,
vPadding
);
}

if (this.data.multiLine) {
return this._getMultilineAppearance(
// TODO: need to handle chars which are not in the font
const encodedString = font.encodeString(value).join("");

if (this.data.comb) {
return this._getCombAppearance(
defaultAppearance,
value,
font,
fontSize,
encodedString,
totalWidth,
totalHeight,
alignment,
hPadding,
vPadding
);
Expand All @@ -1327,13 +1343,15 @@ class WidgetAnnotation extends Annotation {
return (
"/Tx BMC q BT " +
defaultAppearance +
` 1 0 0 1 ${hPadding} ${vPadding} Tm (${escapeString(value)}) Tj` +
` 1 0 0 1 ${hPadding} ${vPadding} Tm (${escapeString(
encodedString
)}) Tj` +
" ET Q EMC"
);
}

const renderedText = this._renderText(
value,
encodedString,
font,
fontSize,
totalWidth,
Expand Down Expand Up @@ -1373,10 +1391,24 @@ class WidgetAnnotation extends Annotation {

_computeFontSize(font, fontName, fontSize, height) {
if (fontSize === null || fontSize === 0) {
const em = font.charsToGlyphs("M")[0].width / 1000;
// According to https://en.wikipedia.org/wiki/Em_(typography)
// an average cap height should be 70% of 1em
const capHeight = 0.7 * em;
let capHeight;
if (font.capHeight) {
capHeight = font.capHeight;
} else {
const glyphs = font.charsToGlyphs(
font.encodeString("M").join(""),
true
);
if (glyphs.length === 1 && glyphs[0].width) {
const em = glyphs[0].width / 1000;
// According to https://en.wikipedia.org/wiki/Em_(typography)
// an average cap height should be 70% of 1em
capHeight = 0.7 * em;
} else {
capHeight = 0.7;
}
}

// 1.5 * capHeight * fontSize seems to be a good value for lineHeight
fontSize = Math.max(1, Math.floor(height / (1.5 * capHeight)));

Expand Down Expand Up @@ -1510,11 +1542,12 @@ class TextWidgetAnnotation extends WidgetAnnotation {
this.data.maxLen !== null;
}

_getCombAppearance(defaultAppearance, text, width, hPadding, vPadding) {
_getCombAppearance(defaultAppearance, font, text, width, hPadding, vPadding) {
const combWidth = (width / this.data.maxLen).toFixed(2);
const buf = [];
for (const character of text) {
buf.push(`(${escapeString(character)}) Tj`);
const positions = font.getCharPosition(text);
for (const [start, end] of positions) {
buf.push(`(${escapeString(text.substring(start, end))}) Tj`);
}

const renderedComb = buf.join(` ${combWidth} 0 Td `);
Expand Down Expand Up @@ -1568,49 +1601,59 @@ class TextWidgetAnnotation extends WidgetAnnotation {
}

_splitLine(line, font, fontSize, width) {
if (line.length <= 1) {
// TODO: need to handle chars which are not in the font
line = font.encodeString(line).join("");

const glyphs = font.getGlyphsAndCharPosition(line);

if (glyphs.length <= 1) {
// Nothing to split
return [line];
}

const scale = fontSize / 1000;
const whitespace = font.charsToGlyphs(" ")[0].width * scale;
const chunks = [];

let lastSpacePos = -1,
let lastSpacePosInStringStart = -1,
lastSpacePosInStringEnd = -1,
lastSpacePos = -1,
startChunk = 0,
currentWidth = 0;

for (let i = 0, ii = line.length; i < ii; i++) {
const character = line.charAt(i);
if (character === " ") {
if (currentWidth + whitespace > width) {
for (let i = 0, ii = glyphs.length; i < ii; i++) {
const { glyph, start, end } = glyphs[i];
const glyphWidth = glyph.width * scale;
if (glyph.unicode === " ") {
if (currentWidth + glyphWidth > width) {
// We can break here
chunks.push(line.substring(startChunk, i));
startChunk = i;
currentWidth = whitespace;
chunks.push(line.substring(startChunk, start));
startChunk = start;
currentWidth = glyphWidth;
lastSpacePosInStringStart = -1;
lastSpacePos = -1;
} else {
currentWidth += whitespace;
currentWidth += glyphWidth;
lastSpacePosInStringStart = start;
lastSpacePosInStringEnd = end;
lastSpacePos = i;
}
} else {
const charWidth = font.charsToGlyphs(character)[0].width * scale;
if (currentWidth + charWidth > width) {
if (currentWidth + glyphWidth > width) {
// We must break to the last white position (if available)
if (lastSpacePos !== -1) {
chunks.push(line.substring(startChunk, lastSpacePos + 1));
startChunk = i = lastSpacePos + 1;
lastSpacePos = -1;
if (lastSpacePosInStringStart !== -1) {
chunks.push(line.substring(startChunk, lastSpacePosInStringEnd));
startChunk = lastSpacePosInStringEnd;
i = lastSpacePos + 1;
lastSpacePosInStringStart = -1;
currentWidth = 0;
} else {
// Just break in the middle of the word
chunks.push(line.substring(startChunk, i));
startChunk = i;
currentWidth = charWidth;
chunks.push(line.substring(startChunk, start));
startChunk = start;
currentWidth = glyphWidth;
}
} else {
currentWidth += charWidth;
currentWidth += glyphWidth;
}
}
}
Expand Down Expand Up @@ -1876,10 +1919,7 @@ class ButtonWidgetAnnotation extends WidgetAnnotation {
this.checkedAppearance = normalAppearance.get(this.data.exportValue);
this.uncheckedAppearance = normalAppearance.get("Off") || null;

this._streams.push(this.checkedAppearance);
if (this.uncheckedAppearance) {
this._streams.push(this.uncheckedAppearance);
}
this._addStream(this.checkedAppearance, this.uncheckedAppearance);
this._fallbackFontDict = this.fallbackFontDict;
}

Expand Down Expand Up @@ -1916,10 +1956,7 @@ class ButtonWidgetAnnotation extends WidgetAnnotation {
this.checkedAppearance = normalAppearance.get(this.data.buttonValue);
this.uncheckedAppearance = normalAppearance.get("Off") || null;

this._streams.push(this.checkedAppearance);
if (this.uncheckedAppearance) {
this._streams.push(this.uncheckedAppearance);
}
this._addStream(this.checkedAppearance, this.uncheckedAppearance);
this._fallbackFontDict = this.fallbackFontDict;
}

Expand Down
16 changes: 16 additions & 0 deletions src/core/cmap.js
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,22 @@ class CMap {
out.length = 1;
}

getCharCodeLength(charCode) {
const codespaceRanges = this.codespaceRanges;
for (let n = 0, nn = codespaceRanges.length; n < nn; n++) {
// Check each codespace range to see if it falls within.
const codespaceRange = codespaceRanges[n];
for (let k = 0, kk = codespaceRange.length; k < kk; ) {
const low = codespaceRange[k++];
const high = codespaceRange[k++];
if (charCode >= low && charCode <= high) {
return n + 1;
}
}
}
return 0;
}

get length() {
return this._map.length;
}
Expand Down
Loading

0 comments on commit 1a6adde

Please sign in to comment.