Skip to content

Commit

Permalink
Fix encoding issues when printing/saving a form with non-ascii charac…
Browse files Browse the repository at this point in the history
…ters
  • Loading branch information
calixteman committed Sep 19, 2020
1 parent c98046e commit bf5e0d0
Show file tree
Hide file tree
Showing 6 changed files with 533 additions and 58 deletions.
123 changes: 80 additions & 43 deletions src/core/annotation.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,11 @@ import {
assert,
escapeString,
getModificationDate,
isAscii,
isString,
OPS,
stringToPDFString,
stringToUTF16BEString,
unreachable,
Util,
warn,
Expand Down Expand Up @@ -1145,7 +1147,7 @@ class WidgetAnnotation extends Annotation {
appearance = newTransform.encryptString(appearance);
}

dict.set("V", value);
dict.set("V", isAscii(value) ? value : stringToUTF16BEString(value));
dict.set("AP", AP);
dict.set("M", `D:${getModificationDate()}`);

Expand Down Expand Up @@ -1196,34 +1198,42 @@ class WidgetAnnotation extends Annotation {
const fontSize = this._computeFontSize(...fontInfo, totalHeight);
this._fontName = fontName;

let descent = font.descent;
if (isNaN(descent)) {
descent = 0;
let lineHeight = totalHeight;
if (this.data.multiLine) {
lineHeight /= value.split(/\r\n|\r|\n/).length;
}

const vPadding = defaultPadding + Math.abs(descent) * fontSize;
const descent = font.descent;
const vPadding = isNaN(descent)
? defaultPadding
: Math.abs(descent) * fontSize;

const defaultAppearance = this.data.defaultAppearance;
const alignment = this.data.textAlignment;

if (this.data.comb) {
return this._getCombAppearance(
if (this.data.multiLine) {
return this._getMultilineAppearance(
defaultAppearance,
value,
font,
fontSize,
totalWidth,
totalHeight,
alignment,
hPadding,
vPadding
);
}

if (this.data.multiLine) {
return this._getMultilineAppearance(
// TODO: need to handle chars which are not in the font
const encodedString = font.encodeString(value).join("");

if (this.data.comb) {
return this._getCombAppearance(
defaultAppearance,
value,
font,
fontSize,
encodedString,
totalWidth,
totalHeight,
alignment,
hPadding,
vPadding
);
Expand All @@ -1234,13 +1244,15 @@ class WidgetAnnotation extends Annotation {
return (
"/Tx BMC q BT " +
defaultAppearance +
` 1 0 0 1 ${hPadding} ${vPadding} Tm (${escapeString(value)}) Tj` +
` 1 0 0 1 ${hPadding} ${vPadding} Tm (${escapeString(
encodedString
)}) Tj` +
" ET Q EMC"
);
}

const renderedText = this._renderText(
value,
encodedString,
font,
fontSize,
totalWidth,
Expand Down Expand Up @@ -1280,10 +1292,24 @@ class WidgetAnnotation extends Annotation {

_computeFontSize(font, fontName, fontSize, height) {
if (fontSize === null || fontSize === 0) {
const em = font.charsToGlyphs("M", true)[0].width / 1000;
// According to https://en.wikipedia.org/wiki/Em_(typography)
// an average cap height should be 70% of 1em
const capHeight = 0.7 * em;
let capHeight;
if (font.capHeight) {
capHeight = font.capHeight;
} else {
const glyphs = font.charsToGlyphs(
font.encodeString("M").join(""),
true
);
if (glyphs.length === 1 && glyphs[0].width) {
const em = glyphs[0].width / 1000;
// According to https://en.wikipedia.org/wiki/Em_(typography)
// an average cap height should be 70% of 1em
capHeight = 0.7 * em;
} else {
capHeight = 0.7;
}
}

// 1.5 * capHeight * fontSize seems to be a good value for lineHeight
fontSize = Math.max(1, Math.floor(height / (1.5 * capHeight)));

Expand Down Expand Up @@ -1406,11 +1432,12 @@ class TextWidgetAnnotation extends WidgetAnnotation {
this.data.maxLen !== null;
}

_getCombAppearance(defaultAppearance, text, width, hPadding, vPadding) {
_getCombAppearance(defaultAppearance, font, text, width, hPadding, vPadding) {
const combWidth = (width / this.data.maxLen).toFixed(2);
const buf = [];
for (const character of text) {
buf.push(`(${escapeString(character)}) Tj`);
const positions = font.getCharPosition(text);
for (const [start, end] of positions) {
buf.push(`(${escapeString(text.substring(start, end))}) Tj`);
}

const renderedComb = buf.join(` ${combWidth} 0 Td `);
Expand Down Expand Up @@ -1464,49 +1491,59 @@ class TextWidgetAnnotation extends WidgetAnnotation {
}

_splitLine(line, font, fontSize, width) {
if (line.length <= 1) {
// TODO: need to handle chars which are not in the font
line = font.encodeString(line).join("");

const glyphs = font.getGlyphsAndCharPosition(line);

if (glyphs.length <= 1) {
// Nothing to split
return [line];
}

const scale = fontSize / 1000;
const whitespace = font.charsToGlyphs(" ", true)[0].width * scale;
const chunks = [];

let lastSpacePos = -1,
let lastSpacePosInStringStart = -1,
lastSpacePosInStringEnd = -1,
lastSpacePos = -1,
startChunk = 0,
currentWidth = 0;

for (let i = 0, ii = line.length; i < ii; i++) {
const character = line.charAt(i);
if (character === " ") {
if (currentWidth + whitespace > width) {
for (let i = 0, ii = glyphs.length; i < ii; i++) {
const { glyph, start, end } = glyphs[i];
const glyphWidth = glyph.width * scale;
if (glyph.unicode === " ") {
if (currentWidth + glyphWidth > width) {
// We can break here
chunks.push(line.substring(startChunk, i));
startChunk = i;
currentWidth = whitespace;
chunks.push(line.substring(startChunk, start));
startChunk = start;
currentWidth = glyphWidth;
lastSpacePosInStringStart = -1;
lastSpacePos = -1;
} else {
currentWidth += whitespace;
currentWidth += glyphWidth;
lastSpacePosInStringStart = start;
lastSpacePosInStringEnd = end;
lastSpacePos = i;
}
} else {
const charWidth = font.charsToGlyphs(character, false)[0].width * scale;
if (currentWidth + charWidth > width) {
if (currentWidth + glyphWidth > width) {
// We must break to the last white position (if available)
if (lastSpacePos !== -1) {
chunks.push(line.substring(startChunk, lastSpacePos + 1));
startChunk = i = lastSpacePos + 1;
lastSpacePos = -1;
if (lastSpacePosInStringStart !== -1) {
chunks.push(line.substring(startChunk, lastSpacePosInStringEnd));
startChunk = lastSpacePosInStringEnd;
i = lastSpacePos + 1;
lastSpacePosInStringStart = -1;
currentWidth = 0;
} else {
// Just break in the middle of the word
chunks.push(line.substring(startChunk, i));
startChunk = i;
currentWidth = charWidth;
chunks.push(line.substring(startChunk, start));
startChunk = start;
currentWidth = glyphWidth;
}
} else {
currentWidth += charWidth;
currentWidth += glyphWidth;
}
}
}
Expand Down
16 changes: 16 additions & 0 deletions src/core/cmap.js
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,22 @@ class CMap {
out.length = 1;
}

getCharCodeLength(charCode) {
const codespaceRanges = this.codespaceRanges;
for (let n = 0, nn = codespaceRanges.length; n < nn; n++) {
// Check each codespace range to see if it falls within.
const codespaceRange = codespaceRanges[n];
for (let k = 0, kk = codespaceRange.length; k < kk; ) {
const low = codespaceRange[k++];
const high = codespaceRange[k++];
if (charCode >= low && charCode <= high) {
return n + 1;
}
}
}
return 0;
}

get length() {
return this._map.length;
}
Expand Down
107 changes: 107 additions & 0 deletions src/core/fonts.js
Original file line number Diff line number Diff line change
Expand Up @@ -589,6 +589,7 @@ var Font = (function FontClosure() {
this.defaultWidth = properties.defaultWidth;
this.composite = properties.composite;
this.cMap = properties.cMap;
this.capHeight = properties.capHeight / PDF_GLYPH_SPACE_UNITS;
this.ascent = properties.ascent / PDF_GLYPH_SPACE_UNITS;
this.descent = properties.descent / PDF_GLYPH_SPACE_UNITS;
this.fontMatrix = properties.fontMatrix;
Expand Down Expand Up @@ -3335,9 +3336,112 @@ var Font = (function FontClosure() {
return (charsCache[charsCacheKey] = glyphs);
},

getGlyphsAndCharPosition: function Font_getGlyphsAndCharPosition(chars) {
const glyphs = [];

if (this.cMap) {
const c = Object.create(null);
let i = 0;
while (i < chars.length) {
this.cMap.readCharCode(chars, i, c);
const length = c.length;
const isSpace = length === 1 && chars.charCodeAt(i) === 0x20;
const glyph = this.charToGlyph(c.charcode, isSpace);
glyphs.push({
start: i,
end: i + length,
glyph,
});
i += length;
}
} else {
for (let i = 0, ii = chars.length; i < ii; ++i) {
const charcode = chars.charCodeAt(i);
const glyph = this.charToGlyph(charcode, charcode === 0x20);
glyphs.push({
start: i,
end: i + 1,
glyph,
});
}
}

return glyphs;
},

getCharPosition: function Font_getCharPosition(chars) {
const positions = [];

if (this.cMap) {
const c = Object.create(null);
let i = 0;
while (i < chars.length) {
this.cMap.readCharCode(chars, i, c);
const length = c.length;
positions.push([i, i + length]);
i += length;
}
} else {
for (let i = 0, ii = chars.length; i < ii; ++i) {
positions.push([i, i + 1]);
}
}

return positions;
},

get glyphCacheValues() {
return Object.values(this.glyphCache);
},

encodeString: function Font_encodeString(str) {
const buffers = [];
let buf = [];
let isError = false;

for (let i = 0, ii = str.length; i < ii; i++) {
const unicode = str.codePointAt(i);
let hasError = false;
if (unicode > 0xffff) {
++i;
}
if (this.toUnicode) {
const char = String.fromCodePoint(unicode);
const charCode = this.toUnicode.charCodeOf(char);
if (charCode === -1) {
hasError = true;
} else {
if (isError) {
buffers.push(buf.join(""));
buf = [];
isError = false;
}
const charCodeLength = this.cMap
? this.cMap.getCharCodeLength(charCode)
: 1;
for (let j = charCodeLength - 1; j >= 0; j--) {
buf.push(String.fromCharCode((charCode >> (8 * j)) & 0xff));
}
}
} else {
hasError = true;
}

if (hasError) {
if (isError) {
buf.push(String.fromCodePoint(unicode));
} else {
buffers.push(buf.join(""));
buf = [String.fromCodePoint(unicode)];
isError = true;
}
}
}

buffers.push(buf.join(""));

return buffers;
},
};

return Font;
Expand All @@ -3355,6 +3459,9 @@ var ErrorFont = (function ErrorFontClosure() {
charsToGlyphs: function ErrorFont_charsToGlyphs() {
return [];
},
encodeString: function ErrorFont_encodeString(chars) {
return [chars];
},
exportData(extraProperties = false) {
return { error: this.error };
},
Expand Down
Loading

0 comments on commit bf5e0d0

Please sign in to comment.