Skip to content

Commit

Permalink
Put PDF fonts management in a separate module
Browse files Browse the repository at this point in the history
  • Loading branch information
liZe committed Jun 26, 2022
1 parent 70f9b62 commit 5486875
Show file tree
Hide file tree
Showing 2 changed files with 300 additions and 298 deletions.
304 changes: 6 additions & 298 deletions weasyprint/pdf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import hashlib
import io
import math
import zlib
from os.path import basename
from urllib.parse import unquote, urlsplit
Expand All @@ -16,6 +15,7 @@
from ..matrix import Matrix
from ..urls import URLFetchingError
from . import pdfa
from .fonts import build_fonts_dictionary
from .stream import Stream

VARIANTS = {
Expand Down Expand Up @@ -149,8 +149,7 @@ def _use_references(pdf, resources, images):
# Resources
if 'Resources' in x_object.extra:
x_object.extra['Resources'] = _reference_resources(
pdf, x_object.extra['Resources'], images,
resources['Font'])
pdf, x_object.extra['Resources'], images, resources['Font'])

# Patterns
for key, pattern in resources.get('Pattern', {}).items():
Expand Down Expand Up @@ -209,8 +208,7 @@ def _create_bookmarks(bookmarks, pdf, parent=None):
outlines = []
for title, (page, x, y), children, state in bookmarks:
destination = pydyf.Array((
pdf.objects[pdf.pages['Kids'][page * 3]].reference,
'/XYZ', x, y, 0))
pdf.objects[pdf.pages['Kids'][page*3]].reference, '/XYZ', x, y, 0))
outline = pydyf.Dictionary({
'Title': pydyf.String(title), 'Dest': destination})
pdf.add_object(outline)
Expand Down Expand Up @@ -410,8 +408,7 @@ def generate_pdf(pages, url_fetcher, metadata, fonts, target, zoom,
if metadata.description:
pdf.info['Subject'] = pydyf.String(metadata.description)
if metadata.keywords:
pdf.info['Keywords'] = pydyf.String(
', '.join(metadata.keywords))
pdf.info['Keywords'] = pydyf.String(', '.join(metadata.keywords))
if metadata.generator:
pdf.info['Creator'] = pydyf.String(metadata.generator)
if metadata.created:
Expand Down Expand Up @@ -444,297 +441,8 @@ def generate_pdf(pages, url_fetcher, metadata, fonts, target, zoom,
pdf.catalog['Names'] = pydyf.Dictionary()
pdf.catalog['Names']['EmbeddedFiles'] = content.reference

# Embeded fonts
pdf_fonts = pydyf.Dictionary()
fonts_by_file_hash = {}
for font in fonts.values():
fonts_by_file_hash.setdefault(font.hash, []).append(font)
font_references_by_file_hash = {}
for file_hash, file_fonts in fonts_by_file_hash.items():
# TODO: find why we can have multiple fonts for one font file
font = file_fonts[0]
if font.bitmap:
continue

# Clean font, optimize and handle emojis
cmap = {}
if 'fonts' in optimize_size:
for file_font in file_fonts:
cmap = {**cmap, **file_font.cmap}
font.clean(cmap)

# Include font
if font.type == 'otf':
font_extra = pydyf.Dictionary({'Subtype': '/OpenType'})
else:
font_extra = pydyf.Dictionary(
{'Length1': len(font.file_content)})
font_stream = pydyf.Stream(
[font.file_content], font_extra, compress=True)
pdf.add_object(font_stream)
font_references_by_file_hash[file_hash] = font_stream.reference

for font in fonts.values():
widths = pydyf.Array()
for i in sorted(font.widths):
if i - 1 not in font.widths:
widths.append(i)
current_widths = pydyf.Array()
widths.append(current_widths)
current_widths.append(font.widths[i])
font_file = f'FontFile{3 if font.type == "otf" else 2}'
to_unicode = pydyf.Stream([
b'/CIDInit /ProcSet findresource begin',
b'12 dict begin',
b'begincmap',
b'/CIDSystemInfo',
b'<< /Registry (Adobe)',
b'/Ordering (UCS)',
b'/Supplement 0',
b'>> def',
b'/CMapName /Adobe-Identity-UCS def',
b'/CMapType 2 def',
b'1 begincodespacerange',
b'<0000> <ffff>',
b'endcodespacerange',
f'{len(font.cmap)} beginbfchar'.encode()])
for glyph, text in font.cmap.items():
unicode_codepoints = ''.join(
f'{letter.encode("utf-16-be").hex()}' for letter in text)
to_unicode.stream.append(
f'<{glyph:04x}> <{unicode_codepoints}>'.encode())
to_unicode.stream.extend([
b'endbfchar',
b'endcmap',
b'CMapName currentdict /CMap defineresource pop',
b'end',
b'end'])
pdf.add_object(to_unicode)
font_dictionary = pydyf.Dictionary({
'Type': '/Font',
'Subtype': f'/Type{3 if font.bitmap else 0}',
'BaseFont': font.name,
'ToUnicode': to_unicode.reference,
})

if font.bitmap:
# https://docs.microsoft.com/typography/opentype/spec/ebdt
font_dictionary['FontBBox'] = pydyf.Array([0, 0, 1, 1])
font_dictionary['FontMatrix'] = pydyf.Array([1, 0, 0, 1, 0, 0])
if 'fonts' in optimize_size:
chars = tuple(sorted(font.cmap))
else:
chars = tuple(range(256))
first, last = chars[0], chars[-1]
font_dictionary['FirstChar'] = first
font_dictionary['LastChar'] = last
differences = []
for index, index_widths in zip(widths[::2], widths[1::2]):
differences.append(index)
for i in range(len(index_widths)):
if i + index in chars:
differences.append(f'/{i + index}')
font_dictionary['Encoding'] = pydyf.Dictionary({
'Type': '/Encoding',
'Differences': pydyf.Array(differences),
})
char_procs = pydyf.Dictionary({})
font_glyphs = font.ttfont['EBDT'].strikeData[0]
widths = [0] * (last - first + 1)
glyphs_info = {}
for key, glyph in font_glyphs.items():
glyph_format = glyph.getFormat()
glyph_id = font.ttfont.getGlyphID(key)

# Get and store glyph metrics
if glyph_format == 5:
data = glyph.data
subtables = font.ttfont['EBLC'].strikes[0].indexSubTables
for subtable in subtables:
first_index = subtable.firstGlyphIndex
last_index = subtable.lastGlyphIndex
if first_index <= glyph_id <= last_index:
height = subtable.metrics.height
advance = width = subtable.metrics.width
bearing_x = subtable.metrics.horiBearingX
bearing_y = subtable.metrics.horiBearingY
break
else:
LOGGER.warning(
f'Unknown bitmap metrics for glyph: {glyph_id}')
continue
else:
data_start = 5 if glyph_format in (1, 2, 8) else 8
data = glyph.data[data_start:]
height, width = glyph.data[0:2]
bearing_x = int.from_bytes(
glyph.data[2:3], 'big', signed=True)
bearing_y = int.from_bytes(
glyph.data[3:4], 'big', signed=True)
advance = glyph.data[4]
position_y = bearing_y - height
if glyph_id in chars:
widths[glyph_id - first] = advance
stride = math.ceil(width / 8)
glyph_info = glyphs_info[glyph_id] = {
'width': width,
'height': height,
'x': bearing_x,
'y': position_y,
'stride': stride,
'bitmap': None,
'subglyphs': None,
}

# Decode bitmaps
if glyph_format in (1, 6):
glyph_info['bitmap'] = data
elif glyph_format in (2, 5, 7):
padding = (8 - (width % 8)) % 8
bits = bin(int(data.hex(), 16))[2:]
bits = bits.zfill(8 * len(data))
bitmap_bits = ''.join(
bits[i * width:(i + 1) * width] + padding * '0'
for i in range(height))
glyph_info['bitmap'] = int(bitmap_bits, 2).to_bytes(
height * stride, 'big')
elif glyph_format in (8, 9):
subglyphs = glyph_info['subglyphs'] = []
i = 0 if glyph_format == 9 else 1
number_of_components = int.from_bytes(
data[i:i+2], 'big')
for j in range(number_of_components):
index = (i + 2) + (j * 4)
subglyph_id = int.from_bytes(
data[index:index+2], 'big')
x = int.from_bytes(
data[index+2:index+3], 'big', signed=True)
y = int.from_bytes(
data[index+3:index+4], 'big', signed=True)
subglyphs.append(
{'id': subglyph_id, 'x': x, 'y': y})
else: # pragma: no cover
LOGGER.warning(
f'Unsupported bitmap glyph format: {glyph_format}')
glyph_info['bitmap'] = bytes(height * stride)

for glyph_id, glyph_info in glyphs_info.items():
# Don’t store glyph not in cmap
if glyph_id not in chars:
continue

# Draw glyph
stride = glyph_info['stride']
width = glyph_info['width']
height = glyph_info['height']
x = glyph_info['x']
y = glyph_info['y']
if glyph_info['bitmap'] is None:
length = height * stride
bitmap_int = int.from_bytes(bytes(length), 'big')
for subglyph in glyph_info['subglyphs']:
sub_x = subglyph['x']
sub_y = subglyph['y']
sub_id = subglyph['id']
if sub_id not in glyphs_info:
LOGGER.warning(f'Unknown subglyph: {sub_id}')
continue
subglyph = glyphs_info[sub_id]
if subglyph['bitmap'] is None:
# TODO: support subglyph in subglyph
LOGGER.warning(
'Unsupported subglyph in subglyph: '
f'{sub_id}')
continue
for row_y in range(subglyph['height']):
row_slice = slice(
row_y * subglyph['stride'],
(row_y + 1) * subglyph['stride'])
row = subglyph['bitmap'][row_slice]
row_int = int.from_bytes(row, 'big')
shift = (
stride * 8 * (height - sub_y - row_y - 1))
stride_difference = stride - subglyph['stride']
if stride_difference > 0:
row_int <<= stride_difference * 8
elif stride_difference < 0:
row_int >>= -stride_difference * 8
if sub_x > 0:
row_int >>= sub_x
elif sub_x < 0:
row_int <<= -sub_x
row_int %= 1 << stride * 8
row_int <<= shift
bitmap_int |= row_int
bitmap = bitmap_int.to_bytes(length, 'big')
else:
bitmap = glyph_info['bitmap']
bitmap_stream = pydyf.Stream([
b'0 0 d0',
f'{width} 0 0 {height} {x} {y} cm'.encode(),
b'BI',
b'/IM true',
b'/W', width,
b'/H', height,
b'/BPC 1',
b'/D [1 0]',
b'ID', bitmap, b'EI'
])
pdf.add_object(bitmap_stream)
char_procs[glyph_id] = bitmap_stream.reference

pdf.add_object(char_procs)
font_dictionary['Widths'] = pydyf.Array(widths)
font_dictionary['CharProcs'] = char_procs.reference

else:
font_descriptor = pydyf.Dictionary({
'Type': '/FontDescriptor',
'FontName': font.name,
'FontFamily': pydyf.String(font.family),
'Flags': font.flags,
'FontBBox': pydyf.Array(font.bbox),
'ItalicAngle': font.italic_angle,
'Ascent': font.ascent,
'Descent': font.descent,
'CapHeight': font.bbox[3],
'StemV': font.stemv,
'StemH': font.stemh,
font_file: font_references_by_file_hash[font.hash],
})
if pdf.version <= b'1.4':
cids = sorted(font.widths)
padded_width = int(math.ceil(cids[-1] / 8))
bits = ['0'] * padded_width * 8
for cid in cids:
bits[cid] = '1'
stream = pydyf.Stream(
(int(''.join(bits), 2).to_bytes(padded_width, 'big'),))
pdf.add_object(stream)
font_descriptor['CIDSet'] = stream.reference
if font.type == 'otf':
font_descriptor['Subtype'] = '/OpenType'
pdf.add_object(font_descriptor)
subfont_dictionary = pydyf.Dictionary({
'Type': '/Font',
'Subtype': f'/CIDFontType{0 if font.type == "otf" else 2}',
'BaseFont': font.name,
'CIDSystemInfo': pydyf.Dictionary({
'Registry': pydyf.String('Adobe'),
'Ordering': pydyf.String('Identity'),
'Supplement': 0,
}),
'CIDToGIDMap': '/Identity',
'W': widths,
'FontDescriptor': font_descriptor.reference,
})
pdf.add_object(subfont_dictionary)
font_dictionary['Encoding'] = '/Identity-H'
font_dictionary['DescendantFonts'] = pydyf.Array(
[subfont_dictionary.reference])
pdf.add_object(font_dictionary)
pdf_fonts[font.hash] = font_dictionary.reference

# Embedded fonts
pdf_fonts = build_fonts_dictionary(pdf, fonts, optimize_size)
pdf.add_object(pdf_fonts)
resources['Font'] = pdf_fonts.reference
_use_references(pdf, resources, images)
Expand Down
Loading

0 comments on commit 5486875

Please sign in to comment.