Skip to content

Commit

Permalink
Improve font management
Browse files Browse the repository at this point in the history
This commit adds many small improvements in the way fonts are managed.

1. Most of the font deduplication logic has been put in add_font(), instead of
draw_first_line().

2. Font objects now have only one (reproducible) hash, based on the font
content and the face index. Before this commit, we had 3 (!) different hashes.
Fixes #1553.

3. Font fields are calculated during initialization, to avoid useless
parameters storage.

4. Harfbuzz face is not read twice from Pango font when a new Font is added.

5. Font face deduplication is now done using the Pango face pointer, instead of
the Harfbuzz face hash extracted from the Pango font. This could save some
time, especially for very long documents always using the same font face.

6. Font name displayed in PDF now includes weight and style.
  • Loading branch information
liZe committed Mar 6, 2022
1 parent 7034d67 commit 3b0ae92
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 54 deletions.
86 changes: 46 additions & 40 deletions weasyprint/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,29 +63,39 @@ def _w3c_date_to_pdf(string, attr_name):


class Font:
def __init__(self, font_hash, file_content, pango_font, index):
pango_metrics = pango.pango_font_get_metrics(pango_font, ffi.NULL)
def __init__(self, pango_font):
hb_font = pango.pango_font_get_hb_font(pango_font)
hb_face = harfbuzz.hb_font_get_face(hb_font)
self._font_description = pango.pango_font_describe(pango_font)
self.family = ffi.string(pango.pango_font_description_get_family(
self._font_description))
font_size = pango.pango_font_description_get_size(
self._font_description)
self.index = harfbuzz.hb_face_get_index(hb_face)
hb_blob = ffi.gc(
harfbuzz.hb_face_reference_blob(hb_face),
harfbuzz.hb_blob_destroy)
with ffi.new('unsigned int *') as length:
hb_data = harfbuzz.hb_blob_get_data(hb_blob, length)
self.file_content = ffi.unpack(hb_data, int(length[0]))

pango_metrics = pango.pango_font_get_metrics(pango_font, ffi.NULL)
description = pango.pango_font_describe(pango_font)
font_size = pango.pango_font_description_get_size(description)
self.style = pango.pango_font_description_get_style(description)
self.family = ffi.string(
pango.pango_font_description_get_family(description))
digest = hashlib.sha1(self.file_content + bytes(self.index)).digest()
self.hash = ''.join(chr(65 + letter % 26) for letter in digest[:6])

# Name
description_string = ffi.string(
pango.pango_font_description_to_string(self._font_description))
sha = hashlib.sha256()
sha.update(str(font_hash).encode())
sha.update(description_string)

self.file_content = file_content
self.index = index
self.file_hash = hash(file_content + bytes(index))
self.hash = ''.join(
chr(65 + letter % 26) for letter in sha.digest()[:6])
self.name = (
b'/' + self.hash.encode() + b'+' + self.family.replace(b' ', b''))
self.italic_angle = 0 # TODO: this should be different
pango.pango_font_description_to_string(description))
fields = description_string.split(b' ')
if fields and b'=' in fields[-1]:
fields.pop() # Remove variations
if fields:
fields.pop() # Remove font size
else:
fields = [b'Unknown']
self.name = b'/' + self.hash.encode() + b'+' + b''.join(fields)

# Ascent & descent
if font_size:
self.ascent = int(
pango.pango_font_metrics_get_ascent(pango_metrics) /
Expand All @@ -95,6 +105,9 @@ def __init__(self, font_hash, file_content, pango_font, index):
font_size * 1000)
else:
self.ascent = self.descent = 0

# Various properties
self.italic_angle = 0 # TODO: this should be different
self.upem = harfbuzz.hb_face_get_upem(hb_face)
self.png = harfbuzz.hb_ot_color_has_png(hb_face)
self.svg = harfbuzz.hb_ot_color_has_svg(hb_face)
Expand All @@ -104,17 +117,15 @@ def __init__(self, font_hash, file_content, pango_font, index):
self.widths = {}
self.cmap = {}

@property
def flags(self):
flags = 2 ** (3 - 1) # Symbolic, custom character set
if pango.pango_font_description_get_style(self._font_description):
flags += 2 ** (7 - 1) # Italic
# Font flags
self.flags = 2 ** (3 - 1) # Symbolic, custom character set
if self.style:
self.flags += 2 ** (7 - 1) # Italic
if b'Serif' in self.family.split():
flags += 2 ** (2 - 1) # Serif
self.flags += 2 ** (2 - 1) # Serif
widths = self.widths.values()
if len(widths) > 1 and len(set(widths)) == 1:
flags += 2 ** (1 - 1) # FixedPitch
return flags
self.flags += 2 ** (1 - 1) # FixedPitch


class Stream(pydyf.Stream):
Expand Down Expand Up @@ -217,13 +228,11 @@ def set_alpha(self, alpha, stroke=False, fill=None):
self._states[key] = pydyf.Dictionary({'ca': alpha})
super().set_state(key)

def add_font(self, font_hash, font_content, pango_font, index):
self._document.fonts[font_hash] = Font(
font_hash, font_content, pango_font, index)
return self._document.fonts[font_hash]

def get_fonts(self):
return self._document.fonts
def add_font(self, pango_font):
face = pango.pango_font_get_face(pango_font)
if face not in self._document.fonts:
self._document.fonts[face] = Font(pango_font)
return self._document.fonts[face]

def add_group(self, bounding_box):
states = pydyf.Dictionary()
Expand Down Expand Up @@ -1323,10 +1332,7 @@ def write_pdf(self, target=None, zoom=1, attachments=None, finisher=None):
pdf_fonts = pydyf.Dictionary()
fonts_by_file_hash = {}
for font in self.fonts.values():
if font.file_hash in fonts_by_file_hash:
fonts_by_file_hash[font.file_hash].append(font)
else:
fonts_by_file_hash[font.file_hash] = [font]
fonts_by_file_hash.setdefault(font.hash, []).append(font)
font_references_by_file_hash = {}
for file_hash, fonts in fonts_by_file_hash.items():
content = fonts[0].file_content
Expand Down Expand Up @@ -1409,7 +1415,7 @@ def write_pdf(self, target=None, zoom=1, attachments=None, finisher=None):
'StemV': font.stemv,
'StemH': font.stemh,
(f'FontFile{"3" if font_type == "otf" else "2"}'):
font_references_by_file_hash[font.file_hash],
font_references_by_file_hash[font.hash],
})
if font_type == 'otf':
font_descriptor['Subtype'] = '/OpenType'
Expand Down
18 changes: 4 additions & 14 deletions weasyprint/draw.py
Original file line number Diff line number Diff line change
Expand Up @@ -1087,7 +1087,6 @@ def draw_first_line(stream, textbox, text_overflow, block_ellipsis, x, y,
stream.text_matrix(*matrix.values)
last_font = None
string = ''
fonts = stream.get_fonts()
x_advance = 0
emojis = []
for run in runs:
Expand All @@ -1101,19 +1100,7 @@ def draw_first_line(stream, textbox, text_overflow, block_ellipsis, x, y,

# Font content
pango_font = glyph_item.item.analysis.font
hb_font = pango.pango_font_get_hb_font(pango_font)
hb_face = harfbuzz.hb_font_get_face(hb_font)
font_hash = hash(hb_face)
if font_hash in fonts:
font = fonts[font_hash]
else:
hb_blob = ffi.gc(
harfbuzz.hb_face_reference_blob(hb_face),
harfbuzz.hb_blob_destroy)
hb_data = harfbuzz.hb_blob_get_data(hb_blob, stream.length)
file_content = ffi.unpack(hb_data, int(stream.length[0]))
index = harfbuzz.hb_face_get_index(hb_face)
font = stream.add_font(font_hash, file_content, pango_font, index)
font = stream.add_font(pango_font)

# Positions of the glyphs in the UTF-8 string
utf8_positions = [offset + clusters[i] for i in range(1, num_glyphs)]
Expand Down Expand Up @@ -1178,6 +1165,8 @@ def draw_first_line(stream, textbox, text_overflow, block_ellipsis, x, y,
previous_utf8_position = utf8_position

if font.svg:
hb_font = pango.pango_font_get_hb_font(pango_font)
hb_face = harfbuzz.hb_font_get_face(hb_font)
hb_blob = ffi.gc(
harfbuzz.hb_ot_color_glyph_reference_svg(hb_face, glyph),
harfbuzz.hb_blob_destroy)
Expand All @@ -1189,6 +1178,7 @@ def draw_first_line(stream, textbox, text_overflow, block_ellipsis, x, y,
a = d = font.widths[glyph] / 1000 / font.upem * font_size
emojis.append([image, font, a, d, x_advance, 0])
elif font.png:
hb_font = pango.pango_font_get_hb_font(pango_font)
hb_blob = ffi.gc(
harfbuzz.hb_ot_color_glyph_reference_png(hb_font, glyph),
harfbuzz.hb_blob_destroy)
Expand Down
2 changes: 2 additions & 0 deletions weasyprint/text/ffi.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
typedef ... PangoAttrList;
typedef ... PangoAttrClass;
typedef ... PangoFont;
typedef ... PangoFontFace;
typedef guint PangoGlyph;
typedef gint PangoGlyphUnit;
Expand Down Expand Up @@ -214,6 +215,7 @@
PangoLayoutLine * pango_layout_get_line_readonly (
PangoLayout *layout, int line);
PangoFontFace * pango_font_get_face (PangoFont *font);
hb_font_t * pango_font_get_hb_font (PangoFont *font);
PangoFontDescription * pango_font_description_new (void);
Expand Down

1 comment on commit 3b0ae92

@aschmitz
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just FYI, it looks like there's a probably-measurable improvement from this change (possibly including 2c0bb3a / 2e701b1). In a separate test of the same document from #1587 with master (5ad3b1d), across 10 runs the render took an average of 66.225 s ± 0.839 s (compared to 67.992 ± 1.279 beforehand), which is fairly close but still more than a standard deviation better. Nice! Thanks for the optimization.

Please sign in to comment.